├── .gitignore ├── LICENCE ├── README.md ├── data └── timeline_examples │ ├── 0_manual_timeline_examples.json │ ├── 10_manual_timeline_examples.json │ ├── 11_manual_timeline_examples.json │ ├── 12_manual_timeline_examples.json │ ├── 13_manual_timeline_examples.json │ ├── 14_manual_timeline_examples.json │ ├── 15_manual_timeline_examples.json │ ├── 16_manual_timeline_examples.json │ ├── 17_manual_timeline_examples.json │ ├── 18_manual_timeline_examples.json │ ├── 19_manual_timeline_examples.json │ ├── 1_manual_timeline_examples.json │ ├── 20_manual_timeline_examples.json │ ├── 21_manual_timeline_examples.json │ ├── 22_manual_timeline_examples.json │ ├── 23_manual_timeline_examples.json │ ├── 24_manual_timeline_examples.json │ ├── 25_manual_timeline_examples.json │ ├── 26_manual_timeline_examples.json │ ├── 27_manual_timeline_examples.json │ ├── 28_manual_timeline_examples.json │ ├── 29_manual_timeline_examples.json │ ├── 2_manual_timeline_examples.json │ ├── 30_manual_timeline_examples.json │ ├── 31_manual_timeline_examples.json │ ├── 32_manual_timeline_examples.json │ ├── 33_manual_timeline_examples.json │ ├── 34_manual_timeline_examples.json │ ├── 3_manual_timeline_examples.json │ ├── 4_manual_timeline_examples.json │ ├── 5_manual_timeline_examples.json │ ├── 6_manual_timeline_examples.json │ ├── 7_manual_timeline_examples.json │ ├── 8_manual_timeline_examples.json │ ├── 9_manual_timeline_examples.json │ └── all_manual_timeline_examples.json ├── experiments ├── Foresight Metrics MIMIC.ipynb ├── Foresight | MIMIC | Final | Prepare data.ipynb └── Foresight | MIMIC | Train and Test | Final.ipynb ├── foresight ├── __init__.py ├── datasets │ ├── __init__.py │ ├── data_collator.py │ ├── filters.py │ ├── patient_concept_stream.py │ └── utils.py ├── metrics │ ├── __init__.py │ └── next_concept_prediction.py ├── sight.py ├── tokenizers │ └── simple_map_tokenizer.py ├── trainer.py └── utils │ ├── cdb_utils.py │ ├── pickle.py │ └── stream_utils.py └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | #Directories to be ignored fully 2 | /books/ 3 | /articles/ 4 | /other/ 5 | /output/ 6 | /graphics/ 7 | models/ 8 | static/ 9 | dist/ 10 | tmp/ 11 | logs/ 12 | results/ 13 | wandb/ 14 | *_tmp/ 15 | medgpt.egg-info/ 16 | medflux.egg-info/ 17 | build/ 18 | .idea 19 | venv 20 | db.sqlite3 21 | .ipynb_checkpoints/ 22 | 23 | #tmp and similar files 24 | .nfs* 25 | *.log 26 | *.pyc 27 | *.out 28 | *.swp 29 | *.swn 30 | tmp_* 31 | t_* 32 | tmp_* 33 | *_tmp 34 | *.swo 35 | *.lyx.emergency 36 | *.lyx# 37 | *~ 38 | *hidden* 39 | nohup.out 40 | tmp.py 41 | .DS_Store 42 | *.lock 43 | 44 | # models files 45 | *.dat 46 | -------------------------------------------------------------------------------- /LICENCE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Foresight 2 | Deep Generative Modelling of Patient Timelines using Electronic Health Records 3 | 4 | We present a novel deep learning generative model of patients using EHRs that is composed of both natural language processing and a longitudinal forecasting, which has broad utility across many healthcare domains. We anticipate further iterative improvements as all subcomponents are improvable. Foresight opens the door for digital health twins, synthetic dataset generation, real world risk estimation, longitudinal research, emulation of virtual trials, medical education and more. 5 | 6 | Demo at: https://foresight.sites.er.kcl.ac.uk/ 7 | 8 | Paper: https://arxiv.org/abs/2212.08072 9 | -------------------------------------------------------------------------------- /data/timeline_examples/0_manual_timeline_examples.json: -------------------------------------------------------------------------------- 1 | {"concepts": [{"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.022", "cui": "", "uid": "165099333914310000000 - "}, {"id": "Unknown", "label": "10000000 - Unknown", "count": 10000000, "name": "Unknown", "saliency": "0.028", "cui": "Unknown", "uid": "165117808938610000000 - Unknown"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.034", "cui": "", "uid": "165099334731010000000 - "}, {"id": "Male", "label": "10000000 - Male", "count": 10000000, "name": "Male", "saliency": "0.035", "cui": "Male", "uid": "165117809517310000000 - Male"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.041", "cui": "", "uid": "165099335373310000000 - "}, {"id": "38", "label": "10000000 - 38", "count": 10000000, "name": "38", "saliency": "0.043", "cui": "38", "uid": "165117810658810000000 - 38"}, {"id": "59621000", "label": "6885 - Essential hypertension (disorder)", "count": 6885, "name": "Essential hypertension (disorder)", "saliency": "0.095", "cui": "59621000", "uid": "16511781237806885 - Essential hypertension (disorder)"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.028", "cui": "", "uid": "165167889675610000000 - "}, {"id": "41", "label": "10000000 - 41", "count": 10000000, "name": "41", "saliency": "0.020", "cui": "41", "uid": "165117814727510000000 - 41"}, {"id": "709044004", "label": "17873 - Chronic kidney disease (disorder)", "count": 17873, "name": "Chronic kidney disease (disorder)", "saliency": "0.049", "cui": "709044004", "uid": "165117815504917873 - Chronic kidney disease (disorder)"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.024", "cui": "", "uid": "165167889740410000000 - "}, {"id": "43", "label": "10000000 - 43", "count": 10000000, "name": "43", "saliency": "0.026", "cui": "43", "uid": "165117817659310000000 - 43"}, {"id": "249579002", "label": "238 - Kidney palpable (finding)", "count": 238, "name": "Kidney palpable (finding)", "saliency": "0.145", "cui": "249579002", "uid": "1651178759345238 - Kidney palpable (finding)"}, {"id": "21454007", "label": "9750 - Subarachnoid intracranial hemorrhage (disorder)", "count": 9750, "name": "Subarachnoid intracranial hemorrhage (disorder)", "saliency": "0.034", "cui": "21454007", "uid": "16511781961349750 - Subarachnoid intracranial hemorrhage (disorder)"}, {"id": "128609009", "label": "2216 - Intracranial aneurysm (disorder)", "count": 2216, "name": "Intracranial aneurysm (disorder)", "saliency": "0.061", "cui": "128609009", "uid": "16511785840452216 - Intracranial aneurysm (disorder)"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.018", "cui": "", "uid": "165167889602210000000 - "}, {"id": "46", "label": "10000000 - 46", "count": 10000000, "name": "46", "saliency": "0.032", "cui": "46", "uid": "165117822098110000000 - 46"}, {"id": "197940006", "label": "8908 - Microscopic hematuria (disorder)", "count": 8908, "name": "Microscopic hematuria (disorder)", "saliency": "0.035", "cui": "197940006", "uid": "16511785001228908 - Microscopic hematuria (disorder)"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.017", "cui": "", "uid": "165167889499210000000 - "}, {"id": "48", "label": "10000000 - 48", "count": 10000000, "name": "48", "saliency": "0.038", "cui": "48", "uid": "165117824746510000000 - 48"}, {"id": "197941005", "label": "10501 - Frank hematuria (disorder)", "count": 10501, "name": "Frank hematuria (disorder)", "saliency": "0.072", "cui": "197941005", "uid": "165117847946410501 - Frank hematuria (disorder)"}, {"id": "271857006", "label": "11263 - Loin pain (finding)", "count": 11263, "name": "Loin pain (finding)", "saliency": "0.101", "cui": "271857006", "uid": "165117848622111263 - Loin pain (finding)"}], "cboxTypes": ["Disorders"], "temporality": ["New Concepts"], "prediction_filters": ["Ignore Parents", "Ignore Children"], "cui_filter": "123946008"} -------------------------------------------------------------------------------- /data/timeline_examples/10_manual_timeline_examples.json: -------------------------------------------------------------------------------- 1 | {"concepts": [{"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.025", "cui": "", "uid": "165099333914310000000 - "}, {"id": "Unknown", "label": "Unknown - Unknown", "count": 10000000, "name": "Unknown", "saliency": "0.037", "cui": "Unknown", "uid": "1652559607657Unknown - Unknown"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.037", "cui": "", "uid": "165099334731010000000 - "}, {"id": "Male", "label": "Male - Male", "count": 10000000, "name": "Male", "saliency": "0.029", "cui": "Male", "uid": "1652559586394Male - Male"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.038", "cui": "", "uid": "165099335373310000000 - "}, {"id": "16", "label": "16 - 16", "count": 10000000, "name": "16", "saliency": "0.027", "cui": "16", "uid": "165255908410516 - 16"}, {"id": "26416006", "label": "26416006 - Drug abuse (disorder)", "count": 4776, "name": "Drug abuse (disorder)", "saliency": "0.037", "cui": "26416006", "uid": "165255884874426416006 - Drug abuse (disorder)"}, {"id": "24", "label": "24 - 24", "count": 10000000, "name": "24", "saliency": "0.020", "cui": "24", "uid": "165255909015224 - 24"}, {"id": "231477003", "label": "231477003 - Heroin dependence (disorder)", "count": 313, "name": "Heroin dependence (disorder)", "saliency": "0.067", "cui": "231477003", "uid": "1652559264096231477003 - Heroin dependence (disorder)"}, {"id": "386661006", "label": "386661006 - Fever (finding)", "count": 203120, "name": "Fever (finding)", "saliency": "0.014", "cui": "386661006", "uid": "1652558909488386661006 - Fever (finding)"}, {"id": "67023009", "label": "67023009 - Lymphocytosis (disorder)", "count": 4894, "name": "Lymphocytosis (disorder)", "saliency": "0.034", "cui": "67023009", "uid": "165255897024067023009 - Lymphocytosis (disorder)"}, {"id": "30", "label": "30 - 30", "count": 10000000, "name": "30", "saliency": "0.022", "cui": "30", "uid": "165255909339230 - 30"}, {"id": "32861005", "label": "32861005 - Erythema nodosum (disorder)", "count": 1014, "name": "Erythema nodosum (disorder)", "saliency": "0.055", "cui": "32861005", "uid": "165255915825632861005 - Erythema nodosum (disorder)"}, {"id": "50711007", "label": "50711007 - Viral hepatitis type C (disorder)", "count": 12774, "name": "Viral hepatitis type C (disorder)", "saliency": "0.030", "cui": "50711007", "uid": "165255939730450711007 - Viral hepatitis type C (disorder)"}, {"id": "35", "label": "35 - 35", "count": 10000000, "name": "35", "saliency": "0.024", "cui": "35", "uid": "165255909829735 - 35"}, {"id": "50563003", "label": "50563003 - Seborrheic dermatitis (disorder)", "count": 3209, "name": "Seborrheic dermatitis (disorder)", "saliency": "0.034", "cui": "50563003", "uid": "165255881027150563003 - Seborrheic dermatitis (disorder)"}, {"id": "30746006", "label": "30746006 - Lymphadenopathy (disorder)", "count": 22415, "name": "Lymphadenopathy (disorder)", "saliency": "0.027", "cui": "30746006", "uid": "165255896450430746006 - Lymphadenopathy (disorder)"}, {"id": "79740000", "label": "79740000 - Candidiasis of mouth (disorder)", "count": 10089, "name": "Candidiasis of mouth (disorder)", "saliency": "0.038", "cui": "79740000", "uid": "165255881476779740000 - Candidiasis of mouth (disorder)"}, {"id": "16294009", "label": "16294009 - Splenomegaly (disorder)", "count": 13325, "name": "Splenomegaly (disorder)", "saliency": "0.030", "cui": "16294009", "uid": "165255944836016294009 - Splenomegaly (disorder)"}, {"id": "127034005", "label": "127034005 - Pancytopenia (disorder)", "count": 6427, "name": "Pancytopenia (disorder)", "saliency": "0.042", "cui": "127034005", "uid": "1652559005104127034005 - Pancytopenia (disorder)"}, {"id": "89362005", "label": "89362005 - Weight loss (finding)", "count": 106506, "name": "Weight loss (finding)", "saliency": "0.020", "cui": "89362005", "uid": "165255932599289362005 - Weight loss (finding)"}, {"id": "36", "label": "36 - 36", "count": 10000000, "name": "36", "saliency": "0.024", "cui": "36", "uid": "165255912748136 - 36"}, {"id": "409609008", "label": "409609008 - Radiologic infiltrate of lung (disorder)", "count": 11933, "name": "Radiologic infiltrate of lung (disorder)", "saliency": "0.044", "cui": "409609008", "uid": "1652559389872409609008 - Radiologic infiltrate of lung (disorder)"}, {"id": "233606009", "label": "233606009 - Atypical pneumonia (disorder)", "count": 4775, "name": "Atypical pneumonia (disorder)", "saliency": "0.063", "cui": "233606009", "uid": "1652559052455233606009 - Atypical pneumonia (disorder)"}, {"id": "109385007", "label": "109385007 - Kaposi's sarcoma (disorder)", "count": 249, "name": "Kaposi's sarcoma (disorder)", "saliency": "0.180", "cui": "109385007", "uid": "1652559438528109385007 - Kaposi's sarcoma (disorder)"}], "cboxTypes": ["Disorders"], "temporality": ["New Concepts"], "prediction_filters": ["Ignore Parents", "Ignore Children"], "cui_filter": null} -------------------------------------------------------------------------------- /data/timeline_examples/11_manual_timeline_examples.json: -------------------------------------------------------------------------------- 1 | {"concepts": [{"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.038", "cui": "", "uid": "165099333914310000000 - "}, {"id": "Unknown", "label": "Unknown - Unknown", "count": 10000000, "name": "Unknown", "saliency": "0.043", "cui": "Unknown", "uid": "1652559924217Unknown - Unknown"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.049", "cui": "", "uid": "165099334731010000000 - "}, {"id": "Male", "label": "Male - Male", "count": 10000000, "name": "Male", "saliency": "0.043", "cui": "Male", "uid": "1652559797993Male - Male"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.064", "cui": "", "uid": "165099335373310000000 - "}, {"id": "44", "label": "44 - 44", "count": 10000000, "name": "44", "saliency": "0.056", "cui": "44", "uid": "165255979420944 - 44"}, {"id": "735938006", "label": "735938006 - Acute headache (finding)", "count": 726, "name": "Acute headache (finding)", "saliency": "0.203", "cui": "735938006", "uid": "1652560110554735938006 - Acute headache (finding)"}, {"id": "11934000", "label": "11934000 - Ptosis of eyelid (disorder)", "count": 9857, "name": "Ptosis of eyelid (disorder)", "saliency": "0.110", "cui": "11934000", "uid": "165255969069011934000 - Ptosis of eyelid (disorder)"}, {"id": "301939004", "label": "301939004 - Constricted pupil (finding)", "count": 1586, "name": "Constricted pupil (finding)", "saliency": "0.123", "cui": "301939004", "uid": "1652559695273301939004 - Constricted pupil (finding)"}, {"id": "13045009", "label": "13045009 - Anisocoria (disorder)", "count": 4378, "name": "Anisocoria (disorder)", "saliency": "0.083", "cui": "13045009", "uid": "165255982356113045009 - Anisocoria (disorder)"}, {"id": "427310006", "label": "427310006 - Pain radiating to neck (finding)", "count": 3338, "name": "Pain radiating to neck (finding)", "saliency": "0.188", "cui": "427310006", "uid": "1652560433283427310006 - Pain radiating to neck (finding)"}], "cboxTypes": ["Disorders"], "temporality": ["New Concepts"], "prediction_filters": ["Ignore Parents", "Ignore Children"], "cui_filter": ""} -------------------------------------------------------------------------------- /data/timeline_examples/12_manual_timeline_examples.json: -------------------------------------------------------------------------------- 1 | {"concepts": [{"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.041", "cui": "", "uid": "165099333914310000000 - "}, {"id": "Unknown", "label": "Unknown - Unknown", "count": 10000000, "name": "Unknown", "saliency": "0.045", "cui": "Unknown", "uid": "1652561585556Unknown - Unknown"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.063", "cui": "", "uid": "165099334731010000000 - "}, {"id": "Male", "label": "Male - Male", "count": 10000000, "name": "Male", "saliency": "0.064", "cui": "Male", "uid": "1652561580916Male - Male"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.085", "cui": "", "uid": "165099335373310000000 - "}, {"id": "19", "label": "19 - 19", "count": 10000000, "name": "19", "saliency": "0.052", "cui": "19", "uid": "165256157706919 - 19"}, {"id": "241411000000101", "label": "241411000000101 - Collapse from cause unknown (finding)", "count": 2327, "name": "Collapse from cause unknown (finding)", "saliency": "0.114", "cui": "241411000000101", "uid": "1652561597028241411000000101 - Collapse from cause unknown (finding)"}, {"id": "427461000", "label": "427461000 - Near syncope (disorder)", "count": 7956, "name": "Near syncope (disorder)", "saliency": "0.077", "cui": "427461000", "uid": "1652561735860427461000 - Near syncope (disorder)"}, {"id": "20", "label": "20 - 20", "count": 10000000, "name": "20", "saliency": "0.053", "cui": "20", "uid": "165256170478820 - 20"}, {"id": "410429000", "label": "410429000 - Cardiac arrest (disorder)", "count": 13078, "name": "Cardiac arrest (disorder)", "saliency": "0.102", "cui": "410429000", "uid": "1652561569044410429000 - Cardiac arrest (disorder)"}, {"id": "240371000000108", "label": "240371000000108 - Return of spontaneous circulation (finding)", "count": 480, "name": "Return of spontaneous circulation (finding)", "saliency": "0.305", "cui": "240371000000108", "uid": "1652561573692240371000000108 - Return of spontaneous circulation (finding)"}], "cboxTypes": ["Disorders"], "temporality": ["New Concepts"], "prediction_filters": ["Ignore Parents", "Ignore Children"], "cui_filter": ""} -------------------------------------------------------------------------------- /data/timeline_examples/13_manual_timeline_examples.json: -------------------------------------------------------------------------------- 1 | {"concepts": [{"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.033", "cui": "", "uid": "165099333914310000000 - "}, {"id": "Unknown", "label": "Unknown - Unknown", "count": 10000000, "name": "Unknown", "saliency": "0.045", "cui": "Unknown", "uid": "1652561585556Unknown - Unknown"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.039", "cui": "", "uid": "165099334731010000000 - "}, {"id": "Male", "label": "Male - Male", "count": 10000000, "name": "Male", "saliency": "0.038", "cui": "Male", "uid": "1652561580916Male - Male"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.048", "cui": "", "uid": "165099335373310000000 - "}, {"id": "19", "label": "19 - 19", "count": 10000000, "name": "19", "saliency": "0.035", "cui": "19", "uid": "165256157706919 - 19"}, {"id": "241411000000101", "label": "241411000000101 - Collapse from cause unknown (finding)", "count": 2327, "name": "Collapse from cause unknown (finding)", "saliency": "0.099", "cui": "241411000000101", "uid": "1652561597028241411000000101 - Collapse from cause unknown (finding)"}, {"id": "427461000", "label": "427461000 - Near syncope (disorder)", "count": 7956, "name": "Near syncope (disorder)", "saliency": "0.045", "cui": "427461000", "uid": "1652561735860427461000 - Near syncope (disorder)"}, {"id": "20", "label": "20 - 20", "count": 10000000, "name": "20", "saliency": "0.025", "cui": "20", "uid": "165256170478820 - 20"}, {"id": "410429000", "label": "410429000 - Cardiac arrest (disorder)", "count": 13078, "name": "Cardiac arrest (disorder)", "saliency": "0.067", "cui": "410429000", "uid": "1652561569044410429000 - Cardiac arrest (disorder)"}, {"id": "240371000000108", "label": "240371000000108 - Return of spontaneous circulation (finding)", "count": 480, "name": "Return of spontaneous circulation (finding)", "saliency": "0.148", "cui": "240371000000108", "uid": "1652561573692240371000000108 - Return of spontaneous circulation (finding)"}, {"id": "45007003", "label": "45007003 - Low blood pressure (disorder)", "count": 98370, "name": "Low blood pressure (disorder)", "saliency": "0.035", "cui": "45007003", "uid": "165256189830845007003 - Low blood pressure (disorder)"}, {"id": "371820004", "label": "371820004 - Patient ventilated (finding)", "count": 817, "name": "Patient ventilated (finding)", "saliency": "0.111", "cui": "371820004", "uid": "1652561788204371820004 - Patient ventilated (finding)"}, {"id": "40701008", "label": "40701008 - Echocardiography (procedure)", "count": 159740, "name": "Echocardiography (procedure)", "saliency": "0.054", "cui": "40701008", "uid": "165256188066840701008 - Echocardiography (procedure)"}, {"id": "55827005", "label": "55827005 - Left ventricular hypertrophy (disorder)", "count": 30302, "name": "Left ventricular hypertrophy (disorder)", "saliency": "0.177", "cui": "55827005", "uid": "165256193776455827005 - Left ventricular hypertrophy (disorder)"}], "cboxTypes": ["Disorders"], "temporality": ["New Concepts"], "prediction_filters": ["Ignore Parents", "Ignore Children"], "cui_filter": ""} -------------------------------------------------------------------------------- /data/timeline_examples/14_manual_timeline_examples.json: -------------------------------------------------------------------------------- 1 | {"concepts": [{"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.030", "cui": "", "uid": "165099333914310000000 - "}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.038", "cui": "", "uid": "165099334731010000000 - "}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.062", "cui": "", "uid": "165099335373310000000 - "}, {"id": "55", "label": "55 - 55", "count": 10000000, "name": "55", "saliency": "0.043", "cui": "55", "uid": "165256266859055 - 55"}, {"id": "38341003", "label": "38341003 - Hypertensive disorder, systemic arterial (disorder)", "count": 259443, "name": "Hypertensive disorder, systemic arterial (disorder)", "saliency": "0.031", "cui": "38341003", "uid": "165256242568538341003 - Hypertensive disorder, systemic arterial (disorder)"}, {"id": "73211009", "label": "73211009 - Diabetes mellitus (disorder)", "count": 110552, "name": "Diabetes mellitus (disorder)", "saliency": "0.032", "cui": "73211009", "uid": "165256242123773211009 - Diabetes mellitus (disorder)"}, {"id": "64", "label": "64 - 64", "count": 10000000, "name": "64", "saliency": "0.039", "cui": "64", "uid": "165256268039164 - 64"}, {"id": "23986001", "label": "23986001 - Glaucoma (disorder)", "count": 42785, "name": "Glaucoma (disorder)", "saliency": "0.038", "cui": "23986001", "uid": "165256241669323986001 - Glaucoma (disorder)"}, {"id": "22298006", "label": "22298006 - Myocardial infarction (disorder)", "count": 25315, "name": "Myocardial infarction (disorder)", "saliency": "0.040", "cui": "22298006", "uid": "165256240971022298006 - Myocardial infarction (disorder)"}, {"id": "65", "label": "65 - 65", "count": 10000000, "name": "65", "saliency": "0.020", "cui": "65", "uid": "165256268326265 - 65"}, {"id": "414545008", "label": "414545008 - Ischemic heart disease (disorder)", "count": 49159, "name": "Ischemic heart disease (disorder)", "saliency": "0.023", "cui": "414545008", "uid": "1652562857759414545008 - Ischemic heart disease (disorder)"}, {"id": "71", "label": "71 - 71", "count": 10000000, "name": "71", "saliency": "0.043", "cui": "71", "uid": "165256267479071 - 71"}, {"id": "240311000000103", "label": "240311000000103 - Loss of vision (disorder)", "count": 6527, "name": "Loss of vision (disorder)", "saliency": "0.095", "cui": "240311000000103", "uid": "1652562991614240311000000103 - Loss of vision (disorder)"}, {"id": "15203004", "label": "15203004 - Sudden visual loss (disorder)", "count": 1044, "name": "Sudden visual loss (disorder)", "saliency": "0.074", "cui": "15203004", "uid": "165256304055115203004 - Sudden visual loss (disorder)"}, {"id": "301924000", "label": "301924000 - Normal globe (finding)", "count": 159, "name": "Normal globe (finding)", "saliency": "0.132", "cui": "301924000", "uid": "1652562920262301924000 - Normal globe (finding)"}, {"id": "301950001", "label": "301950001 - Pupil normal (finding)", "count": 3275, "name": "Pupil normal (finding)", "saliency": "0.260", "cui": "301950001", "uid": "1652563074886301950001 - Pupil normal (finding)"}], "cboxTypes": ["Disorders"], "temporality": ["New Concepts"], "prediction_filters": ["Ignore Parents", "Ignore Children"], "cui_filter": "118235002"} -------------------------------------------------------------------------------- /data/timeline_examples/15_manual_timeline_examples.json: -------------------------------------------------------------------------------- 1 | {"concepts": [{"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.025", "cui": "", "uid": "165099333914310000000 - "}, {"id": "Unknown", "label": "Unknown - Unknown", "count": 10000000, "name": "Unknown", "saliency": "0.029", "cui": "Unknown", "uid": "1652563296239Unknown - Unknown"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.034", "cui": "", "uid": "165099334731010000000 - "}, {"id": "Male", "label": "Male - Male", "count": 10000000, "name": "Male", "saliency": "0.040", "cui": "Male", "uid": "1652563275263Male - Male"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.035", "cui": "", "uid": "165099335373310000000 - "}, {"id": "75", "label": "75 - 75", "count": 10000000, "name": "75", "saliency": "0.029", "cui": "75", "uid": "165256352111975 - 75"}, {"id": "38341003", "label": "38341003 - Hypertensive disorder, systemic arterial (disorder)", "count": 259443, "name": "Hypertensive disorder, systemic arterial (disorder)", "saliency": "0.020", "cui": "38341003", "uid": "165256351243238341003 - Hypertensive disorder, systemic arterial (disorder)"}, {"id": "78", "label": "78 - 78", "count": 10000000, "name": "78", "saliency": "0.023", "cui": "78", "uid": "165256339180778 - 78"}, {"id": "162128006", "label": "162128006 - Poor stream of urine (finding)", "count": 478, "name": "Poor stream of urine (finding)", "saliency": "0.086", "cui": "162128006", "uid": "1652563204415162128006 - Poor stream of urine (finding)"}, {"id": "75088002", "label": "75088002 - Urgent desire to urinate (finding)", "count": 13033, "name": "Urgent desire to urinate (finding)", "saliency": "0.037", "cui": "75088002", "uid": "165256322587975088002 - Urgent desire to urinate (finding)"}, {"id": "279039007", "label": "279039007 - Low back pain (finding)", "count": 86981, "name": "Low back pain (finding)", "saliency": "0.024", "cui": "279039007", "uid": "1652563448375279039007 - Low back pain (finding)"}, {"id": "23056005", "label": "23056005 - Sciatica (disorder)", "count": 29616, "name": "Sciatica (disorder)", "saliency": "0.033", "cui": "23056005", "uid": "165256362723923056005 - Sciatica (disorder)"}, {"id": "81", "label": "81 - 81", "count": 10000000, "name": "81", "saliency": "0.035", "cui": "81", "uid": "165256327853581 - 81"}, {"id": "134407002", "label": "134407002 - Chronic back pain (finding)", "count": 14984, "name": "Chronic back pain (finding)", "saliency": "0.035", "cui": "134407002", "uid": "1652563362071134407002 - Chronic back pain (finding)"}, {"id": "139394000", "label": "139394000 - Nocturia (finding)", "count": 31625, "name": "Nocturia (finding)", "saliency": "0.033", "cui": "139394000", "uid": "1652563383063139394000 - Nocturia (finding)"}, {"id": "11441004", "label": "11441004 - Prostatism (disorder)", "count": 1950, "name": "Prostatism (disorder)", "saliency": "0.038", "cui": "11441004", "uid": "165256349429511441004 - Prostatism (disorder)"}, {"id": "85", "label": "85 - 85", "count": 10000000, "name": "85", "saliency": "0.042", "cui": "85", "uid": "165256330664785 - 85"}, {"id": "267064002", "label": "267064002 - Retention of urine (disorder)", "count": 54291, "name": "Retention of urine (disorder)", "saliency": "0.051", "cui": "267064002", "uid": "1652563320176267064002 - Retention of urine (disorder)"}, {"id": "1845001", "label": "1845001 - Paraparesis (disorder)", "count": 1175, "name": "Paraparesis (disorder)", "saliency": "0.115", "cui": "1845001", "uid": "16525633415191845001 - Paraparesis (disorder)"}, {"id": "192970008", "label": "192970008 - Cauda equina syndrome (disorder)", "count": 2442, "name": "Cauda equina syndrome (disorder)", "saliency": "0.235", "cui": "192970008", "uid": "1652563267879192970008 - Cauda equina syndrome (disorder)"}], "cboxTypes": ["Disorders"], "temporality": ["New Concepts"], "prediction_filters": ["Ignore Parents", "Ignore Children"], "cui_filter": null} -------------------------------------------------------------------------------- /data/timeline_examples/16_manual_timeline_examples.json: -------------------------------------------------------------------------------- 1 | {"concepts": [{"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.030", "cui": "", "uid": "165099333914310000000 - "}, {"id": "White", "label": "White - White", "count": 10000000, "name": "White", "saliency": "0.038", "cui": "White", "uid": "1652568316080White - White"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.033", "cui": "", "uid": "165099334731010000000 - "}, {"id": "Male", "label": "Male - Male", "count": 10000000, "name": "Male", "saliency": "0.034", "cui": "Male", "uid": "1652568320327Male - Male"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.043", "cui": "", "uid": "165099335373310000000 - "}, {"id": "19", "label": "19 - 19", "count": 10000000, "name": "19", "saliency": "0.031", "cui": "19", "uid": "165256831042419 - 19"}, {"id": "11381005", "label": "11381005 - Acne vulgaris (disorder)", "count": 14433, "name": "Acne vulgaris (disorder)", "saliency": "0.041", "cui": "11381005", "uid": "165256890504111381005 - Acne vulgaris (disorder)"}, {"id": "20", "label": "20 - 20", "count": 10000000, "name": "20", "saliency": "0.020", "cui": "20", "uid": "165256831382420 - 20"}, {"id": "24526004", "label": "24526004 - Inflammatory bowel disease (disorder)", "count": 6761, "name": "Inflammatory bowel disease (disorder)", "saliency": "0.052", "cui": "24526004", "uid": "165256887109624526004 - Inflammatory bowel disease (disorder)"}, {"id": "26284000", "label": "26284000 - Ulcer of mouth (disorder)", "count": 13852, "name": "Ulcer of mouth (disorder)", "saliency": "0.048", "cui": "26284000", "uid": "165256867212026284000 - Ulcer of mouth (disorder)"}, {"id": "32861005", "label": "32861005 - Erythema nodosum (disorder)", "count": 1014, "name": "Erythema nodosum (disorder)", "saliency": "0.066", "cui": "32861005", "uid": "165256853213732861005 - Erythema nodosum (disorder)"}, {"id": "21", "label": "21 - 21", "count": 10000000, "name": "21", "saliency": "0.029", "cui": "21", "uid": "165256830708821 - 21"}, {"id": "34000006", "label": "34000006 - Crohn's disease (disorder)", "count": 10125, "name": "Crohn's disease (disorder)", "saliency": "0.044", "cui": "34000006", "uid": "165256828990434000006 - Crohn's disease (disorder)"}, {"id": "95545007", "label": "95545007 - Hemorrhagic diarrhea (disorder)", "count": 2731, "name": "Hemorrhagic diarrhea (disorder)", "saliency": "0.060", "cui": "95545007", "uid": "165256829478395545007 - Hemorrhagic diarrhea (disorder)"}, {"id": "12063002", "label": "12063002 - Rectal hemorrhage (disorder)", "count": 51163, "name": "Rectal hemorrhage (disorder)", "saliency": "0.027", "cui": "12063002", "uid": "165256884057612063002 - Rectal hemorrhage (disorder)"}, {"id": "25", "label": "25 - 25", "count": 10000000, "name": "25", "saliency": "0.048", "cui": "25", "uid": "165256890947425 - 25"}, {"id": "34436003", "label": "34436003 - Blood in urine (finding)", "count": 48255, "name": "Blood in urine (finding)", "saliency": "0.038", "cui": "34436003", "uid": "165256855341634436003 - Blood in urine (finding)"}, {"id": "284078000", "label": "284078000 - Purpuric rash (disorder)", "count": 1263, "name": "Purpuric rash (disorder)", "saliency": "0.144", "cui": "284078000", "uid": "1652568489737284078000 - Purpuric rash (disorder)"}, {"id": "725119006", "label": "725119006 - Generalized rash (disorder)", "count": 19190, "name": "Generalized rash (disorder)", "saliency": "0.173", "cui": "725119006", "uid": "1652568604881725119006 - Generalized rash (disorder)"}], "cboxTypes": ["Disorders"], "temporality": ["New Concepts"], "prediction_filters": ["Ignore Parents", "Ignore Children"], "cui_filter": null} -------------------------------------------------------------------------------- /data/timeline_examples/17_manual_timeline_examples.json: -------------------------------------------------------------------------------- 1 | {"concepts": [{"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": 0, "cui": "", "uid": "165099333914310000000 - "}, {"id": "Unknown", "label": "10000000 - Unknown", "count": 10000000, "name": "Unknown", "saliency": 0, "cui": "Unknown", "uid": "165117808938610000000 - Unknown"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": 0, "cui": "", "uid": "165099334731010000000 - "}, {"id": "Male", "label": "10000000 - Male", "count": 10000000, "name": "Male", "saliency": 0, "cui": "Male", "uid": "165117809517310000000 - Male"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": 0, "cui": "", "uid": "165099335373310000000 - "}, {"id": "46", "label": "10000000 - 46", "count": 10000000, "name": "46", "saliency": 0, "cui": "46", "uid": "165117983923410000000 - 46"}, {"id": "414916001", "label": "24986 - Obesity (disorder)", "count": 24986, "name": "Obesity (disorder)", "saliency": 0, "cui": "414916001", "uid": "165117976515924986 - Obesity (disorder)"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": 0, "cui": "", "uid": "165167798003110000000 - "}, {"id": "48", "label": "10000000 - 48", "count": 10000000, "name": "48", "saliency": 0, "cui": "48", "uid": "165117985237510000000 - 48"}, {"id": "44054006", "label": "81753 - Diabetes mellitus type 2 (disorder)", "count": 81753, "name": "Diabetes mellitus type 2 (disorder)", "saliency": 0, "cui": "44054006", "uid": "165117977198081753 - Diabetes mellitus type 2 (disorder)"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": 0, "cui": "", "uid": "165167796281410000000 - "}, {"id": "55", "label": "10000000 - 55", "count": 10000000, "name": "55", "saliency": 0, "cui": "55", "uid": "165117988970910000000 - 55"}, {"id": "267032009", "label": "3565 - Tired all the time (finding)", "count": 3565, "name": "Tired all the time (finding)", "saliency": 0, "cui": "267032009", "uid": "16511797833163565 - Tired all the time (finding)"}, {"id": "25064002", "label": "185961 - Headache (finding)", "count": 185961, "name": "Headache (finding)", "saliency": 0, "cui": "25064002", "uid": "1651179812300185961 - Headache (finding)"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": 0, "cui": "", "uid": "165167795818210000000 - "}, {"id": "58", "label": "10000000 - 58", "count": 10000000, "name": "58", "saliency": 0, "cui": "58", "uid": "165117999935210000000 - 58"}, {"id": "571000119103", "label": "4430 - Daily headache (disorder)", "count": 4430, "name": "Daily headache (disorder)", "saliency": 0, "cui": "571000119103", "uid": "16511799851784430 - Daily headache (disorder)"}, {"id": "59621000", "label": "6885 - Essential hypertension (disorder)", "count": 6885, "name": "Essential hypertension (disorder)", "saliency": 0, "cui": "59621000", "uid": "16511800886546885 - Essential hypertension (disorder)"}, {"id": "235595009", "label": "11417 - Gastroesophageal reflux disease (disorder)", "count": 11417, "name": "Gastroesophageal reflux disease (disorder)", "saliency": 0, "cui": "235595009", "uid": "165118031145911417 - Gastroesophageal reflux disease (disorder)"}, {"id": "26329005", "label": "4152 - Poor concentration (finding)", "count": 4152, "name": "Poor concentration (finding)", "saliency": 0, "cui": "26329005", "uid": "16511798059754152 - Poor concentration (finding)"}, {"id": "37796009", "label": "53324 - Migraine (disorder)", "count": 53324, "name": "Migraine (disorder)", "saliency": 0, "cui": "37796009", "uid": "165118011975353324 - Migraine (disorder)"}, {"id": "35489007", "label": "114472 - Depressive disorder (disorder)", "count": 114472, "name": "Depressive disorder (disorder)", "saliency": 0, "cui": "35489007", "uid": "1651180215419114472 - Depressive disorder (disorder)"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": 0, "cui": "", "uid": "165167795448410000000 - "}, {"id": "59", "label": "10000000 - 59", "count": 10000000, "name": "59", "saliency": 0, "cui": "59", "uid": "165117989735710000000 - 59"}, {"id": "230471006", "label": "448 - Chronic tension-type headache (disorder)", "count": 448, "name": "Chronic tension-type headache (disorder)", "saliency": 0, "cui": "230471006", "uid": "1651179821170448 - Chronic tension-type headache (disorder)"}], "cboxTypes": ["Disorders"], "temporality": ["New Concepts"], "prediction_filters": ["Ignore Parents", "Ignore Children"]} -------------------------------------------------------------------------------- /data/timeline_examples/18_manual_timeline_examples.json: -------------------------------------------------------------------------------- 1 | {"concepts": [{"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.050", "cui": "", "uid": "165099333914310000000 - "}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.059", "cui": "", "uid": "165099334731010000000 - "}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.101", "cui": "", "uid": "165099335373310000000 - "}, {"id": "62", "label": "62 - 62", "count": 10000000, "name": "62", "saliency": "0.043", "cui": "62", "uid": "165256794289662 - 62"}, {"id": "38341003", "label": "38341003 - Hypertensive disorder, systemic arterial (disorder)", "count": 259443, "name": "Hypertensive disorder, systemic arterial (disorder)", "saliency": "0.048", "cui": "38341003", "uid": "165256791684738341003 - Hypertensive disorder, systemic arterial (disorder)"}, {"id": "65", "label": "65 - 65", "count": 10000000, "name": "65", "saliency": "0.033", "cui": "65", "uid": "165256794029565 - 65"}, {"id": "73211009", "label": "73211009 - Diabetes mellitus (disorder)", "count": 110552, "name": "Diabetes mellitus (disorder)", "saliency": "0.049", "cui": "73211009", "uid": "165256790911973211009 - Diabetes mellitus (disorder)"}, {"id": "76", "label": "76 - 76", "count": 10000000, "name": "76", "saliency": "0.047", "cui": "76", "uid": "165256796453576 - 76"}, {"id": "404640003", "label": "404640003 - Dizziness (finding)", "count": 107584, "name": "Dizziness (finding)", "saliency": "0.070", "cui": "404640003", "uid": "1652567890615404640003 - Dizziness (finding)"}, {"id": "428887009", "label": "428887009 - Asymmetrical sensorineural hearing loss (disorder)", "count": 382, "name": "Asymmetrical sensorineural hearing loss (disorder)", "saliency": "0.308", "cui": "428887009", "uid": "1652568094288428887009 - Asymmetrical sensorineural hearing loss (disorder)"}, {"id": "816077007", "label": "816077007 - Mri Brain", "count": 34595, "name": "Mri Brain", "saliency": "0.192", "cui": "816077007", "uid": "1652568161344816077007 - Mri Brain"}], "cboxTypes": ["Disorders"], "temporality": ["New Concepts"], "prediction_filters": ["Ignore Parents", "Ignore Children"], "cui_filter": null} -------------------------------------------------------------------------------- /data/timeline_examples/19_manual_timeline_examples.json: -------------------------------------------------------------------------------- 1 | {"concepts": [{"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.019", "cui": "", "uid": "165099333914310000000 - "}, {"id": "Unknown", "label": "10000000 - Unknown", "count": 10000000, "name": "Unknown", "saliency": "0.022", "cui": "Unknown", "uid": "165117808938610000000 - Unknown"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.024", "cui": "", "uid": "165099334731010000000 - "}, {"id": "Male", "label": "10000000 - Male", "count": 10000000, "name": "Male", "saliency": "0.022", "cui": "Male", "uid": "165117809517310000000 - Male"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.025", "cui": "", "uid": "165167898417510000000 - "}, {"id": "55", "label": "10000000 - 55", "count": 10000000, "name": "55", "saliency": "0.016", "cui": "55", "uid": "165117909893710000000 - 55"}, {"id": "25702006", "label": "8530 - Alcohol intoxication (disorder)", "count": 8530, "name": "Alcohol intoxication (disorder)", "saliency": "0.071", "cui": "25702006", "uid": "16511790853118530 - Alcohol intoxication (disorder)"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.019", "cui": "", "uid": "165167898324010000000 - "}, {"id": "56", "label": "10000000 - 56", "count": 10000000, "name": "56", "saliency": "0.013", "cui": "56", "uid": "165117911044810000000 - 56"}, {"id": "15167005", "label": "7088 - Alcohol abuse (disorder)", "count": 7088, "name": "Alcohol abuse (disorder)", "saliency": "0.065", "cui": "15167005", "uid": "16511790725127088 - Alcohol abuse (disorder)"}, {"id": "13645005", "label": "46496 - Chronic obstructive lung disease (disorder)", "count": 46496, "name": "Chronic obstructive lung disease (disorder)", "saliency": "0.018", "cui": "13645005", "uid": "165117920669946496 - Chronic obstructive lung disease (disorder)"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.016", "cui": "", "uid": "165167896839910000000 - "}, {"id": "59", "label": "10000000 - 59", "count": 10000000, "name": "59", "saliency": "0.012", "cui": "59", "uid": "165117949545610000000 - 59"}, {"id": "66590003", "label": "5028 - Alcohol dependence (disorder)", "count": 5028, "name": "Alcohol dependence (disorder)", "saliency": "0.053", "cui": "66590003", "uid": "16511794686175028 - Alcohol dependence (disorder)"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.017", "cui": "", "uid": "165167896697010000000 - "}, {"id": "61", "label": "10000000 - 61", "count": 10000000, "name": "61", "saliency": "0.016", "cui": "61", "uid": "165117912250310000000 - 61"}, {"id": "422504002", "label": "6975 - Ischemic stroke (disorder)", "count": 6975, "name": "Ischemic stroke (disorder)", "saliency": "0.101", "cui": "422504002", "uid": "16511791448606975 - Ischemic stroke (disorder)"}, {"id": "49436004", "label": "43296 - Atrial fibrillation (disorder)", "count": 43296, "name": "Atrial fibrillation (disorder)", "saliency": "0.019", "cui": "49436004", "uid": "165117915005143296 - Atrial fibrillation (disorder)"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.024", "cui": "", "uid": "165167908474210000000 - "}, {"id": "440363007", "label": "15722 - Evaluation of oral and pharyngeal swallowing function (procedure)", "count": 15722, "name": "Evaluation of oral and pharyngeal swallowing function (procedure)", "saliency": "0.056", "cui": "440363007", "uid": "165117918664415722 - Evaluation of oral and pharyngeal swallowing function (procedure)"}, {"id": "40739000", "label": "34984 - Dysphagia (disorder)", "count": 34984, "name": "Dysphagia (disorder)", "saliency": "0.030", "cui": "40739000", "uid": "165117917832634984 - Dysphagia (disorder)"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.040", "cui": "", "uid": "165167908603410000000 - "}, {"id": "130987000", "label": "11073 - Acute confusion (finding)", "count": 11073, "name": "Acute confusion (finding)", "saliency": "0.074", "cui": "130987000", "uid": "165117958474611073 - Acute confusion (finding)"}, {"id": "24982008", "label": "29741 - Diplopia (disorder)", "count": 29741, "name": "Diplopia (disorder)", "saliency": "0.033", "cui": "24982008", "uid": "165117930584629741 - Diplopia (disorder)"}, {"id": "563001", "label": "11274 - Nystagmus (disorder)", "count": 11274, "name": "Nystagmus (disorder)", "saliency": "0.073", "cui": "563001", "uid": "165117936708211274 - Nystagmus (disorder)"}, {"id": "386806002", "label": "36437 - Impaired cognition (finding)", "count": 36437, "name": "Impaired cognition (finding)", "saliency": "0.124", "cui": "386806002", "uid": "165117938063636437 - Impaired cognition (finding)"}], "cboxTypes": ["Disorders"], "temporality": ["New Concepts"], "prediction_filters": ["Ignore Parents", "Ignore Children"]} -------------------------------------------------------------------------------- /data/timeline_examples/1_manual_timeline_examples.json: -------------------------------------------------------------------------------- 1 | {"concepts": [{"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.022", "cui": "", "uid": "165099333914310000000 - "}, {"id": "Unknown", "label": "Unknown - Unknown", "count": 10000000, "name": "Unknown", "saliency": "0.028", "cui": "Unknown", "uid": "1652553297006Unknown - Unknown"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.031", "cui": "", "uid": "165099334731010000000 - "}, {"id": "Male", "label": "Male - Male", "count": 10000000, "name": "Male", "saliency": "0.034", "cui": "Male", "uid": "1652553292839Male - Male"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.038", "cui": "", "uid": "165099335373310000000 - "}, {"id": "21", "label": "21 - 21", "count": 10000000, "name": "21", "saliency": "0.029", "cui": "21", "uid": "165255327928721 - 21"}, {"id": "69322001", "label": "69322001 - Psychotic disorder (disorder)", "count": 7926, "name": "Psychotic disorder (disorder)", "saliency": "0.039", "cui": "69322001", "uid": "165255320587969322001 - Psychotic disorder (disorder)"}, {"id": "58214004", "label": "58214004 - Schizophrenia (disorder)", "count": 8920, "name": "Schizophrenia (disorder)", "saliency": "0.043", "cui": "58214004", "uid": "165255567317158214004 - Schizophrenia (disorder)"}, {"id": "24", "label": "24 - 24", "count": 10000000, "name": "24", "saliency": "0.019", "cui": "24", "uid": "165255560437024 - 24"}, {"id": "231496004", "label": "231496004 - Hypomania (disorder)", "count": 305, "name": "Hypomania (disorder)", "saliency": "0.088", "cui": "231496004", "uid": "1652556181547231496004 - Hypomania (disorder)"}, {"id": "13746004", "label": "13746004 - Bipolar disorder (disorder)", "count": 5625, "name": "Bipolar disorder (disorder)", "saliency": "0.034", "cui": "13746004", "uid": "165255321461513746004 - Bipolar disorder (disorder)"}, {"id": "28", "label": "28 - 28", "count": 10000000, "name": "28", "saliency": "0.025", "cui": "28", "uid": "165255560977028 - 28"}, {"id": "68890003", "label": "68890003 - Schizoaffective disorder (disorder)", "count": 1284, "name": "Schizoaffective disorder (disorder)", "saliency": "0.061", "cui": "68890003", "uid": "165255322482368890003 - Schizoaffective disorder (disorder)"}, {"id": "386806002", "label": "386806002 - Impaired cognition (finding)", "count": 36437, "name": "Impaired cognition (finding)", "saliency": "0.033", "cui": "386806002", "uid": "1652556145723386806002 - Impaired cognition (finding)"}, {"id": "35", "label": "35 - 35", "count": 10000000, "name": "35", "saliency": "0.032", "cui": "35", "uid": "165255562389035 - 35"}, {"id": "35489007", "label": "35489007 - Depressive disorder (disorder)", "count": 114472, "name": "Depressive disorder (disorder)", "saliency": "0.015", "cui": "35489007", "uid": "165255323144635489007 - Depressive disorder (disorder)"}, {"id": "42", "label": "42 - 42", "count": 10000000, "name": "42", "saliency": "0.022", "cui": "42", "uid": "165255331739042 - 42"}, {"id": "38341003", "label": "38341003 - Hypertensive disorder, systemic arterial (disorder)", "count": 259443, "name": "Hypertensive disorder, systemic arterial (disorder)", "saliency": "0.016", "cui": "38341003", "uid": "165255323906238341003 - Hypertensive disorder, systemic arterial (disorder)"}, {"id": "44", "label": "44 - 44", "count": 10000000, "name": "44", "saliency": "0.021", "cui": "44", "uid": "165255563268244 - 44"}, {"id": "91175000", "label": "91175000 - Seizure (finding)", "count": 92812, "name": "Seizure (finding)", "saliency": "0.018", "cui": "91175000", "uid": "165255324512691175000 - Seizure (finding)"}, {"id": "84757009", "label": "84757009 - Epilepsy (disorder)", "count": 53292, "name": "Epilepsy (disorder)", "saliency": "0.024", "cui": "84757009", "uid": "165255325594284757009 - Epilepsy (disorder)"}, {"id": "82271004", "label": "82271004 - Injury of head (disorder)", "count": 80908, "name": "Injury of head (disorder)", "saliency": "0.028", "cui": "82271004", "uid": "165255607314782271004 - Injury of head (disorder)"}, {"id": "52", "label": "52 - 52", "count": 10000000, "name": "52", "saliency": "0.023", "cui": "52", "uid": "165255564253852 - 52"}, {"id": "414545008", "label": "414545008 - Ischemic heart disease (disorder)", "count": 49159, "name": "Ischemic heart disease (disorder)", "saliency": "0.018", "cui": "414545008", "uid": "1652553262543414545008 - Ischemic heart disease (disorder)"}, {"id": "161898004", "label": "161898004 - Falls (finding)", "count": 50678, "name": "Falls (finding)", "saliency": "0.034", "cui": "161898004", "uid": "1652555998747161898004 - Falls (finding)"}, {"id": "8517006", "label": "8517006 - Ex-smoker (finding)", "count": 74572, "name": "Ex-smoker (finding)", "saliency": "0.029", "cui": "8517006", "uid": "16525561116118517006 - Ex-smoker (finding)"}, {"id": "59", "label": "59 - 59", "count": 10000000, "name": "59", "saliency": "0.027", "cui": "59", "uid": "165255328771159 - 59"}, {"id": "386805003", "label": "386805003 - Mild cognitive disorder (disorder)", "count": 5017, "name": "Mild cognitive disorder (disorder)", "saliency": "0.168", "cui": "386805003", "uid": "1652553271654386805003 - Mild cognitive disorder (disorder)"}], "cboxTypes": ["Disorders"], "temporality": ["New Concepts"], "prediction_filters": ["Ignore Parents", "Ignore Children"], "cui_filter": "230226000"} -------------------------------------------------------------------------------- /data/timeline_examples/20_manual_timeline_examples.json: -------------------------------------------------------------------------------- 1 | {"concepts": [{"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.039", "cui": "", "uid": "165099333914310000000 - "}, {"id": "Unknown", "label": "Unknown - Unknown", "count": 10000000, "name": "Unknown", "saliency": "0.050", "cui": "Unknown", "uid": "1652564399497Unknown - Unknown"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.053", "cui": "", "uid": "165099334731010000000 - "}, {"id": "Male", "label": "Male - Male", "count": 10000000, "name": "Male", "saliency": "0.043", "cui": "Male", "uid": "1652564395841Male - Male"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.062", "cui": "", "uid": "165099335373310000000 - "}, {"id": "40", "label": "40 - 40", "count": 10000000, "name": "40", "saliency": "0.032", "cui": "40", "uid": "165256438172940 - 40"}, {"id": "424647005", "label": "424647005 - Cramp in limb (finding)", "count": 159, "name": "Cramp in limb (finding)", "saliency": "0.181", "cui": "424647005", "uid": "1652564178009424647005 - Cramp in limb (finding)"}, {"id": "44", "label": "44 - 44", "count": 10000000, "name": "44", "saliency": "0.042", "cui": "44", "uid": "165256438999344 - 44"}, {"id": "8011004", "label": "8011004 - Dysarthria (finding)", "count": 16470, "name": "Dysarthria (finding)", "saliency": "0.122", "cui": "8011004", "uid": "16525641827768011004 - Dysarthria (finding)"}, {"id": "40739000", "label": "40739000 - Dysphagia (disorder)", "count": 34984, "name": "Dysphagia (disorder)", "saliency": "0.082", "cui": "40739000", "uid": "165256418695240739000 - Dysphagia (disorder)"}, {"id": "82470000", "label": "82470000 - Muscle fasciculation (finding)", "count": 4845, "name": "Muscle fasciculation (finding)", "saliency": "0.293", "cui": "82470000", "uid": "165256437719382470000 - Muscle fasciculation (finding)"}], "cboxTypes": ["Disorders"], "temporality": ["New Concepts"], "prediction_filters": ["Ignore Parents", "Ignore Children"], "cui_filter": null} -------------------------------------------------------------------------------- /data/timeline_examples/21_manual_timeline_examples.json: -------------------------------------------------------------------------------- 1 | {"concepts": [{"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.017", "cui": "", "uid": "165099333914310000000 - "}, {"id": "Unknown", "label": "10000000 - Unknown", "count": 10000000, "name": "Unknown", "saliency": "0.025", "cui": "Unknown", "uid": "165117808938610000000 - Unknown"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.023", "cui": "", "uid": "165099334731010000000 - "}, {"id": "Male", "label": "10000000 - Male", "count": 10000000, "name": "Male", "saliency": "0.029", "cui": "Male", "uid": "165117809517310000000 - Male"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.030", "cui": "", "uid": "165099335373310000000 - "}, {"id": "68", "label": "10000000 - 68", "count": 10000000, "name": "68", "saliency": "0.033", "cui": "68", "uid": "165118040188410000000 - 68"}, {"id": "32914008", "label": "6084 - Restless legs (disorder)", "count": 6084, "name": "Restless legs (disorder)", "saliency": "0.071", "cui": "32914008", "uid": "16511804067616084 - Restless legs (disorder)"}, {"id": "38341003", "label": "259443 - Hypertensive disorder, systemic arterial (disorder)", "count": 259443, "name": "Hypertensive disorder, systemic arterial (disorder)", "saliency": "0.017", "cui": "38341003", "uid": "1651180637077259443 - Hypertensive disorder, systemic arterial (disorder)"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.020", "cui": "", "uid": "165167917866610000000 - "}, {"id": "70", "label": "10000000 - 70", "count": 10000000, "name": "70", "saliency": "0.026", "cui": "70", "uid": "165118047243310000000 - 70"}, {"id": "12262002", "label": "574 - Restless sleep (finding)", "count": 574, "name": "Restless sleep (finding)", "saliency": "0.098", "cui": "12262002", "uid": "1651180412192574 - Restless sleep (finding)"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.016", "cui": "", "uid": "165167918053710000000 - "}, {"id": "71", "label": "10000000 - 71", "count": 10000000, "name": "71", "saliency": "0.024", "cui": "71", "uid": "165118045469210000000 - 71"}, {"id": "193462001", "label": "756 - Insomnia (disorder)", "count": 756, "name": "Insomnia (disorder)", "saliency": "0.067", "cui": "193462001", "uid": "1651180416094756 - Insomnia (disorder)"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.015", "cui": "", "uid": "165167917995210000000 - "}, {"id": "74", "label": "10000000 - 74", "count": 10000000, "name": "74", "saliency": "0.025", "cui": "74", "uid": "165118045886610000000 - 74"}, {"id": "443929000", "label": "32438 - Small vessel cerebrovascular disease (disorder)", "count": 32438, "name": "Small vessel cerebrovascular disease (disorder)", "saliency": "0.033", "cui": "443929000", "uid": "165118053842332438 - Small vessel cerebrovascular disease (disorder)"}, {"id": "14760008", "label": "133118 - Constipation (finding)", "count": 133118, "name": "Constipation (finding)", "saliency": "0.016", "cui": "14760008", "uid": "1651180866985133118 - Constipation (finding)"}, {"id": "64269007", "label": "8730 - Visual hallucinations (finding)", "count": 8730, "name": "Visual hallucinations (finding)", "saliency": "0.042", "cui": "64269007", "uid": "16511804396068730 - Visual hallucinations (finding)"}, {"id": "161898004", "label": "50678 - Falls (finding)", "count": 50678, "name": "Falls (finding)", "saliency": "0.035", "cui": "161898004", "uid": "165118068609450678 - Falls (finding)"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.017", "cui": "", "uid": "165167917935010000000 - "}, {"id": "76", "label": "10000000 - 76", "count": 10000000, "name": "76", "saliency": "0.033", "cui": "76", "uid": "165118048974210000000 - 76"}, {"id": "64269007", "label": "8730 - Visual hallucinations (finding)", "count": 8730, "name": "Visual hallucinations (finding)", "saliency": "0.039", "cui": "64269007", "uid": "16511807800098730 - Visual hallucinations (finding)"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.016", "cui": "", "uid": "165167917785110000000 - "}, {"id": "77", "label": "10000000 - 77", "count": 10000000, "name": "77", "saliency": "0.031", "cui": "77", "uid": "165118079978910000000 - 77"}, {"id": "2776000", "label": "29259 - Delirium (disorder)", "count": 29259, "name": "Delirium (disorder)", "saliency": "0.076", "cui": "2776000", "uid": "165118069613329259 - Delirium (disorder)"}, {"id": "386807006", "label": "29972 - Memory impairment (finding)", "count": 29972, "name": "Memory impairment (finding)", "saliency": "0.125", "cui": "386807006", "uid": "165118044717629972 - Memory impairment (finding)"}], "cboxTypes": ["Disorders"], "temporality": ["New Concepts"], "prediction_filters": ["Ignore Parents", "Ignore Children"]} -------------------------------------------------------------------------------- /data/timeline_examples/22_manual_timeline_examples.json: -------------------------------------------------------------------------------- 1 | {"concepts": [{"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.032", "cui": "", "uid": "165099333914310000000 - "}, {"id": "White", "label": "10000000 - White", "count": 10000000, "name": "White", "saliency": "0.043", "cui": "White", "uid": "165099334099310000000 - White"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.041", "cui": "", "uid": "165099334731010000000 - "}, {"id": "Female", "label": "10000000 - Female", "count": 10000000, "name": "Female", "saliency": "0.035", "cui": "Female", "uid": "165099334437810000000 - Female"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.044", "cui": "", "uid": "165099335373310000000 - "}, {"id": "45", "label": "10000000 - 45", "count": 10000000, "name": "45", "saliency": "0.041", "cui": "45", "uid": "165099352585610000000 - 45"}, {"id": "44054006", "label": "81753 - Diabetes mellitus type 2 (disorder)", "count": 81753, "name": "Diabetes mellitus type 2 (disorder)", "saliency": "0.046", "cui": "44054006", "uid": "165099362479181753 - Diabetes mellitus type 2 (disorder)"}, {"id": "271681002", "label": "9716 - Stomach ache (finding)", "count": 9716, "name": "Stomach ache (finding)", "saliency": "0.102", "cui": "271681002", "uid": "16509936035889716 - Stomach ache (finding)"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.044", "cui": "", "uid": "165099359512310000000 - "}, {"id": "38341003", "label": "259443 - Hypertensive disorder, systemic arterial (disorder)", "count": 259443, "name": "Hypertensive disorder, systemic arterial (disorder)", "saliency": "0.023", "cui": "38341003", "uid": "1650993590679259443 - Hypertensive disorder, systemic arterial (disorder)"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.035", "cui": "", "uid": "165099353028410000000 - "}, {"id": "46", "label": "10000000 - 46", "count": 10000000, "name": "46", "saliency": "0.039", "cui": "46", "uid": "165099353423310000000 - 46"}, {"id": "44169009", "label": "2980 - Loss of sense of smell (finding)", "count": 2980, "name": "Loss of sense of smell (finding)", "saliency": "0.301", "cui": "44169009", "uid": "16509933622922980 - Loss of sense of smell (finding)"}, {"id": "49727002", "label": "195355 - Cough (finding)", "count": 195355, "name": "Cough (finding)", "saliency": "0.065", "cui": "49727002", "uid": "1650993450761195355 - Cough (finding)"}, {"id": "386661006", "label": "203120 - Fever (finding)", "count": 203120, "name": "Fever (finding)", "saliency": "0.111", "cui": "386661006", "uid": "1650993452046203120 - Fever (finding)"}], "cboxTypes": ["Disorders"], "temporality": ["New Concepts"], "prediction_filters": ["Ignore Parents", "Ignore Children"]} -------------------------------------------------------------------------------- /data/timeline_examples/23_manual_timeline_examples.json: -------------------------------------------------------------------------------- 1 | {"concepts": [{"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.037", "cui": "", "uid": "165099333914310000000 - "}, {"id": "Unknown", "label": "Unknown - Unknown", "count": 10000000, "name": "Unknown", "saliency": "0.034", "cui": "Unknown", "uid": "1652566196388Unknown - Unknown"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.051", "cui": "", "uid": "165099334731010000000 - "}, {"id": "Female", "label": "Female - Female", "count": 10000000, "name": "Female", "saliency": "0.028", "cui": "Female", "uid": "1652566179140Female - Female"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.062", "cui": "", "uid": "165099335373310000000 - "}, {"id": "35", "label": "35 - 35", "count": 10000000, "name": "35", "saliency": "0.026", "cui": "35", "uid": "165256616238735 - 35"}, {"id": "370143000", "label": "370143000 - Major depressive disorder (disorder)", "count": 2380, "name": "Major depressive disorder (disorder)", "saliency": "0.059", "cui": "370143000", "uid": "1652566050707370143000 - Major depressive disorder (disorder)"}, {"id": "372767007", "label": "372767007 - Fluoxetine (substance)", "count": 13027, "name": "Fluoxetine (substance)", "saliency": "0.023", "cui": "372767007", "uid": "1652566059963372767007 - Fluoxetine (substance)"}, {"id": "43", "label": "43 - 43", "count": 10000000, "name": "43", "saliency": "0.024", "cui": "43", "uid": "165256617288343 - 43"}, {"id": "276853009", "label": "276853009 - Self inflicted injury (disorder)", "count": 139, "name": "Self inflicted injury (disorder)", "saliency": "0.085", "cui": "276853009", "uid": "1652566066972276853009 - Self inflicted injury (disorder)"}, {"id": "42", "label": "42 - 42", "count": 10000000, "name": "42", "saliency": "0.031", "cui": "42", "uid": "165256616910842 - 42"}, {"id": "23971007", "label": "23971007 - Acute vomiting (disorder)", "count": 171, "name": "Acute vomiting (disorder)", "saliency": "0.159", "cui": "23971007", "uid": "165256607250723971007 - Acute vomiting (disorder)"}, {"id": "609558009", "label": "609558009 - Essential tremor (disorder)", "count": 2588, "name": "Essential tremor (disorder)", "saliency": "0.102", "cui": "609558009", "uid": "1652566078075609558009 - Essential tremor (disorder)"}, {"id": "409702008", "label": "409702008 - Hyperpyrexia (finding)", "count": 412, "name": "Hyperpyrexia (finding)", "saliency": "0.278", "cui": "409702008", "uid": "1652566228916409702008 - Hyperpyrexia (finding)"}], "cboxTypes": ["Disorders"], "temporality": ["New Concepts"], "prediction_filters": ["Ignore Parents", "Ignore Children"], "cui_filter": null} -------------------------------------------------------------------------------- /data/timeline_examples/24_manual_timeline_examples.json: -------------------------------------------------------------------------------- 1 | {"concepts": [{"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.037", "cui": "", "uid": "165099333914310000000 - "}, {"id": "Unknown", "label": "Unknown - Unknown", "count": 10000000, "name": "Unknown", "saliency": "0.049", "cui": "Unknown", "uid": "1652565963075Unknown - Unknown"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.056", "cui": "", "uid": "165099334731010000000 - "}, {"id": "Male", "label": "Male - Male", "count": 10000000, "name": "Male", "saliency": "0.045", "cui": "Male", "uid": "1652565960244Male - Male"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.060", "cui": "", "uid": "165099335373310000000 - "}, {"id": "64", "label": "64 - 64", "count": 10000000, "name": "64", "saliency": "0.062", "cui": "64", "uid": "165256595760464 - 64"}, {"id": "386807006", "label": "386807006 - Memory impairment (finding)", "count": 29972, "name": "Memory impairment (finding)", "saliency": "0.089", "cui": "386807006", "uid": "1652565923435386807006 - Memory impairment (finding)"}, {"id": "279992002", "label": "279992002 - Recurrent falls (finding)", "count": 11442, "name": "Recurrent falls (finding)", "saliency": "0.090", "cui": "279992002", "uid": "1652565935971279992002 - Recurrent falls (finding)"}, {"id": "165232002", "label": "165232002 - Urinary incontinence (finding)", "count": 42399, "name": "Urinary incontinence (finding)", "saliency": "0.060", "cui": "165232002", "uid": "1652565942555165232002 - Urinary incontinence (finding)"}, {"id": "413808003", "label": "413808003 - Cerebral ventriculomegaly (disorder)", "count": 3700, "name": "Cerebral ventriculomegaly (disorder)", "saliency": "0.213", "cui": "413808003", "uid": "1652565947579413808003 - Cerebral ventriculomegaly (disorder)"}, {"id": "277762005", "label": "277762005 - Lumbar puncture (procedure)", "count": 13181, "name": "Lumbar puncture (procedure)", "saliency": "0.238", "cui": "277762005", "uid": "1652565952355277762005 - Lumbar puncture (procedure)"}], "cboxTypes": ["Disorders"], "temporality": ["New Concepts"], "prediction_filters": ["Ignore Parents", "Ignore Children"], "cui_filter": null} -------------------------------------------------------------------------------- /data/timeline_examples/25_manual_timeline_examples.json: -------------------------------------------------------------------------------- 1 | {"concepts": [{"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.037", "cui": "", "uid": "165099333914310000000 - "}, {"id": "White", "label": "White - White", "count": 10000000, "name": "White", "saliency": "0.037", "cui": "White", "uid": "1652565668491White - White"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.051", "cui": "", "uid": "165099334731010000000 - "}, {"id": "Male", "label": "Male - Male", "count": 10000000, "name": "Male", "saliency": "0.044", "cui": "Male", "uid": "1652565665947Male - Male"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.057", "cui": "", "uid": "165099335373310000000 - "}, {"id": "18", "label": "18 - 18", "count": 10000000, "name": "18", "saliency": "0.047", "cui": "18", "uid": "165256564688318 - 18"}, {"id": "66760008", "label": "66760008 - Optic neuritis (disorder)", "count": 1913, "name": "Optic neuritis (disorder)", "saliency": "0.125", "cui": "66760008", "uid": "165256560191566760008 - Optic neuritis (disorder)"}, {"id": "350449009", "label": "350449009 - Product containing methylprednisolone in oral dose form (medicinal product form)", "count": 650, "name": "Product containing methylprednisolone in oral dose form (medicinal product form)", "saliency": "0.104", "cui": "350449009", "uid": "1652565780858350449009 - Product containing methylprednisolone in oral dose form (medicinal product form)"}, {"id": "22", "label": "22 - 22", "count": 10000000, "name": "22", "saliency": "0.048", "cui": "22", "uid": "165256565045922 - 22"}, {"id": "25064002", "label": "25064002 - Headache (finding)", "count": 185961, "name": "Headache (finding)", "saliency": "0.036", "cui": "25064002", "uid": "165256560801125064002 - Headache (finding)"}, {"id": "25", "label": "25 - 25", "count": 10000000, "name": "25", "saliency": "0.058", "cui": "25", "uid": "165256565385925 - 25"}, {"id": "310481001", "label": "310481001 - Complaining of paresthesia (finding)", "count": 574, "name": "Complaining of paresthesia (finding)", "saliency": "0.179", "cui": "310481001", "uid": "1652565624451310481001 - Complaining of paresthesia (finding)"}, {"id": "24982008", "label": "24982008 - Diplopia (disorder)", "count": 29741, "name": "Diplopia (disorder)", "saliency": "0.175", "cui": "24982008", "uid": "165256573873124982008 - Diplopia (disorder)"}], "cboxTypes": ["Disorders"], "temporality": ["New Concepts"], "prediction_filters": ["Ignore Parents"], "cui_filter": null} -------------------------------------------------------------------------------- /data/timeline_examples/26_manual_timeline_examples.json: -------------------------------------------------------------------------------- 1 | {"concepts": [{"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.037", "cui": "", "uid": "165099333914310000000 - "}, {"id": "Unknown", "label": "Unknown - Unknown", "count": 10000000, "name": "Unknown", "saliency": "0.057", "cui": "Unknown", "uid": "1652566697540Unknown - Unknown"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.042", "cui": "", "uid": "165099334731010000000 - "}, {"id": "Female", "label": "Female - Female", "count": 10000000, "name": "Female", "saliency": "0.043", "cui": "Female", "uid": "1652566677844Female - Female"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.058", "cui": "", "uid": "165099335373310000000 - "}, {"id": "24", "label": "24 - 24", "count": 10000000, "name": "24", "saliency": "0.027", "cui": "24", "uid": "165256668672424 - 24"}, {"id": "195967001", "label": "195967001 - Asthma (disorder)", "count": 136529, "name": "Asthma (disorder)", "saliency": "0.022", "cui": "195967001", "uid": "1652566947989195967001 - Asthma (disorder)"}, {"id": "10743008", "label": "10743008 - Irritable bowel syndrome (disorder)", "count": 33305, "name": "Irritable bowel syndrome (disorder)", "saliency": "0.032", "cui": "10743008", "uid": "165256685470110743008 - Irritable bowel syndrome (disorder)"}, {"id": "26", "label": "26 - 26", "count": 10000000, "name": "26", "saliency": "0.022", "cui": "26", "uid": "165256715767826 - 26"}, {"id": "248490000", "label": "248490000 - Bloating symptom (finding)", "count": 748, "name": "Bloating symptom (finding)", "saliency": "0.103", "cui": "248490000", "uid": "1652566849324248490000 - Bloating symptom (finding)"}, {"id": "35240004", "label": "35240004 - Iron deficiency (disorder)", "count": 14553, "name": "Iron deficiency (disorder)", "saliency": "0.046", "cui": "35240004", "uid": "165256703355735240004 - Iron deficiency (disorder)"}, {"id": "116289008", "label": "116289008 - Abdominal bloating (finding)", "count": 7917, "name": "Abdominal bloating (finding)", "saliency": "0.040", "cui": "116289008", "uid": "1652566929901116289008 - Abdominal bloating (finding)"}, {"id": "27", "label": "27 - 27", "count": 10000000, "name": "27", "saliency": "0.025", "cui": "27", "uid": "165256716768527 - 27"}, {"id": "87522002", "label": "87522002 - Iron deficiency anemia (disorder)", "count": 20832, "name": "Iron deficiency anemia (disorder)", "saliency": "0.037", "cui": "87522002", "uid": "165256686310987522002 - Iron deficiency anemia (disorder)"}, {"id": "34713006", "label": "34713006 - Vitamin D deficiency (disorder)", "count": 18879, "name": "Vitamin D deficiency (disorder)", "saliency": "0.036", "cui": "34713006", "uid": "165256690157334713006 - Vitamin D deficiency (disorder)"}, {"id": "28", "label": "28 - 28", "count": 10000000, "name": "28", "saliency": "0.038", "cui": "28", "uid": "165256714807028 - 28"}, {"id": "418363000", "label": "418363000 - Itching of skin (finding)", "count": 10311, "name": "Itching of skin (finding)", "saliency": "0.076", "cui": "418363000", "uid": "1652567294959418363000 - Itching of skin (finding)"}, {"id": "271807003", "label": "271807003 - Eruption of skin (disorder)", "count": 108088, "name": "Eruption of skin (disorder)", "saliency": "0.037", "cui": "271807003", "uid": "1652567351494271807003 - Eruption of skin (disorder)"}, {"id": "111196000", "label": "111196000 - Dermatitis herpetiformis (disorder)", "count": 194, "name": "Dermatitis herpetiformis (disorder)", "saliency": "0.223", "cui": "111196000", "uid": "1652567335590111196000 - Dermatitis herpetiformis (disorder)"}], "cboxTypes": ["Disorders"], "temporality": ["New Concepts"], "prediction_filters": ["Ignore Parents", "Ignore Children"], "cui_filter": ""} -------------------------------------------------------------------------------- /data/timeline_examples/27_manual_timeline_examples.json: -------------------------------------------------------------------------------- 1 | {"concepts": [{"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.038", "cui": "", "uid": "165099333914310000000 - "}, {"id": "Unknown", "label": "Unknown - Unknown", "count": 10000000, "name": "Unknown", "saliency": "0.040", "cui": "Unknown", "uid": "1652567639271Unknown - Unknown"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.042", "cui": "", "uid": "165099334731010000000 - "}, {"id": "Male", "label": "Male - Male", "count": 10000000, "name": "Male", "saliency": "0.042", "cui": "Male", "uid": "1652567636078Male - Male"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.060", "cui": "", "uid": "165099335373310000000 - "}, {"id": "19", "label": "19 - 19", "count": 10000000, "name": "19", "saliency": "0.042", "cui": "19", "uid": "165256770478219 - 19"}, {"id": "36118008", "label": "36118008 - Pneumothorax (disorder)", "count": 9538, "name": "Pneumothorax (disorder)", "saliency": "0.061", "cui": "36118008", "uid": "165256773718236118008 - Pneumothorax (disorder)"}, {"id": "25", "label": "25 - 25", "count": 10000000, "name": "25", "saliency": "0.032", "cui": "25", "uid": "165256760303125 - 25"}, {"id": "25064002", "label": "25064002 - Headache (finding)", "count": 185961, "name": "Headache (finding)", "saliency": "0.027", "cui": "25064002", "uid": "165256761127225064002 - Headache (finding)"}, {"id": "720626009", "label": "720626009 - Dissection of carotid artery (disorder)", "count": 190, "name": "Dissection of carotid artery (disorder)", "saliency": "0.115", "cui": "720626009", "uid": "1652567470767720626009 - Dissection of carotid artery (disorder)"}, {"id": "28", "label": "28 - 28", "count": 10000000, "name": "28", "saliency": "0.048", "cui": "28", "uid": "165256760687928 - 28"}, {"id": "404640003", "label": "404640003 - Dizziness (finding)", "count": 107584, "name": "Dizziness (finding)", "saliency": "0.033", "cui": "404640003", "uid": "1652567614687404640003 - Dizziness (finding)"}, {"id": "230730001", "label": "230730001 - Dissection of vertebral artery (disorder)", "count": 370, "name": "Dissection of vertebral artery (disorder)", "saliency": "0.117", "cui": "230730001", "uid": "1652567466319230730001 - Dissection of vertebral artery (disorder)"}, {"id": "248328003", "label": "248328003 - Tall stature (finding)", "count": 363, "name": "Tall stature (finding)", "saliency": "0.137", "cui": "248328003", "uid": "1652567596224248328003 - Tall stature (finding)"}, {"id": "85551004", "label": "85551004 - Hypermobility syndrome (disorder)", "count": 1737, "name": "Hypermobility syndrome (disorder)", "saliency": "0.163", "cui": "85551004", "uid": "165256748243185551004 - Hypermobility syndrome (disorder)"}], "cboxTypes": ["Disorders"], "temporality": ["New Concepts"], "prediction_filters": ["Ignore Parents"], "cui_filter": null} -------------------------------------------------------------------------------- /data/timeline_examples/28_manual_timeline_examples.json: -------------------------------------------------------------------------------- 1 | {"concepts": [{"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.035", "cui": "", "uid": "165099333914310000000 - "}, {"id": "White", "label": "White - White", "count": 10000000, "name": "White", "saliency": "0.045", "cui": "White", "uid": "1654687239406White - White"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.055", "cui": "", "uid": "165099334731010000000 - "}, {"id": "Male", "label": "Male - Male", "count": 10000000, "name": "Male", "saliency": "0.053", "cui": "Male", "uid": "1654687236133Male - Male"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.084", "cui": "", "uid": "165099335373310000000 - "}, {"id": "70", "label": "70 - 70", "count": 10000000, "name": "70", "saliency": "0.030", "cui": "70", "uid": "165468723351670 - 70"}, {"id": "82470000", "label": "82470000 - Muscle fasciculation (finding)", "count": 4845, "name": "Muscle fasciculation (finding)", "saliency": "0.254", "cui": "82470000", "uid": "165468767755282470000 - Muscle fasciculation (finding)"}, {"id": "309086004", "label": "309086004 - Paresthesia of hand (finding)", "count": 176, "name": "Paresthesia of hand (finding)", "saliency": "0.218", "cui": "309086004", "uid": "1654687683546309086004 - Paresthesia of hand (finding)"}, {"id": "84229001", "label": "84229001 - Fatigue (finding)", "count": 134026, "name": "Fatigue (finding)", "saliency": "0.082", "cui": "84229001", "uid": "165468769165784229001 - Fatigue (finding)"}, {"id": "48694002", "label": "48694002 - Anxiety (finding)", "count": 168363, "name": "Anxiety (finding)", "saliency": "0.144", "cui": "48694002", "uid": "165468769895948694002 - Anxiety (finding)"}], "cboxTypes": ["Disorders"], "temporality": ["New Concepts"], "prediction_filters": ["Ignore Parents"], "cui_filter": null} -------------------------------------------------------------------------------- /data/timeline_examples/29_manual_timeline_examples.json: -------------------------------------------------------------------------------- 1 | {"concepts": [{"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.036", "cui": "", "uid": "165099333914310000000 - "}, {"id": "Male", "label": "Male - Male", "count": 10000000, "name": "Male", "saliency": "0.049", "cui": "Male", "uid": "1652565368346Male - Male"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.061", "cui": "", "uid": "165099334731010000000 - "}, {"id": "Unknown", "label": "Unknown - Unknown", "count": 10000000, "name": "Unknown", "saliency": "0.047", "cui": "Unknown", "uid": "1652565383698Unknown - Unknown"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.070", "cui": "", "uid": "165099335373310000000 - "}, {"id": "19", "label": "19 - 19", "count": 10000000, "name": "19", "saliency": "0.038", "cui": "19", "uid": "165256537360219 - 19"}, {"id": "409966000", "label": "409966000 - Acute diarrhea (disorder)", "count": 420, "name": "Acute diarrhea (disorder)", "saliency": "0.169", "cui": "409966000", "uid": "1652565297242409966000 - Acute diarrhea (disorder)"}, {"id": "310481001", "label": "310481001 - Complaining of paresthesia (finding)", "count": 574, "name": "Complaining of paresthesia (finding)", "saliency": "0.132", "cui": "310481001", "uid": "1652565491970310481001 - Complaining of paresthesia (finding)"}, {"id": "84229001", "label": "84229001 - Fatigue (finding)", "count": 134026, "name": "Fatigue (finding)", "saliency": "0.050", "cui": "84229001", "uid": "165256527632284229001 - Fatigue (finding)"}, {"id": "26544005", "label": "26544005 - Muscle weakness (finding)", "count": 13620, "name": "Muscle weakness (finding)", "saliency": "0.085", "cui": "26544005", "uid": "165256528083426544005 - Muscle weakness (finding)"}, {"id": "1845001", "label": "1845001 - Paraparesis (disorder)", "count": 1175, "name": "Paraparesis (disorder)", "saliency": "0.263", "cui": "1845001", "uid": "16525654726501845001 - Paraparesis (disorder)"}], "cboxTypes": ["Disorders"], "temporality": ["New Concepts"], "prediction_filters": ["Ignore Parents", "Ignore Children"], "cui_filter": null} -------------------------------------------------------------------------------- /data/timeline_examples/2_manual_timeline_examples.json: -------------------------------------------------------------------------------- 1 | {"concepts": [{"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.032", "cui": "", "uid": "165099333914310000000 - "}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.037", "cui": "", "uid": "165099334731010000000 - "}, {"id": "Unknown", "label": "Unknown - Unknown", "count": 10000000, "name": "Unknown", "saliency": "0.050", "cui": "Unknown", "uid": "1652556701027Unknown - Unknown"}, {"id": "Female", "label": "Female - Female", "count": 10000000, "name": "Female", "saliency": "0.033", "cui": "Female", "uid": "1652556846948Female - Female"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.059", "cui": "", "uid": "165099335373310000000 - "}, {"id": "41", "label": "41 - 41", "count": 10000000, "name": "41", "saliency": "0.054", "cui": "41", "uid": "165255666377141 - 41"}, {"id": "235595009", "label": "235595009 - Gastroesophageal reflux disease (disorder)", "count": 11417, "name": "Gastroesophageal reflux disease (disorder)", "saliency": "0.053", "cui": "235595009", "uid": "1652556603100235595009 - Gastroesophageal reflux disease (disorder)"}, {"id": "46", "label": "46 - 46", "count": 10000000, "name": "46", "saliency": "0.042", "cui": "46", "uid": "165255666894046 - 46"}, {"id": "44018007", "label": "44018007 - Cholestatic jaundice syndrome (disorder)", "count": 404, "name": "Cholestatic jaundice syndrome (disorder)", "saliency": "0.103", "cui": "44018007", "uid": "165255660999644018007 - Cholestatic jaundice syndrome (disorder)"}, {"id": "75694006", "label": "75694006 - Pancreatitis (disorder)", "count": 10113, "name": "Pancreatitis (disorder)", "saliency": "0.058", "cui": "75694006", "uid": "165255661478075694006 - Pancreatitis (disorder)"}, {"id": "48", "label": "48 - 48", "count": 10000000, "name": "48", "saliency": "0.028", "cui": "48", "uid": "165255674206048 - 48"}, {"id": "197441003", "label": "197441003 - Primary sclerosing cholangitis (disorder)", "count": 1822, "name": "Primary sclerosing cholangitis (disorder)", "saliency": "0.085", "cui": "197441003", "uid": "1652556619724197441003 - Primary sclerosing cholangitis (disorder)"}, {"id": "53", "label": "53 - 53", "count": 10000000, "name": "53", "saliency": "0.054", "cui": "53", "uid": "165255669251553 - 53"}, {"id": "409966000", "label": "409966000 - Acute diarrhea (disorder)", "count": 420, "name": "Acute diarrhea (disorder)", "saliency": "0.095", "cui": "409966000", "uid": "1652556624628409966000 - Acute diarrhea (disorder)"}, {"id": "55", "label": "55 - 55", "count": 10000000, "name": "55", "saliency": "0.066", "cui": "55", "uid": "165255668782755 - 55"}, {"id": "95545007", "label": "95545007 - Hemorrhagic diarrhea (disorder)", "count": 2731, "name": "Hemorrhagic diarrhea (disorder)", "saliency": "0.152", "cui": "95545007", "uid": "165255663447695545007 - Hemorrhagic diarrhea (disorder)"}], "cboxTypes": ["Disorders"], "temporality": ["New Concepts"], "prediction_filters": ["Ignore Parents"], "cui_filter": null} -------------------------------------------------------------------------------- /data/timeline_examples/30_manual_timeline_examples.json: -------------------------------------------------------------------------------- 1 | {"concepts": [{"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.030", "cui": "", "uid": "165099333914310000000 - "}, {"id": "White", "label": "White - White", "count": 10000000, "name": "White", "saliency": "0.036", "cui": "White", "uid": "1654687239406White - White"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.043", "cui": "", "uid": "165099334731010000000 - "}, {"id": "Male", "label": "Male - Male", "count": 10000000, "name": "Male", "saliency": "0.037", "cui": "Male", "uid": "1654687236133Male - Male"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.052", "cui": "", "uid": "165099335373310000000 - "}, {"id": "21", "label": "21 - 21", "count": 10000000, "name": "21", "saliency": "0.028", "cui": "21", "uid": "165468780678921 - 21"}, {"id": "231494001", "label": "231494001 - Mania (disorder)", "count": 2213, "name": "Mania (disorder)", "saliency": "0.076", "cui": "231494001", "uid": "1654688134556231494001 - Mania (disorder)"}, {"id": "13746004", "label": "13746004 - Bipolar disorder (disorder)", "count": 5625, "name": "Bipolar disorder (disorder)", "saliency": "0.077", "cui": "13746004", "uid": "165468815900913746004 - Bipolar disorder (disorder)"}, {"id": "27", "label": "27 - 27", "count": 10000000, "name": "27", "saliency": "0.026", "cui": "27", "uid": "165468820373327 - 27"}, {"id": "48694002", "label": "48694002 - Anxiety (finding)", "count": 168363, "name": "Anxiety (finding)", "saliency": "0.025", "cui": "48694002", "uid": "165468825448648694002 - Anxiety (finding)"}, {"id": "28", "label": "28 - 28", "count": 10000000, "name": "28", "saliency": "0.025", "cui": "28", "uid": "165468826244528 - 28"}, {"id": "231496004", "label": "231496004 - Hypomania (disorder)", "count": 305, "name": "Hypomania (disorder)", "saliency": "0.133", "cui": "231496004", "uid": "1654688166516231496004 - Hypomania (disorder)"}, {"id": "29", "label": "29 - 29", "count": 10000000, "name": "29", "saliency": "0.036", "cui": "29", "uid": "165468820806929 - 29"}, {"id": "271737000", "label": "271737000 - Anemia (disorder)", "count": 63566, "name": "Anemia (disorder)", "saliency": "0.046", "cui": "271737000", "uid": "1654688173781271737000 - Anemia (disorder)"}, {"id": "15771004", "label": "15771004 - Diabetes insipidus (disorder)", "count": 1529, "name": "Diabetes insipidus (disorder)", "saliency": "0.120", "cui": "15771004", "uid": "165468818062415771004 - Diabetes insipidus (disorder)"}, {"id": "85102008", "label": "85102008 - Cerebellar ataxia (disorder)", "count": 743, "name": "Cerebellar ataxia (disorder)", "saliency": "0.210", "cui": "85102008", "uid": "165468819530585102008 - Cerebellar ataxia (disorder)"}], "cboxTypes": ["Medications and Substances"], "temporality": ["New Concepts"], "prediction_filters": ["Ignore Parents"], "cui_filter": null} -------------------------------------------------------------------------------- /data/timeline_examples/31_manual_timeline_examples.json: -------------------------------------------------------------------------------- 1 | {"concepts": [{"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.032", "cui": "", "uid": "165099333914310000000 - "}, {"id": "Unknown", "label": "Unknown - Unknown", "count": 10000000, "name": "Unknown", "saliency": "0.045", "cui": "Unknown", "uid": "1652564755329Unknown - Unknown"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.049", "cui": "", "uid": "165099334731010000000 - "}, {"id": "Female", "label": "Female - Female", "count": 10000000, "name": "Female", "saliency": "0.048", "cui": "Female", "uid": "1652564730114Female - Female"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.060", "cui": "", "uid": "165099335373310000000 - "}, {"id": "19", "label": "19 - 19", "count": 10000000, "name": "19", "saliency": "0.033", "cui": "19", "uid": "165256473469019 - 19"}, {"id": "37796009", "label": "37796009 - Migraine (disorder)", "count": 53324, "name": "Migraine (disorder)", "saliency": "0.027", "cui": "37796009", "uid": "165256471738637796009 - Migraine (disorder)"}, {"id": "43116000", "label": "43116000 - Eczema (disorder)", "count": 49833, "name": "Eczema (disorder)", "saliency": "0.037", "cui": "43116000", "uid": "165256488746543116000 - Eczema (disorder)"}, {"id": "237055002", "label": "237055002 - Polycystic ovary syndrome (disorder)", "count": 1399, "name": "Polycystic ovary syndrome (disorder)", "saliency": "0.077", "cui": "237055002", "uid": "1652564722666237055002 - Polycystic ovary syndrome (disorder)"}, {"id": "23", "label": "23 - 23", "count": 10000000, "name": "23", "saliency": "0.021", "cui": "23", "uid": "165256473973823 - 23"}, {"id": "91175000", "label": "91175000 - Seizure (finding)", "count": 92812, "name": "Seizure (finding)", "saliency": "0.019", "cui": "91175000", "uid": "165256469403491175000 - Seizure (finding)"}, {"id": "25", "label": "25 - 25", "count": 10000000, "name": "25", "saliency": "0.021", "cui": "25", "uid": "165256474358525 - 25"}, {"id": "91175000", "label": "91175000 - Seizure (finding)", "count": 92812, "name": "Seizure (finding)", "saliency": "0.020", "cui": "91175000", "uid": "165256469729791175000 - Seizure (finding)"}, {"id": "29", "label": "29 - 29", "count": 10000000, "name": "29", "saliency": "0.023", "cui": "29", "uid": "165256474591329 - 29"}, {"id": "193022009", "label": "193022009 - Localization-related(focal)(partial)idiopathic epilepsy and epileptic syndromes with seizures of localized onset (disorder)", "count": 441, "name": "Localization-related(focal)(partial)idiopathic epilepsy and epileptic syndromes with seizures of localized onset (disorder)", "saliency": "0.176", "cui": "193022009", "uid": "1652564704681193022009 - Localization-related(focal)(partial)idiopathic epilepsy and epileptic syndromes with seizures of localized onset (disorder)"}, {"id": "31", "label": "31 - 31", "count": 10000000, "name": "31", "saliency": "0.047", "cui": "31", "uid": "165256474976131 - 31"}, {"id": "162415008", "label": "162415008 - Complaining of a rash (finding)", "count": 188, "name": "Complaining of a rash (finding)", "saliency": "0.264", "cui": "162415008", "uid": "1652564713154162415008 - Complaining of a rash (finding)"}], "cboxTypes": ["Disorders"], "temporality": ["New Concepts"], "prediction_filters": ["Ignore Parents"], "cui_filter": "127334004"} -------------------------------------------------------------------------------- /data/timeline_examples/32_manual_timeline_examples.json: -------------------------------------------------------------------------------- 1 | {"concepts": [{"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.026", "cui": "", "uid": "165099333914310000000 - "}, {"id": "Unknown", "label": "Unknown - Unknown", "count": 10000000, "name": "Unknown", "saliency": "0.033", "cui": "Unknown", "uid": "1652569169681Unknown - Unknown"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.042", "cui": "", "uid": "165099334731010000000 - "}, {"id": "Female", "label": "Female - Female", "count": 10000000, "name": "Female", "saliency": "0.036", "cui": "Female", "uid": "1652569159618Female - Female"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.049", "cui": "", "uid": "165099335373310000000 - "}, {"id": "24", "label": "24 - 24", "count": 10000000, "name": "24", "saliency": "0.034", "cui": "24", "uid": "165256916517724 - 24"}, {"id": "7011001", "label": "7011001 - Hallucinations (finding)", "count": 18898, "name": "Hallucinations (finding)", "saliency": "0.049", "cui": "7011001", "uid": "16525690671777011001 - Hallucinations (finding)"}, {"id": "30819006", "label": "30819006 - Dysphoric mood (finding)", "count": 1906, "name": "Dysphoric mood (finding)", "saliency": "0.087", "cui": "30819006", "uid": "165256934945030819006 - Dysphoric mood (finding)"}, {"id": "85949006", "label": "85949006 - Euphoria (finding)", "count": 237, "name": "Euphoria (finding)", "saliency": "0.074", "cui": "85949006", "uid": "165257006735585949006 - Euphoria (finding)"}, {"id": "193462001", "label": "193462001 - Insomnia (disorder)", "count": 756, "name": "Insomnia (disorder)", "saliency": "0.046", "cui": "193462001", "uid": "1652570032346193462001 - Insomnia (disorder)"}, {"id": "102943000", "label": "102943000 - Personality change (finding)", "count": 1867, "name": "Personality change (finding)", "saliency": "0.047", "cui": "102943000", "uid": "1652569491314102943000 - Personality change (finding)"}, {"id": "25", "label": "25 - 25", "count": 10000000, "name": "25", "saliency": "0.026", "cui": "25", "uid": "165256968381825 - 25"}, {"id": "286933003", "label": "286933003 - Confusional state (disorder)", "count": 57113, "name": "Confusional state (disorder)", "saliency": "0.025", "cui": "286933003", "uid": "1652570012194286933003 - Confusional state (disorder)"}, {"id": "246544003", "label": "246544003 - Partial seizure evolving to secondary generalized seizure (disorder)", "count": 1850, "name": "Partial seizure evolving to secondary generalized seizure (disorder)", "saliency": "0.125", "cui": "246544003", "uid": "1652569809826246544003 - Partial seizure evolving to secondary generalized seizure (disorder)"}, {"id": "15802004", "label": "15802004 - Dystonia (disorder)", "count": 3773, "name": "Dystonia (disorder)", "saliency": "0.087", "cui": "15802004", "uid": "165257023621915802004 - Dystonia (disorder)"}, {"id": "162704004", "label": "162704004 - On examination - drowsy (finding)", "count": 2371, "name": "On examination - drowsy (finding)", "saliency": "0.214", "cui": "162704004", "uid": "1652569956115162704004 - On examination - drowsy (finding)"}], "cboxTypes": ["Disorders"], "temporality": ["New Concepts"], "prediction_filters": ["Ignore Parents", "Ignore Children"], "cui_filter": "81308009"} -------------------------------------------------------------------------------- /data/timeline_examples/33_manual_timeline_examples.json: -------------------------------------------------------------------------------- 1 | {"concepts": [{"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.041", "cui": "", "uid": "165099333914310000000 - "}, {"id": "Unknown", "label": "Unknown - Unknown", "count": 10000000, "name": "Unknown", "saliency": "0.047", "cui": "Unknown", "uid": "1652570920612Unknown - Unknown"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.041", "cui": "", "uid": "165099334731010000000 - "}, {"id": "Male", "label": "Male - Male", "count": 10000000, "name": "Male", "saliency": "0.060", "cui": "Male", "uid": "1652570916924Male - Male"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.053", "cui": "", "uid": "165099335373310000000 - "}, {"id": "21", "label": "21 - 21", "count": 10000000, "name": "21", "saliency": "0.075", "cui": "21", "uid": "165257082628421 - 21"}, {"id": "266257000", "label": "266257000 - Transient ischemic attack (disorder)", "count": 32749, "name": "Transient ischemic attack (disorder)", "saliency": "0.123", "cui": "266257000", "uid": "1652571006876266257000 - Transient ischemic attack (disorder)"}, {"id": "386952008", "label": "386952008 - Clopidogrel (substance)", "count": 45436, "name": "Clopidogrel (substance)", "saliency": "0.076", "cui": "386952008", "uid": "1652571462972386952008 - Clopidogrel (substance)"}, {"id": "21", "label": "21 - 21", "count": 10000000, "name": "21", "saliency": "0.073", "cui": "21", "uid": "165257154014921 - 21"}, {"id": "89362005", "label": "89362005 - Weight loss (finding)", "count": 106506, "name": "Weight loss (finding)", "saliency": "0.114", "cui": "89362005", "uid": "165257158890189362005 - Weight loss (finding)"}, {"id": "386661006", "label": "386661006 - Fever (finding)", "count": 203120, "name": "Fever (finding)", "saliency": "0.086", "cui": "386661006", "uid": "1652571509845386661006 - Fever (finding)"}, {"id": "31574009", "label": "31574009 - Systolic murmur (finding)", "count": 34203, "name": "Systolic murmur (finding)", "saliency": "0.213", "cui": "31574009", "uid": "165257178794231574009 - Systolic murmur (finding)"}], "cboxTypes": ["Disorders"], "temporality": ["New Concepts"], "prediction_filters": ["Ignore Parents", "Ignore Children"], "cui_filter": ""} -------------------------------------------------------------------------------- /data/timeline_examples/34_manual_timeline_examples.json: -------------------------------------------------------------------------------- 1 | {"concepts": [{"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.034", "cui": "", "uid": "165099333914310000000 - "}, {"id": "Unknown", "label": "Unknown - Unknown", "count": 10000000, "name": "Unknown", "saliency": "0.037", "cui": "Unknown", "uid": "1652570920612Unknown - Unknown"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.045", "cui": "", "uid": "165099334731010000000 - "}, {"id": "Male", "label": "Male - Male", "count": 10000000, "name": "Male", "saliency": "0.039", "cui": "Male", "uid": "1652570916924Male - Male"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.057", "cui": "", "uid": "165099335373310000000 - "}, {"id": "50", "label": "50 - 50", "count": 10000000, "name": "50", "saliency": "0.022", "cui": "50", "uid": "165257195696950 - 50"}, {"id": "13645005", "label": "13645005 - Chronic obstructive lung disease (disorder)", "count": 46496, "name": "Chronic obstructive lung disease (disorder)", "saliency": "0.029", "cui": "13645005", "uid": "165257204987013645005 - Chronic obstructive lung disease (disorder)"}, {"id": "38341003", "label": "38341003 - Hypertensive disorder, systemic arterial (disorder)", "count": 259443, "name": "Hypertensive disorder, systemic arterial (disorder)", "saliency": "0.024", "cui": "38341003", "uid": "165257216063938341003 - Hypertensive disorder, systemic arterial (disorder)"}, {"id": "284523002", "label": "284523002 - Persistent cough (finding)", "count": 8821, "name": "Persistent cough (finding)", "saliency": "0.054", "cui": "284523002", "uid": "1652572123598284523002 - Persistent cough (finding)"}, {"id": "53", "label": "53 - 53", "count": 10000000, "name": "53", "saliency": "0.029", "cui": "53", "uid": "165257229629453 - 53"}, {"id": "87433001", "label": "87433001 - Pulmonary emphysema (disorder)", "count": 1011, "name": "Pulmonary emphysema (disorder)", "saliency": "0.082", "cui": "87433001", "uid": "165257228789487433001 - Pulmonary emphysema (disorder)"}, {"id": "55", "label": "55 - 55", "count": 10000000, "name": "55", "saliency": "0.036", "cui": "55", "uid": "165257195246255 - 55"}, {"id": "89362005", "label": "89362005 - Weight loss (finding)", "count": 106506, "name": "Weight loss (finding)", "saliency": "0.046", "cui": "89362005", "uid": "165257158890189362005 - Weight loss (finding)"}, {"id": "66857006", "label": "66857006 - Hemoptysis (finding)", "count": 14602, "name": "Hemoptysis (finding)", "saliency": "0.063", "cui": "66857006", "uid": "165257214379866857006 - Hemoptysis (finding)"}, {"id": "271730003", "label": "271730003 - Horner's syndrome pupil (disorder)", "count": 200, "name": "Horner's syndrome pupil (disorder)", "saliency": "0.219", "cui": "271730003", "uid": "1652571928406271730003 - Horner's syndrome pupil (disorder)"}, {"id": "60046008", "label": "60046008 - Pleural effusion (disorder)", "count": 32822, "name": "Pleural effusion (disorder)", "saliency": "0.185", "cui": "60046008", "uid": "165257242711960046008 - Pleural effusion (disorder)"}], "cboxTypes": ["Disorders"], "temporality": ["New Concepts"], "prediction_filters": ["Ignore Parents", "Ignore Children"], "cui_filter": ""} -------------------------------------------------------------------------------- /data/timeline_examples/3_manual_timeline_examples.json: -------------------------------------------------------------------------------- 1 | {"concepts": [{"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.019", "cui": "", "uid": "165099333914310000000 - "}, {"id": "Unknown", "label": "Unknown - Unknown", "count": 10000000, "name": "Unknown", "saliency": "0.030", "cui": "Unknown", "uid": "1652570920612Unknown - Unknown"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.025", "cui": "", "uid": "165099334731010000000 - "}, {"id": "Male", "label": "Male - Male", "count": 10000000, "name": "Male", "saliency": "0.033", "cui": "Male", "uid": "1652570916924Male - Male"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.037", "cui": "", "uid": "165099335373310000000 - "}, {"id": "21", "label": "21 - 21", "count": 10000000, "name": "21", "saliency": "0.039", "cui": "21", "uid": "165257082628421 - 21"}, {"id": "42984000", "label": "42984000 - Night sweats (finding)", "count": 19519, "name": "Night sweats (finding)", "saliency": "0.032", "cui": "42984000", "uid": "165257089514042984000 - Night sweats (finding)"}, {"id": "30746006", "label": "30746006 - Lymphadenopathy (disorder)", "count": 22415, "name": "Lymphadenopathy (disorder)", "saliency": "0.031", "cui": "30746006", "uid": "165257073709230746006 - Lymphadenopathy (disorder)"}, {"id": "16294009", "label": "16294009 - Splenomegaly (disorder)", "count": 13325, "name": "Splenomegaly (disorder)", "saliency": "0.032", "cui": "16294009", "uid": "165257087117916294009 - Splenomegaly (disorder)"}, {"id": "24", "label": "24 - 24", "count": 10000000, "name": "24", "saliency": "0.019", "cui": "24", "uid": "165257084063624 - 24"}, {"id": "91861009", "label": "91861009 - Acute myeloid leukemia, disease (disorder)", "count": 2070, "name": "Acute myeloid leukemia, disease (disorder)", "saliency": "0.080", "cui": "91861009", "uid": "165257131493391861009 - Acute myeloid leukemia, disease (disorder)"}, {"id": "443980004", "label": "443980004 - Neutropenic sepsis (disorder)", "count": 5391, "name": "Neutropenic sepsis (disorder)", "saliency": "0.040", "cui": "443980004", "uid": "1652570772756443980004 - Neutropenic sepsis (disorder)"}, {"id": "234646005", "label": "234646005 - Graft-versus-host disease (disorder)", "count": 956, "name": "Graft-versus-host disease (disorder)", "saliency": "0.071", "cui": "234646005", "uid": "1652570651355234646005 - Graft-versus-host disease (disorder)"}, {"id": "43", "label": "43 - 43", "count": 10000000, "name": "43", "saliency": "0.028", "cui": "43", "uid": "165257133684543 - 43"}, {"id": "40930008", "label": "40930008 - Hypothyroidism (disorder)", "count": 43594, "name": "Hypothyroidism (disorder)", "saliency": "0.019", "cui": "40930008", "uid": "165257079224440930008 - Hypothyroidism (disorder)"}, {"id": "233703007", "label": "233703007 - Interstitial lung disease (disorder)", "count": 3355, "name": "Interstitial lung disease (disorder)", "saliency": "0.054", "cui": "233703007", "uid": "1652570658435233703007 - Interstitial lung disease (disorder)"}, {"id": "65", "label": "65 - 65", "count": 10000000, "name": "65", "saliency": "0.048", "cui": "65", "uid": "165257106789365 - 65"}, {"id": "44054006", "label": "44054006 - Diabetes mellitus type 2 (disorder)", "count": 81753, "name": "Diabetes mellitus type 2 (disorder)", "saliency": "0.023", "cui": "44054006", "uid": "165257070114844054006 - Diabetes mellitus type 2 (disorder)"}, {"id": "69", "label": "69 - 69", "count": 10000000, "name": "69", "saliency": "0.043", "cui": "69", "uid": "165257107156569 - 69"}, {"id": "266257000", "label": "266257000 - Transient ischemic attack (disorder)", "count": 32749, "name": "Transient ischemic attack (disorder)", "saliency": "0.034", "cui": "266257000", "uid": "1652571006876266257000 - Transient ischemic attack (disorder)"}, {"id": "386952008", "label": "386952008 - Clopidogrel (substance)", "count": 45436, "name": "Clopidogrel (substance)", "saliency": "0.028", "cui": "386952008", "uid": "1652571230309386952008 - Clopidogrel (substance)"}, {"id": "76", "label": "76 - 76", "count": 10000000, "name": "76", "saliency": "0.052", "cui": "76", "uid": "165257083223576 - 76"}, {"id": "286933003", "label": "286933003 - Confusional state (disorder)", "count": 57113, "name": "Confusional state (disorder)", "saliency": "0.055", "cui": "286933003", "uid": "1652571045540286933003 - Confusional state (disorder)"}, {"id": "816077007", "label": "816077007 - Mri Brain", "count": 34595, "name": "Mri Brain", "saliency": "0.128", "cui": "816077007", "uid": "1652570667772816077007 - Mri Brain"}], "cboxTypes": ["Disorders"], "temporality": ["New Concepts"], "prediction_filters": ["Ignore Parents", "Ignore Children"], "cui_filter": "81308009"} -------------------------------------------------------------------------------- /data/timeline_examples/4_manual_timeline_examples.json: -------------------------------------------------------------------------------- 1 | {"concepts": [{"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.024", "cui": "", "uid": "165099333914310000000 - "}, {"id": "White", "label": "White - White", "count": 10000000, "name": "White", "saliency": "0.030", "cui": "White", "uid": "1652557785253White - White"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.027", "cui": "", "uid": "165099334731010000000 - "}, {"id": "Female", "label": "Female - Female", "count": 10000000, "name": "Female", "saliency": "0.024", "cui": "Female", "uid": "1652557790862Female - Female"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.048", "cui": "", "uid": "165099335373310000000 - "}, {"id": "34", "label": "34 - 34", "count": 10000000, "name": "34", "saliency": "0.021", "cui": "34", "uid": "165255778211834 - 34"}, {"id": "57676002", "label": "57676002 - Joint pain (finding)", "count": 37708, "name": "Joint pain (finding)", "saliency": "0.028", "cui": "57676002", "uid": "165255752557357676002 - Joint pain (finding)"}, {"id": "55235003", "label": "55235003 - C-reactive protein measurement (procedure)", "count": 182, "name": "C-reactive protein measurement (procedure)", "saliency": "0.067", "cui": "55235003", "uid": "165255753099855235003 - C-reactive protein measurement (procedure)"}, {"id": "127062003", "label": "127062003 - Erythrocytosis (disorder)", "count": 4435, "name": "Erythrocytosis (disorder)", "saliency": "0.040", "cui": "127062003", "uid": "1652557648966127062003 - Erythrocytosis (disorder)"}, {"id": "35", "label": "35 - 35", "count": 10000000, "name": "35", "saliency": "0.015", "cui": "35", "uid": "165255782877735 - 35"}, {"id": "271807003", "label": "271807003 - Eruption of skin (disorder)", "count": 108088, "name": "Eruption of skin (disorder)", "saliency": "0.013", "cui": "271807003", "uid": "1652558065263271807003 - Eruption of skin (disorder)"}, {"id": "298266002", "label": "298266002 - Joint problem (finding)", "count": 5095, "name": "Joint problem (finding)", "saliency": "0.029", "cui": "298266002", "uid": "1652557727750298266002 - Joint problem (finding)"}, {"id": "298160000", "label": "298160000 - Inflamed joint (finding)", "count": 641, "name": "Inflamed joint (finding)", "saliency": "0.041", "cui": "298160000", "uid": "1652557546973298160000 - Inflamed joint (finding)"}, {"id": "276330000", "label": "276330000 - Joint hot (finding)", "count": 114, "name": "Joint hot (finding)", "saliency": "0.084", "cui": "276330000", "uid": "1652557550214276330000 - Joint hot (finding)"}, {"id": "6631009", "label": "6631009 - Thrombocytosis (disorder)", "count": 3173, "name": "Thrombocytosis (disorder)", "saliency": "0.043", "cui": "6631009", "uid": "16525578472866631009 - Thrombocytosis (disorder)"}, {"id": "36", "label": "36 - 36", "count": 10000000, "name": "36", "saliency": "0.018", "cui": "36", "uid": "165255782101436 - 36"}, {"id": "271587009", "label": "271587009 - Stiffness (finding)", "count": 111347, "name": "Stiffness (finding)", "saliency": "0.029", "cui": "271587009", "uid": "1652557745694271587009 - Stiffness (finding)"}, {"id": "65761003", "label": "65761003 - Inflammatory pain (finding)", "count": 560, "name": "Inflammatory pain (finding)", "saliency": "0.061", "cui": "65761003", "uid": "165255767376665761003 - Inflammatory pain (finding)"}, {"id": "38", "label": "38 - 38", "count": 10000000, "name": "38", "saliency": "0.028", "cui": "38", "uid": "165255803880738 - 38"}, {"id": "299130003", "label": "299130003 - Deformity of thumb (finding)", "count": 113, "name": "Deformity of thumb (finding)", "saliency": "0.100", "cui": "299130003", "uid": "1652557984743299130003 - Deformity of thumb (finding)"}, {"id": "367510009", "label": "367510009 - Ulnar deviation of fingers (finding)", "count": 300, "name": "Ulnar deviation of fingers (finding)", "saliency": "0.228", "cui": "367510009", "uid": "1652558003526367510009 - Ulnar deviation of fingers (finding)"}], "cboxTypes": ["Disorders"], "temporality": ["New Concepts"], "prediction_filters": ["Ignore Parents"], "cui_filter": null} -------------------------------------------------------------------------------- /data/timeline_examples/5_manual_timeline_examples.json: -------------------------------------------------------------------------------- 1 | {"concepts": [{"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.034", "cui": "", "uid": "165099333914310000000 - "}, {"id": "Unknown", "label": "Unknown - Unknown", "count": 10000000, "name": "Unknown", "saliency": "0.052", "cui": "Unknown", "uid": "1652557192277Unknown - Unknown"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.042", "cui": "", "uid": "165099334731010000000 - "}, {"id": "Female", "label": "Female - Female", "count": 10000000, "name": "Female", "saliency": "0.048", "cui": "Female", "uid": "1652557196062Female - Female"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.066", "cui": "", "uid": "165099335373310000000 - "}, {"id": "64", "label": "64 - 64", "count": 10000000, "name": "64", "saliency": "0.041", "cui": "64", "uid": "165255717336564 - 64"}, {"id": "254838004", "label": "254838004 - Carcinoma of breast (disorder)", "count": 3671, "name": "Carcinoma of breast (disorder)", "saliency": "0.087", "cui": "254838004", "uid": "1652557122085254838004 - Carcinoma of breast (disorder)"}, {"id": "38341003", "label": "38341003 - Hypertensive disorder, systemic arterial (disorder)", "count": 259443, "name": "Hypertensive disorder, systemic arterial (disorder)", "saliency": "0.038", "cui": "38341003", "uid": "165255723698038341003 - Hypertensive disorder, systemic arterial (disorder)"}, {"id": "65", "label": "65 - 65", "count": 10000000, "name": "65", "saliency": "0.035", "cui": "65", "uid": "165255721573365 - 65"}, {"id": "234097001", "label": "234097001 - Lymphedema (disorder)", "count": 6146, "name": "Lymphedema (disorder)", "saliency": "0.072", "cui": "234097001", "uid": "1652557168221234097001 - Lymphedema (disorder)"}, {"id": "68", "label": "68 - 68", "count": 10000000, "name": "68", "saliency": "0.060", "cui": "68", "uid": "165255717846968 - 68"}, {"id": "300954003", "label": "300954003 - Pain in calf (finding)", "count": 1564, "name": "Pain in calf (finding)", "saliency": "0.183", "cui": "300954003", "uid": "1652557139789300954003 - Pain in calf (finding)"}, {"id": "281792000", "label": "281792000 - Swollen calf (finding)", "count": 6484, "name": "Swollen calf (finding)", "saliency": "0.243", "cui": "281792000", "uid": "1652557408893281792000 - Swollen calf (finding)"}], "cboxTypes": ["Disorders"], "temporality": ["New Concepts"], "prediction_filters": ["Ignore Parents"], "cui_filter": null} -------------------------------------------------------------------------------- /data/timeline_examples/6_manual_timeline_examples.json: -------------------------------------------------------------------------------- 1 | {"concepts": [{"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.022", "cui": "", "uid": "165099333914310000000 - "}, {"id": "White", "label": "White - White", "count": 10000000, "name": "White", "saliency": "0.031", "cui": "White", "uid": "1653384099548White - White"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.035", "cui": "", "uid": "165099334731010000000 - "}, {"id": "Male", "label": "Male - Male", "count": 10000000, "name": "Male", "saliency": "0.038", "cui": "Male", "uid": "1653384852628Male - Male"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.038", "cui": "", "uid": "165099335373310000000 - "}, {"id": "43", "label": "43 - 43", "count": 10000000, "name": "43", "saliency": "0.029", "cui": "43", "uid": "165338409588143 - 43"}, {"id": "310495003", "label": "310495003 - Mild depression (disorder)", "count": 1849, "name": "Mild depression (disorder)", "saliency": "0.072", "cui": "310495003", "uid": "1653384329559310495003 - Mild depression (disorder)"}, {"id": "372720008", "label": "372720008 - Antidepressant (substance)", "count": 16474, "name": "Antidepressant (substance)", "saliency": "0.033", "cui": "372720008", "uid": "1653384332689372720008 - Antidepressant (substance)"}, {"id": "", "label": " - ", "count": 10000000, "name": "", "saliency": "0.026", "cui": "", "uid": "1653384136733 - "}, {"id": "44", "label": "44 - 44", "count": 10000000, "name": "44", "saliency": "0.020", "cui": "44", "uid": "165338413383644 - 44"}, {"id": "372720008", "label": "372720008 - Antidepressant (substance)", "count": 16474, "name": "Antidepressant (substance)", "saliency": "0.032", "cui": "372720008", "uid": "1653384413808372720008 - Antidepressant (substance)"}, {"id": "310497006", "label": "310497006 - Severe depression (disorder)", "count": 2826, "name": "Severe depression (disorder)", "saliency": "0.058", "cui": "310497006", "uid": "1653384509775310497006 - Severe depression (disorder)"}, {"id": "162290004", "label": "162290004 - Dry eyes (finding)", "count": 23187, "name": "Dry eyes (finding)", "saliency": "0.028", "cui": "162290004", "uid": "1653383773249162290004 - Dry eyes (finding)"}, {"id": "89362005", "label": "89362005 - Weight loss (finding)", "count": 106506, "name": "Weight loss (finding)", "saliency": "0.025", "cui": "89362005", "uid": "165338378507989362005 - Weight loss (finding)"}, {"id": "193255007", "label": "193255007 - Proximal myopathy (disorder)", "count": 1039, "name": "Proximal myopathy (disorder)", "saliency": "0.107", "cui": "193255007", "uid": "1653383820939193255007 - Proximal myopathy (disorder)"}, {"id": "248490000", "label": "248490000 - Bloating symptom (finding)", "count": 748, "name": "Bloating symptom (finding)", "saliency": "0.083", "cui": "248490000", "uid": "1653384992544248490000 - Bloating symptom (finding)"}, {"id": "", "label": " - ", "count": 10000000, "name": "", "saliency": "0.016", "cui": "", "uid": "1653384350899 - "}, {"id": "45", "label": "45 - 45", "count": 10000000, "name": "45", "saliency": "0.020", "cui": "45", "uid": "165338434587045 - 45"}, {"id": "62315008", "label": "62315008 - Diarrhea (finding)", "count": 123565, "name": "Diarrhea (finding)", "saliency": "0.015", "cui": "62315008", "uid": "165338375650862315008 - Diarrhea (finding)"}, {"id": "87715008", "label": "87715008 - Xerostomia (disorder)", "count": 21356, "name": "Xerostomia (disorder)", "saliency": "0.018", "cui": "87715008", "uid": "165338448962087715008 - Xerostomia (disorder)"}, {"id": "415690000", "label": "415690000 - Sweating (finding)", "count": 26923, "name": "Sweating (finding)", "saliency": "0.022", "cui": "415690000", "uid": "1653384915827415690000 - Sweating (finding)"}, {"id": "16932000", "label": "16932000 - Nausea and vomiting (disorder)", "count": 46068, "name": "Nausea and vomiting (disorder)", "saliency": "0.018", "cui": "16932000", "uid": "165338447018416932000 - Nausea and vomiting (disorder)"}, {"id": "21522001", "label": "21522001 - Abdominal pain (finding)", "count": 200591, "name": "Abdominal pain (finding)", "saliency": "0.014", "cui": "21522001", "uid": "165338385371521522001 - Abdominal pain (finding)"}, {"id": "80313002", "label": "80313002 - Palpitations (finding)", "count": 63467, "name": "Palpitations (finding)", "saliency": "0.023", "cui": "80313002", "uid": "165338424736680313002 - Palpitations (finding)"}, {"id": "286933003", "label": "286933003 - Confusional state (disorder)", "count": 57113, "name": "Confusional state (disorder)", "saliency": "0.022", "cui": "286933003", "uid": "1653384441227286933003 - Confusional state (disorder)"}, {"id": "276796006", "label": "276796006 - Atrial tachycardia (disorder)", "count": 3499, "name": "Atrial tachycardia (disorder)", "saliency": "0.153", "cui": "276796006", "uid": "1653384612937276796006 - Atrial tachycardia (disorder)"}], "cboxTypes": ["Disorders"], "temporality": ["New Concepts"], "prediction_filters": ["Ignore Parents", "Ignore Children"], "cui_filter": "387927001"} -------------------------------------------------------------------------------- /data/timeline_examples/7_manual_timeline_examples.json: -------------------------------------------------------------------------------- 1 | {"concepts": [{"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.019", "cui": "", "uid": "165099333914310000000 - "}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.034", "cui": "", "uid": "165099334731010000000 - "}, {"id": "Female", "label": "Female - Female", "count": 10000000, "name": "Female", "saliency": "0.033", "cui": "Female", "uid": "1652560826379Female - Female"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.031", "cui": "", "uid": "165099335373310000000 - "}, {"id": "44", "label": "44 - 44", "count": 10000000, "name": "44", "saliency": "0.027", "cui": "44", "uid": "165256077898744 - 44"}, {"id": "414916001", "label": "414916001 - Obesity (disorder)", "count": 24986, "name": "Obesity (disorder)", "saliency": "0.046", "cui": "414916001", "uid": "1652560671467414916001 - Obesity (disorder)"}, {"id": "8517006", "label": "8517006 - Ex-smoker (finding)", "count": 74572, "name": "Ex-smoker (finding)", "saliency": "0.036", "cui": "8517006", "uid": "16525606989478517006 - Ex-smoker (finding)"}, {"id": "13644009", "label": "13644009 - Hypercholesterolemia (disorder)", "count": 99521, "name": "Hypercholesterolemia (disorder)", "saliency": "0.022", "cui": "13644009", "uid": "165256089813813644009 - Hypercholesterolemia (disorder)"}, {"id": "60", "label": "60 - 60", "count": 10000000, "name": "60", "saliency": "0.025", "cui": "60", "uid": "165256084853960 - 60"}, {"id": "73211009", "label": "73211009 - Diabetes mellitus (disorder)", "count": 110552, "name": "Diabetes mellitus (disorder)", "saliency": "0.027", "cui": "73211009", "uid": "165256070497973211009 - Diabetes mellitus (disorder)"}, {"id": "38341003", "label": "38341003 - Hypertensive disorder, systemic arterial (disorder)", "count": 259443, "name": "Hypertensive disorder, systemic arterial (disorder)", "saliency": "0.023", "cui": "38341003", "uid": "165256087830738341003 - Hypertensive disorder, systemic arterial (disorder)"}, {"id": "400047006", "label": "400047006 - Peripheral vascular disease (disorder)", "count": 9094, "name": "Peripheral vascular disease (disorder)", "saliency": "0.046", "cui": "400047006", "uid": "1652560710402400047006 - Peripheral vascular disease (disorder)"}, {"id": "68", "label": "68 - 68", "count": 10000000, "name": "68", "saliency": "0.040", "cui": "68", "uid": "165256078551568 - 68"}, {"id": "4855003", "label": "4855003 - Retinopathy co-occurrent and due to diabetes mellitus (disorder)", "count": 11801, "name": "Retinopathy co-occurrent and due to diabetes mellitus (disorder)", "saliency": "0.043", "cui": "4855003", "uid": "16525607409714855003 - Retinopathy co-occurrent and due to diabetes mellitus (disorder)"}, {"id": "29738008", "label": "29738008 - Proteinuria (finding)", "count": 11627, "name": "Proteinuria (finding)", "saliency": "0.061", "cui": "29738008", "uid": "165256075280329738008 - Proteinuria (finding)"}, {"id": "69", "label": "69 - 69", "count": 10000000, "name": "69", "saliency": "0.043", "cui": "69", "uid": "165256084249169 - 69"}, {"id": "52254009", "label": "52254009 - Nephrotic syndrome (disorder)", "count": 1703, "name": "Nephrotic syndrome (disorder)", "saliency": "0.135", "cui": "52254009", "uid": "165256075725152254009 - Nephrotic syndrome (disorder)"}, {"id": "267038008", "label": "267038008 - Edema (finding)", "count": 89332, "name": "Edema (finding)", "saliency": "0.036", "cui": "267038008", "uid": "1652560830251267038008 - Edema (finding)"}, {"id": "102491009", "label": "102491009 - Immobile (finding)", "count": 18629, "name": "Immobile (finding)", "saliency": "0.071", "cui": "102491009", "uid": "1652560927642102491009 - Immobile (finding)"}, {"id": "70", "label": "70 - 70", "count": 10000000, "name": "70", "saliency": "0.044", "cui": "70", "uid": "165256077510670 - 70"}, {"id": "29857009", "label": "29857009 - Chest pain (finding)", "count": 154931, "name": "Chest pain (finding)", "saliency": "0.076", "cui": "29857009", "uid": "165256076119529857009 - Chest pain (finding)"}, {"id": "267036007", "label": "267036007 - Dyspnea (finding)", "count": 213430, "name": "Dyspnea (finding)", "saliency": "0.083", "cui": "267036007", "uid": "1652560765979267036007 - Dyspnea (finding)"}], "cboxTypes": ["Disorders"], "temporality": ["New Concepts"], "prediction_filters": ["Ignore Parents", "Ignore Children"], "cui_filter": null} -------------------------------------------------------------------------------- /data/timeline_examples/8_manual_timeline_examples.json: -------------------------------------------------------------------------------- 1 | {"concepts": [{"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.031", "cui": "", "uid": "165099333914310000000 - "}, {"id": "Unknown", "label": "Unknown - Unknown", "count": 10000000, "name": "Unknown", "saliency": "0.031", "cui": "Unknown", "uid": "1652561200891Unknown - Unknown"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.033", "cui": "", "uid": "165099334731010000000 - "}, {"id": "Male", "label": "Male - Male", "count": 10000000, "name": "Male", "saliency": "0.033", "cui": "Male", "uid": "1652561197251Male - Male"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.038", "cui": "", "uid": "165099335373310000000 - "}, {"id": "53", "label": "53 - 53", "count": 10000000, "name": "53", "saliency": "0.028", "cui": "53", "uid": "165256131333153 - 53"}, {"id": "66590003", "label": "66590003 - Alcohol dependence (disorder)", "count": 5028, "name": "Alcohol dependence (disorder)", "saliency": "0.043", "cui": "66590003", "uid": "165256113586866590003 - Alcohol dependence (disorder)"}, {"id": "56", "label": "56 - 56", "count": 10000000, "name": "56", "saliency": "0.017", "cui": "56", "uid": "165256120739556 - 56"}, {"id": "38341003", "label": "38341003 - Hypertensive disorder, systemic arterial (disorder)", "count": 259443, "name": "Hypertensive disorder, systemic arterial (disorder)", "saliency": "0.018", "cui": "38341003", "uid": "165256146793238341003 - Hypertensive disorder, systemic arterial (disorder)"}, {"id": "50325005", "label": "50325005 - Alcoholic fatty liver (disorder)", "count": 309, "name": "Alcoholic fatty liver (disorder)", "saliency": "0.072", "cui": "50325005", "uid": "165256114112450325005 - Alcoholic fatty liver (disorder)"}, {"id": "57", "label": "57 - 57", "count": 10000000, "name": "57", "saliency": "0.017", "cui": "57", "uid": "165256125548357 - 57"}, {"id": "16932000", "label": "16932000 - Nausea and vomiting (disorder)", "count": 46068, "name": "Nausea and vomiting (disorder)", "saliency": "0.049", "cui": "16932000", "uid": "165256133277116932000 - Nausea and vomiting (disorder)"}, {"id": "235875008", "label": "235875008 - Alcoholic hepatitis (disorder)", "count": 1227, "name": "Alcoholic hepatitis (disorder)", "saliency": "0.104", "cui": "235875008", "uid": "1652561146276235875008 - Alcoholic hepatitis (disorder)"}, {"id": "60", "label": "60 - 60", "count": 10000000, "name": "60", "saliency": "0.018", "cui": "60", "uid": "165256126093160 - 60"}, {"id": "422400008", "label": "422400008 - Vomiting (disorder)", "count": 65972, "name": "Vomiting (disorder)", "saliency": "0.035", "cui": "422400008", "uid": "1652561177236422400008 - Vomiting (disorder)"}, {"id": "404640003", "label": "404640003 - Dizziness (finding)", "count": 107584, "name": "Dizziness (finding)", "saliency": "0.029", "cui": "404640003", "uid": "1652561184004404640003 - Dizziness (finding)"}, {"id": "29857009", "label": "29857009 - Chest pain (finding)", "count": 154931, "name": "Chest pain (finding)", "saliency": "0.028", "cui": "29857009", "uid": "165256119385129857009 - Chest pain (finding)"}, {"id": "60", "label": "60 - 60", "count": 10000000, "name": "60", "saliency": "0.042", "cui": "60", "uid": "165256141858860 - 60"}, {"id": "23971007", "label": "23971007 - Acute vomiting (disorder)", "count": 171, "name": "Acute vomiting (disorder)", "saliency": "0.135", "cui": "23971007", "uid": "165256133018823971007 - Acute vomiting (disorder)"}, {"id": "8765009", "label": "8765009 - Hematemesis (disorder)", "count": 13780, "name": "Hematemesis (disorder)", "saliency": "0.200", "cui": "8765009", "uid": "16525611885888765009 - Hematemesis (disorder)"}], "cboxTypes": ["Disorders"], "temporality": ["New Concepts"], "prediction_filters": ["Ignore Parents", "Ignore Children"], "cui_filter": null} -------------------------------------------------------------------------------- /data/timeline_examples/9_manual_timeline_examples.json: -------------------------------------------------------------------------------- 1 | {"concepts": [{"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.018", "cui": "", "uid": "165099333914310000000 - "}, {"id": "Unknown", "label": "Unknown - Unknown", "count": 10000000, "name": "Unknown", "saliency": "0.032", "cui": "Unknown", "uid": "1652558345287Unknown - Unknown"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.044", "cui": "", "uid": "165099334731010000000 - "}, {"id": "Male", "label": "Male - Male", "count": 10000000, "name": "Male", "saliency": "0.042", "cui": "Male", "uid": "1652558342615Male - Male"}, {"id": "", "label": "10000000 - ", "count": 10000000, "name": "", "saliency": "0.062", "cui": "", "uid": "165099335373310000000 - "}, {"id": "38", "label": "38 - 38", "count": 10000000, "name": "38", "saliency": "0.034", "cui": "38", "uid": "165255833321538 - 38"}, {"id": "57676002", "label": "57676002 - Joint pain (finding)", "count": 37708, "name": "Joint pain (finding)", "saliency": "0.029", "cui": "57676002", "uid": "165255828323157676002 - Joint pain (finding)"}, {"id": "473327001", "label": "473327001 - Complaining of erectile dysfunction (finding)", "count": 251, "name": "Complaining of erectile dysfunction (finding)", "saliency": "0.115", "cui": "473327001", "uid": "1652558373464473327001 - Complaining of erectile dysfunction (finding)"}, {"id": "40", "label": "40 - 40", "count": 10000000, "name": "40", "saliency": "0.030", "cui": "40", "uid": "165255839183140 - 40"}, {"id": "37796009", "label": "37796009 - Migraine (disorder)", "count": 53324, "name": "Migraine (disorder)", "saliency": "0.028", "cui": "37796009", "uid": "165255830065437796009 - Migraine (disorder)"}, {"id": "43", "label": "43 - 43", "count": 10000000, "name": "43", "saliency": "0.043", "cui": "43", "uid": "165255839756743 - 43"}, {"id": "66590003", "label": "66590003 - Alcohol dependence (disorder)", "count": 5028, "name": "Alcohol dependence (disorder)", "saliency": "0.050", "cui": "66590003", "uid": "165255830715866590003 - Alcohol dependence (disorder)"}, {"id": "46", "label": "46 - 46", "count": 10000000, "name": "46", "saliency": "0.026", "cui": "46", "uid": "165255840962346 - 46"}, {"id": "18165001", "label": "18165001 - Jaundice (finding)", "count": 42009, "name": "Jaundice (finding)", "saliency": "0.048", "cui": "18165001", "uid": "165255855455918165001 - Jaundice (finding)"}, {"id": "47", "label": "47 - 47", "count": 10000000, "name": "47", "saliency": "0.023", "cui": "47", "uid": "165255841902247 - 47"}, {"id": "399187006", "label": "399187006 - Hemochromatosis (disorder)", "count": 2918, "name": "Hemochromatosis (disorder)", "saliency": "0.078", "cui": "399187006", "uid": "1652558320182399187006 - Hemochromatosis (disorder)"}, {"id": "50", "label": "50 - 50", "count": 10000000, "name": "50", "saliency": "0.047", "cui": "50", "uid": "165255842619850 - 50"}, {"id": "49436004", "label": "49436004 - Atrial fibrillation (disorder)", "count": 43296, "name": "Atrial fibrillation (disorder)", "saliency": "0.032", "cui": "49436004", "uid": "165255832459949436004 - Atrial fibrillation (disorder)"}, {"id": "42343007", "label": "42343007 - Congestive heart failure (disorder)", "count": 13521, "name": "Congestive heart failure (disorder)", "saliency": "0.082", "cui": "42343007", "uid": "165255832950342343007 - Congestive heart failure (disorder)"}, {"id": "61", "label": "61 - 61", "count": 10000000, "name": "61", "saliency": "0.138", "cui": "61", "uid": "165255833822361 - 61"}], "cboxTypes": ["Disorders"], "temporality": ["New Concepts"], "prediction_filters": ["Ignore Parents"], "cui_filter": "56265001"} -------------------------------------------------------------------------------- /experiments/Foresight Metrics MIMIC.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "fba6b486", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "# Used for development\n", 11 | "#import sys\n", 12 | "#sys.path.insert(0, \"../foresight/\")\n", 13 | "#import sys\n", 14 | "#sys.path.insert(0, \"../MedCAT/\")\n", 15 | "#%load_ext autoreload\n", 16 | "#%autoreload 2" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "id": "dbbfb963", 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "import pandas as pd\n", 27 | "import sys\n", 28 | "import os\n", 29 | "import pickle\n", 30 | "import datasets\n", 31 | "import numpy as np\n", 32 | "from medcat.cat import CAT\n", 33 | "from datetime import datetime" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "id": "82fa5c28", 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "DATASET = 'test'\n", 44 | "DAYS = 1 # Do: 1, 14, 30\n", 45 | "MAX_SEQ_LEN = 256\n", 46 | "TYPES = ['ALL_TYPES']\n", 47 | "\n", 48 | "BASE_NAME = 'annotated_february_2022'\n", 49 | "DATASET_NAME = 'annotations_stream_phase2_v1'\n", 50 | "RUN_NAME = f'{DATASET_NAME}_{DAYS}d_{MAX_SEQ_LEN}_{\"_\".join(TYPES)}'\n", 51 | "DATA_PATH = f\"./data/timecat/mimic/{BASE_NAME}/{DATASET_NAME}.pickle\"\n", 52 | "DATA_PATH_SPLITS = f\"./data/timecat/mimic/{BASE_NAME}/{DATASET_NAME}_split/\"\n", 53 | "TOKENIZER_PATH = f\"./data/timecat/models/gpt/tokenizer_{RUN_NAME}.pickle\"\n", 54 | "ALMOST_PREPARED_DATASET_SPLIT_PATH = f\"./data/timecat/mimic/{BASE_NAME}/{RUN_NAME}_almost_prepared_split/\"\n", 55 | "PREPARED_DATASET_SPLIT_PATH = f\"./data/timecat/mimic/{BASE_NAME}/{RUN_NAME}_prepared_split/\"\n", 56 | "JUST_BEFORE_ENCODING_DATASET_SPLIT_PATH = f\"./data/timecat/mimic/{BASE_NAME}/{RUN_NAME}_just_before_encoding/\"\n", 57 | "CAT_PATH = \"./data/models/modelpacks/mc_modelpack_phase2_snomed_190k_february_2022.zip\"\n", 58 | "PT_DOB_PATH = \"./data/mimic/pt2dob_datetime.pickle\"\n", 59 | "PT_DOD_PATH = \"./data/mimic/pt2dod_timestamp.pickle\"\n", 60 | "PT_SEX_PATH = \"./data/mimic/pt2sex.pickle\"\n", 61 | "PT_LNS_PATH = f\"./data/timecat/mimic/{BASE_NAME}/lns_{DATASET_NAME}.pickle\"\n", 62 | "PT_CNTS_PATH = f\"./data/timecat/mimic/{BASE_NAME}/cnts_{DATASET_NAME}.pickle\"\n", 63 | "PT_ETHNICITY_PATH = \"./data/mimic/pt2ethnicity.pickle\"\n", 64 | "TOKEN_TYPES_PATH = f'./data/timecat/mimic/{BASE_NAME}/types_{DATASET_NAME}.pickle'" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": null, 70 | "id": "86a91f2f", 71 | "metadata": {}, 72 | "outputs": [], 73 | "source": [ 74 | "ds_info = open(\"dataset-metrics-\" + DATASET + '-' + RUN_NAME + '.txt', 'w')\n", 75 | "def fprint(*texts):\n", 76 | " for text in texts:\n", 77 | " print(text)\n", 78 | " ds_info.write(str(text) + \"\\n\")" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "id": "7e48922b", 85 | "metadata": {}, 86 | "outputs": [], 87 | "source": [ 88 | "from foresight.metrics.next_concept_prediction import precision, metrics_data2df, ComputePrecisionHF\n", 89 | "from foresight.tokenizers.simple_map_tokenizer import SimpleMapTokenizer\n", 90 | "TOKENIZER_PATH = f\"/home/wish/data/timecat/models/gpt/tokenizer_{RUN_NAME}.pickle\"" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": null, 96 | "id": "4b17b753", 97 | "metadata": {}, 98 | "outputs": [], 99 | "source": [ 100 | "data = [('Concept Type', 'Time (in days)', 'Top-K', 'Overall (MIMIC)', 'New (MIMIC)', 'Old (MIMIC)',\n", 101 | " 'Recall All', 'Recall New', 'Recall Old')]\n", 102 | "tmap = {'T-11': 'Disorders', 'T-55': 'Substances', 'T-18': 'Findings', 'all': \"All Concepts\", 'T-39': 'Procedures'}\n", 103 | "for name in os.listdir(\"./metrics/\"):\n", 104 | " if name.startswith(\"start-0\"):\n", 105 | " m = pickle.load(open(\"./metrics/\" + name, 'rb'))\n", 106 | " p = name.split(\"_\")\n", 107 | " topk = p[1].split(\"-\")[1]\n", 108 | " time = int(p[3].split(\"-\")[1])\n", 109 | " time = int(time)\n", 110 | " types = p[4].split(\".\")[0].split(\"types-\")[1]\n", 111 | " types = tmap[types]\n", 112 | " data.append((types, time, topk, \n", 113 | " \"{:.2f}\".format(m['precision']['all']), \n", 114 | " \"{:.2f}\".format(m['precision']['new']), \n", 115 | " \"{:.2f}\".format(m['precision']['old']),\n", 116 | " \"{:.2f}\".format(m['recall']['all']),\n", 117 | " \"{:.2f}\".format(m['recall']['new']),\n", 118 | " \"{:.2f}\".format(m['recall']['old'])))" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": null, 124 | "id": "5668ed16", 125 | "metadata": { 126 | "scrolled": false 127 | }, 128 | "outputs": [], 129 | "source": [ 130 | "df = pd.DataFrame(data[1:], columns=data[0])\n", 131 | "df['Top-K'] = [int(x) for x in df['Top-K'].values]\n", 132 | "df = df.sort_values(by=['Concept Type', 'Time (in days)', 'Top-K'])\n", 133 | "df" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": null, 139 | "id": "6af1d523", 140 | "metadata": {}, 141 | "outputs": [], 142 | "source": [ 143 | "df.to_csv(\"./summary.csv\")" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": null, 149 | "id": "94065191", 150 | "metadata": {}, 151 | "outputs": [], 152 | "source": [ 153 | "tokenizer = SimpleMapTokenizer.load(TOKENIZER_PATH)" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": null, 159 | "id": "37d6d42c", 160 | "metadata": {}, 161 | "outputs": [], 162 | "source": [ 163 | "# For the standard model get top 20 best performing concepts\n", 164 | "m = pickle.load(open('./start-0_topk-1_time_range-30_types-all_types.pickle', 'rb'))" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": null, 170 | "id": "328ab29b", 171 | "metadata": {}, 172 | "outputs": [], 173 | "source": [ 174 | "df_new = metrics_data2df(m, tkn2name=tokenizer.tkn2name, temporality='new')" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": null, 180 | "id": "6cda4510", 181 | "metadata": { 182 | "scrolled": true 183 | }, 184 | "outputs": [], 185 | "source": [ 186 | "df_new.head(n=20).to_csv(\"./top_20_cuis_new.csv\")" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": null, 192 | "id": "f2fa1496", 193 | "metadata": { 194 | "scrolled": true 195 | }, 196 | "outputs": [], 197 | "source": [ 198 | "df_new.head(n=20)" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": null, 204 | "id": "353b719e", 205 | "metadata": {}, 206 | "outputs": [], 207 | "source": [ 208 | "df_new[df_new.negatives>100].tail(n=20).to_csv('bottom_20_cuis_with_min_100_negatives_new.csv')" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": null, 214 | "id": "46671f82", 215 | "metadata": { 216 | "scrolled": true 217 | }, 218 | "outputs": [], 219 | "source": [ 220 | "df_new[df_new.negatives>100].tail(n=20)" 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "execution_count": null, 226 | "id": "3e9488a7", 227 | "metadata": {}, 228 | "outputs": [], 229 | "source": [ 230 | "df_old = metrics_data2df(m, tkn2name=tokenizer.tkn2name, temporality='old')" 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": null, 236 | "id": "602b9d17", 237 | "metadata": {}, 238 | "outputs": [], 239 | "source": [ 240 | "df_old.head(n=20).to_csv(\"./top_20_cuis_old.csv\")" 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": null, 246 | "id": "4065a3b7", 247 | "metadata": {}, 248 | "outputs": [], 249 | "source": [ 250 | "df_old[df_old.negatives>100].tail(n=20).to_csv('bottom_20_cuis_with_min_100_negatives_old.csv')" 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": null, 256 | "id": "33e16c89", 257 | "metadata": {}, 258 | "outputs": [], 259 | "source": [ 260 | "df_new[df_new.positives > 10]" 261 | ] 262 | }, 263 | { 264 | "cell_type": "markdown", 265 | "id": "125dfbd6", 266 | "metadata": {}, 267 | "source": [ 268 | "# Dataset Metrics" 269 | ] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "execution_count": null, 274 | "id": "5d6036ff", 275 | "metadata": { 276 | "scrolled": true 277 | }, 278 | "outputs": [], 279 | "source": [ 280 | "cat = CAT.load_model_pack(CAT_PATH, meta_cat_config_dict={'general': {'device': 'cpu'}})" 281 | ] 282 | }, 283 | { 284 | "cell_type": "code", 285 | "execution_count": null, 286 | "id": "a84ddda2", 287 | "metadata": {}, 288 | "outputs": [], 289 | "source": [ 290 | "dataset = datasets.load_from_disk(JUST_BEFORE_ENCODING_DATASET_SPLIT_PATH)" 291 | ] 292 | }, 293 | { 294 | "cell_type": "code", 295 | "execution_count": null, 296 | "id": "262b0aec", 297 | "metadata": {}, 298 | "outputs": [], 299 | "source": [ 300 | "dataset" 301 | ] 302 | }, 303 | { 304 | "cell_type": "code", 305 | "execution_count": null, 306 | "id": "26b62828", 307 | "metadata": {}, 308 | "outputs": [], 309 | "source": [ 310 | "id2inds = {}\n", 311 | "for ind, row in enumerate(dataset[DATASET]):\n", 312 | " if row['patient_id'] in id2inds:\n", 313 | " id2inds[row['patient_id']].append(ind)\n", 314 | " else:\n", 315 | " id2inds[row['patient_id']] = [ind]" 316 | ] 317 | }, 318 | { 319 | "cell_type": "code", 320 | "execution_count": null, 321 | "id": "7c4509e2", 322 | "metadata": {}, 323 | "outputs": [], 324 | "source": [ 325 | "data = dataset[DATASET].to_dict()\n", 326 | "for id in id2inds:\n", 327 | " inds = id2inds[id]\n", 328 | " if len(inds) > 1:\n", 329 | " for ind in inds[1:]:\n", 330 | " data['stream'][inds[0]].extend(data['stream'][ind])\n", 331 | " data['token_type'][inds[0]].extend(data['token_type'][ind])\n", 332 | " data['time'][inds[0]].extend(data['time'][ind])\n", 333 | " data['patient_id'][ind] = \"SKIP\"\n", 334 | "dataset_combined = datasets.Dataset.from_dict(data)" 335 | ] 336 | }, 337 | { 338 | "cell_type": "code", 339 | "execution_count": null, 340 | "id": "fb192cce", 341 | "metadata": {}, 342 | "outputs": [], 343 | "source": [ 344 | "timeline_lens = []\n", 345 | "timeline_len_years = []\n", 346 | "timeline_len_by_sex = {'Female': [], 'Male': [], 'Unknown': []}\n", 347 | "timeline_len_by_sex_y = {'Female': [], 'Male': [], 'Unknown': []}\n", 348 | "timeline_len_by_eth = {}\n", 349 | "timeline_len_by_eth_y = {}\n", 350 | "timeline_len_by_age = {'0-18': [], '18-30': [], '30-41': [], '41-50': [], '51-64': [], '64+': []}\n", 351 | "timeline_len_by_age_y = {'0-18': [], '18-30': [], '30-41': [], '41-50': [], '51-64': [], '64+': []}\n", 352 | "len_per_type = {}\n", 353 | "sex = {'Female': 0, 'Male': 0, 'Unknown': 0}\n", 354 | "age_groups = {'0-18': [], '18-30': [], '30-41': [], '41-50': [], '51-64': [], '64+': []}\n", 355 | "ethnicity = {}\n", 356 | "all_types = set([x for x in tokenizer.token_type2tokens.keys() if x.startswith('T-')])\n", 357 | "\n", 358 | "for e in dataset_combined:\n", 359 | " if e['patient_id'] == 'SKIP':\n", 360 | " continue\n", 361 | " \n", 362 | " t_len = len([x for x in e['token_type'] if x.startswith(\"T-\")])\n", 363 | " timeline_lens.append(t_len)\n", 364 | "\n", 365 | " # Timeline in years\n", 366 | " l_years = (datetime.fromtimestamp(e['time'][-1]) - datetime.fromtimestamp(e['time'][0])).days / 365\n", 367 | " if l_years < 0:\n", 368 | " l_years = 0\n", 369 | " timeline_len_years.append(l_years)\n", 370 | " \n", 371 | " # Years\n", 372 | " inds = [i for i, v in enumerate(e['token_type']) if v == 'age'] \n", 373 | " once = False\n", 374 | " old_age_group = -1\n", 375 | " print(e['patient_id'], inds)\n", 376 | " for ind in inds:\n", 377 | " y = int(e['stream'][ind])\n", 378 | " # Use the last ind to determine pts current age\n", 379 | " if ind == inds[-1]:\n", 380 | " once = True\n", 381 | " \n", 382 | " if y <= 18:\n", 383 | " if old_age_group != '0-18':\n", 384 | " age_groups['0-18'].append(y)\n", 385 | " old_age_group = '0-18'\n", 386 | " if once:\n", 387 | " timeline_len_by_age['0-18'].append(t_len)\n", 388 | " timeline_len_by_age_y['0-18'].append(l_years)\n", 389 | " elif y <= 30:\n", 390 | " if old_age_group != '18-30':\n", 391 | " age_groups['18-30'].append(y)\n", 392 | " old_age_group = '18-30'\n", 393 | " if once:\n", 394 | " timeline_len_by_age['18-30'].append(t_len)\n", 395 | " timeline_len_by_age_y['18-30'].append(l_years)\n", 396 | "\n", 397 | " elif y <= 41:\n", 398 | " if old_age_group != '30-41':\n", 399 | " age_groups['30-41'].append(y)\n", 400 | " old_age_group = '30-41'\n", 401 | " if once:\n", 402 | " timeline_len_by_age['30-41'].append(t_len)\n", 403 | " timeline_len_by_age_y['30-41'].append(l_years)\n", 404 | " elif y <= 50:\n", 405 | " if old_age_group != '41-50':\n", 406 | " age_groups['41-50'].append(y)\n", 407 | " old_age_group = '41-50'\n", 408 | " if once:\n", 409 | " timeline_len_by_age['41-50'].append(t_len)\n", 410 | " timeline_len_by_age_y['41-50'].append(l_years)\n", 411 | " elif y <= 64:\n", 412 | " if old_age_group != '51-64':\n", 413 | " age_groups['51-64'].append(y)\n", 414 | " old_age_group = '51-64'\n", 415 | " if once:\n", 416 | " timeline_len_by_age['51-64'].append(t_len)\n", 417 | " timeline_len_by_age_y['51-64'].append(l_years)\n", 418 | " else:\n", 419 | " if old_age_group != '64+':\n", 420 | " age_groups['64+'].append(y)\n", 421 | " old_age_group = '64+'\n", 422 | " if once:\n", 423 | " timeline_len_by_age['64+'].append(t_len)\n", 424 | " timeline_len_by_age_y['64+'].append(l_years)\n", 425 | " once = False\n", 426 | "\n", 427 | " # Sex\n", 428 | " if 'sex' in e['token_type']:\n", 429 | " ind = e['token_type'].index('sex')\n", 430 | " val = e['stream'][ind]\n", 431 | " if val == 'Female' or val == 'F':\n", 432 | " sex['Female'] += 1\n", 433 | " timeline_len_by_sex['Female'].append(t_len)\n", 434 | " timeline_len_by_sex_y['Female'].append(l_years)\n", 435 | " elif val == 'Male' or val == 'M':\n", 436 | " sex['Male'] += 1\n", 437 | " timeline_len_by_sex['Male'].append(t_len)\n", 438 | " timeline_len_by_sex_y['Male'].append(l_years)\n", 439 | " else:\n", 440 | " sex['Unknown'] += 1\n", 441 | " timeline_len_by_sex['Unknown'].append(t_len)\n", 442 | " timeline_len_by_sex_y['Unknown'].append(l_years)\n", 443 | " else:\n", 444 | " sex['Unknown'] += 1\n", 445 | " timeline_len_by_sex['Unknown'].append(t_len)\n", 446 | " timeline_len_by_sex_y['Unknown'].append(l_years)\n", 447 | " \n", 448 | " # Ethnicity\n", 449 | " if 'ethnicity' in e['token_type']:\n", 450 | " ind = e['token_type'].index('ethnicity')\n", 451 | " val = e['stream'][ind]\n", 452 | " if val in ethnicity:\n", 453 | " ethnicity[val] += 1\n", 454 | " timeline_len_by_eth[val].append(t_len)\n", 455 | " timeline_len_by_eth_y[val].append(l_years)\n", 456 | " else:\n", 457 | " ethnicity[val] = 1\n", 458 | " timeline_len_by_eth[val] = [t_len]\n", 459 | " timeline_len_by_eth_y[val] = [l_years]\n", 460 | " else:\n", 461 | " if 'Unknown' in ethnicity:\n", 462 | " ethnicity['Unknown'] += 1\n", 463 | " timeline_len_by_eth['Unknown'].append(t_len)\n", 464 | " timeline_len_by_eth_y['Unknown'].append(l_years)\n", 465 | " else:\n", 466 | " ethnicity['Unknown'] = 1\n", 467 | " timeline_len_by_eth['Unknown'] = [t_len]\n", 468 | " timeline_len_by_eth_y['Unknown'] = [l_years]\n", 469 | " \n", 470 | " # Concepts per CUI\n", 471 | " #vals = [v for v in e['token_type'] if v.startswith('T-')]\n", 472 | " for val in all_types:\n", 473 | " title = cat.cdb.addl_info['type_id2name'][val].title()\n", 474 | " if title in len_per_type:\n", 475 | " len_per_type[title].append(len([x for x in e['token_type'] if x == val]))\n", 476 | " else:\n", 477 | " len_per_type[title] = [len([x for x in e['token_type'] if x == val])]" 478 | ] 479 | }, 480 | { 481 | "cell_type": "code", 482 | "execution_count": null, 483 | "id": "de1cf1aa", 484 | "metadata": {}, 485 | "outputs": [], 486 | "source": [ 487 | "# Mean number of concepts of certain type per pt\n", 488 | "fprint(\"Mean number of concepts of certain type per pt\")\n", 489 | "for t in len_per_type:\n", 490 | " fprint(\"{:30} : {}\".format(t, np.mean(len_per_type[t])))\n", 491 | "fprint('\\n')" 492 | ] 493 | }, 494 | { 495 | "cell_type": "code", 496 | "execution_count": null, 497 | "id": "29ae3172", 498 | "metadata": {}, 499 | "outputs": [], 500 | "source": [ 501 | "# Mean timeline length by age group\n", 502 | "fprint(\"Mean timeline length by age group\")\n", 503 | "fprint(timeline_len_by_age.keys(), '')\n", 504 | "for age in timeline_len_by_age:\n", 505 | " fprint(\"{:.0f} ({:.1f})\".format(np.mean(timeline_len_by_age[age]), np.mean(timeline_len_by_age_y[age])))\n", 506 | "fprint('\\n')" 507 | ] 508 | }, 509 | { 510 | "cell_type": "code", 511 | "execution_count": null, 512 | "id": "dd67782c", 513 | "metadata": {}, 514 | "outputs": [], 515 | "source": [ 516 | "new_timeline_len_by_eth = {'White': [], 'Black': [], 'Other': [], \n", 517 | " 'Asian': [], 'Unknown': [], 'Mixed': []}\n", 518 | "new_timeline_len_by_eth_y = {'White': [], 'Black': [], 'Other': [], \n", 519 | " 'Asian': [], 'Unknown': [], 'Mixed': []}\n", 520 | "\n", 521 | "for eth in timeline_len_by_eth:\n", 522 | " if 'ASIAN' in eth:\n", 523 | " new_timeline_len_by_eth['Asian'].extend(timeline_len_by_eth[eth])\n", 524 | " new_timeline_len_by_eth_y['Asian'].extend(timeline_len_by_eth_y[eth])\n", 525 | " elif 'BLACK' in eth:\n", 526 | " new_timeline_len_by_eth['Black'].extend(timeline_len_by_eth[eth])\n", 527 | " new_timeline_len_by_eth_y['Black'].extend(timeline_len_by_eth_y[eth])\n", 528 | " elif 'WHITE' in eth:\n", 529 | " new_timeline_len_by_eth['White'].extend(timeline_len_by_eth[eth])\n", 530 | " new_timeline_len_by_eth_y['White'].extend(timeline_len_by_eth_y[eth])\n", 531 | " elif 'UNKNOWN' in eth or 'PATIENT DECLINED TO ANSWER' in eth or 'UNABLE TO OBTAIN' in eth:\n", 532 | " new_timeline_len_by_eth['Unknown'].extend(timeline_len_by_eth[eth])\n", 533 | " new_timeline_len_by_eth_y['Unknown'].extend(timeline_len_by_eth_y[eth])\n", 534 | " elif 'MULTI' in eth:\n", 535 | " new_timeline_len_by_eth['Mixed'].extend(timeline_len_by_eth[eth])\n", 536 | " new_timeline_len_by_eth_y['Mixed'].extend(timeline_len_by_eth_y[eth])\n", 537 | " else:\n", 538 | " new_timeline_len_by_eth['Other'].extend(timeline_len_by_eth[eth])\n", 539 | " new_timeline_len_by_eth_y['Other'].extend(timeline_len_by_eth_y[eth])\n", 540 | "\n", 541 | "fprint(\"Mean timeline length by ethnicity\")\n", 542 | "for eth in new_timeline_len_by_eth:\n", 543 | " fprint(\"{:10} : {:.0f} ({:.1f})\".format(eth, np.mean(new_timeline_len_by_eth[eth]), \n", 544 | " np.mean(new_timeline_len_by_eth_y[eth])))\n", 545 | "fprint('\\n')" 546 | ] 547 | }, 548 | { 549 | "cell_type": "code", 550 | "execution_count": null, 551 | "id": "c6e999d8", 552 | "metadata": {}, 553 | "outputs": [], 554 | "source": [ 555 | "fprint(\"Mean timeline length by sex\")\n", 556 | "fprint(timeline_len_by_sex.keys(), '')\n", 557 | "for s in timeline_len_by_sex:\n", 558 | " fprint(\"{:.0f} ({:.1f})\".format(np.mean(timeline_len_by_sex[s]), np.mean(timeline_len_by_sex_y[s])))\n", 559 | "fprint('\\n')" 560 | ] 561 | }, 562 | { 563 | "cell_type": "code", 564 | "execution_count": null, 565 | "id": "5a1f4fb3", 566 | "metadata": {}, 567 | "outputs": [], 568 | "source": [ 569 | "fprint(\"Mean timeline len: \", np.mean(timeline_lens))\n", 570 | "fprint('\\n')" 571 | ] 572 | }, 573 | { 574 | "cell_type": "code", 575 | "execution_count": null, 576 | "id": "4aa01a65", 577 | "metadata": {}, 578 | "outputs": [], 579 | "source": [ 580 | "# Number of pts by ethnicity\n", 581 | "#fprint(\"Ethnicity: \", ethnicity)\n", 582 | "new_ethnicity = {'White': 0, 'Black': 0, 'Other': 0, \n", 583 | " 'Asian': 0, 'Unknown': 0, 'Mixed': 0}\n", 584 | "\n", 585 | "for eth in ethnicity:\n", 586 | " if 'ASIAN' in eth:\n", 587 | " new_ethnicity['Asian'] += ethnicity[eth]\n", 588 | " elif 'BLACK' in eth:\n", 589 | " new_ethnicity['Black'] += ethnicity[eth]\n", 590 | " elif 'WHITE' in eth:\n", 591 | " new_ethnicity['White'] += ethnicity[eth]\n", 592 | " elif 'UNKNOWN' in eth or 'PATIENT DECLINED TO ANSWER' in eth or 'UNABLE TO OBTAIN' in eth:\n", 593 | " new_ethnicity['Unknown'] += ethnicity[eth]\n", 594 | " elif 'MULTI' in eth:\n", 595 | " new_ethnicity['Mixed'] += ethnicity[eth]\n", 596 | " else:\n", 597 | " new_ethnicity['Other'] += ethnicity[eth]\n", 598 | "fprint(new_ethnicity)\n", 599 | "fprint('\\n')" 600 | ] 601 | }, 602 | { 603 | "cell_type": "code", 604 | "execution_count": null, 605 | "id": "c16fe4f2", 606 | "metadata": {}, 607 | "outputs": [], 608 | "source": [ 609 | "# Number of pts by sex\n", 610 | "fprint(sex)\n", 611 | "fprint('\\n')" 612 | ] 613 | }, 614 | { 615 | "cell_type": "code", 616 | "execution_count": null, 617 | "id": "f0aae15a", 618 | "metadata": {}, 619 | "outputs": [], 620 | "source": [ 621 | "fprint(\"Total pts for sex: \", sum(sex.values()))\n", 622 | "fprint('\\n')" 623 | ] 624 | }, 625 | { 626 | "cell_type": "code", 627 | "execution_count": null, 628 | "id": "d4401a1c", 629 | "metadata": {}, 630 | "outputs": [], 631 | "source": [ 632 | "dataset" 633 | ] 634 | }, 635 | { 636 | "cell_type": "code", 637 | "execution_count": null, 638 | "id": "3b19d621", 639 | "metadata": {}, 640 | "outputs": [], 641 | "source": [ 642 | "# Number of pts by age (note that we are multi counting, if for one pt we have age 27, 28 and 35 that will be three counts)\n", 643 | "t_cnt = 0\n", 644 | "fprint(\"Age group, mean age for group, number of patients in this group (with multi counting)\")\n", 645 | "for g in age_groups:\n", 646 | " fprint('{} - {:.3f} - {}'.format(g, np.mean(age_groups[g]), len(age_groups[g])))\n", 647 | " t_cnt += len(age_groups[g])\n", 648 | "fprint('\\n')" 649 | ] 650 | }, 651 | { 652 | "cell_type": "code", 653 | "execution_count": null, 654 | "id": "f14d0451", 655 | "metadata": {}, 656 | "outputs": [], 657 | "source": [ 658 | "# Overall timeline mean length in years \n", 659 | "fprint('Timeline len in years: ', np.mean(timeline_len_years))\n", 660 | "fprint('\\n')" 661 | ] 662 | }, 663 | { 664 | "cell_type": "code", 665 | "execution_count": null, 666 | "id": "35f6bf1c", 667 | "metadata": {}, 668 | "outputs": [], 669 | "source": [ 670 | "ds_info.close()" 671 | ] 672 | } 673 | ], 674 | "metadata": { 675 | "kernelspec": { 676 | "display_name": "Python 3 (ipykernel)", 677 | "language": "python", 678 | "name": "python3" 679 | }, 680 | "language_info": { 681 | "codemirror_mode": { 682 | "name": "ipython", 683 | "version": 3 684 | }, 685 | "file_extension": ".py", 686 | "mimetype": "text/x-python", 687 | "name": "python", 688 | "nbconvert_exporter": "python", 689 | "pygments_lexer": "ipython3", 690 | "version": "3.8.0" 691 | } 692 | }, 693 | "nbformat": 4, 694 | "nbformat_minor": 5 695 | } 696 | -------------------------------------------------------------------------------- /foresight/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CogStack/Foresight/a152dce0933975e9c9e28668f62f13812ddc3c20/foresight/__init__.py -------------------------------------------------------------------------------- /foresight/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CogStack/Foresight/a152dce0933975e9c9e28668f62f13812ddc3c20/foresight/datasets/__init__.py -------------------------------------------------------------------------------- /foresight/datasets/data_collator.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Callable, Dict, List, NewType, Optional, Tuple, Union 2 | import torch 3 | 4 | InputDataClass = NewType("InputDataClass", Any) 5 | 6 | class CollataAndPad(object): 7 | r''' Arrange the data into the right format + add padding or trim where necessary. 8 | 9 | Args: 10 | max_seq_len (`int`, `optional`, defaults to -1): 11 | Upper bound for sequence length. If it is -1 means that it will be 12 | calculated for each bach and set to the max length without upper limits. 13 | pad_id (`int`, `optional`, defaults to 0): 14 | What ID will be used to pad the inputs to max_seq_len 15 | shift_labels: 16 | If True labels = input_ids[1:] + [pad_id], else labels = input_ids, this will also 17 | remove the last element from each sample in input_ids. 18 | remap_names: 19 | If a dictionary the stanard output names (input_ids, labels, attention_mask) will be maped 20 | to whatever is in the dict. 21 | mlm (`float`, `optional`, defaults to None): 22 | Number [0, 1] - Marks the mlm probability, if it is not None tokens will be masked 23 | mlm_mask_id: 24 | ID of the token that will be used as a mask 25 | ''' 26 | def __init__(self, max_seq_len=-1, pad_id=0, shift_labels=False, remap_names=None, mlm=None, mlm_mask_id=None, use_position_ids=False, 27 | embeddings=None, use_token_type_ids=False): 28 | self.max_seq_len = max_seq_len 29 | self.pad_id = pad_id 30 | self.shift_labels = shift_labels 31 | self.remap_names = remap_names 32 | self.mlm = mlm 33 | self.mlm_mask_id = mlm_mask_id 34 | self.use_position_ids = use_position_ids 35 | self.use_token_type_ids = use_token_type_ids 36 | self.embeddings = embeddings 37 | 38 | 39 | def __call__(self, features: List[InputDataClass]) -> Dict[str, torch.Tensor]: 40 | batch = {} 41 | if self.max_seq_len == -1: 42 | max_seq_len = max([len(f['input_ids']) for f in features]) 43 | else: 44 | max_seq_len = min(self.max_seq_len, max([len(f['input_ids']) for f in features])) 45 | 46 | if self.shift_labels and not 'labels' in features[0]: 47 | # Recalculate max_seq_len 48 | max_seq_len = min(max_seq_len, max([len(f['input_ids'][0:-1]) for f in features])) 49 | 50 | # Labels do not exist and we should shift 51 | batch['labels'] = torch.tensor([f['input_ids'][1:max_seq_len+1] + [-100] * max(0, max_seq_len - len(f['input_ids']) + 1) 52 | for f in features], dtype=torch.long) 53 | 54 | for f in features: 55 | f['input_ids'] = f['input_ids'][0:-1] 56 | 57 | elif 'labels' in features[0]: 58 | # Labels already exist, just pad them with -100 59 | batch['labels'] = torch.tensor([f['labels'] for f in features], dtype=torch.long) 60 | 61 | 62 | batch['input_ids'] = torch.tensor([f['input_ids'][0:max_seq_len] + [self.pad_id] * max(0, max_seq_len - len(f['input_ids'])) 63 | for f in features], dtype=torch.long) 64 | if self.use_position_ids: 65 | # Padding for position ids is max_seq_len - 1 66 | batch['position_ids'] = torch.tensor([f['position_ids'][0:max_seq_len] + [max_seq_len - 1] * max(0, max_seq_len - len(f['position_ids'])) 67 | for f in features], dtype=torch.long) 68 | 69 | if self.use_token_type_ids: 70 | # Padding for position ids is max_seq_len - 1 71 | batch['token_type_ids'] = torch.tensor([f['token_type_id'][0:max_seq_len] + [self.pad_id] * max(0, max_seq_len - len(f['token_type_id'])) 72 | for f in features], dtype=torch.long) 73 | 74 | batch['attention_mask'] = batch['input_ids'] != self.pad_id 75 | 76 | if not self.shift_labels and not 'labels' in batch: 77 | # If we did not shift labels just clone 78 | batch['labels'] = batch['input_ids'].clone() 79 | batch['labels'][batch['labels'] == self.pad_id] = -100 80 | 81 | # If we want Masked Langauge Modeling - mask some tokens 82 | if self.mlm is not None: 83 | raise Exception("MLM not implemented") 84 | 85 | if self.remap_names is not None: 86 | batch = {self.remap_names.get(k, k):v for k,v in batch.items()} 87 | 88 | if self.embeddings is not None: 89 | # Pad or remove 90 | zero_cntx = [0.0 for i in range(len(features[0]['context_representation'][0]))] 91 | cntxs = torch.tensor([f['context_representation'][0:max_seq_len] + [zero_cntx] * max(0, max_seq_len - len(f['context_representation'])) 92 | for f in features], dtype=torch.float32) 93 | # Create input embeddings and remove input_ids 94 | batch['inputs_embeds'] = self.embeddings(batch['input_ids'], cntxs=cntxs) 95 | del batch['input_ids'] 96 | 97 | return batch 98 | 99 | 100 | def mask_tokens(self, input_ids): 101 | probability_matrix = torch.full(input_ids.shape, self.mlm_probability) 102 | 103 | -------------------------------------------------------------------------------- /foresight/datasets/filters.py: -------------------------------------------------------------------------------- 1 | from foresight.datasets.utils import get_all_splits 2 | from collections import defaultdict 3 | 4 | 5 | def filter_by_count(dataset, min_count=5, min_count_global=100, min_length=5, max_length=0, token_cnt=None, num_proc=None): 6 | r''' Filters tokens of a dataset and leaves only the ones with frequencey >= min_count 7 | 8 | Args: 9 | dataset 10 | min_count: 11 | Intra patient count 12 | min_count_global: 13 | Whole dataset count 14 | min_length: 15 | Examples below will be removed, in other words patients with less than min_length concepts 16 | max_length: 17 | Anything longer than this will be trimmed to this 18 | num_proc 19 | ''' 20 | 21 | if min_count_global is not None and min_count_global > 0: 22 | if token_cnt is None: 23 | token_cnt = defaultdict(int) 24 | for _dataset in get_all_splits(dataset): 25 | for stream in _dataset['stream']: 26 | seen_in_stream = set() 27 | for sample in stream: 28 | token = sample['token'] 29 | if token not in seen_in_stream: 30 | token_cnt[token] += 1 31 | seen_in_stream.add(token) 32 | 33 | # First we filter by global count, ie a concept has to have more than min_count_global appearances in the whole dataset 34 | dataset = dataset.map(function=lambda example: {'stream': [sample for sample in example['stream'] if token_cnt[sample['token']] >= min_count_global]}, 35 | load_from_cache_file=False, num_proc=num_proc) 36 | 37 | if min_count is not None: 38 | # Next, filter by intra-patient count 39 | dataset = dataset.map(function=lambda example: {'stream': [sample for sample in example['stream'] if sample['cnt'] >= min_count]}, 40 | load_from_cache_file=False, num_proc=num_proc) 41 | 42 | # Remove short streams 43 | if min_length > 0: 44 | dataset = dataset.filter(function=lambda example: len(example['stream']) >= min_length, num_proc=num_proc, load_from_cache_file=False) 45 | 46 | if max_length > 0: 47 | dataset = dataset.map(function=lambda example: {'stream': example['stream'][:max_length]}, 48 | load_from_cache_file=False, num_proc=num_proc) 49 | 50 | return dataset 51 | 52 | 53 | def filter_by_type(dataset, types_to_keep, num_proc): 54 | dataset = dataset.map(function=lambda example: {'stream': [stream for stream in example['stream'] if stream['token_type'] in types_to_keep]}, 55 | load_from_cache_file=False, num_proc=num_proc) 56 | 57 | return dataset 58 | -------------------------------------------------------------------------------- /foresight/datasets/patient_concept_stream.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import pickle 4 | import logging 5 | import datasets 6 | import numpy as np 7 | 8 | 9 | _CITATION = """\ 10 | @misc{kraljevic2020multidomain, 11 | title={Multi-domain Clinical Natural Language Processing with MedCAT: the Medical Concept Annotation Toolkit}, 12 | author={Zeljko Kraljevic and Thomas Searle and Anthony Shek and Lukasz Roguski and Kawsar Noor and Daniel Bean and Aurelie Mascio and Leilei Zhu and Amos A Folarin and Angus Roberts and Rebecca Bendayan and Mark P Richardson and Robert Stewart and Anoop D Shah and Wai Keong Wong and Zina Ibrahim and James T Teo and Richard JB Dobson}, 13 | year={2020}, 14 | eprint={2010.01165}, 15 | archivePrefix={arXiv}, 16 | primaryClass={cs.CL} 17 | } 18 | """ 19 | 20 | _DESCRIPTION = """\ 21 | Takes as input a pickled dict of pt2stream. The format should be: 22 | {'patient_id': (concept_cui, concept_count_for_patient, timestamp_of_first_occurrence_for_patient), ...} 23 | """ 24 | 25 | class PatientConceptStreamConfig(datasets.BuilderConfig): 26 | """ BuilderConfig for PatientConceptStream. 27 | 28 | Args: 29 | **kwargs: keyword arguments forwarded to super. 30 | """ 31 | 32 | def __init__(self, **kwargs): 33 | super(PatientConceptStreamConfig, self).__init__(**kwargs) 34 | 35 | 36 | class PatientConceptStream(datasets.GeneratorBasedBuilder): 37 | """PatientConceptStream: as input takes the patient to stream of concepts. 38 | 39 | TODO: Move the preparations scripts out of notebooks 40 | """ 41 | 42 | BUILDER_CONFIGS = [ 43 | PatientConceptStreamConfig( 44 | name="pickle", 45 | version=datasets.Version("1.0.0", ""), 46 | description="Pickled output from Temporal dataset preparation scripts", 47 | ), 48 | ] 49 | 50 | 51 | def _info(self): 52 | return datasets.DatasetInfo( 53 | description=_DESCRIPTION, 54 | features=datasets.Features( 55 | { 56 | "patient_id": datasets.Value("string"), 57 | "stream": [ 58 | { 59 | 'token': datasets.Value('string'), 60 | 'cnt': datasets.Value('int32'), 61 | 'time': datasets.Value('int64'), 62 | 'token_type': datasets.Value('string'), 63 | #'context_representation': datasets.Sequence(datasets.Value('float'), length=128), 64 | } 65 | ], 66 | } 67 | ), 68 | supervised_keys=None, 69 | citation=_CITATION, 70 | ) 71 | 72 | def _split_generators(self, dl_manager): 73 | """Returns SplitGenerators.""" 74 | return [ 75 | datasets.SplitGenerator( 76 | name=datasets.Split.TRAIN, 77 | gen_kwargs={ 78 | 'filepaths': self.config.data_files['train'], 79 | }, 80 | ), 81 | ] 82 | 83 | def _generate_examples(self, filepaths): 84 | """Returns Pts one by one""" 85 | for filepath in filepaths: 86 | logging.info("generating examples from = %s", filepath) 87 | with open(filepath, 'rb') as f: 88 | pt2stream = pickle.load(f) 89 | for pt, stream in pt2stream.items(): 90 | out_stream = [] 91 | # If time == None there is no temporal info, and for this dataset it is required 92 | stream = [data for data in stream if data[2] is not None] 93 | # Sort the stream by time - ascending 94 | stream.sort(key=lambda data: data[2]) 95 | for data in stream: 96 | out_stream.append({ 97 | 'token': data[0], # Call it token from now on as it does not have to be only CUIs 98 | 'cnt': data[1], 99 | 'time': int(data[2]), # We convert this into int for speed 100 | 'token_type': data[3], # Call it token from now on as it does not have to be only CUIs 101 | #'context_representation': np.random.rand(128), #data[4], 102 | }) 103 | pt = str(pt) 104 | if out_stream: # Skip streams that have zero annotations 105 | yield pt, {'patient_id': str(pt), 106 | 'stream': out_stream} 107 | -------------------------------------------------------------------------------- /foresight/datasets/utils.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import numpy as np 3 | from medcat.utils.matutils import unitvec 4 | from datetime import datetime 5 | import math 6 | import datasets 7 | import random 8 | import copy 9 | 10 | def get_all_splits(dataset): 11 | all_datasets = [] 12 | if 'train' in dataset: 13 | all_datasets.append(dataset['train']) 14 | if 'test' in dataset: 15 | all_datasets.append(dataset['test']) 16 | if 'valid' in dataset: 17 | all_datasets.append(dataset['valid']) 18 | if isinstance(dataset, datasets.arrow_dataset.Dataset): 19 | # If we have only one, ie no train/test 20 | all_datasets.append(dataset) 21 | 22 | return all_datasets 23 | 24 | def make_example(token, ent_example, token_type='unk', cnt=10**6, time=None, cntx=None): 25 | out = {'token': token, 'token_type': token_type, 'cnt': cnt, 'time': time} 26 | if 'context_representation' in ent_example: 27 | if cntx is None: 28 | cntx = [0.0 for i in range(len(ent_example['context_representation']))] 29 | 30 | out['context_representation'] = cntx 31 | return out 32 | 33 | def get_duration_separator(separator, start_time, current_time, bucket_size_seconds): 34 | d_separator = separator 35 | for i in [1, 7]: 36 | if (current_time - start_time) >= bucket_size_seconds * i: 37 | d_separator = f'{separator[0:-1]}-{i}{separator[-1]}' 38 | 39 | return d_separator 40 | 41 | 42 | def bucket_concepts(examples, bucket_size_seconds=365*24*60*60, separator='', duration_separator=False): 43 | r''' Will bucket concepts into specified bucket_size. 44 | 45 | Args: 46 | examples 47 | ''' 48 | for i in range(len(examples['stream'])): 49 | stream = examples['stream'][i] 50 | 51 | new_stream = [] 52 | _bucket = [] 53 | _tokens = set() 54 | start_time = -1 55 | for ent in stream: 56 | if start_time == -1: 57 | start_time = ent['time'] 58 | 59 | if ent['time'] - start_time >= bucket_size_seconds: 60 | # Add to stream 61 | new_stream.extend(_bucket) 62 | _bucket = [] 63 | _tokens = set() 64 | 65 | if separator is not None: 66 | _separator = separator 67 | if duration_separator: 68 | # This will have different separator for different time spans 69 | _separator = get_duration_separator(separator, start_time, ent['time'], bucket_size_seconds) 70 | 71 | # A separator is +1 of the last token in the stream 72 | new_stream.append(make_example(ent_example=ent, token=_separator, token_type='sep', cnt=10**6, time=new_stream[-1]['time']+1)) 73 | # Change start time to current entity time 74 | start_time = ent['time'] 75 | 76 | if ent['token'] not in _tokens: 77 | _bucket.append(ent) 78 | _tokens.add(ent['token']) 79 | 80 | if _bucket: 81 | new_stream.extend(_bucket) 82 | 83 | examples['stream'][i] = new_stream 84 | new_stream = [] 85 | 86 | return examples 87 | 88 | def add_position_ids(examples, separators=set()): 89 | for i in range(len(examples['stream'])): 90 | stream = examples['stream'][i] 91 | 92 | old_t = None 93 | cnt = 0 94 | for ent in stream: 95 | ent['position_ids'] = cnt 96 | if ent['token'] in separators: 97 | cnt += 1 98 | 99 | return examples 100 | 101 | def add_age(examples, pt2dob_timestamp, age_prefix='', age_suffix=None, age_normalizer=365.25 * 24 * 60 * 60): 102 | for i in range(len(examples['stream'])): 103 | stream = examples['stream'][i] 104 | last_age_added = -1 105 | new_stream = [] 106 | for ent in stream: 107 | if examples['patient_id'][i] in pt2dob_timestamp: 108 | if pt2dob_timestamp is not None: 109 | age = int((ent['time'] - pt2dob_timestamp[examples['patient_id'][i]]) / age_normalizer) 110 | 111 | # Age comes a step before the token that caused the change 112 | if age >= 0 and last_age_added != age: 113 | if age_prefix is not None: 114 | new_stream.append(make_example(ent_example=ent, token=age_prefix, token_type='age_prefix', cnt=10**6, time=ent['time'])) 115 | new_stream.append(make_example(ent_example=ent, token=str(age), token_type='age', cnt=10**6, time=ent['time'])) 116 | last_age_added = age 117 | if age_suffix is not None: 118 | new_stream.append(make_example(ent_example=ent, token=age_suffix, token_type='age_suffx', cnt=10**6, time=ent['time'])) 119 | 120 | new_stream.append(ent) 121 | 122 | examples['stream'][i] = new_stream 123 | new_stream = [] 124 | 125 | return examples 126 | 127 | def add_ttd(examples, pt2dod_timestamp, ttd_prefix='', ttd_suffix=None, ttd_normalizer=365.25 * 24 * 60 * 60, 128 | max_ttd=10, ttd_prob=1, max_nttd=10, duplicate_streams=False): 129 | all_patient_id = [] 130 | all_stream = [] 131 | for i in range(len(examples['stream'])): 132 | stream = examples['stream'][i] 133 | last_ttd_added = -1 134 | new_stream = [] 135 | new_streams = [new_stream] 136 | n_added_ttds = 0 137 | for ent in stream: 138 | if examples['patient_id'][i] in pt2dod_timestamp: 139 | if n_added_ttds < max_nttd: 140 | if random.random() <= ttd_prob: 141 | ttd = int((pt2dod_timestamp[examples['patient_id'][i]] - ent['time']) / ttd_normalizer) + 1 142 | if ttd <= max_ttd: 143 | if last_ttd_added != ttd: 144 | if duplicate_streams: 145 | # At this point we duplicate the first stream fron new_streams (it is the one without TTD always) 146 | new_stream = copy.deepcopy(new_streams[0]) 147 | new_streams.append(new_stream) 148 | 149 | if ttd_prefix is not None: 150 | new_stream.append(make_example(ent_example=ent, token=ttd_prefix, token_type='ttd_prefix', cnt=10**6, time=ent['time'])) 151 | new_stream.append(make_example(ent_example=ent, token=str(ttd), token_type='ttd', cnt=10**6, time=ent['time'])) 152 | 153 | last_ttd_added = ttd 154 | if ttd_suffix is not None: 155 | new_stream.append(make_example(ent_example=ent, token=ttd_suffix, token_type='ttd_suffix', cnt=10**6, time=ent['time'])) 156 | n_added_ttds += 1 157 | 158 | # append the entity to each stream 159 | for new_stream in new_streams: new_stream.append(ent) 160 | 161 | if duplicate_streams and len(new_streams) > 1: 162 | # Remove the first example as it is the base one without time info 163 | del new_streams[0] 164 | 165 | for new_stream in new_streams: 166 | all_stream.append(new_stream) 167 | all_patient_id.append(examples['patient_id'][i]) 168 | 169 | examples['patient_id'] = all_patient_id 170 | examples['stream'] = all_stream 171 | 172 | return examples 173 | 174 | def split_stream(examples, max_seq_len=-1): 175 | if max_seq_len > 0: 176 | new_streams = [] 177 | new_patient_ids = [] 178 | for ind, stream in enumerate(examples['stream']): 179 | nparts = math.ceil(len(stream) / max_seq_len) 180 | for i in range(nparts): 181 | new_streams.append(stream[i*max_seq_len:(i+1)*max_seq_len]) 182 | new_patient_ids.append(examples['patient_id'][ind]) 183 | 184 | examples['stream'] = new_streams 185 | examples['patient_id'] = new_patient_ids 186 | 187 | return examples 188 | 189 | 190 | def cleanup_stream(examples, keep_time=True, keep_type=True, keep_position_ids=True, keep_context_representation=True): 191 | r''' Leave only Tokens and remove the rest from `stream` 192 | 193 | Args: 194 | examples 195 | keep_time: 196 | If set another value will be added to examples that contains the `time` for each 197 | entity in stream. 198 | keep_type: 199 | Same as above 200 | ''' 201 | if 'token' in examples['stream'][0][0]: 202 | if keep_time: 203 | examples['time'] = [[ent['time'] for ent in stream] for stream in examples['stream']] 204 | if keep_type: 205 | examples['token_type'] = [[ent['token_type'] for ent in stream] for stream in examples['stream']] 206 | if keep_position_ids: 207 | examples['position_ids'] = [[ent['position_ids'] for ent in stream] for stream in examples['stream']] 208 | if keep_context_representation: 209 | examples['context_representation'] = [[ent['context_representation'] for ent in stream] for stream in examples['stream']] 210 | 211 | examples['stream'] = [[ent['token'] for ent in stream] for stream in examples['stream']] 212 | 213 | return examples 214 | 215 | 216 | def add_to_stream(examples, pt2tkn, last=False, prefix=None, unk_tkn='unk', token_type='unk'): 217 | r''' Add information to the patient stream based on patient_id. 218 | 219 | Args: 220 | examples 221 | pt2tkn 222 | last 223 | unk_tkn: 224 | What token will be added if the patient_id is not in pt2tkn 225 | ''' 226 | 227 | for i in range(len(examples['stream'])): 228 | ent = examples['stream'][i][0] 229 | 230 | if examples['patient_id'][i] in pt2tkn: 231 | token = pt2tkn.get(examples['patient_id'][i], unk_tkn) 232 | t_ind = -1 if last else 0 # If -1 means it is the last token, otherwise the first 233 | to_append = [make_example(ent_example=ent, token=token, cnt=10**6, time=examples['stream'][i][t_ind]['time'], token_type=token_type)] 234 | if prefix is not None: 235 | prefix_token = make_example(ent_example=ent, token=prefix, cnt=10**6, 236 | time=examples['stream'][i][t_ind]['time'], token_type="prefix_" + token_type) 237 | to_append = [prefix_token] + to_append 238 | 239 | if last: 240 | # Append as last token 241 | examples['stream'][i] = examples['stream'][i] + to_append 242 | else: 243 | examples['stream'][i] = to_append + examples['stream'][i] 244 | 245 | return examples 246 | 247 | 248 | def remove_tokens_not_in_tokenizer(examples, tokens_to_keep): 249 | tokens_to_keep = set(tokens_to_keep) 250 | for i in range(len(examples['stream'])): 251 | stream = examples['stream'][i] 252 | new_stream = [] 253 | 254 | for ent in stream: 255 | tkn = ent['token'] 256 | 257 | if tkn in tokens_to_keep: 258 | new_stream.append(ent) 259 | 260 | examples['stream'][i] = new_stream 261 | 262 | return examples 263 | 264 | 265 | def remove_parents_from_stream(examples, ch2parents, separator=None, separators=None): 266 | for i in range(len(examples['stream'])): 267 | stream = examples['stream'][i] 268 | parents = set() 269 | new_stream = [] 270 | 271 | for ent in stream: 272 | tkn = ent['token'] 273 | 274 | if (separator is not None and tkn == separator) or (separators is not None and tkn in separators): 275 | # This means we are removing parents only inside of one bucket 276 | parents = set() 277 | 278 | if tkn in ch2parents: 279 | # Add only if not in parents 280 | if tkn not in parents: 281 | new_stream.append(ent) 282 | # Update parents 283 | parents.update(ch2parents[tkn]) 284 | else: 285 | new_stream.append(ent) 286 | 287 | examples['stream'][i] = new_stream 288 | 289 | return examples 290 | 291 | def get_embeddings_for_tokens(dataset=None, cdb=None, context_type='medium', normalize=True, extra_tokens=[''], types=None, concepts=None): 292 | r''' Given a stream of tokens get the embeddings from MedCAT and make the required maps. 293 | 294 | Args: 295 | dataset 296 | cdb 297 | context_type 298 | normalize: 299 | If True the embedding vectors will be normalized 300 | tkn2type: 301 | Dictionary mapping from token to type 302 | types: 303 | All posible token types (e.g. [T-11, T-12, ...] 304 | concepts: 305 | If provided these concepts will also be appened to the tokens and supported by the tokenizer 306 | Returns: 307 | embeddings 308 | tkn2id 309 | id2tkn 310 | id2type 311 | id2type_detailed 312 | ''' 313 | embeddings = [] 314 | tkn2id = {} 315 | id2tkn = {} 316 | 317 | def add_tkn(tkn): 318 | if tkn in cdb.cui2context_vectors and context_type in cdb.cui2context_vectors[tkn]: 319 | vec = cdb.cui2context_vectors[tkn][context_type] 320 | else: 321 | # Token vector is randomly assigned 322 | vec = np.random.rand(300) 323 | 324 | id2tkn[len(embeddings)] = tkn 325 | tkn2id[tkn] = len(embeddings) 326 | 327 | vec = unitvec(vec) if normalize else vec 328 | embeddings.append(vec) 329 | 330 | datasets = get_all_splits(dataset) 331 | for _dataset in datasets: 332 | for stream in _dataset['stream']: 333 | for tkn in stream: 334 | tkn = str(tkn) 335 | if tkn not in tkn2id: 336 | add_tkn(tkn) 337 | # Add concepts if they are provided, this is used to build a general 338 | #tokenizer with all concepts 339 | if concepts is not None: 340 | for concept in concepts: 341 | tkn = str(concept) 342 | if tkn not in tkn2id: 343 | add_tkn(tkn) 344 | 345 | # Add named tokens 346 | for tkn in extra_tokens: 347 | if tkn not in tkn2id: 348 | id2tkn[len(embeddings)] = tkn 349 | tkn2id[tkn] = len(embeddings) 350 | if tkn != '': 351 | embeddings.append(np.random.rand(len(embeddings[0]))) 352 | else: 353 | embeddings.append(np.zeros(len(embeddings[0]))) 354 | 355 | # Add type tokens 356 | for tkn in types: 357 | if tkn not in tkn2id: 358 | id2tkn[len(embeddings)] = tkn 359 | tkn2id[tkn] = len(embeddings) 360 | embeddings.append(np.random.rand(len(embeddings[0]))) 361 | 362 | return embeddings, tkn2id, id2tkn 363 | 364 | 365 | def stream_to_separate_examples(examples): 366 | r''' Convert a stream to separate examples that can be used to train 367 | a next concept predictor unable to handle sequences (e.g. random forset). Use with HF datasets map function. 368 | 369 | ''' 370 | out = {} 371 | out['input_ids'] = [input_ids[0:i+1] for input_ids in examples['input_ids'] for i in range(len(input_ids) - 1)] 372 | out['labels'] = [input_ids[i+1] for input_ids in examples['input_ids'] for i in range(len(input_ids) - 1)] 373 | out['labels_all'] = [input_ids[i+1:] for input_ids in examples['input_ids'] for i in range(len(input_ids) - 1)] 374 | out['patient_id'] = [patient_id for ind, patient_id in enumerate(examples['patient_id']) for _ in range(len(examples['input_ids'][ind]) - 1)] 375 | 376 | return out 377 | -------------------------------------------------------------------------------- /foresight/metrics/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CogStack/Foresight/a152dce0933975e9c9e28668f62f13812ddc3c20/foresight/metrics/__init__.py -------------------------------------------------------------------------------- /foresight/sight.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import numpy as np 3 | import torch 4 | from foresight.utils.cdb_utils import get_parents_map, get_children_map, get_siblings_map 5 | 6 | class Sight(object): 7 | def __init__(self, tokenizer, device, model, cat): 8 | self.tokenizer = tokenizer 9 | self.device = device 10 | self.model = model 11 | self.cat = cat 12 | 13 | def _predict(self, stream, create_position_ids=False, skip_oov=False): 14 | self.model.eval() 15 | _stream = self.tokenizer(stream, return_tensors=True, device=self.model.device, skip_oov=skip_oov) 16 | 17 | # Create position ids 18 | if create_position_ids: 19 | position_ids = [] 20 | time = 0 21 | for tkn in stream: 22 | position_ids.append(time) 23 | if tkn.startswith('= n: 97 | break 98 | print(len(candidates)) 99 | return candidates 100 | 101 | 102 | def mcq(self, question, options, do_print=False): 103 | option2p = {} 104 | ps = self._predict(question) 105 | 106 | for option in options: 107 | tkn_id = self.tokenizer.tkn2id[option] 108 | option2p[option] = {'original': ps[tkn_id], 109 | 'cnt': self.tokenizer.global_token_cnt[option]} 110 | 111 | p_sum = sum([v['original'] for v in option2p.values()]) 112 | 113 | for option in options: 114 | tkn_id = self.tokenizer.tkn2id[option] 115 | option2p[option]['norm'] = ps[tkn_id] / p_sum 116 | 117 | if do_print: 118 | for tkn in question: 119 | print("{:5}: {:20} - {}".format( 120 | self.tokenizer.global_token_cnt.get(tkn, 0), 121 | self.tokenizer.tkn2name[tkn], 122 | tkn)) 123 | print() 124 | for option in options: 125 | option_name = self.tokenizer.tkn2name[option] 126 | print("{:5}: {:50} - {:20}- {:.2f} - {:.2f}".format( 127 | option2p[option]['cnt'], 128 | option_name[:50], 129 | option, 130 | option2p[option]['original'], 131 | option2p[option]['norm'])) 132 | 133 | 134 | return option2p 135 | -------------------------------------------------------------------------------- /foresight/tokenizers/simple_map_tokenizer.py: -------------------------------------------------------------------------------- 1 | import random 2 | import torch 3 | import os 4 | import dill 5 | 6 | class SimpleMapTokenizer(object): 7 | r''' Not even really a tokenizer, will take a list of tokens and 8 | covert them to IDs 9 | 10 | Args: 11 | tkn2id 12 | pad_id 13 | max_len 14 | ''' 15 | def __init__(self, tkn2id=None, pad_id=None, max_len=50, tkn2name=None, 16 | token_type2tokens=None, embeddings=None, global_token_cnt=None): 17 | self.tkn2id = tkn2id 18 | self.pad_id = pad_id 19 | self.max_len = max_len 20 | self.tkn2name = tkn2name 21 | self.token_type2tokens = token_type2tokens 22 | self.embeddings = embeddings 23 | self.global_token_cnt = global_token_cnt 24 | self.id2tkn = None 25 | 26 | # Create id2tkn 27 | if tkn2id is not None: 28 | self.id2tkn = {v:k for k,v in self.tkn2id.items()} 29 | 30 | def __call__(self, text, return_tensors=False, device='cpu', skip_oov=False): 31 | r''' 32 | 33 | skip_oov: will skip out of vocabulary words, otherwise error 34 | ''' 35 | out = {'input_ids': [], 'attention_mask': []} 36 | 37 | if isinstance(text, str): 38 | out['input_ids'] = out['input_ids'] + [self.tkn2id[tkn] for tkn in text.split("~~") if not skip_oov or tkn in self.tkn2id] 39 | elif isinstance(text, list): 40 | # It is pre_tokenized 41 | out['input_ids'] = [self.tkn2id[tkn] for tkn in text if not skip_oov or tkn in self.tkn2id] 42 | 43 | out['attention_mask'] = [float(x != self.pad_id) for x in out['input_ids']] 44 | 45 | if return_tensors: 46 | out = {k:torch.tensor([v]).to(device) for k,v in out.items()} 47 | 48 | return out 49 | 50 | 51 | def decode(self, token_ids, get_names=True): 52 | tkns = self.convert_ids2tokens(token_ids, get_names=get_names) 53 | if type(tkns) != list: 54 | tkns = [tkns] 55 | return " ".join(tkns) 56 | 57 | 58 | def convert_ids2tokens(self, token_ids, get_names=True): 59 | if type(token_ids) == torch.Tensor: 60 | token_ids = token_ids.tolist() 61 | if type(token_ids) == list and type(token_ids[0]) == torch.Tensor: 62 | token_ids = [x.tolist() for x in token_ids] 63 | 64 | # Same as decode, but needed for compatibility with ecco 65 | out = [] 66 | if type(token_ids) != list: 67 | out = [self.id2tkn[int(token_ids)]] 68 | else: 69 | # Convert tokens to IDs 70 | out = [self.id2tkn[int(id)] for id in token_ids] 71 | 72 | if get_names: 73 | _out = [] 74 | for tkn in out: 75 | _out.append(self.tkn2name.get(tkn, tkn)) 76 | #_out.append(" | ") 77 | out = _out 78 | 79 | return out 80 | 81 | 82 | def tokens_to_ids(self, tokens): 83 | r''' This will skip tokens if they are not in the tkn2id dict 84 | ''' 85 | out = [self.tkn2id[tkn] for tkn in tokens] 86 | 87 | return out 88 | 89 | 90 | def encode(self, examples, trim_to_max_len=['position_ids', 'time', 'token_type']): 91 | r''' Convert 'stream' in the examples from tokens to IDs, save as 'input_ids'. Use with HF datasets.map 92 | ''' 93 | examples['input_ids'] = [self.tokens_to_ids(stream)[0:self.max_len] for stream in examples['stream']] 94 | examples['token_type_id'] = [self.tokens_to_ids(token_type)[0:self.max_len] for token_type in examples['token_type']] 95 | 96 | for key in trim_to_max_len: 97 | examples[key] = [example[0:self.max_len] for example in examples[key]] 98 | 99 | return examples 100 | 101 | 102 | def save(self, path): 103 | with open(path, 'wb') as f: 104 | dill.dump(self.__dict__, f) 105 | 106 | @classmethod 107 | def load(cls, path): 108 | tokenizer = cls() 109 | with open(path, 'rb') as f: 110 | d = dill.load(f) 111 | for k in tokenizer.__dict__: 112 | if k in d: 113 | tokenizer.__dict__[k] = d[k] 114 | return tokenizer 115 | -------------------------------------------------------------------------------- /foresight/trainer.py: -------------------------------------------------------------------------------- 1 | from transformers import Trainer 2 | from transformers.trainer import * 3 | 4 | class SuperTrainer(Trainer): 5 | def create_optimizer(self): 6 | """ 7 | Setup the optimizer. 8 | 9 | We provide a reasonable default that works well. If you want to use something else, you can pass a tuple in the 10 | Trainer's init through :obj:`optimizers`, or subclass and override this method in a subclass. 11 | """ 12 | if self.optimizer is None: 13 | decay_parameters = get_parameter_names(self.model, [nn.LayerNorm]) 14 | decay_parameters = [name for name in decay_parameters if "bias" not in name] 15 | optimizer_grouped_parameters = [ 16 | { 17 | "params": [p for n, p in self.model.named_parameters() if n in decay_parameters], 18 | "weight_decay": self.args.weight_decay, 19 | }, 20 | { 21 | "params": [p for n, p in self.model.named_parameters() if n not in decay_parameters], 22 | "weight_decay": 0.0, 23 | }, 24 | ] 25 | 26 | # Add the extra_params 27 | if hasattr(self, 'extra_params'): 28 | optimizer_grouped_parameters[0]['params'].extend([p for d, p in self.extra_params if d]) 29 | optimizer_grouped_parameters[1]['params'].extend([p for d, p in self.extra_params if not d]) 30 | 31 | optimizer_cls = Adafactor if self.args.adafactor else AdamW 32 | if self.args.adafactor: 33 | optimizer_cls = Adafactor 34 | optimizer_kwargs = {"scale_parameter": False, "relative_step": False} 35 | else: 36 | optimizer_cls = AdamW 37 | optimizer_kwargs = { 38 | "betas": (self.args.adam_beta1, self.args.adam_beta2), 39 | "eps": self.args.adam_epsilon, 40 | } 41 | optimizer_kwargs["lr"] = self.args.learning_rate 42 | if self.sharded_ddp == ShardedDDPOption.SIMPLE: 43 | self.optimizer = OSS( 44 | params=optimizer_grouped_parameters, 45 | optim=optimizer_cls, 46 | **optimizer_kwargs, 47 | ) 48 | else: 49 | self.optimizer = optimizer_cls(optimizer_grouped_parameters, **optimizer_kwargs) 50 | 51 | if is_sagemaker_mp_enabled(): 52 | self.optimizer = smp.DistributedOptimizer(self.optimizer) 53 | 54 | return self.optimizer 55 | 56 | 57 | def add_params_to_be_tracked(self, params, decay=True): 58 | if hasattr(self, 'extra_params'): 59 | self.extra_params.append((decay, params)) 60 | else: 61 | self.extra_params = [(decay, params)] 62 | -------------------------------------------------------------------------------- /foresight/utils/cdb_utils.py: -------------------------------------------------------------------------------- 1 | def reverse_pt2ch(pt2ch): 2 | ch2pt = {} 3 | for pt in pt2ch: 4 | for ch in pt2ch[pt]: 5 | if ch in ch2pt: 6 | ch2pt[ch].add(pt) 7 | else: 8 | ch2pt[ch] = {pt} 9 | return ch2pt 10 | 11 | 12 | def get_parents_map(cuis, pt2ch, ch2pt=None, depth=3): 13 | r''' Get a map from a concept to all of its parents up to the `depth`, meaning parents of parents and so on. 14 | 15 | Args: 16 | pt2ch (`Dict`): 17 | map from parent concept to children (this is 18 | usually what we have when building a CDB). 19 | 20 | depth (`int`, optional defaults to 3): 21 | Get only parents, or parents of parents also, or ... 22 | ''' 23 | 24 | # First convert pt2ch into ch2pt 25 | if ch2pt is None: 26 | ch2pt = reverse_pt2ch(pt2ch) 27 | 28 | def get_parents(concept, ch2pt, depth): 29 | parents = set() 30 | parents.update(ch2pt.get(concept, [])) 31 | if depth > 0: 32 | for pt in ch2pt.get(concept, []): 33 | parents.update(get_parents(pt, ch2pt, depth=depth-1)) 34 | return parents 35 | 36 | ch2all_pt = {} 37 | for cui in cuis: 38 | ch2all_pt[cui] = get_parents(cui, ch2pt, depth=depth) 39 | 40 | return ch2all_pt 41 | 42 | 43 | def get_children_map(cuis, pt2ch, depth=3): 44 | r''' Returns a map from a CUI to all chlidren of it until depth 45 | ''' 46 | def get_children(concept, pt2ch, depth): 47 | children = set() 48 | children.update(pt2ch.get(concept, [])) 49 | if depth > 0: 50 | for ch in pt2ch.get(concept, []): 51 | children.update(get_children(ch, pt2ch, depth=depth-1)) 52 | return children 53 | 54 | pt2all_ch = {} 55 | for cui in cuis: 56 | pt2all_ch[cui] = get_children(cui, pt2ch, depth=depth) 57 | 58 | return pt2all_ch 59 | 60 | 61 | def get_siblings_map(cuis, pt2ch, ch2pt=None): 62 | # First convert pt2ch into ch2pt 63 | if ch2pt is None: 64 | ch2pt = reverse_pt2ch(pt2ch) 65 | 66 | cui2sib = {} 67 | for cui in cuis: 68 | ps = ch2pt[cui] 69 | cui2sib[cui] = set() 70 | for p in ps: 71 | cui2sib[cui].update(pt2ch[p]) 72 | 73 | return cui2sib 74 | -------------------------------------------------------------------------------- /foresight/utils/pickle.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import os 3 | 4 | def load(path): 5 | return pickle.load(open(path, 'rb')) 6 | 7 | def dump(data, path): 8 | os.makedirs(os.path.dirname(path), exist_ok=True) 9 | pickle.dump(data, open(path, 'wb')) 10 | -------------------------------------------------------------------------------- /foresight/utils/stream_utils.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | import logging 3 | 4 | def get_entities_for_doc(docs, doc_id): 5 | r''' Return entities for the given doc_id from the docs dictionary. 6 | 7 | docs: 8 | Output of medcat multiprocessing 9 | doc_id: 10 | id of the doc in docs 11 | ''' 12 | ents = docs[doc_id]['entities'] 13 | # Depending on the version of medcat ents can be dict {id: entities, ...} or list of entities 14 | ents = ents.values() if isinstance(ents, dict) else ents 15 | 16 | return ents 17 | 18 | def calculate_counts(docs, doc2pt, pt2cui2cnt, meta_requirements=None): 19 | 20 | if pt2cui2cnt is None: 21 | pt2cui2cnt = defaultdict(lambda: defaultdict(int)) 22 | 23 | # Frequency for each each entity given a patient 24 | for doc in docs: 25 | for ent in get_entities_for_doc(docs, doc): 26 | # Must match all meta meta_anns 27 | if not meta_requirements or \ 28 | all([ent['meta_anns'][name]['value'] == value for name, value in meta_requirements.items()]): 29 | cui = ent['cui'] 30 | pt = doc2pt[doc] 31 | pt2cui2cnt[pt][cui] += 1 32 | 33 | return pt2cui2cnt 34 | 35 | 36 | def docs2stream(docs, doc2pt, pt2cui2cnt, doc2time=None, meta_requirements={}, entity_type_column='tuis', 37 | historical_meta=None, historical_meta_value=None, old_pt2stream=None, skip_cuis=None, 38 | require_time=True): 39 | r''' Convert the `docs` output of medcat multiprocessing 40 | to a stream of concepts for each patient. 41 | 42 | Args: 43 | docs 44 | doc2pt 45 | doc2time 46 | meta_requirements: 47 | Values for meta_annotaitons that must exist e.g. = {'Presence': True} 48 | ''' 49 | if old_pt2stream is not None: 50 | pt2stream = old_pt2stream 51 | else: 52 | pt2stream = defaultdict(list) 53 | 54 | have_warned = set() 55 | for doc in docs: 56 | for ent in get_entities_for_doc(docs, doc): 57 | if not meta_requirements or \ 58 | all([ent['meta_anns'][name]['value'] == value for name, value in meta_requirements.items()]): 59 | 60 | cui = ent['cui'] 61 | if skip_cuis is None or cui not in skip_cuis: 62 | if doc2time is not None: 63 | timestamp = doc2time[doc] 64 | elif 'document_timestamp' in ent: 65 | timestamp = ent['document_timestamp'] 66 | else: 67 | timestamp = None # Means time is not known, later it will be ignored if necessary 68 | 69 | if not require_time or timestamp is not None: # Skip all where timestamp is None 70 | if historical_meta is not None and timestamp is not None: 71 | # If something is historical then make the timestamp less by 1 because it appeared before 72 | #other things in this document. Unles time is None which means time is undefined 73 | if ent['meta_anns'][historical_meta]['value'] == historical_meta_value: 74 | timestamp = timestamp - 1 75 | 76 | pt = doc2pt[doc] 77 | cnt = pt2cui2cnt[pt][cui] 78 | if ent[entity_type_column]: # This can be none in some cases 79 | token_type = ent[entity_type_column][0] 80 | else: 81 | token_type = 'unk' 82 | if cui not in have_warned: 83 | logging.warning(f"Entity type missing from: {cui}") 84 | have_warned.add(cui) 85 | pt2stream[pt].append((cui, cnt, timestamp, token_type)) 86 | 87 | return pt2stream 88 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | from setuptools.command.install import install 3 | from setuptools.command.develop import develop 4 | from setuptools.command.egg_info import egg_info 5 | 6 | with open("./README.md", "r") as fh: 7 | long_description = fh.read() 8 | 9 | setuptools.setup( 10 | name="medgpt", 11 | version="0.4", 12 | author="w-is-h", 13 | author_email="w.kraljevic@gmail.com", 14 | description="Temporal modeling of patients and diseases", 15 | long_description=long_description, 16 | long_description_content_type="text/markdown", 17 | url="https://github.com/w-is-h/medgpt", 18 | packages=['medgpt', 'medgpt.datasets', 'medgpt.metrics', 'medgpt.utils', 19 | 'medgpt.models', 'medgpt.tokenizers'], 20 | install_requires=[ 21 | 'datasets==2.15.0' 22 | 'transformers==4.35.2', 23 | 'flash-attn==2.3.6', 24 | ], 25 | classifiers=[ 26 | "Programming Language :: Python :: 3", 27 | "License :: OSI Approved :: MIT License", 28 | "Operating System :: OS Independent", 29 | ], 30 | ) 31 | --------------------------------------------------------------------------------