├── .DS_Store ├── requirements.txt ├── .gitignore ├── app ├── main.py └── ui.py ├── README.md └── data ├── query_log.csv └── schema_map.json /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ashishsalunkhe/Text2SQL/main/.DS_Store -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | openai==0.28.1 2 | pandas 3 | numpy 4 | faiss-cpu 5 | tqdm 6 | streamlit 7 | altair 8 | nltk -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore raw CSV data (large and unnecessary for repo) 2 | data/csv/ 3 | 4 | # Ignore full-size database file 5 | data/mimic_iii.db 6 | 7 | # DO NOT ignore: schema_map.json and query_log.csv 8 | 9 | # Secrets (API keys etc.) 10 | .streamlit/secrets.toml 11 | 12 | # Python virtual environments 13 | .venv/ 14 | venv/ 15 | 16 | # OS/system files 17 | .DS_Store 18 | 19 | # Python bytecode/cache 20 | __pycache__/ 21 | *.pyc 22 | -------------------------------------------------------------------------------- /app/main.py: -------------------------------------------------------------------------------- 1 | # Directory Structure: 2 | # mimic_text_to_sql/ 3 | # ├── data/ 4 | # │ ├── csv/ ← All CSVs here 5 | # │ ├── mimic_iii.db ← DB will be created here 6 | # │ └── schema_map.json ← Generated schema info 7 | # └── app.py ← This script 8 | 9 | import os 10 | import sqlite3 11 | import json 12 | import pandas as pd 13 | import openai 14 | import faiss 15 | import numpy as np 16 | from tqdm import tqdm 17 | import argparse 18 | 19 | # === STEP 1: Path Setup === 20 | BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 21 | CSV_DIR = os.path.join(BASE_DIR, "data", "csv") 22 | DATA_DIR = os.path.join(BASE_DIR, "data") 23 | DB_PATH = os.path.join(DATA_DIR, "mimic_iii.db") 24 | SCHEMA_JSON_PATH = os.path.join(DATA_DIR, "schema_map.json") 25 | 26 | # ✅ Create DB directory if it doesn't exist 27 | os.makedirs(DATA_DIR, exist_ok=True) 28 | 29 | # === STEP 2: Load CSVs into SQLite if DB doesn't exist === 30 | if not os.path.exists(DB_PATH): 31 | print("📦 Creating mimic_iii.db from CSVs...") 32 | conn = sqlite3.connect(DB_PATH) 33 | for file in tqdm(os.listdir(CSV_DIR)): 34 | if file.endswith(".csv"): 35 | table = file.replace(".csv", "").lower() 36 | df = pd.read_csv(os.path.join(CSV_DIR, file), low_memory=False) 37 | df.to_sql(table, conn, if_exists="replace", index=False) 38 | conn.commit() 39 | conn.close() 40 | print("✅ Database created at", DB_PATH) 41 | else: 42 | print("✅ Using existing SQLite DB at", DB_PATH) 43 | 44 | # === STEP 3: Extract Schema and Save JSON === 45 | if not os.path.exists(SCHEMA_JSON_PATH): 46 | print("🔍 Extracting schema to JSON map...") 47 | conn = sqlite3.connect(DB_PATH) 48 | cursor = conn.cursor() 49 | cursor.execute("SELECT name FROM sqlite_master WHERE type='table';") 50 | tables = [row[0] for row in cursor.fetchall()] 51 | 52 | schema_info = {} 53 | for table in tables: 54 | cursor.execute(f"PRAGMA table_info({table});") 55 | columns = cursor.fetchall() 56 | schema_info[table] = { 57 | "columns": [col[1] for col in columns], 58 | "types": {col[1]: col[2] for col in columns} 59 | } 60 | 61 | # Annotate known joins 62 | known_joins = { 63 | "prescriptions": ["subject_id", "hadm_id"], 64 | "diagnoses_icd": ["subject_id", "hadm_id", "icd9_code"], 65 | "procedures_icd": ["subject_id", "hadm_id"], 66 | "microbiologyevents": ["subject_id"], 67 | "icustays": ["subject_id", "hadm_id", "icustay_id"], 68 | "patients": ["subject_id", "hospital_expire_flag"], 69 | "labevents": ["subject_id", "itemid"], 70 | "d_labitems": ["itemid", "label"], 71 | "d_icd_diagnoses": ["icd9_code", "long_title"] 72 | } 73 | 74 | for table, keys in known_joins.items(): 75 | if table in schema_info: 76 | schema_info[table]["join_keys"] = keys 77 | 78 | with open(SCHEMA_JSON_PATH, "w") as f: 79 | json.dump(schema_info, f, indent=2) 80 | print("✅ Schema map saved to", SCHEMA_JSON_PATH) 81 | else: 82 | print("✅ Using existing schema map at", SCHEMA_JSON_PATH) 83 | 84 | # === STEP 4: Prompt & Query === 85 | def format_schema(schema_json): 86 | return "\n\n".join( 87 | [f"Table: {tbl}\nColumns: {', '.join(meta['columns'])}" for tbl, meta in schema_json.items()] 88 | ) 89 | 90 | def build_prompt(user_question, schema_json): 91 | context = """ 92 | Helpful Notes: 93 | - Use subject_id or hadm_id to link patient-level tables. 94 | - diagnoses_icd.icd9_code LIKE '250%' means diabetes. 95 | - patients.hospital_expire_flag = 1 means patient died. 96 | - microbiologyevents.org_name is for organisms. 97 | - d_labitems.label gives lab test names via labevents.itemid = d_labitems.itemid 98 | - labevents + icustays join via subject_id or hadm_id 99 | """ 100 | return f""" 101 | You are a medical SQL assistant. 102 | 103 | {context} 104 | 105 | Schema: 106 | {format_schema(schema_json)} 107 | 108 | User question: 109 | "{user_question}" 110 | 111 | Return only the SQLite SQL query. 112 | """ 113 | 114 | # === STEP 5: Run GPT + SQL === 115 | openai.api_key = os.getenv("OPENAI_API_KEY") 116 | 117 | def get_sql_from_gpt(prompt): 118 | res = openai.ChatCompletion.create( 119 | model="gpt-3.5-turbo", 120 | messages=[{"role": "user", "content": prompt}], 121 | temperature=0 122 | ) 123 | return res["choices"][0]["message"]["content"].strip().replace("```sql", "").replace("```", "").strip() 124 | 125 | def run_sql(query): 126 | conn = sqlite3.connect(DB_PATH) 127 | cur = conn.cursor() 128 | cur.execute(query) 129 | rows = cur.fetchall() 130 | cols = [desc[0] for desc in cur.description] 131 | conn.close() 132 | return pd.DataFrame(rows, columns=cols) 133 | 134 | # === STEP 6: Main CLI Logic === 135 | if __name__ == "__main__": 136 | parser = argparse.ArgumentParser(description="Run GPT-powered SQL over MIMIC-III data.") 137 | parser.add_argument("--question", type=str, required=True, help="Natural language question to convert into SQL") 138 | parser.add_argument("--print_only", action="store_true", help="Only print the SQL without executing it") 139 | args = parser.parse_args() 140 | 141 | with open(SCHEMA_JSON_PATH) as f: 142 | schema_data = json.load(f) 143 | 144 | prompt = build_prompt(args.question, schema_data) 145 | print("\n📤 Prompt Sent to GPT:\n", prompt) 146 | 147 | sql_query = get_sql_from_gpt(prompt) 148 | print("\n🧠 Generated SQL:\n", sql_query) 149 | 150 | if not args.print_only: 151 | try: 152 | result_df = run_sql(sql_query) 153 | print("📊 Query Result Preview:") 154 | print(result_df.head()) 155 | 156 | # ✅ Logging 157 | log_path = os.path.join(DATA_DIR, "query_log.csv") 158 | log_entry = { 159 | "question": args.question, 160 | "sql_query": sql_query, 161 | "result_preview": result_df.head().to_json(orient="records") 162 | } 163 | if os.path.exists(log_path): 164 | pd.DataFrame([log_entry]).to_csv(log_path, mode='a', header=False, index=False) 165 | else: 166 | pd.DataFrame([log_entry]).to_csv(log_path, mode='w', header=True, index=False) 167 | 168 | except Exception as e: 169 | print("❌ SQL execution failed:", e) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 🩺 Text-to-SQL System for MIMIC-III Dataset 2 | 3 | **Ashish Salunkhe** 4 | University of Maryland, College Park 5 | **Aryaman Paigankar** 6 | University of Maryland, College Park 7 | 8 | --- 9 | ## How to reproduct this project? 10 | 11 | This guide will help you set up and run the Text-to-SQL system for querying the MIMIC-III dataset using natural language. 12 | 13 | --- 14 | 15 | ### Clone the Repository 16 | 17 | ```bash 18 | git clone https://github.com/your-username/mimic-llm-text2sql.git 19 | cd mimic-llm-text2sql 20 | ``` 21 | 22 | --- 23 | 24 | ### Set Up the Python Environment 25 | 26 | ```bash 27 | python3 -m venv .venv 28 | source .venv/bin/activate # On Windows use: .venv\Scripts\activate 29 | pip install -r requirements.txt 30 | ``` 31 | 32 | --- 33 | 34 | ### Download the Dataset 35 | 36 | * Go to [Kaggle – mimic-iii-10k dataset](https://www.kaggle.com/datasets/bilal1907/mimic-iii-10k) 37 | * Download only the `*_random.csv` files 38 | * Place them in the following directory: 39 | 40 | ```bash 41 | data/csv/ 42 | ``` 43 | 44 | --- 45 | 46 | ### Set OpenAI API Key 47 | 48 | * Create a file named `.streamlit/secrets.toml` at the root of your repo 49 | * Add your OpenAI key as follows: 50 | 51 | ```toml 52 | OPENAI_API_KEY = "your-api-key-here" 53 | ``` 54 | 55 | --- 56 | 57 | ### Build the SQLite DB and Schema Map 58 | 59 | ```bash 60 | python app/main.py --question "What are the most common diagnoses?" 61 | ``` 62 | 63 | This will create `mimic_iii.db` and `schema_map.json` in the `data/` directory. 64 | 65 | --- 66 | 67 | ### Run the Streamlit UI App 68 | 69 | ```bash 70 | streamlit run app/ui.py 71 | ``` 72 | 73 | This will open a browser where you can ask clinical questions in plain English. 74 | 75 | --- 76 | 77 | ### Run via Command-Line (Optional) 78 | 79 | You can also run the pipeline through CLI: 80 | 81 | ```bash 82 | python app/main.py --question "Which lab tests are common in diabetic patients?" 83 | ``` 84 | 85 | 86 | --- 87 | ## 🚀 Repo Structure 88 | 89 | ``` 90 | mimic_text_to_sql/ 91 | ├── data/ 92 | │ ├── mimic_iii.db # SQLite DB from CSVs 93 | │ ├── schema_map.json # JSON schema metadata 94 | │ └── query_log.csv # Logged questions, SQL, results 95 | ├── app/ 96 | │ ├── main.py # CLI interface 97 | │ ├── ui.py # Streamlit interface 98 | ├── .streamlit/ 99 | │ └── secrets.toml # API keys (ignored) 100 | ├── requirements.txt 101 | └── README.md 102 | ``` 103 | --- 104 | 105 | ## 📌 Problem Formulation 106 | 107 | Large Language Models (LLMs) have shown increasing capability in natural language understanding and structured data reasoning. One practical application is translating natural language questions into SQL queries to access complex medical datasets like MIMIC-III. 108 | 109 | Our project aims to develop a Retrieval-Augmented Generation (RAG) based Text-to-SQL system that enables healthcare professionals or researchers to interact with the MIMIC-III clinical database using plain English queries. 110 | 111 | Key challenges addressed: 112 | 113 | * Schema complexity 114 | * Ambiguity in natural language 115 | * Lack of join/contextual awareness in naive LLMs 116 | 117 | We mitigate these issues via schema-aware metadata retrieval and GPT-based SQL generation. 118 | 119 | --- 120 | 121 | ## 🗃️ Dataset Description 122 | 123 | We used the **mimic-III-10k** dataset — a curated subset of the full MIMIC-III clinical dataset containing \~10,000 patients. 124 | 125 | * Source: Beth Israel Deaconess Medical Center (via PhysioNet) 126 | * Format: 25 CSV tables (\~6 GB total) 127 | * Relational schema includes: 128 | 129 | * `PATIENTS`: Demographics 130 | * `ADMISSIONS`: Hospital admission logs 131 | * `ICUSTAYS`: ICU-level data 132 | * `DIAGNOSES_ICD`: ICD-9 medical codes 133 | 134 | ### Data Ingestion Pipeline 135 | 136 | * Loaded CSVs into **SQLite** for fast, structured access 137 | * Optionally support **PostgreSQL** for scale 138 | * Explored initial joins using `subject_id`, `hadm_id`, and `icustay_id` 139 | 140 | --- 141 | 142 | ## 📊 Descriptive Analysis 143 | 144 | We began by understanding key patient journeys using 4 main tables: 145 | 146 | * Explored relationships between `PATIENTS`, `ADMISSIONS`, `ICUSTAYS`, and `DIAGNOSES_ICD` 147 | * Identified key identifiers for joins: `subject_id`, `hadm_id` 148 | * Highlighted distribution of diagnoses and ICU visits 149 | 150 | We also set up: 151 | 152 | * SQLite database from CSV 153 | * Initial EDA in Google Colab using Pandas 154 | * Schema inspection for metadata modeling 155 | 156 | --- 157 | 158 | ## 🧠 Methodology: RAG-based LLM System 159 | 160 | ### 🔧 System Steps: 161 | 162 | 1. **Metadata Extraction** 163 | 164 | * Generate JSON summaries of table schemas (columns, types, join keys) 165 | 2. **Embedding Generation** 166 | 167 | * Use `all-MiniLM-L6-v2` from SentenceTransformers 168 | * Encode schema metadata into dense vectors 169 | 3. **Vector DB (ChromaDB)** 170 | 171 | * Store embeddings and enable semantic retrieval 172 | 4. **Retrieval Layer** 173 | 174 | * Given a user question, retrieve top-k relevant table schemas 175 | 5. **Prompt Construction** 176 | g 177 | * Inject schema context + user query into GPT-3.5-Turbo prompt 178 | 6. **LLM SQL Generation** 179 | 180 | * Parse GPT output to SQL, execute, and return results 181 | 182 | All components were orchestrated within a modular Python architecture with `main.py` (CLI) and `ui.py` (Streamlit). 183 | 184 | 185 | --- 186 | 187 | ## 📈 Evaluation Strategy 188 | 189 | ### 🎯 Ground Truth Creation 190 | 191 | * Defined 15 clinical questions with gold SQL and results 192 | * Example: *"What procedures are most common among deceased patients?"* 193 | * Evaluated SQL outputs for correctness and execution success 194 | 195 | ### 📏 Metrics 196 | 197 | | Metric | Description | 198 | | -------------------------- | ---------------------------------------------- | 199 | | Execution Accuracy | % of SQL queries that executed without error | 200 | | Result Overlap (Jaccard) | Match between LLM vs. ground truth results | 201 | | Schema Retrieval Precision | % of correct tables retrieved in top-k context | 202 | | Prompt Token Size | Avg tokens used in prompt to GPT-3.5 | 203 | | Latency / Cost | Time + API cost per query | 204 | 205 | --- 206 | 207 | ## 📊 Results Summary 208 | 209 | * ✅ Execution Accuracy: **87%** (13/15 queries successful) 210 | * ✅ Result Overlap (Jaccard): Avg **0.72** 211 | * ✅ Retrieval hit rate: **90%** relevant tables in top-k 212 | * ⚠️ Common failure: SQL hallucination in JOINs or WHERE clauses 213 | 214 | --- 215 | 216 | ## ⚠️ Challenges & Takeaways 217 | 218 | * Complex schema with repeated identifiers across tables 219 | * Token limit requires prompt compression / top-k filtering 220 | * LLMs occasionally hallucinate JOIN conditions 221 | * Some vague queries required schema-specific disambiguation 222 | 223 | --- 224 | 225 | ## 🔭 Future Work 226 | 227 | * Fine-tune with healthcare-specific SQL data (MimicSQL, Spider) 228 | * Add error-handling and user-guided corrections 229 | * Integrate with PostgreSQL for production-scale queries 230 | * Explore open-source LLMs with local inference (e.g., SQLCoder) 231 | 232 | --- 233 | 234 | ## 📚 Related Work 235 | 236 | * MimicSQL: Fine-tuned Text2SQL on MIMIC (Zhang et al., 2023) 237 | * RAG-based Question Answering (Lewis et al., 2020) 238 | * Spider Benchmark for cross-domain SQL generation (Yu et al., 2018) 239 | 240 | --- 241 | 242 | ## 🔗 References 243 | 244 | * Johnson, A., Pollard, T., & Mark, R. (2016). MIMIC-III Clinical Database. [https://doi.org/10.13026/C2XW26](https://doi.org/10.13026/C2XW26) 245 | * mimic-III-10k \[Kaggle]. [https://www.kaggle.com/datasets/bilal1907/mimic-iii-10k](https://www.kaggle.com/datasets/bilal1907/mimic-iii-10k) 246 | * Lewis, P. et al. (2020). Retrieval-Augmented Generation. NeurIPS 33 247 | * Yu, T. et al. (2018). Spider Dataset. EMNLP 248 | * Zhang, H. et al. (2023). MimicSQL. ACL. [https://arxiv.org/abs/2305.11921](https://arxiv.org/abs/2305.11921) 249 | 250 | --- 251 | 252 | ## 👥 Authors 253 | 254 | * **Ashish Salunkhe** — [ashishsalunke.com](https://ashishsalunke.com) 255 | * **Aryaman Paigankar** — University of Maryland 256 | 257 | 258 | -------------------------------------------------------------------------------- /app/ui.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | import openai 3 | import sqlite3 4 | import pandas as pd 5 | import json 6 | import os 7 | import requests 8 | import nltk 9 | import logging 10 | from nltk.corpus import stopwords 11 | from nltk.tokenize import word_tokenize 12 | 13 | # === Logging Setup === 14 | logging.basicConfig( 15 | level=logging.INFO, 16 | format="%(asctime)s [%(levelname)s] %(message)s", 17 | handlers=[logging.StreamHandler()] 18 | ) 19 | logger = logging.getLogger(__name__) 20 | 21 | # === NLTK Safe Downloader === 22 | try: 23 | nltk.data.find('tokenizers/punkt') 24 | except LookupError: 25 | nltk.download('punkt') 26 | try: 27 | nltk.data.find('corpora/stopwords') 28 | except LookupError: 29 | nltk.download('stopwords') 30 | 31 | # === Paths === 32 | BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 33 | DATA_DIR = os.path.join(BASE_DIR, "data") 34 | DB_PATH = os.path.join(DATA_DIR, "mimic_iii.db") 35 | SCHEMA_JSON_PATH = os.path.join(DATA_DIR, "schema_map.json") 36 | LOG_PATH = os.path.join(DATA_DIR, "query_log.csv") 37 | 38 | # === Load Schema === 39 | @st.cache_data 40 | def load_schema(): 41 | try: 42 | with open(SCHEMA_JSON_PATH) as f: 43 | schema = json.load(f) 44 | logger.info("Schema loaded from JSON.") 45 | return schema 46 | except Exception as e: 47 | logger.error(f"Error loading schema: {e}") 48 | st.error("Failed to load schema.") 49 | 50 | # === Prompt Builder === 51 | def format_schema(schema_json): 52 | return "\n\n".join( 53 | [f"Table: {tbl}\nColumns: {', '.join(meta['columns'])}" for tbl, meta in schema_json.items()] 54 | ) 55 | 56 | def build_prompt(user_question, schema_json): 57 | context = """ 58 | Helpful Notes: 59 | - Use subject_id or hadm_id to link patient-level tables. 60 | - diagnoses_icd.icd9_code LIKE '250%' means diabetes. 61 | - patients.hospital_expire_flag = 1 means patient died. 62 | - microbiologyevents.org_name is for organisms. 63 | - d_labitems.label gives lab test names via labevents.itemid = d_labitems.itemid 64 | - labevents + icustays join via subject_id or hadm_id 65 | """ 66 | prompt = f""" 67 | You are a medical SQL assistant. 68 | 69 | {context} 70 | 71 | Schema: 72 | {format_schema(schema_json)} 73 | 74 | User question: 75 | \"{user_question}\" 76 | 77 | Return only the SQLite SQL query. 78 | """ 79 | logger.info("Prompt built successfully.") 80 | return prompt 81 | 82 | # === GPT Query Generator === 83 | def get_sql_from_gpt(prompt): 84 | openai.api_key = st.secrets["OPENAI_API_KEY"] 85 | try: 86 | res = openai.ChatCompletion.create( 87 | model="gpt-3.5-turbo", 88 | messages=[{"role": "user", "content": prompt}], 89 | temperature=0 90 | ) 91 | sql = res["choices"][0]["message"]["content"].strip().replace("```sql", "").replace("```", "").strip() 92 | logger.info("SQL query generated by GPT.") 93 | return sql 94 | except Exception as e: 95 | logger.error(f"OpenAI GPT error: {e}") 96 | st.error("Failed to generate SQL from GPT.") 97 | 98 | # === SQL Executor === 99 | def run_sql(query): 100 | try: 101 | conn = sqlite3.connect(DB_PATH) 102 | cur = conn.cursor() 103 | cur.execute(query) 104 | rows = cur.fetchall() 105 | cols = [desc[0] for desc in cur.description] 106 | conn.close() 107 | logger.info("SQL query executed successfully.") 108 | return pd.DataFrame(rows, columns=cols) 109 | except Exception as e: 110 | logger.error(f"SQL execution failed: {e}") 111 | raise e 112 | 113 | # === ICD Lookup === 114 | def get_icd9_description(code, table="d_icd_diagnoses"): 115 | try: 116 | conn = sqlite3.connect(DB_PATH) 117 | query = f"SELECT SHORT_TITLE, LONG_TITLE FROM {table} WHERE ICD9_CODE LIKE ? LIMIT 1" 118 | df = pd.read_sql(query, conn, params=(code + '%',)) 119 | conn.close() 120 | if not df.empty: 121 | return f"**{code}**: {df.iloc[0]['SHORT_TITLE']} – {df.iloc[0]['LONG_TITLE']}" 122 | except Exception as e: 123 | logger.warning(f"ICD lookup failed for {code}: {e}") 124 | return None 125 | 126 | # === Augmented Term Extraction & Wiki Context === 127 | def extract_medical_terms(question): 128 | stop_words = set(stopwords.words('english')) 129 | words = word_tokenize(question.lower()) 130 | return [w for w in words if w.isalpha() and w not in stop_words and len(w) > 3] 131 | 132 | def fetch_medical_context(terms): 133 | summaries = {} 134 | for term in terms: 135 | try: 136 | url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{term}" 137 | res = requests.get(url) 138 | if res.status_code == 200: 139 | data = res.json() 140 | if 'extract' in data and len(data['extract']) > 100: 141 | summaries[term] = data['extract'] 142 | logger.info(f"Fetched context for term: {term}") 143 | except Exception as e: 144 | logger.warning(f"Failed to fetch context for {term}: {e}") 145 | return summaries 146 | 147 | # === Logging === 148 | def log_interaction(question, sql_query, result_df): 149 | log_entry = { 150 | "question": question, 151 | "sql_query": sql_query, 152 | "result_preview": result_df.head().to_json(orient="records") 153 | } 154 | df = pd.DataFrame([log_entry]) 155 | mode = 'a' if os.path.exists(LOG_PATH) else 'w' 156 | header = not os.path.exists(LOG_PATH) 157 | df.to_csv(LOG_PATH, mode=mode, header=header, index=False) 158 | logger.info("Logged query interaction.") 159 | 160 | def load_query_history(): 161 | if os.path.exists(LOG_PATH): 162 | logger.info("Query history loaded.") 163 | return pd.read_csv(LOG_PATH) 164 | return pd.DataFrame(columns=["question", "sql_query", "result_preview"]) 165 | 166 | # === UI Starts === 167 | st.set_page_config(page_title="MIMIC-III SQL Explorer", layout="wide") 168 | st.title("🩺 MIMIC-III Natural Language SQL Explorer") 169 | 170 | # --- Query History --- 171 | with st.expander("🕘 Query History", expanded=False): 172 | history_df = load_query_history() 173 | if not history_df.empty: 174 | history_df['result_preview'] = history_df['result_preview'].astype(str) 175 | display_df = history_df[["question", "sql_query", "result_preview"]].rename(columns={ 176 | "question": "Question", "sql_query": "SQL Query", "result_preview": "Result" 177 | }).sort_index(ascending=False) 178 | st.dataframe(display_df) 179 | 180 | st.download_button("📥 Download Query History", display_df.to_csv(index=False).encode("utf-8"), 181 | file_name="query_history.csv", mime="text/csv") 182 | 183 | selected = st.selectbox("🔁 Re-run query", display_df["Question"]) 184 | rerun_row = display_df[display_df["Question"] == selected].iloc[0] 185 | st.code(rerun_row["SQL Query"], language="sql") 186 | try: 187 | df = run_sql(rerun_row["SQL Query"]) 188 | st.dataframe(df) 189 | except Exception as e: 190 | st.error(f"Execution failed: {e}") 191 | else: 192 | st.info("No queries logged yet.") 193 | 194 | # --- New Question --- 195 | user_question = st.text_input("Ask a clinical question:") 196 | 197 | if user_question: 198 | schema = load_schema() 199 | prompt = build_prompt(user_question, schema) 200 | with st.spinner("Generating SQL..."): 201 | try: 202 | sql_query = get_sql_from_gpt(prompt) 203 | st.code(sql_query, language='sql') 204 | result_df = run_sql(sql_query) 205 | st.dataframe(result_df) 206 | 207 | if "ICD9_CODE" in result_df.columns: 208 | st.markdown("### 🧾 ICD Descriptions") 209 | for code in result_df["ICD9_CODE"].dropna().unique()[:5]: 210 | desc = get_icd9_description(code) 211 | if desc: 212 | st.markdown(f"- {desc}") 213 | 214 | if st.checkbox("📘 Add Clinical Context"): 215 | terms = extract_medical_terms(user_question) 216 | summaries = fetch_medical_context(terms) 217 | if summaries: 218 | st.caption(f"🔍 Found terms: {', '.join(summaries.keys())}") 219 | for term, info in summaries.items(): 220 | with st.expander(term.title()): 221 | st.markdown(info) 222 | else: 223 | st.info("No relevant clinical context found.") 224 | 225 | log_interaction(user_question, sql_query, result_df) 226 | except Exception as e: 227 | st.error(f"SQL execution failed: {e}") 228 | logger.error(f"Query failed: {e}") 229 | 230 | # --- Manual SQL --- 231 | with st.expander("🧮 Run Manual SQL Query"): 232 | manual_sql = st.text_area("Enter your SQL query here (SELECT-only):") 233 | if manual_sql: 234 | if any(word in manual_sql.upper() for word in ["DROP", "DELETE", "UPDATE", "INSERT"]): 235 | st.error("❌ Destructive SQL statements are not allowed.") 236 | logger.warning("Blocked destructive SQL query.") 237 | else: 238 | try: 239 | df = run_sql(manual_sql) 240 | st.dataframe(df) 241 | logger.info("Manual SQL executed successfully.") 242 | except Exception as e: 243 | st.error(f"Execution failed: {e}") 244 | logger.error(f"Manual SQL error: {e}") -------------------------------------------------------------------------------- /data/query_log.csv: -------------------------------------------------------------------------------- 1 | question,sql_query,result_preview 2 | What procedures are most common among patients who died?,"SELECT d_icd_procedures.SHORT_TITLE, COUNT(procedures_icd_random.ICD9_CODE) AS PROCEDURE_COUNT 3 | FROM procedures_icd_random 4 | JOIN d_icd_procedures ON procedures_icd_random.ICD9_CODE = d_icd_procedures.ICD9_CODE 5 | JOIN patients_random ON procedures_icd_random.SUBJECT_ID = patients_random.SUBJECT_ID 6 | WHERE patients_random.EXPIRE_FLAG = 1 7 | GROUP BY procedures_icd_random.ICD9_CODE 8 | ORDER BY PROCEDURE_COUNT DESC;","[{""SHORT_TITLE"":""Venous cath NEC"",""PROCEDURE_COUNT"":460},{""SHORT_TITLE"":""Insert endotracheal tube"",""PROCEDURE_COUNT"":322},{""SHORT_TITLE"":""Cont inv mec ven <96 hrs"",""PROCEDURE_COUNT"":275},{""SHORT_TITLE"":""Entral infus nutrit sub"",""PROCEDURE_COUNT"":247},{""SHORT_TITLE"":""Packed cell transfusion"",""PROCEDURE_COUNT"":226}]" 9 | How many people have diabetes?,"SELECT COUNT(DISTINCT SUBJECT_ID) 10 | FROM diagnoses_icd_random 11 | WHERE ICD9_CODE LIKE '250%'","[{""COUNT(DISTINCT SUBJECT_ID)"":2272}]" 12 | How many have diabetes?,"SELECT COUNT(DISTINCT SUBJECT_ID) 13 | FROM diagnoses_icd_random 14 | WHERE ICD9_CODE LIKE '250%'","[{""COUNT(DISTINCT SUBJECT_ID)"":2272}]" 15 | How many people dont have diabetes but are on dialysis?,"SELECT COUNT(DISTINCT p.SUBJECT_ID) 16 | FROM patients_random p 17 | JOIN diagnoses_icd_random d ON p.SUBJECT_ID = d.SUBJECT_ID 18 | JOIN procedures_icd_random pr ON p.SUBJECT_ID = pr.SUBJECT_ID 19 | WHERE d.ICD9_CODE NOT LIKE '250%' 20 | AND pr.ICD9_CODE LIKE '39%' OR pr.ICD9_CODE LIKE '54%';","[{""COUNT(DISTINCT p.SUBJECT_ID)"":126}]" 21 | What are the top 10 diagnoses (by ICD-9 code) among patients who were admitted to the ICU and later died in the hospital?,"SELECT icd9_code, COUNT(icd9_code) AS diagnosis_count 22 | FROM diagnoses_icd_random 23 | WHERE HADM_ID IN ( 24 | SELECT HADM_ID 25 | FROM icustays_random 26 | WHERE SUBJECT_ID IN ( 27 | SELECT SUBJECT_ID 28 | FROM patients_random 29 | WHERE EXPIRE_FLAG = 1 30 | ) 31 | ) 32 | GROUP BY icd9_code 33 | ORDER BY diagnosis_count DESC 34 | LIMIT 10;","[{""ICD9_CODE"":""4019"",""diagnosis_count"":86},{""ICD9_CODE"":""4280"",""diagnosis_count"":75},{""ICD9_CODE"":""42731"",""diagnosis_count"":68},{""ICD9_CODE"":""41401"",""diagnosis_count"":55},{""ICD9_CODE"":""5849"",""diagnosis_count"":46}]" 35 | What are the top 10 diagnoses (by ICD-9 code) among patients who were admitted to the ICU and later died in the hospital?,"SELECT icd9_code, COUNT(icd9_code) AS diagnosis_count 36 | FROM diagnoses_icd_random 37 | WHERE hadm_id IN ( 38 | SELECT DISTINCT hadm_id 39 | FROM icustays_random 40 | WHERE hadm_id IS NOT NULL 41 | ) 42 | AND hadm_id IN ( 43 | SELECT DISTINCT hadm_id 44 | FROM admissions_random 45 | WHERE hospital_expire_flag = 1 46 | ) 47 | GROUP BY icd9_code 48 | ORDER BY diagnosis_count DESC 49 | LIMIT 10;","[{""ICD9_CODE"":""4280"",""diagnosis_count"":22},{""ICD9_CODE"":""42731"",""diagnosis_count"":21},{""ICD9_CODE"":""4019"",""diagnosis_count"":20},{""ICD9_CODE"":""5845"",""diagnosis_count"":13},{""ICD9_CODE"":""5849"",""diagnosis_count"":12}]" 50 | What are the top 10 diagnoses (by ICD-9 code) among patients who were admitted to the ICU and later died in the hospital?,"SELECT icd9_code, COUNT(icd9_code) AS diagnosis_count 51 | FROM diagnoses_icd_random 52 | WHERE HADM_ID IN ( 53 | SELECT HADM_ID 54 | FROM icustays_random 55 | WHERE SUBJECT_ID IN ( 56 | SELECT SUBJECT_ID 57 | FROM patients_random 58 | WHERE EXPIRE_FLAG = 1 59 | ) 60 | ) 61 | GROUP BY icd9_code 62 | ORDER BY diagnosis_count DESC 63 | LIMIT 10;","[{""ICD9_CODE"":""4019"",""diagnosis_count"":86},{""ICD9_CODE"":""4280"",""diagnosis_count"":75},{""ICD9_CODE"":""42731"",""diagnosis_count"":68},{""ICD9_CODE"":""41401"",""diagnosis_count"":55},{""ICD9_CODE"":""5849"",""diagnosis_count"":46}]" 64 | What are the top 10 diagnoses (by ICD-9 code) among patients who were admitted to the ICU and later died in the hospital?,"SELECT icd9_code, COUNT(icd9_code) AS diagnosis_count 65 | FROM diagnoses_icd_random 66 | WHERE HADM_ID IN ( 67 | SELECT HADM_ID 68 | FROM icustays_random 69 | WHERE SUBJECT_ID IN ( 70 | SELECT SUBJECT_ID 71 | FROM patients_random 72 | WHERE EXPIRE_FLAG = 1 73 | ) 74 | ) 75 | GROUP BY icd9_code 76 | ORDER BY diagnosis_count DESC 77 | LIMIT 10;","[{""ICD9_CODE"":""4019"",""diagnosis_count"":86},{""ICD9_CODE"":""4280"",""diagnosis_count"":75},{""ICD9_CODE"":""42731"",""diagnosis_count"":68},{""ICD9_CODE"":""41401"",""diagnosis_count"":55},{""ICD9_CODE"":""5849"",""diagnosis_count"":46}]" 78 | What are the top 10 diagnoses (by ICD-9 code) among patients who were admitted to the ICU and later died in the hospital?,"SELECT icd9_code, COUNT(icd9_code) AS diagnosis_count 79 | FROM diagnoses_icd_random 80 | WHERE hadm_id IN ( 81 | SELECT DISTINCT hadm_id 82 | FROM icustays_random 83 | WHERE hadm_id IS NOT NULL 84 | ) 85 | AND hadm_id IN ( 86 | SELECT DISTINCT hadm_id 87 | FROM admissions_random 88 | WHERE hospital_expire_flag = 1 89 | ) 90 | GROUP BY icd9_code 91 | ORDER BY diagnosis_count DESC 92 | LIMIT 10;","[{""ICD9_CODE"":""4280"",""diagnosis_count"":22},{""ICD9_CODE"":""42731"",""diagnosis_count"":21},{""ICD9_CODE"":""4019"",""diagnosis_count"":20},{""ICD9_CODE"":""5845"",""diagnosis_count"":13},{""ICD9_CODE"":""5849"",""diagnosis_count"":12}]" 93 | What are the most common ICD-9 diagnoses among ICU patients,"SELECT icd9_code, COUNT(icd9_code) AS diagnosis_count 94 | FROM diagnoses_icd_random 95 | GROUP BY icd9_code 96 | ORDER BY diagnosis_count DESC;","[{""ICD9_CODE"":""4019"",""diagnosis_count"":4386},{""ICD9_CODE"":""4280"",""diagnosis_count"":2821},{""ICD9_CODE"":""42731"",""diagnosis_count"":2808},{""ICD9_CODE"":""41401"",""diagnosis_count"":2711},{""ICD9_CODE"":""25000"",""diagnosis_count"":1925}]" 97 | What are the most common ICD-9 diagnoses among ICU patients,"SELECT icd9_code, COUNT(icd9_code) AS diagnosis_count 98 | FROM diagnoses_icd_random 99 | GROUP BY icd9_code 100 | ORDER BY diagnosis_count DESC;","[{""ICD9_CODE"":""4019"",""diagnosis_count"":4386},{""ICD9_CODE"":""4280"",""diagnosis_count"":2821},{""ICD9_CODE"":""42731"",""diagnosis_count"":2808},{""ICD9_CODE"":""41401"",""diagnosis_count"":2711},{""ICD9_CODE"":""25000"",""diagnosis_count"":1925}]" 101 | What are the most common ICD-9 diagnoses among ICU patients,"SELECT icd9_code, COUNT(icd9_code) AS diagnosis_count 102 | FROM diagnoses_icd_random 103 | GROUP BY icd9_code 104 | ORDER BY diagnosis_count DESC;","[{""ICD9_CODE"":""4019"",""diagnosis_count"":4386},{""ICD9_CODE"":""4280"",""diagnosis_count"":2821},{""ICD9_CODE"":""42731"",""diagnosis_count"":2808},{""ICD9_CODE"":""41401"",""diagnosis_count"":2711},{""ICD9_CODE"":""25000"",""diagnosis_count"":1925}]" 105 | What conditions frequently occur in patients admitted for sepsis?,"SELECT d_icd_diagnoses.LONG_TITLE, COUNT(diagnoses_icd_random.HADM_ID) AS frequency 106 | FROM diagnoses_icd_random 107 | JOIN d_icd_diagnoses ON diagnoses_icd_random.ICD9_CODE = d_icd_diagnoses.ICD9_CODE 108 | WHERE diagnoses_icd_random.HADM_ID IN 109 | (SELECT HADM_ID FROM diagnoses_icd_random WHERE ICD9_CODE LIKE '995%') 110 | GROUP BY diagnoses_icd_random.ICD9_CODE 111 | ORDER BY frequency DESC;","[{""LONG_TITLE"":""Severe sepsis"",""frequency"":829},{""LONG_TITLE"":""Unspecified septicemia"",""frequency"":692},{""LONG_TITLE"":""Septic shock"",""frequency"":548},{""LONG_TITLE"":""Acute kidney failure, unspecified"",""frequency"":470},{""LONG_TITLE"":""Acute respiratory failure"",""frequency"":449}]" 112 | What conditions frequently occur in patients admitted for sepsis?,"SELECT d_icd_diagnoses.LONG_TITLE, COUNT(diagnoses_icd_random.HADM_ID) AS frequency 113 | FROM diagnoses_icd_random 114 | JOIN d_icd_diagnoses ON diagnoses_icd_random.ICD9_CODE = d_icd_diagnoses.ICD9_CODE 115 | WHERE diagnoses_icd_random.HADM_ID IN 116 | (SELECT HADM_ID FROM diagnoses_icd_random WHERE ICD9_CODE LIKE '995%') 117 | GROUP BY diagnoses_icd_random.ICD9_CODE 118 | ORDER BY frequency DESC;","[{""LONG_TITLE"":""Severe sepsis"",""frequency"":829},{""LONG_TITLE"":""Unspecified septicemia"",""frequency"":692},{""LONG_TITLE"":""Septic shock"",""frequency"":548},{""LONG_TITLE"":""Acute kidney failure, unspecified"",""frequency"":470},{""LONG_TITLE"":""Acute respiratory failure"",""frequency"":449}]" 119 | What drugs were most prescribed to patients with pneumonia diagnoses?,"SELECT p.DRUG_NAME_GENERIC, COUNT(p.DRUG_NAME_GENERIC) AS prescription_count 120 | FROM prescriptions_random p 121 | JOIN diagnoses_icd_random d ON p.HADM_ID = d.HADM_ID 122 | WHERE d.ICD9_CODE LIKE '486%' 123 | GROUP BY p.DRUG_NAME_GENERIC 124 | ORDER BY prescription_count DESC;","[{""DRUG_NAME_GENERIC"":""Furosemide"",""prescription_count"":1123},{""DRUG_NAME_GENERIC"":""Potassium Chloride"",""prescription_count"":633},{""DRUG_NAME_GENERIC"":""Metoprolol"",""prescription_count"":594},{""DRUG_NAME_GENERIC"":""Sodium Chloride 0.9% Flush"",""prescription_count"":486},{""DRUG_NAME_GENERIC"":""Lorazepam"",""prescription_count"":405}]" 125 | What drugs were most prescribed to patients with pneumonia diagnoses?,"SELECT p.DRUG_NAME_GENERIC, COUNT(p.DRUG_NAME_GENERIC) AS prescription_count 126 | FROM prescriptions_random p 127 | JOIN diagnoses_icd_random d ON p.HADM_ID = d.HADM_ID 128 | WHERE d.ICD9_CODE LIKE '486%' 129 | GROUP BY p.DRUG_NAME_GENERIC 130 | ORDER BY prescription_count DESC;","[{""DRUG_NAME_GENERIC"":""Furosemide"",""prescription_count"":1123},{""DRUG_NAME_GENERIC"":""Potassium Chloride"",""prescription_count"":633},{""DRUG_NAME_GENERIC"":""Metoprolol"",""prescription_count"":594},{""DRUG_NAME_GENERIC"":""Sodium Chloride 0.9% Flush"",""prescription_count"":486},{""DRUG_NAME_GENERIC"":""Lorazepam"",""prescription_count"":405}]" 131 | What are the top comorbidities in diabetic patients?,"SELECT d_icd_diagnoses.LONG_TITLE, COUNT(diagnoses_icd_random.SUBJECT_ID) AS num_patients 132 | FROM diagnoses_icd_random 133 | JOIN d_icd_diagnoses ON diagnoses_icd_random.ICD9_CODE = d_icd_diagnoses.ICD9_CODE 134 | WHERE diagnoses_icd_random.ICD9_CODE LIKE '250%' 135 | GROUP BY diagnoses_icd_random.ICD9_CODE 136 | ORDER BY num_patients DESC;","[{""LONG_TITLE"":""Diabetes mellitus without mention of complication, type II or unspecified type, not stated as uncontrolled"",""num_patients"":1925},{""LONG_TITLE"":""Diabetes with neurological manifestations, type II or unspecified type, not stated as uncontrolled"",""num_patients"":284},{""LONG_TITLE"":""Diabetes with renal manifestations, type II or unspecified type, not stated as uncontrolled"",""num_patients"":225},{""LONG_TITLE"":""Diabetes with neurological manifestations, type I [juvenile type], not stated as uncontrolled"",""num_patients"":121},{""LONG_TITLE"":""Diabetes with ketoacidosis, type I [juvenile type], uncontrolled"",""num_patients"":119}]" 137 | What are the top comorbidities in diabetic patients?,"SELECT d_icd_diagnoses.LONG_TITLE, COUNT(diagnoses_icd_random.SUBJECT_ID) AS num_patients 138 | FROM diagnoses_icd_random 139 | JOIN d_icd_diagnoses ON diagnoses_icd_random.ICD9_CODE = d_icd_diagnoses.ICD9_CODE 140 | WHERE diagnoses_icd_random.ICD9_CODE LIKE '250%' 141 | GROUP BY diagnoses_icd_random.ICD9_CODE 142 | ORDER BY num_patients DESC;","[{""LONG_TITLE"":""Diabetes mellitus without mention of complication, type II or unspecified type, not stated as uncontrolled"",""num_patients"":1925},{""LONG_TITLE"":""Diabetes with neurological manifestations, type II or unspecified type, not stated as uncontrolled"",""num_patients"":284},{""LONG_TITLE"":""Diabetes with renal manifestations, type II or unspecified type, not stated as uncontrolled"",""num_patients"":225},{""LONG_TITLE"":""Diabetes with neurological manifestations, type I [juvenile type], not stated as uncontrolled"",""num_patients"":121},{""LONG_TITLE"":""Diabetes with ketoacidosis, type I [juvenile type], uncontrolled"",""num_patients"":119}]" 143 | What are the top comorbidities in diabetic patients?,"SELECT d_icd_diagnoses.LONG_TITLE, COUNT(diagnoses_icd_random.SUBJECT_ID) AS num_patients 144 | FROM diagnoses_icd_random 145 | JOIN d_icd_diagnoses ON diagnoses_icd_random.ICD9_CODE = d_icd_diagnoses.ICD9_CODE 146 | WHERE diagnoses_icd_random.ICD9_CODE LIKE '250%' 147 | GROUP BY diagnoses_icd_random.ICD9_CODE 148 | ORDER BY num_patients DESC;","[{""LONG_TITLE"":""Diabetes mellitus without mention of complication, type II or unspecified type, not stated as uncontrolled"",""num_patients"":1925},{""LONG_TITLE"":""Diabetes with neurological manifestations, type II or unspecified type, not stated as uncontrolled"",""num_patients"":284},{""LONG_TITLE"":""Diabetes with renal manifestations, type II or unspecified type, not stated as uncontrolled"",""num_patients"":225},{""LONG_TITLE"":""Diabetes with neurological manifestations, type I [juvenile type], not stated as uncontrolled"",""num_patients"":121},{""LONG_TITLE"":""Diabetes with ketoacidosis, type I [juvenile type], uncontrolled"",""num_patients"":119}]" 149 | Which organisms are most resistant to antibiotics?,"SELECT ORG_NAME, COUNT(ORG_NAME) AS ORGANISM_COUNT 150 | FROM microbiologyevents_random 151 | WHERE AB_NAME IS NOT NULL 152 | GROUP BY ORG_NAME 153 | ORDER BY ORGANISM_COUNT DESC;","[{""ORG_NAME"":""STAPH AUREUS COAG +"",""ORGANISM_COUNT"":15647},{""ORG_NAME"":""ESCHERICHIA COLI"",""ORGANISM_COUNT"":15582},{""ORG_NAME"":""KLEBSIELLA PNEUMONIAE"",""ORGANISM_COUNT"":7467},{""ORG_NAME"":""STAPHYLOCOCCUS, COAGULASE NEGATIVE"",""ORGANISM_COUNT"":6962},{""ORG_NAME"":""PSEUDOMONAS AERUGINOSA"",""ORGANISM_COUNT"":6399}]" 154 | What are the most common lab tests for ICU patients diagnosed with sepsis or pneumonia?,"SELECT d_labitems.label, COUNT(*) AS test_count 155 | FROM labevents_random 156 | JOIN d_labitems ON labevents_random.itemid = d_labitems.itemid 157 | WHERE labevents_random.subject_id IN ( 158 | SELECT DISTINCT subject_id 159 | FROM diagnoses_icd_random 160 | WHERE icd9_code LIKE '995%' OR icd9_code LIKE '486%' 161 | ) 162 | GROUP BY d_labitems.label 163 | ORDER BY test_count DESC;","[{""LABEL"":""Glucose"",""test_count"":19827},{""LABEL"":""Hematocrit"",""test_count"":17291},{""LABEL"":""Potassium"",""test_count"":17213},{""LABEL"":""Sodium"",""test_count"":16468},{""LABEL"":""Creatinine"",""test_count"":16379}]" 164 | What are the most common lab tests for ICU patients diagnosed with sepsis or pneumonia?,"SELECT d_labitems.label, COUNT(*) AS test_count 165 | FROM labevents_random 166 | JOIN d_labitems ON labevents_random.itemid = d_labitems.itemid 167 | WHERE labevents_random.subject_id IN ( 168 | SELECT DISTINCT subject_id 169 | FROM diagnoses_icd_random 170 | WHERE icd9_code LIKE '995%' OR icd9_code LIKE '486%' 171 | ) 172 | GROUP BY d_labitems.label 173 | ORDER BY test_count DESC;","[{""LABEL"":""Glucose"",""test_count"":19827},{""LABEL"":""Hematocrit"",""test_count"":17291},{""LABEL"":""Potassium"",""test_count"":17213},{""LABEL"":""Sodium"",""test_count"":16468},{""LABEL"":""Creatinine"",""test_count"":16379}]" 174 | What are the most common lab tests for ICU patients diagnosed with sepsis or pneumonia?,"SELECT d_labitems.label, COUNT(*) AS lab_count 175 | FROM labevents_random 176 | JOIN d_labitems ON labevents_random.itemid = d_labitems.itemid 177 | JOIN diagnoses_icd_random ON labevents_random.subject_id = diagnoses_icd_random.subject_id 178 | WHERE diagnoses_icd_random.icd9_code LIKE '995%' OR diagnoses_icd_random.icd9_code LIKE '486%' 179 | GROUP BY d_labitems.label 180 | ORDER BY lab_count DESC;","[{""LABEL"":""Glucose"",""lab_count"":35046},{""LABEL"":""Hematocrit"",""lab_count"":31362},{""LABEL"":""Potassium"",""lab_count"":31255},{""LABEL"":""Sodium"",""lab_count"":29770},{""LABEL"":""Creatinine"",""lab_count"":29524}]" 181 | What are the most common lab tests for ICU patients diagnosed with sepsis or pneumonia?,"SELECT d_labitems.label, COUNT(*) AS test_count 182 | FROM labevents_random 183 | JOIN d_labitems ON labevents_random.itemid = d_labitems.itemid 184 | JOIN diagnoses_icd_random ON labevents_random.subject_id = diagnoses_icd_random.subject_id 185 | WHERE diagnoses_icd_random.icd9_code LIKE '99591%' OR diagnoses_icd_random.icd9_code LIKE '486%' 186 | GROUP BY d_labitems.label 187 | ORDER BY test_count DESC;","[{""LABEL"":""Glucose"",""test_count"":19800},{""LABEL"":""Hematocrit"",""test_count"":17983},{""LABEL"":""Potassium"",""test_count"":17839},{""LABEL"":""Sodium"",""test_count"":16983},{""LABEL"":""Creatinine"",""test_count"":16839}]" 188 | What are the most common lab tests for ICU patients diagnosed with sepsis or pneumonia?,"SELECT d_labitems.label, COUNT(*) AS test_count 189 | FROM labevents_random 190 | JOIN d_labitems ON labevents_random.itemid = d_labitems.itemid 191 | JOIN diagnoses_icd_random ON labevents_random.subject_id = diagnoses_icd_random.subject_id 192 | WHERE diagnoses_icd_random.icd9_code LIKE '995%' OR diagnoses_icd_random.icd9_code LIKE '486%' 193 | GROUP BY d_labitems.label 194 | ORDER BY test_count DESC;","[{""LABEL"":""Glucose"",""test_count"":35046},{""LABEL"":""Hematocrit"",""test_count"":31362},{""LABEL"":""Potassium"",""test_count"":31255},{""LABEL"":""Sodium"",""test_count"":29770},{""LABEL"":""Creatinine"",""test_count"":29524}]" 195 | -------------------------------------------------------------------------------- /data/schema_map.json: -------------------------------------------------------------------------------- 1 | { 2 | "patients_random": { 3 | "columns": [ 4 | "ROW_ID", 5 | "SUBJECT_ID", 6 | "GENDER", 7 | "DOB", 8 | "DOD", 9 | "DOD_HOSP", 10 | "DOD_SSN", 11 | "EXPIRE_FLAG" 12 | ], 13 | "types": { 14 | "ROW_ID": "INTEGER", 15 | "SUBJECT_ID": "INTEGER", 16 | "GENDER": "TEXT", 17 | "DOB": "TEXT", 18 | "DOD": "TEXT", 19 | "DOD_HOSP": "TEXT", 20 | "DOD_SSN": "TEXT", 21 | "EXPIRE_FLAG": "INTEGER" 22 | } 23 | }, 24 | "outputevents_random": { 25 | "columns": [ 26 | "ROW_ID", 27 | "SUBJECT_ID", 28 | "HADM_ID", 29 | "ICUSTAY_ID", 30 | "CHARTTIME", 31 | "ITEMID", 32 | "VALUE", 33 | "VALUEUOM", 34 | "STORETIME", 35 | "CGID", 36 | "STOPPED", 37 | "NEWBOTTLE", 38 | "ISERROR" 39 | ], 40 | "types": { 41 | "ROW_ID": "INTEGER", 42 | "SUBJECT_ID": "INTEGER", 43 | "HADM_ID": "REAL", 44 | "ICUSTAY_ID": "REAL", 45 | "CHARTTIME": "TEXT", 46 | "ITEMID": "INTEGER", 47 | "VALUE": "REAL", 48 | "VALUEUOM": "TEXT", 49 | "STORETIME": "TEXT", 50 | "CGID": "INTEGER", 51 | "STOPPED": "REAL", 52 | "NEWBOTTLE": "REAL", 53 | "ISERROR": "REAL" 54 | } 55 | }, 56 | "transfers_random": { 57 | "columns": [ 58 | "ROW_ID", 59 | "SUBJECT_ID", 60 | "HADM_ID", 61 | "ICUSTAY_ID", 62 | "DBSOURCE", 63 | "EVENTTYPE", 64 | "PREV_CAREUNIT", 65 | "CURR_CAREUNIT", 66 | "PREV_WARDID", 67 | "CURR_WARDID", 68 | "INTIME", 69 | "OUTTIME", 70 | "LOS" 71 | ], 72 | "types": { 73 | "ROW_ID": "INTEGER", 74 | "SUBJECT_ID": "INTEGER", 75 | "HADM_ID": "INTEGER", 76 | "ICUSTAY_ID": "REAL", 77 | "DBSOURCE": "TEXT", 78 | "EVENTTYPE": "TEXT", 79 | "PREV_CAREUNIT": "TEXT", 80 | "CURR_CAREUNIT": "TEXT", 81 | "PREV_WARDID": "REAL", 82 | "CURR_WARDID": "REAL", 83 | "INTIME": "TEXT", 84 | "OUTTIME": "TEXT", 85 | "LOS": "REAL" 86 | } 87 | }, 88 | "procedures_icd_random": { 89 | "columns": [ 90 | "ROW_ID", 91 | "SUBJECT_ID", 92 | "HADM_ID", 93 | "SEQ_NUM", 94 | "ICD9_CODE" 95 | ], 96 | "types": { 97 | "ROW_ID": "INTEGER", 98 | "SUBJECT_ID": "INTEGER", 99 | "HADM_ID": "INTEGER", 100 | "SEQ_NUM": "INTEGER", 101 | "ICD9_CODE": "INTEGER" 102 | } 103 | }, 104 | "labevents_random": { 105 | "columns": [ 106 | "ROW_ID", 107 | "SUBJECT_ID", 108 | "HADM_ID", 109 | "ITEMID", 110 | "CHARTTIME", 111 | "VALUE", 112 | "VALUENUM", 113 | "VALUEUOM", 114 | "FLAG" 115 | ], 116 | "types": { 117 | "ROW_ID": "INTEGER", 118 | "SUBJECT_ID": "INTEGER", 119 | "HADM_ID": "REAL", 120 | "ITEMID": "INTEGER", 121 | "CHARTTIME": "TEXT", 122 | "VALUE": "TEXT", 123 | "VALUENUM": "REAL", 124 | "VALUEUOM": "TEXT", 125 | "FLAG": "TEXT" 126 | } 127 | }, 128 | "microbiologyevents_random": { 129 | "columns": [ 130 | "ROW_ID", 131 | "SUBJECT_ID", 132 | "HADM_ID", 133 | "CHARTDATE", 134 | "CHARTTIME", 135 | "SPEC_ITEMID", 136 | "SPEC_TYPE_DESC", 137 | "ORG_ITEMID", 138 | "ORG_NAME", 139 | "ISOLATE_NUM", 140 | "AB_ITEMID", 141 | "AB_NAME", 142 | "DILUTION_TEXT", 143 | "DILUTION_COMPARISON", 144 | "DILUTION_VALUE", 145 | "INTERPRETATION" 146 | ], 147 | "types": { 148 | "ROW_ID": "INTEGER", 149 | "SUBJECT_ID": "INTEGER", 150 | "HADM_ID": "INTEGER", 151 | "CHARTDATE": "TEXT", 152 | "CHARTTIME": "TEXT", 153 | "SPEC_ITEMID": "REAL", 154 | "SPEC_TYPE_DESC": "TEXT", 155 | "ORG_ITEMID": "REAL", 156 | "ORG_NAME": "TEXT", 157 | "ISOLATE_NUM": "REAL", 158 | "AB_ITEMID": "REAL", 159 | "AB_NAME": "TEXT", 160 | "DILUTION_TEXT": "TEXT", 161 | "DILUTION_COMPARISON": "TEXT", 162 | "DILUTION_VALUE": "REAL", 163 | "INTERPRETATION": "TEXT" 164 | } 165 | }, 166 | "d_cpt": { 167 | "columns": [ 168 | "ROW_ID", 169 | "CATEGORY", 170 | "SECTIONRANGE", 171 | "SECTIONHEADER", 172 | "SUBSECTIONRANGE", 173 | "SUBSECTIONHEADER", 174 | "CODESUFFIX", 175 | "MINCODEINSUBSECTION", 176 | "MAXCODEINSUBSECTION" 177 | ], 178 | "types": { 179 | "ROW_ID": "INTEGER", 180 | "CATEGORY": "INTEGER", 181 | "SECTIONRANGE": "TEXT", 182 | "SECTIONHEADER": "TEXT", 183 | "SUBSECTIONRANGE": "TEXT", 184 | "SUBSECTIONHEADER": "TEXT", 185 | "CODESUFFIX": "TEXT", 186 | "MINCODEINSUBSECTION": "INTEGER", 187 | "MAXCODEINSUBSECTION": "INTEGER" 188 | } 189 | }, 190 | "admissions_random": { 191 | "columns": [ 192 | "ROW_ID", 193 | "SUBJECT_ID", 194 | "HADM_ID", 195 | "ADMITTIME", 196 | "DISCHTIME", 197 | "DEATHTIME", 198 | "ADMISSION_TYPE", 199 | "ADMISSION_LOCATION", 200 | "DISCHARGE_LOCATION", 201 | "INSURANCE", 202 | "LANGUAGE", 203 | "RELIGION", 204 | "MARITAL_STATUS", 205 | "ETHNICITY", 206 | "EDREGTIME", 207 | "EDOUTTIME", 208 | "DIAGNOSIS", 209 | "HOSPITAL_EXPIRE_FLAG", 210 | "HAS_CHARTEVENTS_DATA" 211 | ], 212 | "types": { 213 | "ROW_ID": "INTEGER", 214 | "SUBJECT_ID": "INTEGER", 215 | "HADM_ID": "INTEGER", 216 | "ADMITTIME": "TEXT", 217 | "DISCHTIME": "TEXT", 218 | "DEATHTIME": "TEXT", 219 | "ADMISSION_TYPE": "TEXT", 220 | "ADMISSION_LOCATION": "TEXT", 221 | "DISCHARGE_LOCATION": "TEXT", 222 | "INSURANCE": "TEXT", 223 | "LANGUAGE": "TEXT", 224 | "RELIGION": "TEXT", 225 | "MARITAL_STATUS": "TEXT", 226 | "ETHNICITY": "TEXT", 227 | "EDREGTIME": "TEXT", 228 | "EDOUTTIME": "TEXT", 229 | "DIAGNOSIS": "TEXT", 230 | "HOSPITAL_EXPIRE_FLAG": "INTEGER", 231 | "HAS_CHARTEVENTS_DATA": "INTEGER" 232 | } 233 | }, 234 | "d_items": { 235 | "columns": [ 236 | "ROW_ID", 237 | "ITEMID", 238 | "LABEL", 239 | "ABBREVIATION", 240 | "DBSOURCE", 241 | "LINKSTO", 242 | "CATEGORY", 243 | "UNITNAME", 244 | "PARAM_TYPE", 245 | "CONCEPTID" 246 | ], 247 | "types": { 248 | "ROW_ID": "INTEGER", 249 | "ITEMID": "INTEGER", 250 | "LABEL": "TEXT", 251 | "ABBREVIATION": "TEXT", 252 | "DBSOURCE": "TEXT", 253 | "LINKSTO": "TEXT", 254 | "CATEGORY": "TEXT", 255 | "UNITNAME": "TEXT", 256 | "PARAM_TYPE": "TEXT", 257 | "CONCEPTID": "REAL" 258 | } 259 | }, 260 | "caregivers": { 261 | "columns": [ 262 | "ROW_ID", 263 | "CGID", 264 | "LABEL", 265 | "DESCRIPTION" 266 | ], 267 | "types": { 268 | "ROW_ID": "INTEGER", 269 | "CGID": "INTEGER", 270 | "LABEL": "TEXT", 271 | "DESCRIPTION": "TEXT" 272 | } 273 | }, 274 | "callout_random": { 275 | "columns": [ 276 | "ROW_ID", 277 | "SUBJECT_ID", 278 | "HADM_ID", 279 | "SUBMIT_WARDID", 280 | "SUBMIT_CAREUNIT", 281 | "CURR_WARDID", 282 | "CURR_CAREUNIT", 283 | "CALLOUT_WARDID", 284 | "CALLOUT_SERVICE", 285 | "REQUEST_TELE", 286 | "REQUEST_RESP", 287 | "REQUEST_CDIFF", 288 | "REQUEST_MRSA", 289 | "REQUEST_VRE", 290 | "CALLOUT_STATUS", 291 | "CALLOUT_OUTCOME", 292 | "DISCHARGE_WARDID", 293 | "ACKNOWLEDGE_STATUS", 294 | "CREATETIME", 295 | "UPDATETIME", 296 | "ACKNOWLEDGETIME", 297 | "OUTCOMETIME", 298 | "FIRSTRESERVATIONTIME", 299 | "CURRENTRESERVATIONTIME" 300 | ], 301 | "types": { 302 | "ROW_ID": "INTEGER", 303 | "SUBJECT_ID": "INTEGER", 304 | "HADM_ID": "INTEGER", 305 | "SUBMIT_WARDID": "REAL", 306 | "SUBMIT_CAREUNIT": "TEXT", 307 | "CURR_WARDID": "REAL", 308 | "CURR_CAREUNIT": "TEXT", 309 | "CALLOUT_WARDID": "INTEGER", 310 | "CALLOUT_SERVICE": "TEXT", 311 | "REQUEST_TELE": "INTEGER", 312 | "REQUEST_RESP": "INTEGER", 313 | "REQUEST_CDIFF": "INTEGER", 314 | "REQUEST_MRSA": "INTEGER", 315 | "REQUEST_VRE": "INTEGER", 316 | "CALLOUT_STATUS": "TEXT", 317 | "CALLOUT_OUTCOME": "TEXT", 318 | "DISCHARGE_WARDID": "REAL", 319 | "ACKNOWLEDGE_STATUS": "TEXT", 320 | "CREATETIME": "TEXT", 321 | "UPDATETIME": "TEXT", 322 | "ACKNOWLEDGETIME": "TEXT", 323 | "OUTCOMETIME": "TEXT", 324 | "FIRSTRESERVATIONTIME": "TEXT", 325 | "CURRENTRESERVATIONTIME": "TEXT" 326 | } 327 | }, 328 | "procedureevents_mv_random": { 329 | "columns": [ 330 | "ROW_ID", 331 | "SUBJECT_ID", 332 | "HADM_ID", 333 | "ICUSTAY_ID", 334 | "STARTTIME", 335 | "ENDTIME", 336 | "ITEMID", 337 | "VALUE", 338 | "VALUEUOM", 339 | "LOCATION", 340 | "LOCATIONCATEGORY", 341 | "STORETIME", 342 | "CGID", 343 | "ORDERID", 344 | "LINKORDERID", 345 | "ORDERCATEGORYNAME", 346 | "SECONDARYORDERCATEGORYNAME", 347 | "ORDERCATEGORYDESCRIPTION", 348 | "ISOPENBAG", 349 | "CONTINUEINNEXTDEPT", 350 | "CANCELREASON", 351 | "STATUSDESCRIPTION", 352 | "COMMENTS_EDITEDBY", 353 | "COMMENTS_CANCELEDBY", 354 | "COMMENTS_DATE" 355 | ], 356 | "types": { 357 | "ROW_ID": "INTEGER", 358 | "SUBJECT_ID": "INTEGER", 359 | "HADM_ID": "INTEGER", 360 | "ICUSTAY_ID": "REAL", 361 | "STARTTIME": "TEXT", 362 | "ENDTIME": "TEXT", 363 | "ITEMID": "INTEGER", 364 | "VALUE": "REAL", 365 | "VALUEUOM": "TEXT", 366 | "LOCATION": "TEXT", 367 | "LOCATIONCATEGORY": "TEXT", 368 | "STORETIME": "TEXT", 369 | "CGID": "INTEGER", 370 | "ORDERID": "INTEGER", 371 | "LINKORDERID": "INTEGER", 372 | "ORDERCATEGORYNAME": "TEXT", 373 | "SECONDARYORDERCATEGORYNAME": "REAL", 374 | "ORDERCATEGORYDESCRIPTION": "TEXT", 375 | "ISOPENBAG": "INTEGER", 376 | "CONTINUEINNEXTDEPT": "INTEGER", 377 | "CANCELREASON": "INTEGER", 378 | "STATUSDESCRIPTION": "TEXT", 379 | "COMMENTS_EDITEDBY": "TEXT", 380 | "COMMENTS_CANCELEDBY": "TEXT", 381 | "COMMENTS_DATE": "TEXT" 382 | } 383 | }, 384 | "d_labitems": { 385 | "columns": [ 386 | "ROW_ID", 387 | "ITEMID", 388 | "LABEL", 389 | "FLUID", 390 | "CATEGORY", 391 | "LOINC_CODE" 392 | ], 393 | "types": { 394 | "ROW_ID": "INTEGER", 395 | "ITEMID": "INTEGER", 396 | "LABEL": "TEXT", 397 | "FLUID": "TEXT", 398 | "CATEGORY": "TEXT", 399 | "LOINC_CODE": "TEXT" 400 | }, 401 | "join_keys": [ 402 | "itemid", 403 | "label" 404 | ] 405 | }, 406 | "icustays_random": { 407 | "columns": [ 408 | "ROW_ID", 409 | "SUBJECT_ID", 410 | "HADM_ID", 411 | "ICUSTAY_ID", 412 | "DBSOURCE", 413 | "FIRST_CAREUNIT", 414 | "LAST_CAREUNIT", 415 | "FIRST_WARDID", 416 | "LAST_WARDID", 417 | "INTIME", 418 | "OUTTIME", 419 | "LOS" 420 | ], 421 | "types": { 422 | "ROW_ID": "INTEGER", 423 | "SUBJECT_ID": "INTEGER", 424 | "HADM_ID": "INTEGER", 425 | "ICUSTAY_ID": "INTEGER", 426 | "DBSOURCE": "TEXT", 427 | "FIRST_CAREUNIT": "TEXT", 428 | "LAST_CAREUNIT": "TEXT", 429 | "FIRST_WARDID": "INTEGER", 430 | "LAST_WARDID": "INTEGER", 431 | "INTIME": "TEXT", 432 | "OUTTIME": "TEXT", 433 | "LOS": "REAL" 434 | } 435 | }, 436 | "inputevents_cv_random": { 437 | "columns": [ 438 | "ROW_ID", 439 | "SUBJECT_ID", 440 | "HADM_ID", 441 | "ICUSTAY_ID", 442 | "CHARTTIME", 443 | "ITEMID", 444 | "AMOUNT", 445 | "AMOUNTUOM", 446 | "RATE", 447 | "RATEUOM", 448 | "STORETIME", 449 | "CGID", 450 | "ORDERID", 451 | "LINKORDERID", 452 | "STOPPED", 453 | "NEWBOTTLE", 454 | "ORIGINALAMOUNT", 455 | "ORIGINALAMOUNTUOM", 456 | "ORIGINALROUTE", 457 | "ORIGINALRATE", 458 | "ORIGINALRATEUOM", 459 | "ORIGINALSITE" 460 | ], 461 | "types": { 462 | "ROW_ID": "INTEGER", 463 | "SUBJECT_ID": "INTEGER", 464 | "HADM_ID": "REAL", 465 | "ICUSTAY_ID": "REAL", 466 | "CHARTTIME": "TEXT", 467 | "ITEMID": "INTEGER", 468 | "AMOUNT": "REAL", 469 | "AMOUNTUOM": "TEXT", 470 | "RATE": "REAL", 471 | "RATEUOM": "TEXT", 472 | "STORETIME": "TEXT", 473 | "CGID": "REAL", 474 | "ORDERID": "INTEGER", 475 | "LINKORDERID": "INTEGER", 476 | "STOPPED": "TEXT", 477 | "NEWBOTTLE": "REAL", 478 | "ORIGINALAMOUNT": "REAL", 479 | "ORIGINALAMOUNTUOM": "TEXT", 480 | "ORIGINALROUTE": "TEXT", 481 | "ORIGINALRATE": "REAL", 482 | "ORIGINALRATEUOM": "TEXT", 483 | "ORIGINALSITE": "TEXT" 484 | } 485 | }, 486 | "inputevents_mv_random": { 487 | "columns": [ 488 | "ROW_ID", 489 | "SUBJECT_ID", 490 | "HADM_ID", 491 | "ICUSTAY_ID", 492 | "STARTTIME", 493 | "ENDTIME", 494 | "ITEMID", 495 | "AMOUNT", 496 | "AMOUNTUOM", 497 | "RATE", 498 | "RATEUOM", 499 | "STORETIME", 500 | "CGID", 501 | "ORDERID", 502 | "LINKORDERID", 503 | "ORDERCATEGORYNAME", 504 | "SECONDARYORDERCATEGORYNAME", 505 | "ORDERCOMPONENTTYPEDESCRIPTION", 506 | "ORDERCATEGORYDESCRIPTION", 507 | "PATIENTWEIGHT", 508 | "TOTALAMOUNT", 509 | "TOTALAMOUNTUOM", 510 | "ISOPENBAG", 511 | "CONTINUEINNEXTDEPT", 512 | "CANCELREASON", 513 | "STATUSDESCRIPTION", 514 | "COMMENTS_EDITEDBY", 515 | "COMMENTS_CANCELEDBY", 516 | "COMMENTS_DATE", 517 | "ORIGINALAMOUNT", 518 | "ORIGINALRATE" 519 | ], 520 | "types": { 521 | "ROW_ID": "INTEGER", 522 | "SUBJECT_ID": "INTEGER", 523 | "HADM_ID": "INTEGER", 524 | "ICUSTAY_ID": "REAL", 525 | "STARTTIME": "TEXT", 526 | "ENDTIME": "TEXT", 527 | "ITEMID": "INTEGER", 528 | "AMOUNT": "REAL", 529 | "AMOUNTUOM": "TEXT", 530 | "RATE": "REAL", 531 | "RATEUOM": "TEXT", 532 | "STORETIME": "TEXT", 533 | "CGID": "INTEGER", 534 | "ORDERID": "INTEGER", 535 | "LINKORDERID": "INTEGER", 536 | "ORDERCATEGORYNAME": "TEXT", 537 | "SECONDARYORDERCATEGORYNAME": "TEXT", 538 | "ORDERCOMPONENTTYPEDESCRIPTION": "TEXT", 539 | "ORDERCATEGORYDESCRIPTION": "TEXT", 540 | "PATIENTWEIGHT": "REAL", 541 | "TOTALAMOUNT": "REAL", 542 | "TOTALAMOUNTUOM": "TEXT", 543 | "ISOPENBAG": "INTEGER", 544 | "CONTINUEINNEXTDEPT": "INTEGER", 545 | "CANCELREASON": "INTEGER", 546 | "STATUSDESCRIPTION": "TEXT", 547 | "COMMENTS_EDITEDBY": "TEXT", 548 | "COMMENTS_CANCELEDBY": "TEXT", 549 | "COMMENTS_DATE": "TEXT", 550 | "ORIGINALAMOUNT": "REAL", 551 | "ORIGINALRATE": "REAL" 552 | } 553 | }, 554 | "services_random": { 555 | "columns": [ 556 | "ROW_ID", 557 | "SUBJECT_ID", 558 | "HADM_ID", 559 | "TRANSFERTIME", 560 | "PREV_SERVICE", 561 | "CURR_SERVICE" 562 | ], 563 | "types": { 564 | "ROW_ID": "INTEGER", 565 | "SUBJECT_ID": "INTEGER", 566 | "HADM_ID": "INTEGER", 567 | "TRANSFERTIME": "TEXT", 568 | "PREV_SERVICE": "TEXT", 569 | "CURR_SERVICE": "TEXT" 570 | } 571 | }, 572 | "prescriptions_random": { 573 | "columns": [ 574 | "ROW_ID", 575 | "SUBJECT_ID", 576 | "HADM_ID", 577 | "ICUSTAY_ID", 578 | "STARTDATE", 579 | "ENDDATE", 580 | "DRUG_TYPE", 581 | "DRUG", 582 | "DRUG_NAME_POE", 583 | "DRUG_NAME_GENERIC", 584 | "FORMULARY_DRUG_CD", 585 | "GSN", 586 | "NDC", 587 | "PROD_STRENGTH", 588 | "DOSE_VAL_RX", 589 | "DOSE_UNIT_RX", 590 | "FORM_VAL_DISP", 591 | "FORM_UNIT_DISP", 592 | "ROUTE" 593 | ], 594 | "types": { 595 | "ROW_ID": "INTEGER", 596 | "SUBJECT_ID": "INTEGER", 597 | "HADM_ID": "INTEGER", 598 | "ICUSTAY_ID": "REAL", 599 | "STARTDATE": "TEXT", 600 | "ENDDATE": "TEXT", 601 | "DRUG_TYPE": "TEXT", 602 | "DRUG": "TEXT", 603 | "DRUG_NAME_POE": "TEXT", 604 | "DRUG_NAME_GENERIC": "TEXT", 605 | "FORMULARY_DRUG_CD": "TEXT", 606 | "GSN": "TEXT", 607 | "NDC": "REAL", 608 | "PROD_STRENGTH": "TEXT", 609 | "DOSE_VAL_RX": "TEXT", 610 | "DOSE_UNIT_RX": "TEXT", 611 | "FORM_VAL_DISP": "TEXT", 612 | "FORM_UNIT_DISP": "TEXT", 613 | "ROUTE": "TEXT" 614 | } 615 | }, 616 | "drgcodes_random": { 617 | "columns": [ 618 | "ROW_ID", 619 | "SUBJECT_ID", 620 | "HADM_ID", 621 | "DRG_TYPE", 622 | "DRG_CODE", 623 | "DESCRIPTION", 624 | "DRG_SEVERITY", 625 | "DRG_MORTALITY" 626 | ], 627 | "types": { 628 | "ROW_ID": "INTEGER", 629 | "SUBJECT_ID": "INTEGER", 630 | "HADM_ID": "INTEGER", 631 | "DRG_TYPE": "TEXT", 632 | "DRG_CODE": "INTEGER", 633 | "DESCRIPTION": "TEXT", 634 | "DRG_SEVERITY": "REAL", 635 | "DRG_MORTALITY": "REAL" 636 | } 637 | }, 638 | "diagnoses_icd_random": { 639 | "columns": [ 640 | "ROW_ID", 641 | "SUBJECT_ID", 642 | "HADM_ID", 643 | "SEQ_NUM", 644 | "ICD9_CODE" 645 | ], 646 | "types": { 647 | "ROW_ID": "INTEGER", 648 | "SUBJECT_ID": "INTEGER", 649 | "HADM_ID": "INTEGER", 650 | "SEQ_NUM": "REAL", 651 | "ICD9_CODE": "TEXT" 652 | } 653 | }, 654 | "datetimeevents_cv_random": { 655 | "columns": [ 656 | "ROW_ID", 657 | "SUBJECT_ID", 658 | "HADM_ID", 659 | "ICUSTAY_ID", 660 | "ITEMID", 661 | "CHARTTIME", 662 | "STORETIME", 663 | "CGID", 664 | "VALUE", 665 | "VALUEUOM", 666 | "WARNING", 667 | "ERROR", 668 | "RESULTSTATUS", 669 | "STOPPED" 670 | ], 671 | "types": { 672 | "ROW_ID": "INTEGER", 673 | "SUBJECT_ID": "INTEGER", 674 | "HADM_ID": "REAL", 675 | "ICUSTAY_ID": "REAL", 676 | "ITEMID": "INTEGER", 677 | "CHARTTIME": "TEXT", 678 | "STORETIME": "TEXT", 679 | "CGID": "INTEGER", 680 | "VALUE": "TEXT", 681 | "VALUEUOM": "TEXT", 682 | "WARNING": "REAL", 683 | "ERROR": "REAL", 684 | "RESULTSTATUS": "REAL", 685 | "STOPPED": "TEXT" 686 | } 687 | }, 688 | "cptevents_random": { 689 | "columns": [ 690 | "ROW_ID", 691 | "SUBJECT_ID", 692 | "HADM_ID", 693 | "COSTCENTER", 694 | "CHARTDATE", 695 | "CPT_CD", 696 | "CPT_NUMBER", 697 | "CPT_SUFFIX", 698 | "TICKET_ID_SEQ", 699 | "SECTIONHEADER", 700 | "SUBSECTIONHEADER", 701 | "DESCRIPTION" 702 | ], 703 | "types": { 704 | "ROW_ID": "INTEGER", 705 | "SUBJECT_ID": "INTEGER", 706 | "HADM_ID": "INTEGER", 707 | "COSTCENTER": "TEXT", 708 | "CHARTDATE": "TEXT", 709 | "CPT_CD": "TEXT", 710 | "CPT_NUMBER": "REAL", 711 | "CPT_SUFFIX": "TEXT", 712 | "TICKET_ID_SEQ": "REAL", 713 | "SECTIONHEADER": "TEXT", 714 | "SUBSECTIONHEADER": "TEXT", 715 | "DESCRIPTION": "TEXT" 716 | } 717 | }, 718 | "d_icd_diagnoses": { 719 | "columns": [ 720 | "ROW_ID", 721 | "ICD9_CODE", 722 | "SHORT_TITLE", 723 | "LONG_TITLE" 724 | ], 725 | "types": { 726 | "ROW_ID": "INTEGER", 727 | "ICD9_CODE": "TEXT", 728 | "SHORT_TITLE": "TEXT", 729 | "LONG_TITLE": "TEXT" 730 | }, 731 | "join_keys": [ 732 | "icd9_code", 733 | "long_title" 734 | ] 735 | }, 736 | "noteevents_random": { 737 | "columns": [ 738 | "ROW_ID", 739 | "SUBJECT_ID", 740 | "HADM_ID", 741 | "CHARTDATE", 742 | "CHARTTIME", 743 | "STORETIME", 744 | "CATEGORY", 745 | "DESCRIPTION", 746 | "CGID", 747 | "ISERROR", 748 | "TEXT" 749 | ], 750 | "types": { 751 | "ROW_ID": "INTEGER", 752 | "SUBJECT_ID": "INTEGER", 753 | "HADM_ID": "REAL", 754 | "CHARTDATE": "TEXT", 755 | "CHARTTIME": "TEXT", 756 | "STORETIME": "TEXT", 757 | "CATEGORY": "TEXT", 758 | "DESCRIPTION": "TEXT", 759 | "CGID": "REAL", 760 | "ISERROR": "REAL", 761 | "TEXT": "TEXT" 762 | } 763 | }, 764 | "d_icd_procedures": { 765 | "columns": [ 766 | "ROW_ID", 767 | "ICD9_CODE", 768 | "SHORT_TITLE", 769 | "LONG_TITLE" 770 | ], 771 | "types": { 772 | "ROW_ID": "INTEGER", 773 | "ICD9_CODE": "INTEGER", 774 | "SHORT_TITLE": "TEXT", 775 | "LONG_TITLE": "TEXT" 776 | } 777 | } 778 | } --------------------------------------------------------------------------------