├── .github ├── FUNDING.yml ├── ISSUE_TEMPLATE │ ├── issue_template.md │ └── proposal_template.md └── pull_request_template.md ├── .gitignore ├── LICENSE.md ├── README.md ├── airflow ├── Dockerfile ├── config │ └── airflow.cfg ├── dags │ ├── .gitkeep │ ├── airflow_operator.py │ ├── ashp │ │ └── dag.py │ ├── build_marts │ │ └── dag.py │ ├── cms_part_d │ │ ├── dag.py │ │ ├── load_basic_drugs_formulary_file.sql │ │ ├── load_beneficiary_cost_file.sql │ │ ├── load_excluded_drugs_formulary_file.sql │ │ ├── load_geographic_locator_file.sql │ │ ├── load_indication_based_coverage_formulary_file.sql │ │ ├── load_insulin_beneficiary_cost_file.sql │ │ ├── load_pharmacy_networks_file.sql │ │ ├── load_plan_information_file.sql │ │ └── load_pricing_file.sql │ ├── common_dag_tasks.py │ ├── dailymed │ │ ├── dag.py │ │ └── template.xsl │ ├── dailymed_pharm_class │ │ ├── dag.py │ │ └── load-dailymed_pharm_class.sql │ ├── dailymed_rxnorm │ │ ├── dag.py │ │ └── load-dailymed_rxnorm.sql │ ├── dailymed_zip_file_metadata │ │ ├── dag.py │ │ └── load-dailymed_zip_file_metadata.sql │ ├── dbt_gcp │ │ └── dag.py │ ├── export_marts │ │ └── dag.py │ ├── fda_enforcement │ │ ├── dag.py │ │ └── dag_tasks.py │ ├── fda_excluded │ │ ├── dag.py │ │ ├── load_package.sql │ │ └── load_product.sql │ ├── fda_ndc │ │ ├── dag.py │ │ ├── load_package.sql │ │ └── load_product.sql │ ├── fda_unfinished │ │ ├── dag.py │ │ ├── load_package.sql │ │ └── load_product.sql │ ├── fda_unii │ │ ├── dag.py │ │ └── load_unii.sql │ ├── mccpd │ │ ├── dag.py │ │ └── dag_tasks.py │ ├── nadac │ │ ├── dag.py │ │ └── load_nadac.sql │ ├── orange_book │ │ ├── dag.py │ │ ├── load_exclusivity.sql │ │ ├── load_patent.sql │ │ └── load_products.sql │ ├── purple_book │ │ ├── dag.py │ │ ├── dag_tasks.py │ │ └── load_purple_book.sql │ ├── rxclass │ │ ├── dag.py │ │ └── dag_tasks.py │ ├── rxnorm │ │ ├── dag.py │ │ ├── load_rxnconso.sql │ │ ├── load_rxncui.sql │ │ ├── load_rxncuichanges.sql │ │ ├── load_rxndoc.sql │ │ ├── load_rxnrel.sql │ │ ├── load_rxnrxnatomarchive.sql │ │ ├── load_rxnsab.sql │ │ ├── load_rxnsat.sql │ │ ├── load_rxnsty.sql │ │ └── terms-of-service.md │ ├── rxnorm_historical │ │ ├── dag.py │ │ └── dag_tasks.py │ ├── rxterms │ │ ├── dag.py │ │ ├── load_ingredients.sql │ │ └── load_rxterms.sql │ ├── sagerx.py │ ├── umls │ │ ├── dag.py │ │ └── dag_tasks.py │ ├── user_macros.py │ └── vsac │ │ ├── dag.py │ │ └── dag_tasks.py ├── hidden_dags │ └── meps │ │ ├── meps_medical_conditions_dag.py │ │ ├── meps_population_characteristics_dag.py │ │ └── meps_prescribed_medications_dag.py └── requirements.txt ├── dbt ├── Dockerfile ├── profiles.yml └── sagerx │ ├── .gitignore │ ├── README.md │ ├── analyses │ └── .gitkeep │ ├── dbt_project.yml │ ├── macros │ ├── .gitkeep │ ├── check_data_availability.sql │ ├── get_custom_schema.sql │ ├── ndc_convert.sql │ ├── ndc_format.sql │ └── ndc_to_11.sql │ ├── models │ ├── intermediate │ │ ├── dailymed │ │ │ ├── int_dailymed_image_name_ndcs.sql │ │ │ ├── int_dailymed_image_xml_ndcs.sql │ │ │ ├── int_dailymed_organization_metrics.sql │ │ │ ├── int_dailymed_ranked_package_label_images.sql │ │ │ ├── int_dailymed_ranked_package_label_ndcs.sql │ │ │ └── int_dailymed_validated_package_label_ndcs.sql │ │ ├── fda │ │ │ ├── int_fda_packaging_components.sql │ │ │ ├── int_fda_packaging_parts.sql │ │ │ └── int_fda_packaging_subparts.sql │ │ ├── fda_enforcement │ │ │ └── int_inactive_ingredients_to_fda_enforcement_reports.sql │ │ ├── nadac │ │ │ ├── _int_nadac__models.yml │ │ │ ├── int_nadac_historical_pricing.sql │ │ │ └── int_nadac_pricing.sql │ │ ├── orange_book │ │ │ ├── _int_orange_book__models.yml │ │ │ └── int_fda_ndc_to_te.sql │ │ ├── rxclass │ │ │ ├── _int_rxclass__models.yml │ │ │ ├── int_rxclass_clinical_products_to_atc_class.sql │ │ │ ├── int_rxclass_clinical_products_to_cvx_code.sql │ │ │ ├── int_rxclass_clinical_products_to_schedule.sql │ │ │ └── int_rxclass_clinical_products_to_va_class.sql │ │ ├── rxnorm │ │ │ ├── _int_rxnorm__models.yml │ │ │ ├── int_mthspl_products_to_active_ingredients.sql │ │ │ ├── int_mthspl_products_to_active_moieties.sql │ │ │ ├── int_mthspl_products_to_inactive_ingredients.sql │ │ │ ├── int_rxnorm_all_ndcs_to_product_rxcuis.sql │ │ │ ├── int_rxnorm_clinical_products_to_clinical_product_components.sql │ │ │ ├── int_rxnorm_clinical_products_to_dose_forms.sql │ │ │ ├── int_rxnorm_clinical_products_to_ingredient_components.sql │ │ │ ├── int_rxnorm_clinical_products_to_ingredient_strengths.sql │ │ │ ├── int_rxnorm_clinical_products_to_ingredients.sql │ │ │ ├── int_rxnorm_clinical_products_to_ndcs.sql │ │ │ └── int_rxnorm_ndcs_to_products.sql │ │ └── umls │ │ │ ├── _int_umls__models.yml │ │ │ ├── int_umls_clinical_products_to_crosswalk_codes.sql │ │ │ ├── int_umls_ingredient_components_to_crosswalk_codes.sql │ │ │ ├── int_umls_multiple_ingredients_to_crosswalk_codes.sql │ │ │ └── int_umls_precise_ingredients_to_crosswalk_codes.sql │ ├── marts │ │ ├── classification │ │ │ ├── _classification__models.yml │ │ │ ├── atc_codes_to_rxnorm_products.sql │ │ │ └── clinical_products_to_diseases.sql │ │ ├── fda_excluded │ │ │ └── fda_excluded.sql │ │ ├── ndc │ │ │ ├── _ndc__models.yml │ │ │ ├── all_ndc_descriptions.sql │ │ │ ├── all_ndcs_to_sources.sql │ │ │ ├── gtins.sql │ │ │ ├── ndc_associations.sql │ │ │ ├── ndcs_to_label_images.sql │ │ │ └── pack_size.sql │ │ ├── pricing │ │ │ ├── pricing.sql │ │ │ └── pricing_historical.sql │ │ ├── products │ │ │ ├── _products__models.yml │ │ │ ├── brand_products_with_related_ndcs.sql │ │ │ ├── product_synonyms.sql │ │ │ ├── products.sql │ │ │ └── products_to_inactive_ingredients.sql │ │ └── purdue │ │ │ └── scorecard_data.sql │ └── staging │ │ ├── ashp │ │ ├── _ashp__models.yml │ │ ├── _ashp__sources.yml │ │ ├── stg_ashp__current_drug_shortages.sql │ │ └── stg_ashp__current_drug_shortages_ndcs.sql │ │ ├── dailymed │ │ ├── stg_dailymed__interactions.sql │ │ ├── stg_dailymed__main.sql │ │ ├── stg_dailymed__ndcs.sql │ │ ├── stg_dailymed__organization_activities.sql │ │ ├── stg_dailymed__organization_items.sql │ │ ├── stg_dailymed__organization_texts.sql │ │ ├── stg_dailymed__organizations.sql │ │ ├── stg_dailymed__package_label_section_images.sql │ │ ├── stg_dailymed__package_label_section_ndcs.sql │ │ └── stg_dailymed__package_label_sections.sql │ │ ├── fda_enforcement │ │ ├── _fda_enforcement__models.yml │ │ ├── _fda_enforcement__sources.yml │ │ ├── stg_fda_enforcement__json_ndcs.sql │ │ ├── stg_fda_enforcement__regex_ndcs.sql │ │ └── stg_fda_enforcement__reports.sql │ │ ├── fda_excluded │ │ ├── _fda_excluded__models.yml │ │ ├── _fda_excluded__sources.yml │ │ ├── stg_fda_excluded__classes.sql │ │ ├── stg_fda_excluded__ndcs.sql │ │ └── stg_fda_excluded__substances.sql │ │ ├── fda_ndc │ │ ├── _fda_ndc__models.yml │ │ ├── _fda_ndc__sources.yml │ │ ├── stg_fda_ndc__classes.sql │ │ ├── stg_fda_ndc__ndc_associations.sql │ │ ├── stg_fda_ndc__ndcs.sql │ │ └── stg_fda_ndc__substances.sql │ │ ├── fda_unfinished │ │ ├── _fda_unfinished__models.yml │ │ ├── _fda_unfinished__sources.yml │ │ ├── stg_fda_unfinished__ndcs.sql │ │ └── stg_fda_unfinished__substances.sql │ │ ├── fda_unii │ │ ├── _fda_unii__sources.yml │ │ └── stg_fda_unii__unii_codes.sql │ │ ├── mccpd │ │ └── _mccpd__sources.yml │ │ ├── nadac │ │ ├── _nadac__models.yml │ │ ├── _nadac__sources.yml │ │ └── stg_nadac__nadac.sql │ │ ├── orange_book │ │ ├── _orange_book__models.yml │ │ └── _orange_book__sources.yml │ │ ├── purple_book │ │ ├── _purple_book__models.yml │ │ └── _purple_book__sources.yml │ │ ├── rxclass │ │ ├── _rxclass__models.yml │ │ ├── _rxclass__sources.yml │ │ └── stg_rxclass__rxclass.sql │ │ ├── rxnorm │ │ ├── _rxnorm__models.yml │ │ ├── _rxnorm__sources.yml │ │ ├── stg_rxnorm__all_ndcs.sql │ │ ├── stg_rxnorm__atc_codes.sql │ │ ├── stg_rxnorm__brand_product_component_links.sql │ │ ├── stg_rxnorm__brand_product_components.sql │ │ ├── stg_rxnorm__brand_products.sql │ │ ├── stg_rxnorm__brands.sql │ │ ├── stg_rxnorm__clinical_product_component_links.sql │ │ ├── stg_rxnorm__clinical_product_components.sql │ │ ├── stg_rxnorm__clinical_products.sql │ │ ├── stg_rxnorm__dose_form_group_links.sql │ │ ├── stg_rxnorm__dose_form_groups.sql │ │ ├── stg_rxnorm__dose_forms.sql │ │ ├── stg_rxnorm__hcpcs_codes.sql │ │ ├── stg_rxnorm__ingredient_component_links.sql │ │ ├── stg_rxnorm__ingredient_components.sql │ │ ├── stg_rxnorm__ingredient_strength_links.sql │ │ ├── stg_rxnorm__ingredient_strengths.sql │ │ ├── stg_rxnorm__ingredients.sql │ │ ├── stg_rxnorm__mthspl_ndcs.sql │ │ ├── stg_rxnorm__mthspl_products.sql │ │ ├── stg_rxnorm__mthspl_substances.sql │ │ ├── stg_rxnorm__ndcs.sql │ │ ├── stg_rxnorm__precise_ingredient_links.sql │ │ ├── stg_rxnorm__precise_ingredients.sql │ │ ├── stg_rxnorm__product_rxcuis.sql │ │ └── stg_rxnorm__products.sql │ │ ├── rxnorm_historical │ │ ├── _rxnorm_historical__sources.yml │ │ ├── stg_rxnorm_historical__most_recent_ndcs.sql │ │ └── stg_rxnorm_historical__ndcs.sql │ │ ├── rxterms │ │ ├── _rxterms__models.yml │ │ ├── _rxterms__sources.yml │ │ ├── stg_rxterms__names.sql │ │ └── stg_rxterms__strengths.sql │ │ └── umls │ │ ├── _stg_umls__models.yml │ │ ├── _stg_umls__sources.yml │ │ └── stg_umls__crosswalk_codes.sql │ ├── seeds │ ├── .gitkeep │ ├── _seeds__models.yml │ └── usp_preservatives.csv │ ├── snapshots │ └── .gitkeep │ └── tests │ └── .gitkeep ├── docker-compose.yml ├── docs ├── catalog.json ├── images │ ├── sagerx_airflow_example.png │ └── sagerx_postgres_example.png ├── index.html ├── manifest.json ├── run_results.json └── style_guide.md ├── pgadmin └── servers.json └── postgres ├── 0_pg_stat_statement.sh ├── 1_airflow.sql └── 2_sagerx_setup.sql /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: coderxio # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] 4 | patreon: # Replace with a single Patreon username 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: # Replace with a single Ko-fi username 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | otechie: # Replace with a single Otechie username 12 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry 13 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] 14 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/issue_template.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Issue 3 | about: Create a new issue 4 | --- 5 | # Problem Statement 6 | [What needs to be done and why] 7 | 8 | # Criteria for Success 9 | [Measureable outcome if possible] 10 | 11 | # Additional Information 12 | [ways one might accomplish this task, links, documentation, alternatives, etc.] 13 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/proposal_template.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Proposal 3 | about: Propose a new feature or some other changes not related to a direct issue 4 | --- 5 | 6 | # Proposal 7 | [What is the idea] 8 | 9 | # Rationale 10 | [Why should this be implemented] 11 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | Resolves #ISSUE NUMBER 2 | 3 | ## Explanation 4 | [What did you change?] 5 | 6 | ## Rationale 7 | [Why did you make the changes mentioned above? What alternatives did you consider?] 8 | 9 | ## Tests 10 | 1. What testing did you do? 11 | 1. Attach testing logs inside a summary block: 12 | 13 |
14 | testing logs 15 | 16 | ``` 17 | 18 | ``` 19 |
20 | 21 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .env 2 | notebooks 3 | 4 | # Python files 5 | __pycache__ 6 | venv 7 | 8 | # Airflow Volumes 9 | data 10 | logs 11 | extracts 12 | plugins 13 | 14 | # dbt 15 | .user.yml 16 | 17 | # Desktop Services Store 18 | .DS_Store 19 | 20 | # GCP 21 | gcp.json 22 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright 2024 CodeRx, LLC 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | -------------------------------------------------------------------------------- /airflow/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM apache/airflow:2.5.1 2 | 3 | COPY requirements.txt . 4 | 5 | RUN pip install --no-cache-dir -r requirements.txt 6 | -------------------------------------------------------------------------------- /airflow/dags/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderxio/sagerx/8f64ff1bc743150661fb466a60d18318fca047a5/airflow/dags/.gitkeep -------------------------------------------------------------------------------- /airflow/dags/airflow_operator.py: -------------------------------------------------------------------------------- 1 | from airflow import DAG 2 | from airflow.models.param import Param 3 | 4 | from sagerx import get_dataset, read_sql_file, get_sql_list, alert_slack_channel 5 | 6 | def create_dag(dag_id,**kwargs) -> DAG: 7 | from airflow.utils.dates import days_ago 8 | from datetime import timedelta 9 | 10 | dag_args ={ 11 | "dag_id":dag_id, 12 | "start_date": days_ago(0), 13 | "schedule": "0 5 * * *", # run at 5am every day 14 | "description": f"Processes {dag_id} source", 15 | } 16 | 17 | default_args = { 18 | "owner": "airflow", 19 | "depends_on_past": False, 20 | "email": ["admin@sagerx.io"], 21 | "email_on_failure": False, 22 | "email_on_retry": False, 23 | "retries": 1, 24 | "retry_delay": timedelta(minutes=5), 25 | "retrieve_dataset_function": get_dataset, 26 | "on_failure_callback": alert_slack_channel, 27 | "dagrun_timeout":60 28 | } 29 | 30 | dag_args.update(kwargs) 31 | default_args.update(kwargs) 32 | 33 | dag = DAG(**dag_args,default_args=default_args) 34 | 35 | return dag -------------------------------------------------------------------------------- /airflow/dags/build_marts/dag.py: -------------------------------------------------------------------------------- 1 | import pendulum 2 | 3 | from airflow_operator import create_dag 4 | from common_dag_tasks import get_most_recent_dag_run 5 | from airflow.decorators import dag,task 6 | from airflow.operators.trigger_dagrun import TriggerDagRunOperator 7 | 8 | from common_dag_tasks import run_subprocess_command 9 | 10 | def run_dag_condition(dag_id): 11 | last_run = get_most_recent_dag_run(dag_id) 12 | # if a DAG from the list of dependencies is more than 5 days stale 13 | if last_run is None or (pendulum.now() - last_run.execution_date).days > 5: 14 | if last_run is not None: 15 | print(f'{dag_id} was last run {last_run.execution_date}.') 16 | else: 17 | print(f'{dag_id} has never been run.') 18 | return True 19 | else: 20 | print(f"{dag_id} was last run {last_run.execution_date} and will now skipped.") 21 | return False 22 | 23 | def get_dag_list(): 24 | list_of_dags = [] 25 | dag_dependencies = ["fda_ndc","fda_unfinished","fda_excluded","rxnorm","rxclass","rxnorm_historical"] 26 | for dag in dag_dependencies: 27 | if run_dag_condition(dag): 28 | list_of_dags.append(dag) 29 | print(f'list of dags to run{list_of_dags}') 30 | return list_of_dags 31 | 32 | dag = create_dag( 33 | dag_id="build_marts", 34 | schedule = "0 5 * * 2", #every tuesday at 5:00am 35 | catchup=False, 36 | concurrency=2 37 | ) 38 | with dag: 39 | 40 | # PLEASE NOTE this block will execute each of the DAGs in turn; 41 | # When all are being run consecutively, the process will take in excess of 60 minutes 42 | 43 | @task 44 | def execute_external_dag_list(**kwargs): 45 | dags_list = get_dag_list() 46 | for ex_dag in dags_list: 47 | print(f'triggering {ex_dag}') 48 | dag_task = TriggerDagRunOperator( 49 | task_id=f"{ex_dag}_task", 50 | trigger_dag_id=ex_dag, 51 | conf={"source_dag_id": "build_marts"}, 52 | wait_for_completion=True) 53 | dag_task.execute(context=kwargs) 54 | 55 | # Once DBT freshness metrics are implemented, this task can be updated 56 | @task 57 | def transform_tasks(): 58 | run_subprocess_command(['docker', 'exec', 'dbt', 'dbt', 'seed'], cwd='/dbt/sagerx') 59 | run_subprocess_command(['docker', 'exec', 'dbt', 'dbt', 'run', '--select', '+models/marts/ndc'], cwd='/dbt/sagerx') 60 | run_subprocess_command(['docker', 'exec', 'dbt', 'dbt', 'run', '--select', '+models/marts/classification'], cwd='/dbt/sagerx') 61 | run_subprocess_command(['docker', 'exec', 'dbt', 'dbt', 'run', '--select', '+models/marts/products'], cwd='/dbt/sagerx') 62 | 63 | execute_external_dag_list() >> transform_tasks() 64 | -------------------------------------------------------------------------------- /airflow/dags/cms_part_d/load_basic_drugs_formulary_file.sql: -------------------------------------------------------------------------------- 1 | /* sagerx_lake.cms_basic_drugs_formulary */ 2 | DROP TABLE IF EXISTS sagerx_lake.cms_basic_drugs_formulary CASCADE; 3 | 4 | CREATE TABLE sagerx_lake.cms_basic_drugs_formulary ( 5 | formulary_id VARCHAR(8) NOT NULL, 6 | formulary_version VARCHAR(5) NOT NULL, 7 | contract_year VARCHAR(4), 8 | rxcui VARCHAR(8), 9 | ndc VARCHAR(11), 10 | tier_level_value TEXT, 11 | quantity_limit_yn VARCHAR(1), 12 | quantity_limit_amount VARCHAR(7), 13 | quantity_limit_days VARCHAR(3), 14 | prior_authorization_yn VARCHAR(1), 15 | step_therapy_yn VARCHAR(1) 16 | ); 17 | 18 | COPY sagerx_lake.cms_basic_drugs_formulary 19 | FROM '{{ ti.xcom_pull(task_ids='extract') }}/basic drugs formulary file PPUF_{{params.year}}Q{{params.quarter}}.txt' DELIMITER '|' CSV HEADER;; 20 | -------------------------------------------------------------------------------- /airflow/dags/cms_part_d/load_beneficiary_cost_file.sql: -------------------------------------------------------------------------------- 1 | /* sagerx_lake.cms_beneficiary_cost */ 2 | DROP TABLE IF EXISTS sagerx_lake.cms_beneficiary_cost CASCADE; 3 | 4 | CREATE TABLE sagerx_lake.cms_beneficiary_cost ( 5 | contract_id VARCHAR(5) NOT NULL, 6 | plan_id VARCHAR(3) NOT NULL, 7 | segment_id VARCHAR(3) NOT NULL, 8 | coverage_level SMALLINT, 9 | tier SMALLINT, 10 | days_supply SMALLINT, 11 | 12 | cost_type_pref SMALLINT, 13 | cost_amt_pref NUMERIC(14,2), 14 | cost_min_amt_pref VARCHAR(12), 15 | cost_max_amt_pref NUMERIC(14,2), 16 | 17 | cost_type_nonpref SMALLINT, 18 | cost_amt_nonpref NUMERIC(14,2), 19 | cost_min_amt_nonpref VARCHAR(12), 20 | cost_max_amt_nonpref NUMERIC(14,2), 21 | 22 | cost_type_mail_pref SMALLINT, 23 | cost_amt_mail_pref NUMERIC(14,2), 24 | cost_min_amt_mail_pref VARCHAR(12), 25 | cost_max_amt_mail_pref NUMERIC(14,2), 26 | 27 | cost_type_mail_nonpref VARCHAR(1), 28 | cost_amt_mail_nonpref NUMERIC(14,2), 29 | cost_min_amt_mail_nonpref VARCHAR(12), 30 | cost_max_amt_mail_nonpref NUMERIC(14,2), 31 | 32 | tier_specialty_yn VARCHAR(1), 33 | ded_applies_yn VARCHAR(1) 34 | ); 35 | 36 | COPY sagerx_lake.cms_beneficiary_cost 37 | FROM '{{ ti.xcom_pull(task_ids='extract') }}/beneficiary cost file PPUF_{{params.year}}Q{{params.quarter}}.txt' DELIMITER '|' CSV HEADER;; 38 | -------------------------------------------------------------------------------- /airflow/dags/cms_part_d/load_excluded_drugs_formulary_file.sql: -------------------------------------------------------------------------------- 1 | /* sagerx_lake.cms_excluded_drugs_formulary */ 2 | DROP TABLE IF EXISTS sagerx_lake.cms_excluded_drugs_formulary CASCADE; 3 | 4 | CREATE TABLE sagerx_lake.cms_excluded_drugs_formulary ( 5 | contract_id VARCHAR(5) NOT NULL, 6 | plan_id VARCHAR(3) NOT NULL, 7 | rxcui VARCHAR(8), 8 | tier TEXT, 9 | quantity_limit_yn VARCHAR(5), 10 | quantity_limit_amount VARCHAR(8), 11 | quantity_limit_days VARCHAR(3), 12 | prior_auth_yn VARCHAR(1), 13 | step_therapy_yn VARCHAR(1), 14 | capped_benefit_yn VARCHAR(1) 15 | ); 16 | 17 | COPY sagerx_lake.cms_excluded_drugs_formulary 18 | FROM '{{ ti.xcom_pull(task_ids='extract') }}/excluded drugs formulary file PPUF_{{params.year}}Q{{params.quarter}}.txt' DELIMITER '|' CSV HEADER;; -------------------------------------------------------------------------------- /airflow/dags/cms_part_d/load_geographic_locator_file.sql: -------------------------------------------------------------------------------- 1 | /* sagerx_lake.cms_geographic_locator */ 2 | DROP TABLE IF EXISTS sagerx_lake.cms_geographic_locator CASCADE; 3 | 4 | CREATE TABLE sagerx_lake.cms_geographic_locator ( 5 | county_code VARCHAR(5) NOT NULL, 6 | statename VARCHAR(30) NOT NULL, 7 | county VARCHAR(50), 8 | ma_region_code VARCHAR(2), 9 | ma_region VARCHAR(150), 10 | pdp_region_code VARCHAR(2), 11 | pdp_region VARCHAR(150) 12 | ); 13 | 14 | COPY sagerx_lake.cms_geographic_locator 15 | FROM '{{ ti.xcom_pull(task_ids='extract') }}/geographic locator file PPUF_{{params.year}}Q{{params.quarter}}.txt' DELIMITER '|' CSV HEADER;; 16 | -------------------------------------------------------------------------------- /airflow/dags/cms_part_d/load_indication_based_coverage_formulary_file.sql: -------------------------------------------------------------------------------- 1 | /* sagerx_lake.cms_indication_based_coverage_formulary */ 2 | DROP TABLE IF EXISTS sagerx_lake.cms_indication_based_coverage_formulary CASCADE; 3 | 4 | CREATE TABLE sagerx_lake.cms_indication_based_coverage_formulary ( 5 | contract_id VARCHAR(5) NOT NULL, 6 | plan_id VARCHAR(3) NOT NULL, 7 | rxcui VARCHAR(8), 8 | disease VARCHAR(100) 9 | ); 10 | 11 | COPY sagerx_lake.cms_indication_based_coverage_formulary 12 | FROM '{{ ti.xcom_pull(task_ids='extract') }}/Indication Based Coverage Formulary File PPUF_{{params.year}}Q{{params.quarter}}.txt' DELIMITER '|' CSV HEADER;; 13 | -------------------------------------------------------------------------------- /airflow/dags/cms_part_d/load_insulin_beneficiary_cost_file.sql: -------------------------------------------------------------------------------- 1 | /* sagerx_lake.cms_insulin_beneficiary_cost */ 2 | DROP TABLE IF EXISTS sagerx_lake.cms_insulin_beneficiary_cost CASCADE; 3 | 4 | CREATE TABLE sagerx_lake.cms_insulin_beneficiary_cost ( 5 | contract_id TEXT NOT NULL, 6 | plan_id TEXT NOT NULL, 7 | segment_id TEXT, 8 | tier TEXT, 9 | days_supply TEXT, 10 | copay_amt_pref_insln TEXT, 11 | copay_amt_nonpref_insln TEXT, 12 | copay_amt_mail_pref_insln TEXT, 13 | copay_amt_mail_nonpref_insln TEXT 14 | ); 15 | 16 | COPY sagerx_lake.cms_insulin_beneficiary_cost 17 | FROM '{{ ti.xcom_pull(task_ids='extract') }}/insulin beneficiary cost file PPUF_{{params.year}}Q{{params.quarter}}.txt' DELIMITER '|' CSV HEADER;; -------------------------------------------------------------------------------- /airflow/dags/cms_part_d/load_pharmacy_networks_file.sql: -------------------------------------------------------------------------------- 1 | /* sagerx_lake.cms_pharmacy_networks */ 2 | DROP TABLE IF EXISTS sagerx_lake.cms_pharmacy_networks CASCADE; 3 | 4 | CREATE TABLE sagerx_lake.cms_pharmacy_networks ( 5 | contract_id VARCHAR(5) NOT NULL, 6 | plan_id VARCHAR(3) NOT NULL, 7 | segment_id VARCHAR(3), 8 | pharmacy_number VARCHAR(12), 9 | pharmacy_zipcode VARCHAR(5), 10 | preferred_status_retail VARCHAR(1), 11 | preferred_status_mail VARCHAR(1), 12 | pharmacy_retail VARCHAR(1), 13 | pharmacy_mail VARCHAR(1), 14 | in_area_flag TEXT, 15 | floor_price TEXT, 16 | brand_dispensing_fee_30 TEXT, 17 | brand_dispensing_fee_60 TEXT, 18 | brand_dispensing_fee_90 TEXT, 19 | generic_dispensing_fee_30 TEXT, 20 | generic_dispensing_fee_60 TEXT, 21 | generic_dispensing_fee_90 TEXT 22 | ); 23 | 24 | COPY sagerx_lake.cms_pharmacy_networks 25 | FROM '{{ ti.xcom_pull(task_ids='extract') }}/pharmacy networks file PPUF_{{params.year}}Q{{params.quarter}} part 1.txt' DELIMITER '|' CSV HEADER;; 26 | 27 | COPY sagerx_lake.cms_pharmacy_networks 28 | FROM '{{ ti.xcom_pull(task_ids='extract') }}/pharmacy networks file PPUF_{{params.year}}Q{{params.quarter}} part 2.txt' DELIMITER '|' CSV HEADER;; 29 | 30 | COPY sagerx_lake.cms_pharmacy_networks 31 | FROM '{{ ti.xcom_pull(task_ids='extract') }}/pharmacy networks file PPUF_{{params.year}}Q{{params.quarter}} part 3.txt' DELIMITER '|' CSV HEADER;; 32 | 33 | COPY sagerx_lake.cms_pharmacy_networks 34 | FROM '{{ ti.xcom_pull(task_ids='extract') }}/pharmacy networks file PPUF_{{params.year}}Q{{params.quarter}} part 4.txt' DELIMITER '|' CSV HEADER;; 35 | 36 | COPY sagerx_lake.cms_pharmacy_networks 37 | FROM '{{ ti.xcom_pull(task_ids='extract') }}/pharmacy networks file PPUF_{{params.year}}Q{{params.quarter}} part 5.txt' DELIMITER '|' CSV HEADER;; 38 | 39 | COPY sagerx_lake.cms_pharmacy_networks 40 | FROM '{{ ti.xcom_pull(task_ids='extract') }}/pharmacy networks file PPUF_{{params.year}}Q{{params.quarter}} part 6.txt' DELIMITER '|' CSV HEADER;; 41 | -------------------------------------------------------------------------------- /airflow/dags/cms_part_d/load_plan_information_file.sql: -------------------------------------------------------------------------------- 1 | /* sagerx_lake.cms_plan_information */ 2 | DROP TABLE IF EXISTS sagerx_lake.cms_plan_information CASCADE; 3 | 4 | CREATE TABLE sagerx_lake.cms_plan_information ( 5 | contract_id VARCHAR(5) NOT NULL, 6 | plan_id VARCHAR(3) NOT NULL, 7 | segment_id VARCHAR(3), 8 | contract_name VARCHAR(100), 9 | plan_name VARCHAR(80), 10 | formulary_id VARCHAR(8), 11 | premium TEXT, 12 | deductible TEXT, 13 | ma_region_code VARCHAR(2), 14 | pdp_region_code VARCHAR(2), 15 | state VARCHAR(2), 16 | county_code VARCHAR(5), 17 | snp VARCHAR(1), 18 | plan_suppressed_yn VARCHAR(1) 19 | ); 20 | 21 | COPY sagerx_lake.cms_plan_information 22 | FROM '{{ ti.xcom_pull(task_ids='extract') }}/plan information PPUF_{{params.year}}Q{{params.quarter}}.txt' DELIMITER '|' CSV HEADER ENCODING 'WIN1252';; -------------------------------------------------------------------------------- /airflow/dags/cms_part_d/load_pricing_file.sql: -------------------------------------------------------------------------------- 1 | /* sagerx_lake.cms_pricing */ 2 | DROP TABLE IF EXISTS sagerx_lake.cms_pricing CASCADE; 3 | 4 | CREATE TABLE sagerx_lake.cms_pricing ( 5 | contract_id VARCHAR(5) NOT NULL, 6 | plan_id VARCHAR(3) NOT NULL, 7 | segment_id VARCHAR(3), 8 | ndc VARCHAR(11), 9 | days_supply TEXT, 10 | unit_cost TEXT 11 | ); 12 | 13 | COPY sagerx_lake.cms_pricing 14 | FROM '{{ ti.xcom_pull(task_ids='extract') }}/pricing file PPUF_{{params.year}}Q{{params.quarter}}.txt' DELIMITER '|' CSV HEADER;; 15 | -------------------------------------------------------------------------------- /airflow/dags/dailymed_pharm_class/dag.py: -------------------------------------------------------------------------------- 1 | from airflow_operator import create_dag 2 | from airflow.utils.helpers import chain 3 | 4 | from common_dag_tasks import extract, get_ordered_sql_tasks, get_ds_folder 5 | from sagerx import read_sql_file 6 | from airflow.providers.postgres.operators.postgres import PostgresOperator 7 | 8 | 9 | dag_id = "dailymed_pharm_class" 10 | 11 | dag = create_dag( 12 | dag_id=dag_id, 13 | schedule= "0 5 * * *", # run at 5am every day 14 | max_active_runs=1, 15 | concurrency=2, 16 | ) 17 | 18 | with dag: 19 | url = "https://dailymed-data.nlm.nih.gov/public-release-files/pharmacologic_class_mappings.zip" 20 | ds_folder = get_ds_folder(dag_id) 21 | 22 | extract_task = extract(dag_id,url) 23 | 24 | task_list = [extract_task] 25 | for sql in get_ordered_sql_tasks(dag_id): 26 | sql_path = ds_folder / sql 27 | task_id = sql[:-4] #remove .sql 28 | 29 | sql_task = PostgresOperator( 30 | task_id=task_id, 31 | postgres_conn_id="postgres_default", 32 | sql=read_sql_file(sql_path).format(data_path=extract_task), 33 | dag=dag 34 | ) 35 | task_list.append(sql_task) 36 | 37 | chain(*task_list) 38 | -------------------------------------------------------------------------------- /airflow/dags/dailymed_pharm_class/load-dailymed_pharm_class.sql: -------------------------------------------------------------------------------- 1 | /* sagerx_lake.dailymed_pharm_class */ 2 | DROP TABLE IF EXISTS sagerx_lake.dailymed_pharm_class; 3 | 4 | CREATE TABLE sagerx_lake.dailymed_pharm_class ( 5 | spl_setid TEXT, 6 | spl_version TEXT, 7 | pharma_setid TEXT, 8 | pharma_version TEXT 9 | ); 10 | 11 | COPY sagerx_lake.dailymed_pharm_class 12 | FROM '{data_path}/pharmacologic_class_mappings.txt' DELIMITER '|' QUOTE E'\b' CSV HEADER; 13 | -------------------------------------------------------------------------------- /airflow/dags/dailymed_rxnorm/dag.py: -------------------------------------------------------------------------------- 1 | from airflow_operator import create_dag 2 | from airflow.utils.helpers import chain 3 | 4 | from common_dag_tasks import extract, get_ordered_sql_tasks, get_ds_folder 5 | from sagerx import read_sql_file 6 | from airflow.providers.postgres.operators.postgres import PostgresOperator 7 | 8 | 9 | dag_id = "dailymed_rxnorm" 10 | 11 | dag = create_dag( 12 | dag_id=dag_id, 13 | schedule= "0 5 * * *", # run at 5am every day 14 | max_active_runs=1, 15 | concurrency=2, 16 | ) 17 | 18 | with dag: 19 | url = "https://dailymed-data.nlm.nih.gov/public-release-files/rxnorm_mappings.zip" 20 | ds_folder = get_ds_folder(dag_id) 21 | 22 | extract_task = extract(dag_id,url) 23 | 24 | task_list = [extract_task] 25 | for sql in get_ordered_sql_tasks(dag_id): 26 | sql_path = ds_folder / sql 27 | task_id = sql[:-4] #remove .sql 28 | 29 | sql_task = PostgresOperator( 30 | task_id=task_id, 31 | postgres_conn_id="postgres_default", 32 | sql=read_sql_file(sql_path).format(data_path=extract_task), 33 | dag=dag 34 | ) 35 | task_list.append(sql_task) 36 | 37 | chain(*task_list) 38 | -------------------------------------------------------------------------------- /airflow/dags/dailymed_rxnorm/load-dailymed_rxnorm.sql: -------------------------------------------------------------------------------- 1 | /* sagerx_lake.dailymed_rxnorm */ 2 | DROP TABLE IF EXISTS sagerx_lake.dailymed_rxnorm CASCADE; 3 | 4 | CREATE TABLE sagerx_lake.dailymed_rxnorm ( 5 | setid TEXT, 6 | spl_version TEXT, 7 | rxcui TEXT, 8 | rxstr TEXT, 9 | rxtty TEXT 10 | ); 11 | 12 | COPY sagerx_lake.dailymed_rxnorm 13 | FROM '{data_path}/rxnorm_mappings.txt' DELIMITER '|' QUOTE E'\b' CSV HEADER; 14 | -------------------------------------------------------------------------------- /airflow/dags/dailymed_zip_file_metadata/dag.py: -------------------------------------------------------------------------------- 1 | from airflow_operator import create_dag 2 | from airflow.utils.helpers import chain 3 | 4 | from common_dag_tasks import extract, get_ordered_sql_tasks, get_ds_folder 5 | from sagerx import read_sql_file 6 | from airflow.providers.postgres.operators.postgres import PostgresOperator 7 | 8 | 9 | dag_id = "dailymed_zip_file_metadata" 10 | 11 | dag = create_dag( 12 | dag_id=dag_id, 13 | schedule= "0 5 * * *", # run at 5am every day 14 | max_active_runs=1, 15 | concurrency=2, 16 | ) 17 | 18 | with dag: 19 | url = "https://dailymed-data.nlm.nih.gov/public-release-files/dm_spl_zip_files_meta_data.zip" 20 | ds_folder = get_ds_folder(dag_id) 21 | 22 | extract_task = extract(dag_id,url) 23 | 24 | task_list = [extract_task] 25 | for sql in get_ordered_sql_tasks(dag_id): 26 | sql_path = ds_folder / sql 27 | task_id = sql[:-4] #remove .sql 28 | 29 | sql_task = PostgresOperator( 30 | task_id=task_id, 31 | postgres_conn_id="postgres_default", 32 | sql=read_sql_file(sql_path).format(data_path=extract_task), 33 | dag=dag 34 | ) 35 | task_list.append(sql_task) 36 | 37 | chain(*task_list) 38 | -------------------------------------------------------------------------------- /airflow/dags/dailymed_zip_file_metadata/load-dailymed_zip_file_metadata.sql: -------------------------------------------------------------------------------- 1 | /* sagerx_lake.dailymed_zip_file_metadata */ 2 | DROP TABLE IF EXISTS sagerx_lake.dailymed_zip_file_metadata; 3 | 4 | CREATE TABLE sagerx_lake.dailymed_zip_file_metadata ( 5 | setid TEXT, 6 | zip_file_name TEXT, 7 | upload_date TEXT, 8 | spl_version TEXT, 9 | title TEXT 10 | ); 11 | 12 | COPY sagerx_lake.dailymed_zip_file_metadata 13 | FROM '{data_path}/dm_spl_zip_files_meta_data.txt' DELIMITER '|' QUOTE E'\b' CSV HEADER; 14 | -------------------------------------------------------------------------------- /airflow/dags/export_marts/dag.py: -------------------------------------------------------------------------------- 1 | import sqlalchemy 2 | import pandas as pd 3 | import boto3 4 | 5 | from io import StringIO 6 | from os import environ 7 | from airflow_operator import create_dag 8 | from airflow.decorators import dag,task 9 | from airflow.operators.trigger_dagrun import TriggerDagRunOperator 10 | from airflow.hooks.subprocess import SubprocessHook 11 | from airflow.hooks.postgres_hook import PostgresHook 12 | from airflow.models import Variable 13 | 14 | 15 | 16 | dag = create_dag( 17 | dag_id="export_marts", 18 | schedule = "0 7 * * 2", #every tuesday at 7:00am 19 | catchup=False, 20 | concurrency=2 21 | ) 22 | 23 | with dag: 24 | 25 | @task 26 | def export_marts(): 27 | pg_hook = PostgresHook(postgres_conn_id="postgres_default") 28 | engine = pg_hook.get_sqlalchemy_engine() 29 | marts_list = ["all_ndc_descriptions","atc_codes_to_rxnorm_products","all_ndcs_to_sources","products_to_inactive_ingredients","products","brand_products_with_related_ndcs"] 30 | mart_dfs={} 31 | with engine.connect() as connection: 32 | for mart in marts_list: 33 | if sqlalchemy.inspect(engine).has_table(mart, schema='sagerx_dev'): 34 | print(f'{mart} exists and will be exported') 35 | df = pd.read_sql(f"SELECT * FROM sagerx_dev.{mart};", con=connection) 36 | mart_dfs[mart] = df 37 | 38 | access_key = environ.get("AWS_ACCESS_KEY_ID") 39 | secret_key = environ.get("AWS_SECRET_ACCESS_KEY") 40 | dest_bucket = environ.get("AWS_DEST_BUCKET") 41 | 42 | s3_resource = boto3.resource( 43 | 's3', 44 | aws_access_key_id= access_key, 45 | aws_secret_access_key= secret_key 46 | ) 47 | 48 | for k in list(mart_dfs.keys()): 49 | print(f'putting {k}') 50 | csv_buffer = StringIO() 51 | mart_dfs[k].to_csv(csv_buffer, index=False) 52 | 53 | s3_resource.Object(dest_bucket, f'{k}.csv').put(Body=csv_buffer.getvalue()) 54 | 55 | export_marts() 56 | -------------------------------------------------------------------------------- /airflow/dags/fda_enforcement/dag.py: -------------------------------------------------------------------------------- 1 | import pendulum 2 | from airflow_operator import create_dag 3 | from common_dag_tasks import extract,transform, get_ds_folder 4 | from fda_enforcement.dag_tasks import load_json 5 | # from airflow.operators.python import ShortCircuitOperator 6 | # from airflow.providers.postgres.operators.postgres import PostgresOperator 7 | # from sagerx import read_sql_file 8 | 9 | dag_id = "fda_enforcement" 10 | 11 | dag = create_dag( 12 | dag_id=dag_id, 13 | schedule="0 4 * * 3", 14 | start_date=pendulum.yesterday(), 15 | max_active_runs=1, 16 | concurrency=2, 17 | ) 18 | 19 | 20 | with dag: 21 | url = "https://download.open.fda.gov/drug/enforcement/drug-enforcement-0001-of-0001.json.zip" 22 | ds_folder = get_ds_folder(dag_id) 23 | file_name = "/drug-enforcement-0001-of-0001.json" 24 | 25 | extract_task = extract(dag_id,url) 26 | 27 | load_task = load_json(str(extract_task)+file_name) 28 | 29 | transform_staging_task = transform.override(task_id='transform-staging')(dag_id) 30 | transform_intermediate_task = transform.override(task_id='transform-intermediate')(dag_id,'intermediate') 31 | 32 | extract_task >> load_task >> transform_staging_task >> transform_intermediate_task -------------------------------------------------------------------------------- /airflow/dags/fda_enforcement/dag_tasks.py: -------------------------------------------------------------------------------- 1 | from airflow.decorators import task 2 | from common_dag_tasks import url_request 3 | from sagerx import read_json_file, load_df_to_pg 4 | 5 | # Task to download data from web location 6 | @task(task_id='extract') 7 | def fda_enf_extract(data_interval_start=None, data_interval_end=None): 8 | import pandas as pd 9 | import logging 10 | 11 | start_date = data_interval_start.format("YYYYMMDD") 12 | end_date = data_interval_end.format("YYYYMMDD") 13 | print(f"Start date: {start_date}, End date: {end_date}") 14 | 15 | url = f"https://api.fda.gov/drug/enforcement.json?search=report_date:[{start_date}+TO+{end_date}]&limit=1000" 16 | logging.info(url) 17 | 18 | response = url_request(url) 19 | 20 | json_object = response.json()["results"] 21 | 22 | df = pd.DataFrame(json_object) 23 | df.set_index("recall_number") 24 | 25 | return df 26 | 27 | @task 28 | def load_json(data_path): 29 | import pandas as pd 30 | print(f"JSON path: {data_path}") 31 | json_object = read_json_file(data_path) 32 | df = pd.DataFrame(json_object["results"]) 33 | df.set_index("recall_number") 34 | print(f"Dataframe loaded. Number of rows: {len(df)}") 35 | load_df_to_pg(df,"sagerx_lake","fda_enforcement","replace",dtype_name="openfda") 36 | -------------------------------------------------------------------------------- /airflow/dags/fda_excluded/dag.py: -------------------------------------------------------------------------------- 1 | import pendulum 2 | 3 | from airflow_operator import create_dag 4 | from airflow.utils.helpers import chain 5 | 6 | from common_dag_tasks import extract, transform, get_ordered_sql_tasks, get_ds_folder 7 | from sagerx import read_sql_file 8 | from airflow.decorators import dag,task 9 | from airflow.providers.postgres.operators.postgres import PostgresOperator 10 | 11 | 12 | dag_id = "fda_excluded" 13 | 14 | dag = create_dag( 15 | dag_id=dag_id, 16 | schedule= "30 4 * * *", # run at 4:30am every day 17 | start_date=pendulum.yesterday(), 18 | catchup=False, 19 | max_active_runs=1, 20 | concurrency=2, 21 | ) 22 | 23 | with dag: 24 | url = "https://www.accessdata.fda.gov/cder/ndc_excluded.zip" 25 | ds_folder = get_ds_folder(dag_id) 26 | 27 | extract_task = extract(dag_id,url) 28 | transform_task = transform(dag_id) 29 | 30 | sql_tasks = [] 31 | for sql in get_ordered_sql_tasks(dag_id): 32 | sql_path = ds_folder / sql 33 | task_id = sql[:-4] #remove .sql 34 | sql_task = PostgresOperator( 35 | task_id=task_id, 36 | postgres_conn_id="postgres_default", 37 | sql=read_sql_file(sql_path).format(data_path=extract_task), 38 | dag=dag 39 | ) 40 | sql_tasks.append(sql_task) 41 | 42 | extract_task >> sql_tasks >> transform_task -------------------------------------------------------------------------------- /airflow/dags/fda_excluded/load_package.sql: -------------------------------------------------------------------------------- 1 | /* sagerx_lake.fda_excluded_package */ 2 | DROP TABLE IF EXISTS sagerx_lake.fda_excluded_package CASCADE; 3 | 4 | CREATE TABLE sagerx_lake.fda_excluded_package ( 5 | productid TEXT NOT NULL, 6 | productndc TEXT NOT NULL, 7 | ndcpackagecode TEXT, 8 | packagedescription TEXT, 9 | startmarketingdate TEXT, 10 | endmarketingdate TEXT, 11 | ndc_exclude_flag TEXT, 12 | sample_package TEXT 13 | ); 14 | 15 | COPY sagerx_lake.fda_excluded_package 16 | FROM '{data_path}/Packages_excluded.txt' DELIMITER E'\t' CSV HEADER ENCODING 'WIN1252';; 17 | 18 | CREATE INDEX IF NOT EXISTS x_productid 19 | ON sagerx_lake.fda_excluded_package(productid); -------------------------------------------------------------------------------- /airflow/dags/fda_excluded/load_product.sql: -------------------------------------------------------------------------------- 1 | /* sagerx_lake.fda_excluded_package */ 2 | DROP TABLE IF EXISTS sagerx_lake.fda_excluded_product CASCADE; 3 | 4 | CREATE TABLE sagerx_lake.fda_excluded_product ( 5 | productid TEXT, 6 | productndc TEXT, 7 | producttypename TEXT, 8 | proprietaryname TEXT, 9 | proprietarynamesuffix TEXT, 10 | nonproprietaryname TEXT, 11 | dosageformname TEXT, 12 | routename TEXT, 13 | startmarketingdate TEXT, 14 | endmarketingdate TEXT, 15 | marketingcategoryname TEXT, 16 | applicationnumber TEXT, 17 | labelername TEXT, 18 | substancename TEXT, 19 | active_numerator_strength TEXT, 20 | active_ingred_unit TEXT, 21 | pharm_classes TEXT, 22 | deaschedule TEXT, 23 | ndc_exclude_flag TEXT, 24 | listing_record_certified_through TEXT, 25 | PRIMARY KEY (productid) 26 | ); 27 | 28 | COPY sagerx_lake.fda_excluded_product 29 | FROM '{data_path}/Products_excluded.txt' DELIMITER E'\t' CSV HEADER ENCODING 'WIN1252';; 30 | 31 | CREATE INDEX IF NOT EXISTS x_productid 32 | ON sagerx_lake.fda_excluded_product(productid); -------------------------------------------------------------------------------- /airflow/dags/fda_ndc/dag.py: -------------------------------------------------------------------------------- 1 | import pendulum 2 | 3 | from airflow_operator import create_dag 4 | from airflow.providers.postgres.operators.postgres import PostgresOperator 5 | 6 | from common_dag_tasks import extract, transform, generate_sql_list, get_ds_folder 7 | from sagerx import read_sql_file 8 | 9 | dag_id = "fda_ndc" 10 | 11 | dag = create_dag( 12 | dag_id=dag_id, 13 | schedule="0 4 * * *", 14 | start_date=pendulum.yesterday(), 15 | catchup=False, 16 | concurrency=2, 17 | ) 18 | 19 | with dag: 20 | url= "https://www.accessdata.fda.gov/cder/ndctext.zip" 21 | ds_folder = get_ds_folder(dag_id) 22 | 23 | extract_task = extract(dag_id,url) 24 | transform_task = transform(dag_id) 25 | 26 | sql_tasks = [] 27 | for sql in generate_sql_list(dag_id): 28 | sql_path = ds_folder / sql 29 | task_id = sql[:-4] #remove .sql 30 | sql_task = PostgresOperator( 31 | task_id=task_id, 32 | postgres_conn_id="postgres_default", 33 | sql=read_sql_file(sql_path).format(data_path=extract_task), 34 | dag=dag 35 | ) 36 | sql_tasks.append(sql_task) 37 | 38 | extract_task >> sql_tasks >> transform_task 39 | -------------------------------------------------------------------------------- /airflow/dags/fda_ndc/load_package.sql: -------------------------------------------------------------------------------- 1 | /* sagerx_lake.fda_ndc_package */ 2 | DROP TABLE IF EXISTS sagerx_lake.fda_ndc_package CASCADE; 3 | 4 | CREATE TABLE sagerx_lake.fda_ndc_package ( 5 | productid TEXT NOT NULL, 6 | productndc TEXT NOT NULL, 7 | ndcpackagecode TEXT, 8 | packagedescription TEXT, 9 | startmarketingdate TEXT, 10 | endmarketingdate TEXT, 11 | ndc_exclude_flag TEXT, 12 | sample_package TEXT 13 | ); 14 | 15 | COPY sagerx_lake.fda_ndc_package 16 | FROM '{data_path}/package.txt' DELIMITER E'\t' CSV HEADER ENCODING 'WIN1252';; 17 | 18 | CREATE INDEX IF NOT EXISTS x_productid 19 | ON sagerx_lake.fda_ndc_package(productid); -------------------------------------------------------------------------------- /airflow/dags/fda_ndc/load_product.sql: -------------------------------------------------------------------------------- 1 | /* sagerx_lake.fda_ndc_product */ 2 | DROP TABLE IF EXISTS sagerx_lake.fda_ndc_product CASCADE; 3 | 4 | CREATE TABLE sagerx_lake.fda_ndc_product ( 5 | productid TEXT, 6 | productndc TEXT, 7 | producttypename TEXT, 8 | proprietaryname TEXT, 9 | proprietarynamesuffix TEXT, 10 | nonproprietaryname TEXT, 11 | dosageformname TEXT, 12 | routename TEXT, 13 | startmarketingdate TEXT, 14 | endmarketingdate TEXT, 15 | marketingcategoryname TEXT, 16 | applicationnumber TEXT, 17 | labelername TEXT, 18 | substancename TEXT, 19 | active_numerator_strength TEXT, 20 | active_ingred_unit TEXT, 21 | pharm_classes TEXT, 22 | deaschedule TEXT, 23 | ndc_exclude_flag TEXT, 24 | listing_record_certified_through TEXT, 25 | PRIMARY KEY (productid) 26 | ); 27 | 28 | COPY sagerx_lake.fda_ndc_product 29 | FROM '{data_path}/product.txt' DELIMITER E'\t' CSV HEADER ENCODING 'WIN1252';; 30 | 31 | CREATE INDEX IF NOT EXISTS x_productid 32 | ON sagerx_lake.fda_ndc_product(productid); -------------------------------------------------------------------------------- /airflow/dags/fda_unfinished/dag.py: -------------------------------------------------------------------------------- 1 | import pendulum 2 | 3 | from airflow_operator import create_dag 4 | from airflow.utils.helpers import chain 5 | 6 | from common_dag_tasks import extract, transform, get_ordered_sql_tasks, get_ds_folder 7 | from sagerx import read_sql_file 8 | from airflow.providers.postgres.operators.postgres import PostgresOperator 9 | 10 | 11 | dag_id = "fda_unfinished" 12 | 13 | dag = create_dag( 14 | dag_id=dag_id, 15 | schedule= "0 4 * * *", # run a 4:15am every day 16 | start_date=pendulum.yesterday(), 17 | catchup=False, 18 | max_active_runs=1, 19 | concurrency=2, 20 | ) 21 | 22 | with dag: 23 | url = "https://www.accessdata.fda.gov/cder/ndc_unfinished.zip" 24 | ds_folder = get_ds_folder(dag_id) 25 | 26 | extract_task = extract(dag_id,url) 27 | transform_task = transform(dag_id) 28 | 29 | sql_tasks = [] 30 | for sql in get_ordered_sql_tasks(dag_id): 31 | sql_path = ds_folder / sql 32 | task_id = sql[:-4] #remove .sql 33 | sql_task = PostgresOperator( 34 | task_id=task_id, 35 | postgres_conn_id="postgres_default", 36 | sql=read_sql_file(sql_path).format(data_path=extract_task), 37 | dag=dag 38 | ) 39 | sql_tasks.append(sql_task) 40 | 41 | extract_task >> sql_tasks >> transform_task 42 | -------------------------------------------------------------------------------- /airflow/dags/fda_unfinished/load_package.sql: -------------------------------------------------------------------------------- 1 | /* sagerx_lake.fda_unfinished_package */ 2 | DROP TABLE IF EXISTS sagerx_lake.fda_unfinished_package CASCADE; 3 | 4 | CREATE TABLE sagerx_lake.fda_unfinished_package ( 5 | productid TEXT NOT NULL, 6 | productndc TEXT NOT NULL, 7 | ndcpackagecode TEXT, 8 | packagedescription TEXT, 9 | startmarketingdate TEXT, 10 | endmarketingdate TEXT 11 | ); 12 | 13 | COPY sagerx_lake.fda_unfinished_package 14 | FROM '{data_path}/unfinished_package.txt' DELIMITER E'\t' CSV HEADER ENCODING 'WIN1252'; 15 | 16 | CREATE INDEX IF NOT EXISTS x_productid 17 | ON sagerx_lake.fda_unfinished_package(productid); -------------------------------------------------------------------------------- /airflow/dags/fda_unfinished/load_product.sql: -------------------------------------------------------------------------------- 1 | /* sagerx_lake.fda_unfinished_product*/ 2 | DROP TABLE IF EXISTS sagerx_lake.fda_unfinished_product CASCADE; 3 | 4 | CREATE TABLE sagerx_lake.fda_unfinished_product ( 5 | productid TEXT, 6 | productndc TEXT, 7 | producttypename TEXT, 8 | nonproprietaryname TEXT, 9 | dosageformname TEXT, 10 | startmarketingdate TEXT, 11 | endmarketingdate TEXT, 12 | marketingcategoryname TEXT, 13 | labelername TEXT, 14 | substancename TEXT, 15 | active_numerator_strength TEXT, 16 | active_ingred_unit TEXT, 17 | deaschedule TEXT, 18 | listing_record_certified_through TEXT, 19 | PRIMARY KEY (productid) 20 | ); 21 | 22 | COPY sagerx_lake.fda_unfinished_product FROM '{data_path}/unfinished_product.txt' DELIMITER E'\t' CSV HEADER ENCODING 'WIN1252'; 23 | 24 | CREATE INDEX IF NOT EXISTS x_productid 25 | ON sagerx_lake.fda_unfinished_product(productid); -------------------------------------------------------------------------------- /airflow/dags/fda_unii/dag.py: -------------------------------------------------------------------------------- 1 | import pendulum 2 | 3 | from airflow_operator import create_dag 4 | from airflow.providers.postgres.operators.postgres import PostgresOperator 5 | from airflow.decorators import task 6 | 7 | from common_dag_tasks import extract, transform, generate_sql_list, get_ds_folder 8 | from sagerx import read_sql_file 9 | 10 | dag_id = "fda_unii" 11 | 12 | dag = create_dag( 13 | dag_id=dag_id, 14 | schedule="0 4 * * *", 15 | start_date=pendulum.yesterday(), 16 | catchup=False, 17 | concurrency=2, 18 | ) 19 | 20 | with dag: 21 | url= "https://precision.fda.gov/uniisearch/archive/latest/UNII_Data.zip" 22 | ds_folder = get_ds_folder(dag_id) 23 | 24 | extract_task = extract(dag_id,url) 25 | transform_task = transform(dag_id) 26 | 27 | @task 28 | def get_file_name(data_path) -> str: 29 | import re 30 | import os 31 | import logging 32 | 33 | logging.info(f'Data path: {data_path}') 34 | 35 | file_name = '' 36 | # note: extract_task contains the path to /opt/data/fda_unii/UNII_Data/ 37 | # example file_name: UNII_Records_22Jun2024.txt 38 | for subfile in os.listdir(data_path): 39 | if re.match("UNII_Records", subfile): 40 | file_name = subfile 41 | 42 | if file_name == '': 43 | logging.error('Could not find file_name.') 44 | 45 | return file_name 46 | 47 | file_name_task = get_file_name(extract_task) 48 | 49 | sql_tasks = [] 50 | for sql in generate_sql_list(dag_id): 51 | sql_path = ds_folder / sql 52 | task_id = sql[:-4] #remove .sql 53 | sql_task = PostgresOperator( 54 | task_id=task_id, 55 | postgres_conn_id="postgres_default", 56 | sql=read_sql_file(sql_path).format( 57 | data_path=extract_task, 58 | file_name=file_name_task 59 | ), 60 | dag=dag 61 | ) 62 | sql_tasks.append(sql_task) 63 | 64 | file_name_task >> sql_tasks >> transform_task 65 | -------------------------------------------------------------------------------- /airflow/dags/fda_unii/load_unii.sql: -------------------------------------------------------------------------------- 1 | /* sagerx_lake.fda_unii */ 2 | DROP TABLE IF EXISTS sagerx_lake.fda_unii CASCADE; 3 | 4 | CREATE TABLE sagerx_lake.fda_unii ( 5 | unii TEXT NOT NULL, 6 | display_name TEXT, 7 | rn TEXT, 8 | ec TEXT, 9 | ncit TEXT, 10 | rxcui TEXT, 11 | pubchem TEXT, 12 | epa_comptox TEXT, 13 | smsid TEXT, 14 | catalogue_of_life TEXT, 15 | itis TEXT, 16 | ncbi TEXT, 17 | plants TEXT, 18 | powo TEXT, 19 | grin TEXT, 20 | mpns TEXT, 21 | inn_id TEXT, 22 | usan_id TEXT, 23 | mf TEXT, 24 | inchikey TEXT, 25 | smiles TEXT, 26 | ingredient_type TEXT, 27 | substance_type TEXT, 28 | uuid TEXT, 29 | dailymed TEXT 30 | ); 31 | 32 | COPY sagerx_lake.fda_unii 33 | FROM '{data_path}/{file_name}' DELIMITER E'\t' CSV HEADER ENCODING 'WIN1252';; 34 | -------------------------------------------------------------------------------- /airflow/dags/mccpd/dag.py: -------------------------------------------------------------------------------- 1 | import pendulum 2 | from airflow.decorators import dag 3 | from mccpd.dag_tasks import extract, load 4 | from common_dag_tasks import transform 5 | 6 | dag_id = "mccpd" 7 | 8 | @dag( 9 | dag_id=dag_id, 10 | schedule_interval="0 3 15 * *", # Runs on the 15th of each month at 3 AM 11 | start_date=pendulum.today('UTC').add(days=-1), 12 | catchup=False 13 | ) 14 | def mccpd(): 15 | extract_task = extract(dag_id) 16 | load_task = load(extract_task) 17 | transform_task = transform(dag_id, models_subdir=['staging', 'intermediate']) 18 | 19 | extract_task >> load_task >> transform_task 20 | 21 | dag = mccpd() 22 | -------------------------------------------------------------------------------- /airflow/dags/mccpd/dag_tasks.py: -------------------------------------------------------------------------------- 1 | from airflow.decorators import task 2 | import pandas as pd 3 | import re 4 | from sagerx import fetch_json, camel_to_snake, get_rxcuis, load_df_to_pg, get_concurrent_api_results, write_json_file, read_json_file, create_path 5 | from common_dag_tasks import url_request, get_data_folder 6 | import logging 7 | from airflow.models import Variable 8 | from airflow.hooks.postgres_hook import PostgresHook 9 | 10 | @task 11 | def extract(dag_id:str) -> str: 12 | url = 'https://us-central1-costplusdrugs-publicapi.cloudfunctions.net/main' 13 | results = fetch_json(url) 14 | print(results) 15 | 16 | data_folder = get_data_folder(dag_id) 17 | file_path = create_path(data_folder) / 'data.json' 18 | file_path_str = file_path.resolve().as_posix() 19 | 20 | write_json_file(file_path_str, results) 21 | 22 | print(f"Extraction Completed! Data saved to file: {file_path_str}") 23 | 24 | return file_path_str 25 | 26 | 27 | @task 28 | def load(file_path_str:str): 29 | results = read_json_file(file_path_str) 30 | 31 | # Create a DataFrame directly from JSON 32 | df = pd.json_normalize( 33 | results, 34 | record_path=["results"] 35 | ) 36 | 37 | print(f'Dataframe created of {len(df)} length.') 38 | load_df_to_pg(df,"sagerx_lake","mccpd","replace",index=False) 39 | -------------------------------------------------------------------------------- /airflow/dags/nadac/load_nadac.sql: -------------------------------------------------------------------------------- 1 | /* sagerx_lake.nadac */ 2 | CREATE TABLE IF NOT EXISTS sagerx_lake.nadac ( 3 | ndc_description TEXT NOT NULL, 4 | ndc VARCHAR (11) NOT NULL, 5 | nadac_per_unit NUMERIC (12,5), 6 | effective_date DATE NOT NULL, 7 | pricing_unit TEXT, 8 | pharmacy_type_indicator TEXT, 9 | otc TEXT, 10 | explanation_code TEXT, 11 | classification_for_rate_setting TEXT, 12 | corresponding_generic_drug_nadac_per_unit TEXT, 13 | corresponding_generic_drug_effective_date DATE, 14 | as_of_date DATE 15 | ); 16 | 17 | TRUNCATE sagerx_lake.nadac; 18 | 19 | COPY sagerx_lake.nadac 20 | FROM '{{ ti.xcom_pull(task_ids='extract') }}' CSV HEADER; -------------------------------------------------------------------------------- /airflow/dags/orange_book/dag.py: -------------------------------------------------------------------------------- 1 | from airflow_operator import create_dag 2 | from airflow.utils.helpers import chain 3 | 4 | from common_dag_tasks import extract, get_ordered_sql_tasks, get_ds_folder 5 | from sagerx import read_sql_file 6 | from airflow.providers.postgres.operators.postgres import PostgresOperator 7 | 8 | 9 | dag_id = "orange_book" 10 | 11 | dag = create_dag( 12 | dag_id=dag_id, 13 | schedule= "15 0 24 1 *", # runs once monthly on the 24th day at 00:15 14 | max_active_runs=1, 15 | concurrency=2, 16 | ) 17 | 18 | with dag: 19 | url = "https://www.fda.gov/media/76860/download" 20 | ds_folder = get_ds_folder(dag_id) 21 | 22 | extract_task = extract(dag_id,url) 23 | 24 | task_list = [extract_task] 25 | for sql in get_ordered_sql_tasks(dag_id): 26 | sql_path = ds_folder / sql 27 | task_id = sql[:-4] #remove .sql 28 | 29 | sql_task = PostgresOperator( 30 | task_id=task_id, 31 | postgres_conn_id="postgres_default", 32 | sql=read_sql_file(sql_path).format(data_path=extract_task), 33 | dag=dag 34 | ) 35 | task_list.append(sql_task) 36 | 37 | chain(*task_list) 38 | -------------------------------------------------------------------------------- /airflow/dags/orange_book/load_exclusivity.sql: -------------------------------------------------------------------------------- 1 | /* sagerx_lake.orange_book_exlusivity */ 2 | DROP TABLE IF EXISTS sagerx_lake.orange_book_exlusivity; 3 | 4 | CREATE TABLE sagerx_lake.orange_book_exlusivity ( 5 | appl_type TEXT, 6 | appl_no TEXT, 7 | product_no TEXT, 8 | exclusivity_code TEXT, 9 | exclusivity_date TEXT 10 | ); 11 | 12 | COPY sagerx_lake.orange_book_exlusivity 13 | FROM '{data_path}/exclusivity.txt' DELIMITER '~' CSV HEADER; -------------------------------------------------------------------------------- /airflow/dags/orange_book/load_patent.sql: -------------------------------------------------------------------------------- 1 | /* sagerx_lake.orange_book_patent */ 2 | DROP TABLE IF EXISTS sagerx_lake.orange_book_patent; 3 | 4 | CREATE TABLE sagerx_lake.orange_book_patent ( 5 | appl_type TEXT, 6 | appl_no TEXT, 7 | product_no TEXT, 8 | patent_no TEXT, 9 | patent_expire_date_text TEXT, 10 | drug_substance_flag TEXT, 11 | drug_product_flag TEXT, 12 | patent_use_code TEXT, 13 | delist_flag TEXT, 14 | submission_date TEXT 15 | ); 16 | 17 | COPY sagerx_lake.orange_book_patent 18 | FROM '{data_path}/patent.txt' DELIMITER '~' CSV HEADER; -------------------------------------------------------------------------------- /airflow/dags/orange_book/load_products.sql: -------------------------------------------------------------------------------- 1 | /* sagerx_lake.orange_book_products */ 2 | DROP TABLE IF EXISTS sagerx_lake.orange_book_products; 3 | 4 | CREATE TABLE sagerx_lake.orange_book_products ( 5 | ingredient TEXT, 6 | df_route TEXT, 7 | trade_name TEXT, 8 | applicant TEXT, 9 | strength TEXT, 10 | appl_type TEXT, 11 | appl_no TEXT, 12 | product_no TEXT, 13 | te_code TEXT, 14 | approval_date TEXT, 15 | rld TEXT, 16 | rs TEXT, 17 | type TEXT, 18 | applicant_full_name TEXT 19 | ); 20 | 21 | COPY sagerx_lake.orange_book_products 22 | FROM '{data_path}/products.txt' DELIMITER '~' CSV HEADER; -------------------------------------------------------------------------------- /airflow/dags/purple_book/dag.py: -------------------------------------------------------------------------------- 1 | from airflow_operator import create_dag 2 | from airflow.utils.helpers import chain 3 | 4 | from common_dag_tasks import extract, get_ordered_sql_tasks, get_ds_folder 5 | from sagerx import read_sql_file 6 | from airflow.providers.postgres.operators.postgres import PostgresOperator 7 | from purple_book.dag_tasks import modify_csv 8 | 9 | dag_id = "purple_book" 10 | 11 | dag = create_dag( 12 | dag_id=dag_id, 13 | schedule= "15 0 24 1 *", # runs once monthly on the 23rd 14 | max_active_runs=1, 15 | concurrency=2, 16 | ) 17 | 18 | with dag: 19 | file_name = "{{ (execution_date - macros.dateutil.relativedelta.relativedelta(months=1)).strftime('%Y') }}/purplebook-search-{{ (execution_date - macros.dateutil.relativedelta.relativedelta(months=1)).strftime('%B').lower() }}-data-download.csv" 20 | url = f"https://purplebooksearch.fda.gov/files/{file_name}" 21 | ds_folder = get_ds_folder(dag_id) 22 | 23 | extract_task = extract(dag_id,url) 24 | modify_task = modify_csv(extract_task) 25 | 26 | task_list = [extract_task,modify_task] 27 | 28 | for sql in get_ordered_sql_tasks(dag_id): 29 | sql_path = ds_folder / sql 30 | task_id = sql[:-4] #remove .sql 31 | 32 | sql_task = PostgresOperator( 33 | task_id=task_id, 34 | postgres_conn_id="postgres_default", 35 | sql=read_sql_file(sql_path).format(data_path=extract_task, file_name=file_name), 36 | dag=dag 37 | ) 38 | task_list.append(sql_task) 39 | 40 | chain(*task_list) 41 | -------------------------------------------------------------------------------- /airflow/dags/purple_book/dag_tasks.py: -------------------------------------------------------------------------------- 1 | from airflow.decorators import task 2 | import csv 3 | 4 | @task 5 | def modify_csv(file_path): 6 | print(f"Modifying CSV file at {file_path}") 7 | 8 | with open(file_path, newline='') as file: 9 | csvreader = csv.reader(file) 10 | 11 | for _ in range(3): 12 | next(csvreader,None) # skips the first 3 rows 13 | 14 | """ 15 | Skip the top section by checking if the row is empty (signifying the end of the top section) 16 | 17 | The bottom section of each report contains all products in the Purple Book database for that month, 18 | including the products listed in the top section that were added or changed. 19 | """ 20 | 21 | for row in csvreader: 22 | if not any(row): 23 | break 24 | 25 | rows = [] 26 | for row in csvreader: 27 | rows.append(row) 28 | 29 | with open(file_path, 'w', encoding='UTF8', newline='') as f: 30 | writer = csv.writer(f) 31 | writer.writerows(rows) 32 | -------------------------------------------------------------------------------- /airflow/dags/purple_book/load_purple_book.sql: -------------------------------------------------------------------------------- 1 | /* sagerx_lake.purple_book */ 2 | DROP TABLE IF EXISTS sagerx_lake.purple_book; 3 | 4 | CREATE TABLE sagerx_lake.purple_book ( 5 | nru TEXT, 6 | applicant TEXT, 7 | bla_number TEXT, 8 | proprietary_name TEXT, 9 | proper_name TEXT, 10 | bla_type TEXT, 11 | strength TEXT, 12 | dosage_form TEXT, 13 | route_of_administration TEXT, 14 | product_presentation TEXT, 15 | status TEXT, 16 | licensure TEXT, 17 | approval_date TEXT, 18 | ref_product_proper_name TEXT, 19 | ref_Product_proprietary_name TEXT, 20 | supplement_number TEXT, 21 | submission_type TEXT, 22 | license_number TEXT, 23 | product_number TEXT, 24 | center TEXT, 25 | date_of_first_licensure TEXT, 26 | exclusivity_expiration_date TEXT, 27 | first_interchangeable_exclusivity_exp_date TEXT, 28 | ref_product_exclusivity_exp_date TEXT, 29 | orphan_exclusivity_exp_date TEXT 30 | ); 31 | 32 | COPY sagerx_lake.purple_book 33 | FROM '{data_path}' DELIMITER ',' CSV HEADER QUOTE '"'; 34 | -------------------------------------------------------------------------------- /airflow/dags/rxclass/dag.py: -------------------------------------------------------------------------------- 1 | import pendulum 2 | 3 | from airflow.decorators import dag 4 | 5 | from rxclass.dag_tasks import extract, load 6 | 7 | from common_dag_tasks import transform 8 | 9 | 10 | dag_id = "rxclass" 11 | 12 | @dag( 13 | dag_id=dag_id, 14 | schedule_interval="0 3 15 * *", # Runs on the 15th of each month at 3 AM 15 | start_date=pendulum.today('UTC').add(days=-1), 16 | catchup=False 17 | ) 18 | def rxclass(): 19 | # Main processing task 20 | extract_task = extract(dag_id) 21 | load_task = load(extract_task) 22 | transform_task = transform(dag_id, models_subdir=['staging', 'intermediate']) 23 | 24 | extract_task >> load_task >> transform_task 25 | 26 | # Instantiate the DAG 27 | dag = rxclass() 28 | -------------------------------------------------------------------------------- /airflow/dags/rxclass/dag_tasks.py: -------------------------------------------------------------------------------- 1 | from airflow.decorators import task 2 | import pandas as pd 3 | from sagerx import get_rxcuis, load_df_to_pg, get_concurrent_api_results, write_json_file, read_json_file, create_path 4 | from common_dag_tasks import get_data_folder 5 | import logging 6 | 7 | def create_url_list(rxcui_list:list)-> list: 8 | urls=[] 9 | 10 | for rxcui in rxcui_list: 11 | urls.append(f"https://rxnav.nlm.nih.gov/REST/rxclass/class/byRxcui.json?rxcui={rxcui}") 12 | return urls 13 | 14 | @task 15 | def extract(dag_id:str) -> str: 16 | """ 17 | Retrieves RxClass concepts from RxNav for the EPC class type, 18 | processes them concurrently, and loads results into Postgres. 19 | """ 20 | logging.info("Starting data retrieval for RxClass...") 21 | 22 | # 1. Fetch the list of concepts 23 | tty_list = ['IN','PIN','MIN','SCDC','SCDF','SCDFP','SCDG','SCDGP','SCD','GPCK','BN','SBDC','SBDF','SBDFP','SBDG','SBD','BPCK'] 24 | #tty_list = ['SCD', 'SBD', 'GPCK', 'BPCK'] 25 | #tty_list = ['BPCK'] 26 | rxcui_list = get_rxcuis(tty_list, active_only = True) 27 | logging.info(f"Fetched {len(rxcui_list)} RXCUIs.") 28 | 29 | # 1.5. Create list of urls 30 | url_list = create_url_list(rxcui_list) 31 | 32 | results = get_concurrent_api_results(url_list) 33 | 34 | data_folder = get_data_folder(dag_id) 35 | file_path = create_path(data_folder) / 'data.json' 36 | file_path_str = file_path.resolve().as_posix() 37 | 38 | write_json_file(file_path_str, results) 39 | 40 | print(f"Extraction Completed! Data saved to file: {file_path_str}") 41 | 42 | return file_path_str 43 | 44 | 45 | @task 46 | def load(file_path_str:str): 47 | results = read_json_file(file_path_str) 48 | 49 | classes = [] 50 | for result in results: 51 | response = result['response'] 52 | if 'rxclassDrugInfoList' in response: 53 | for drug_info in response["rxclassDrugInfoList"]["rxclassDrugInfo"]: 54 | classes.append( 55 | dict( 56 | rxcui = drug_info["minConcept"].get("rxcui"), 57 | name = drug_info["minConcept"].get("name",""), 58 | tty = drug_info["minConcept"].get("tty",""), 59 | rela = drug_info.get("rela",""), 60 | class_id = drug_info["rxclassMinConceptItem"].get("classId",""), 61 | class_name = drug_info["rxclassMinConceptItem"].get("className",""), 62 | class_type = drug_info["rxclassMinConceptItem"].get("classType",""), 63 | rela_source = drug_info.get("relaSource","") 64 | ) 65 | ) 66 | df = pd.DataFrame(classes).drop_duplicates() 67 | print(f'Dataframe created of {len(df)} length.') 68 | load_df_to_pg(df,"sagerx_lake","rxclass","replace",index=False) 69 | -------------------------------------------------------------------------------- /airflow/dags/rxnorm/dag.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import pendulum 3 | 4 | from sagerx import get_dataset, read_sql_file, get_sql_list, alert_slack_channel 5 | 6 | from airflow.decorators import dag, task 7 | 8 | from airflow.operators.python import get_current_context 9 | from airflow.providers.postgres.operators.postgres import PostgresOperator 10 | from airflow.hooks.postgres_hook import PostgresHook 11 | from airflow.models import Variable 12 | 13 | from common_dag_tasks import extract, transform, run_subprocess_command 14 | 15 | 16 | @dag( 17 | schedule="0 0 10 * *", 18 | start_date=pendulum.datetime(2005, 1, 1), 19 | catchup=False, 20 | ) 21 | def rxnorm(): 22 | dag_id = "rxnorm" 23 | api_key = Variable.get("umls_api") 24 | ds_url = f"https://uts-ws.nlm.nih.gov/download?url=https://download.nlm.nih.gov/umls/kss/rxnorm/RxNorm_full_current.zip&apiKey={api_key}" 25 | 26 | extract_task = extract(dag_id, ds_url) 27 | 28 | # Task to load data into source db schema 29 | load = [] 30 | ds_folder = Path("/opt/airflow/dags") / dag_id 31 | for sql in get_sql_list("load", ds_folder): 32 | sql_path = ds_folder / sql 33 | task_id = sql[:-4] 34 | load.append( 35 | PostgresOperator( 36 | task_id=task_id, 37 | postgres_conn_id="postgres_default", 38 | sql=read_sql_file(sql_path), 39 | ) 40 | ) 41 | 42 | transform_task = transform(dag_id, models_subdir=['staging', 'intermediate']) 43 | 44 | extract_task >> load >> transform_task 45 | 46 | rxnorm() 47 | -------------------------------------------------------------------------------- /airflow/dags/rxnorm/load_rxnconso.sql: -------------------------------------------------------------------------------- 1 | /* sagerx_lake.rxnorm_rxnconso */ 2 | DROP TABLE IF EXISTS sagerx_lake.rxnorm_rxnconso CASCADE; 3 | 4 | CREATE TABLE sagerx_lake.rxnorm_rxnconso ( 5 | rxcui VARCHAR(8) NOT NULL, 6 | lat VARCHAR (3) DEFAULT 'ENG' NOT NULL, 7 | ts VARCHAR (1), 8 | lui VARCHAR(8), 9 | stt VARCHAR (3), 10 | sui VARCHAR (8), 11 | ispref VARCHAR (1), 12 | rxaui VARCHAR(8) NOT NULL, 13 | saui VARCHAR (50), 14 | scui VARCHAR (50), 15 | sdui VARCHAR (50), 16 | sab VARCHAR (20) NOT NULL, 17 | tty VARCHAR (20) NOT NULL, 18 | code VARCHAR (50) NOT NULL, 19 | str TEXT NOT NULL, 20 | srl VARCHAR (10), 21 | suppress VARCHAR (1), 22 | cvf VARCHAR(50), 23 | blank TEXT 24 | ); 25 | 26 | COPY sagerx_lake.rxnorm_rxnconso FROM '{{ ti.xcom_pull(task_ids='extract') }}/rrf/RXNCONSO.RRF' CSV DELIMITER '|' ENCODING 'UTF8' ESCAPE E'\b' QUOTE E'\b'; 27 | --ESCAPE and QOUTE characters are dummy to remove default 28 | 29 | CREATE INDEX IF NOT EXISTS rxnconso_str 30 | ON sagerx_lake.rxnorm_rxnconso(str); 31 | 32 | 33 | CREATE INDEX IF NOT EXISTS rxnconso_rxcui 34 | ON sagerx_lake.rxnorm_rxnconso(rxcui); 35 | 36 | 37 | CREATE INDEX IF NOT EXISTS rxnconso_tty 38 | ON sagerx_lake.rxnorm_rxnconso(tty); 39 | 40 | 41 | CREATE INDEX IF NOT EXISTS rxnconso_code 42 | ON sagerx_lake.rxnorm_rxnconso(code); 43 | --IF NOT EXISTS added if in future table is not always dropped first 44 | -------------------------------------------------------------------------------- /airflow/dags/rxnorm/load_rxncui.sql: -------------------------------------------------------------------------------- 1 | /* sagerx_lake.rxnorm_rxncui */ 2 | DROP TABLE IF EXISTS sagerx_lake.rxnorm_rxncui CASCADE; 3 | 4 | CREATE TABLE sagerx_lake.rxnorm_rxncui ( 5 | cui1 varchar(8), 6 | ver_start varchar(40), 7 | ver_end varchar(40), 8 | cardinality varchar(8), 9 | cui2 varchar(8), 10 | blank TEXT 11 | ); 12 | 13 | COPY sagerx_lake.rxnorm_rxncui 14 | FROM '{{ ti.xcom_pull(task_ids='extract') }}/rrf/RXNCUI.RRF'CSV DELIMITER '|' ENCODING 'UTF8' ESCAPE E'\b' QUOTE E'\b'; 15 | --ESCAPE and QOUTE characters are dummy to remove default -------------------------------------------------------------------------------- /airflow/dags/rxnorm/load_rxncuichanges.sql: -------------------------------------------------------------------------------- 1 | /* sagerx_lake.rxnorm_rxncuichanges */ 2 | 3 | DROP TABLE IF EXISTS sagerx_lake.rxnorm_rxncuichanges CASCADE; 4 | 5 | CREATE TABLE sagerx_lake.rxnorm_rxncuichanges ( 6 | rxaui varchar(8), 7 | code varchar(50), 8 | sab varchar(20), 9 | tty varchar(20), 10 | str varchar(3000), 11 | old_rxcui varchar(8) not null, 12 | new_rxcui varchar(8) NOT NULL, 13 | blank TEXT 14 | ); 15 | 16 | COPY sagerx_lake.rxnorm_rxncuichanges 17 | FROM '{{ ti.xcom_pull(task_ids='extract') }}/rrf/RXNCUICHANGES.RRF' CSV DELIMITER '|' ENCODING 'UTF8' ESCAPE E'\b' QUOTE E'\b'; 18 | --ESCAPE and QOUTE characters are dummy to remove default 19 | -------------------------------------------------------------------------------- /airflow/dags/rxnorm/load_rxndoc.sql: -------------------------------------------------------------------------------- 1 | /* sagerx_lake.rxnorm_rxndoc */ 2 | 3 | DROP TABLE IF EXISTS sagerx_lake.rxnorm_rxndoc CASCADE; 4 | 5 | CREATE TABLE sagerx_lake.rxnorm_rxndoc ( 6 | dockey varchar(50) NOT NULL, 7 | value varchar(1000), 8 | type varchar(50) NOT NULL, 9 | expl varchar(1000), 10 | blank TEXT 11 | ); 12 | 13 | COPY sagerx_lake.rxnorm_rxndoc 14 | FROM '{{ ti.xcom_pull(task_ids='extract') }}/rrf/RXNDOC.RRF' CSV DELIMITER '|' ENCODING 'UTF8' ESCAPE E'\b' QUOTE E'\b'; 15 | --ESCAPE and QOUTE characters are dummy to remove default 16 | -------------------------------------------------------------------------------- /airflow/dags/rxnorm/load_rxnrel.sql: -------------------------------------------------------------------------------- 1 | /* sagerx_lake.rxnorm_rxnrel */ 2 | DROP TABLE IF EXISTS sagerx_lake.rxnorm_rxnrel CASCADE; 3 | 4 | CREATE TABLE sagerx_lake.rxnorm_rxnrel ( 5 | rxcui1 varchar(8) , 6 | rxaui1 varchar(8), 7 | stype1 varchar(50), 8 | rel varchar(4) , 9 | rxcui2 varchar(8) , 10 | rxaui2 varchar(8), 11 | stype2 varchar(50), 12 | rela varchar(100) , 13 | rui varchar(10), 14 | srui varchar(50), 15 | sab varchar(20) NOT NULL, 16 | sl varchar(1000), 17 | dir varchar(1), 18 | rg varchar(10), 19 | suppress varchar(1), 20 | cvf varchar(50), 21 | blank TEXT 22 | ); 23 | 24 | COPY sagerx_lake.rxnorm_rxnrel 25 | FROM '{{ ti.xcom_pull(task_ids='extract') }}/rrf/RXNREL.RRF' CSV DELIMITER '|' ENCODING 'UTF8' ESCAPE E'\b' QUOTE E'\b'; 26 | --ESCAPE and QOUTE characters are dummy to remove default 27 | 28 | CREATE INDEX IF NOT EXISTS rxnrel_rxcui1 29 | ON sagerx_lake.rxnorm_rxnrel(rxcui1); 30 | 31 | CREATE INDEX IF NOT EXISTS rxnrel_rxcui2 32 | ON sagerx_lake.rxnorm_rxnrel(rxcui2); 33 | 34 | CREATE INDEX IF NOT EXISTS rxnrel_rela 35 | ON sagerx_lake.rxnorm_rxnrel(rela); 36 | -------------------------------------------------------------------------------- /airflow/dags/rxnorm/load_rxnrxnatomarchive.sql: -------------------------------------------------------------------------------- 1 | /* sagerx_lake.rxnorm_rxnatomarchive */ 2 | DROP TABLE IF EXISTS sagerx_lake.rxnorm_rxnatomarchive CASCADE; 3 | 4 | CREATE TABLE sagerx_lake.rxnorm_rxnatomarchive ( 5 | rxaui varchar(8) not null, 6 | aui varchar(10), 7 | str varchar(4000) not null, 8 | archive_timestamp varchar(280) not null, 9 | created_timestamp varchar(280) not null, 10 | updated_timestamp varchar(280) not null, 11 | code varchar(50), 12 | is_brand varchar(1), 13 | lat varchar(3), 14 | last_released varchar(30), 15 | saui varchar(50), 16 | vsab varchar(40), 17 | rxcui varchar(8), 18 | sab varchar(20), 19 | tty varchar(20), 20 | merged_to_rxcui varchar(8), 21 | blank TEXT 22 | ); 23 | 24 | COPY sagerx_lake.rxnorm_rxnatomarchive 25 | FROM '{{ ti.xcom_pull(task_ids='extract') }}/rrf/RXNATOMARCHIVE.RRF' CSV DELIMITER '|' ENCODING 'UTF8' ESCAPE E'\b' QUOTE E'\b'; 26 | --ESCAPE and QOUTE characters are dummy to remove default 27 | 28 | CREATE INDEX IF NOT EXISTS rxnrel_rxaui 29 | ON sagerx_lake.rxnorm_rxnatomarchive(rxaui); 30 | 31 | CREATE INDEX IF NOT EXISTS rxnrel_rxcui 32 | ON sagerx_lake.rxnorm_rxnatomarchive(rxcui); 33 | 34 | CREATE INDEX IF NOT EXISTS rxnrel_mergedcui 35 | ON sagerx_lake.rxnorm_rxnatomarchive(merged_to_rxcui); -------------------------------------------------------------------------------- /airflow/dags/rxnorm/load_rxnsab.sql: -------------------------------------------------------------------------------- 1 | /* sagerx_lake.rxnorm_rxnsab */ 2 | DROP TABLE IF EXISTS sagerx_lake.rxnorm_rxnsab CASCADE; 3 | 4 | CREATE TABLE sagerx_lake.rxnorm_rxnsab ( 5 | vcui varchar (8), 6 | rcui varchar (8), 7 | vsab varchar (40), 8 | rsab varchar (20) NOT NULL, 9 | son varchar (3000), 10 | sf varchar (20), 11 | sver varchar (20), 12 | vstart varchar (10), 13 | vend varchar (10), 14 | imeta varchar (10), 15 | rmeta varchar (10), 16 | slc varchar (1000), 17 | scc varchar (1000), 18 | srl integer, 19 | tfr integer, 20 | cfr integer, 21 | cxty varchar (50), 22 | ttyl varchar (300), 23 | atnl varchar (1000), 24 | lat varchar (3), 25 | cenc varchar (20), 26 | curver varchar (1), 27 | sabin varchar (1), 28 | ssn varchar (3000), 29 | scit varchar (4000), 30 | blank TEXT 31 | ); 32 | 33 | COPY sagerx_lake.rxnorm_rxnsab 34 | FROM '{{ ti.xcom_pull(task_ids='extract') }}/rrf/RXNSAB.RRF' CSV DELIMITER '|' ENCODING 'UTF8' ESCAPE E'\b' QUOTE E'\b'; 35 | --ESCAPE and QOUTE characters are dummy to remove default -------------------------------------------------------------------------------- /airflow/dags/rxnorm/load_rxnsat.sql: -------------------------------------------------------------------------------- 1 | /* sagerx_lake.rxnorm_rxnsat */ 2 | DROP TABLE IF EXISTS sagerx_lake.rxnorm_rxnsat CASCADE; 3 | 4 | CREATE TABLE sagerx_lake.rxnorm_rxnsat ( 5 | rxcui varchar(8) , 6 | lui varchar(8), 7 | sui varchar(8), 8 | rxaui varchar(8), 9 | stype varchar (50), 10 | code varchar (50), 11 | atui varchar(11), 12 | satui varchar (50), 13 | atn varchar (1000) NOT NULL, 14 | sab varchar (20) NOT NULL, 15 | atv varchar (7000), 16 | suppress varchar (1), 17 | cvf varchar (50), 18 | blank TEXT 19 | ); 20 | 21 | COPY sagerx_lake.rxnorm_rxnsat 22 | FROM '{{ ti.xcom_pull(task_ids='extract') }}/rrf/RXNSAT.RRF' CSV DELIMITER '|' ENCODING 'UTF8' ESCAPE E'\b' QUOTE E'\b'; 23 | --ESCAPE and QOUTE characters are dummy to remove default 24 | 25 | CREATE INDEX IF NOT EXISTS rxnsat_rxcui 26 | ON sagerx_lake.rxnorm_rxnsat(rxcui); 27 | 28 | CREATE INDEX IF NOT EXISTS rxnsat_atv 29 | ON sagerx_lake.rxnorm_rxnsat(atv); 30 | 31 | CREATE INDEX IF NOT EXISTS rxnsat_atn 32 | ON sagerx_lake.rxnorm_rxnsat(atn); 33 | -------------------------------------------------------------------------------- /airflow/dags/rxnorm/load_rxnsty.sql: -------------------------------------------------------------------------------- 1 | /* sagerx_lake.rxnorm_rxnsty */ 2 | DROP TABLE IF EXISTS sagerx_lake.rxnorm_rxnsty CASCADE; 3 | 4 | CREATE TABLE sagerx_lake.rxnorm_rxnsty ( 5 | rxcui varchar(8) NOT NULL, 6 | tui varchar (4), 7 | stn varchar (100), 8 | sty varchar (50), 9 | atui varchar (11), 10 | cvf varchar (50), 11 | blank TEXT 12 | ); 13 | 14 | COPY sagerx_lake.rxnorm_rxnsty 15 | FROM '{{ ti.xcom_pull(task_ids='extract') }}/rrf/RXNSTY.RRF' CSV DELIMITER '|' ENCODING 'UTF8' ESCAPE E'\b' QUOTE E'\b'; 16 | --ESCAPE and QOUTE characters are dummy to remove default -------------------------------------------------------------------------------- /airflow/dags/rxnorm/terms-of-service.md: -------------------------------------------------------------------------------- 1 | # RxNorm Terms of Service 2 | 3 | ``` 4 | This product uses publicly available data courtesy of the U.S. National Library of Medicine (NLM), National Institutes of Health, Department of Health and Human Services; NLM is not responsible for the product and does not endorse or recommend this or any other product. 5 | ``` 6 | 7 | More information can be found at https://www.nlm.nih.gov/research/umls/rxnorm/docs/termsofservice.html. -------------------------------------------------------------------------------- /airflow/dags/rxnorm_historical/dag.py: -------------------------------------------------------------------------------- 1 | import pendulum 2 | 3 | from airflow.decorators import dag 4 | 5 | from rxnorm_historical.dag_tasks import extract, load 6 | 7 | 8 | dag_id = "rxnorm_historical" 9 | 10 | @dag( 11 | dag_id=dag_id, 12 | schedule_interval="0 3 15 * *", # Runs on the 15th of each month at 3 AM 13 | start_date=pendulum.today('UTC').add(days=-1), 14 | catchup=False 15 | ) 16 | def rxnorm_historical(): 17 | # Main processing task 18 | extract_task = extract(dag_id) 19 | load_task = load(extract_task) 20 | 21 | extract_task >> load_task 22 | 23 | # Instantiate the DAG 24 | dag = rxnorm_historical() 25 | -------------------------------------------------------------------------------- /airflow/dags/rxterms/dag.py: -------------------------------------------------------------------------------- 1 | from airflow_operator import create_dag 2 | from airflow.utils.helpers import chain 3 | 4 | from common_dag_tasks import extract, transform, get_ordered_sql_tasks, get_ds_folder 5 | from sagerx import read_sql_file 6 | from airflow.providers.postgres.operators.postgres import PostgresOperator 7 | 8 | 9 | dag_id = "rxterms" 10 | 11 | dag = create_dag( 12 | dag_id=dag_id, 13 | schedule= "45 0 15 1 *", # runs once monthly on the 15th day at 00:45 14 | max_active_runs=1, 15 | concurrency=2, 16 | ) 17 | 18 | with dag: 19 | mnth = "{{ macros.ds_format(ds, '%Y-%m-%d', '%Y%m' ) }}" 20 | url = f"https://data.lhncbc.nlm.nih.gov/public/rxterms/release/RxTerms{mnth}.zip" 21 | ds_folder = get_ds_folder(dag_id) 22 | 23 | extract_task = extract(dag_id,url) 24 | transform_task = transform(dag_id) 25 | 26 | sql_tasks = [] 27 | for sql in get_ordered_sql_tasks(dag_id): 28 | sql_path = ds_folder / sql 29 | task_id = sql[:-4] #remove .sql 30 | 31 | sql_task = PostgresOperator( 32 | task_id=task_id, 33 | postgres_conn_id="postgres_default", 34 | sql=read_sql_file(sql_path).format(data_path=extract_task, mnth=mnth), 35 | dag=dag 36 | ) 37 | sql_tasks.append(sql_task) 38 | 39 | extract_task >> sql_tasks >> transform_task 40 | -------------------------------------------------------------------------------- /airflow/dags/rxterms/load_ingredients.sql: -------------------------------------------------------------------------------- 1 | /* sagerx_lake.rxterms_ingredients */ 2 | DROP TABLE IF EXISTS sagerx_lake.rxterms_ingredients; 3 | 4 | CREATE TABLE sagerx_lake.rxterms_ingredients ( 5 | rxcui TEXT, 6 | ingredient TEXT, 7 | ing_rxcui TEXT 8 | ); 9 | 10 | COPY sagerx_lake.rxterms_ingredients 11 | FROM '{data_path}/RxTermsIngredients{mnth}.txt' DELIMITER '|' QUOTE E'\b' CSV HEADER; 12 | -------------------------------------------------------------------------------- /airflow/dags/rxterms/load_rxterms.sql: -------------------------------------------------------------------------------- 1 | /* sagerx_lake.rxterms */ 2 | DROP TABLE IF EXISTS sagerx_lake.rxterms; 3 | 4 | CREATE TABLE sagerx_lake.rxterms ( 5 | rxcui TEXT, 6 | generic_rxcui TEXT, 7 | tty TEXT, 8 | full_name TEXT, 9 | rxn_dose_form TEXT, 10 | full_generic_name TEXT, 11 | brand_name TEXT, 12 | display_name TEXT, 13 | route TEXT, 14 | new_dose_form TEXT, 15 | strength TEXT, 16 | suppress_for TEXT, 17 | display_name_synonym TEXT, 18 | is_retired TEXT, 19 | sxdg_rxcui TEXT, 20 | sxdg_tty TEXT, 21 | sxdg_name TEXT, 22 | psn TEXT 23 | ); 24 | 25 | COPY sagerx_lake.rxterms 26 | FROM '{data_path}/RxTerms{mnth}.txt' DELIMITER '|' QUOTE E'\b' CSV HEADER; 27 | -------------------------------------------------------------------------------- /airflow/dags/umls/dag.py: -------------------------------------------------------------------------------- 1 | import pendulum 2 | from airflow.decorators import dag 3 | from umls.dag_tasks import extract, load 4 | from common_dag_tasks import transform 5 | 6 | dag_id = "umls" 7 | 8 | @dag( 9 | dag_id=dag_id, 10 | schedule_interval="0 3 15 * *", # Runs on the 15th of each month at 3 AM 11 | start_date=pendulum.today('UTC').add(days=-1), 12 | catchup=False 13 | ) 14 | def umls(): 15 | extract_task = extract(dag_id) 16 | load_task = load(extract_task) 17 | transform_task = transform(dag_id, models_subdir=['staging', 'intermediate']) 18 | 19 | extract_task >> load_task >> transform_task 20 | 21 | dag = umls() 22 | -------------------------------------------------------------------------------- /airflow/dags/user_macros.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime, date, timedelta 2 | import calendar 3 | 4 | 5 | def ds_datetime(ds): 6 | return datetime.strptime(ds, "%Y-%m-%d") 7 | 8 | 9 | def get_date_of_prior_weekday( 10 | weekday, reference_date=date.today(), date_format="%m-%d-%Y" 11 | ): 12 | 13 | """Gets the date of the prior weekday. If today is the same weekday, returns today's date.""" 14 | weekday = weekday.lower() 15 | weekdays = [d.lower() for d in list(calendar.day_name)] 16 | weekday_number = weekdays.index(weekday) 17 | 18 | if weekday_number < 0: 19 | raise ValueError 20 | 21 | offset = (reference_date.weekday() - weekday_number) % 7 22 | prior_weekday = reference_date - timedelta(days=offset) 23 | prior_weekday = prior_weekday.strftime(date_format) 24 | 25 | return prior_weekday 26 | 27 | 28 | def get_quarter(reference_date: date) -> int: 29 | return (reference_date.month - 1) // 3 + 1 30 | 31 | 32 | def get_first_day_of_quarter(reference_date: date, date_format="%m-%d-%Y"): 33 | quarter = get_quarter(reference_date) 34 | required_date = datetime(reference_date.year, (3 * quarter) - 2, 1) 35 | return required_date.strftime(date_format) 36 | 37 | 38 | def list_to_bash_array(list: list): 39 | array_str = "" 40 | for x in list: 41 | array_str = f"{array_str} {x}" 42 | return array_str.strip() 43 | -------------------------------------------------------------------------------- /airflow/dags/vsac/dag.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import pendulum 3 | 4 | from sagerx import get_dataset, read_sql_file, get_sql_list, alert_slack_channel 5 | 6 | from airflow.decorators import dag, task 7 | 8 | from airflow.operators.python import get_current_context 9 | from airflow.providers.postgres.operators.postgres import PostgresOperator 10 | from airflow.hooks.postgres_hook import PostgresHook 11 | 12 | from common_dag_tasks import run_subprocess_command, extract 13 | from vsac.dag_tasks import main_execution 14 | 15 | 16 | 17 | @dag( 18 | schedule="0 3 * * *", 19 | start_date=pendulum.yesterday(), 20 | catchup=False, 21 | ) 22 | def vsac(): 23 | dag_id = "vsac" 24 | base_url = "https://cts.nlm.nih.gov/fhir" 25 | ds_url = "" 26 | 27 | extract_load_task = main_execution() 28 | 29 | extract_load_task 30 | 31 | vsac() 32 | -------------------------------------------------------------------------------- /airflow/hidden_dags/meps/meps_medical_conditions_dag.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import pendulum 3 | 4 | from sagerx import get_dataset, read_sql_file, get_sql_list, alert_slack_channel 5 | 6 | from airflow.decorators import dag, task 7 | 8 | from airflow.operators.python import get_current_context 9 | from airflow.providers.postgres.operators.postgres import PostgresOperator 10 | from airflow.hooks.postgres_hook import PostgresHook 11 | from airflow.hooks.subprocess import SubprocessHook 12 | 13 | 14 | @dag( 15 | schedule="0 4 * * *", 16 | start_date=pendulum.today(), 17 | catchup=False, 18 | ) 19 | def meps_medical_conditions(): 20 | col_names = ["duid","pid","dupersid","condn","condidx","panel","condrn","agediag","crnd1","crnd2","crnd3","crnd4","crnd5","injury","accdnwrk","icd10cdx","ccsr1x","ccsr2x","ccsr3x","hhnum","ipnum","opnum","obnum","ernum","rxnum","perwt18f","varstr","varpsu"] 21 | col_spaces = [(0,7),(7,10),(10,20),(20,23),(23,36),(36,38),(38,39),(39,42),(42,44),(44,46),(46,47),(47,49),(49,51),(51,52),(52,55),(55,58),(58,64),(64,70),(70,76),(76,78),(78,80),(80,83),(83,86),(86,88),(88,90),(90,102),(102,106),(106,107)] 22 | dag_id = "meps_medical_conditions" 23 | filename = "h207" 24 | ds_url = f"https://meps.ahrq.gov/mepsweb/data_files/pufs/{filename}/{filename}dat.zip" 25 | 26 | # Task to download data from web location 27 | @task 28 | def extract(): 29 | data_folder = Path("/opt/airflow/data") / dag_id 30 | data_path = get_dataset(ds_url, data_folder) 31 | return data_path 32 | 33 | @task 34 | def load(data_path): 35 | import pandas as pd 36 | import sqlalchemy 37 | 38 | pg_hook = PostgresHook(postgres_conn_id="postgres_default") 39 | engine = pg_hook.get_sqlalchemy_engine() 40 | 41 | # create empty table with columns in postgres 42 | # overwrite existing table, if exists 43 | df = pd.DataFrame(columns = col_names) 44 | df.to_sql( 45 | dag_id, 46 | con=engine, 47 | schema="datasource", 48 | if_exists="replace", 49 | index=False 50 | ) 51 | 52 | with pd.read_fwf( 53 | data_path + f'/{filename}.dat', 54 | header=None, 55 | names=col_names, 56 | converters={col: str for col in col_names}, 57 | colspecs=col_spaces, 58 | chunksize=1000 59 | ) as reader: 60 | reader 61 | for chunk in reader: 62 | chunk.to_sql( 63 | dag_id, 64 | con=engine, 65 | schema="datasource", 66 | if_exists="append", 67 | index=False 68 | ) 69 | 70 | load(extract()) 71 | 72 | meps_medical_conditions() 73 | -------------------------------------------------------------------------------- /airflow/requirements.txt: -------------------------------------------------------------------------------- 1 | # Any change made here should accompany an increment 2 | # to the image version on line 5 of docker-compose.yml 3 | 4 | dbt-core 5 | dbt-postgres 6 | apache-airflow[google] 7 | bs4 8 | -------------------------------------------------------------------------------- /dbt/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11-slim 2 | 3 | ENV DBT_PROFILES_DIR=/dbt 4 | 5 | RUN apt-get -y update 6 | RUN apt-get -y install git 7 | 8 | RUN pip install dbt-core dbt-postgres 9 | 10 | WORKDIR /dbt 11 | 12 | COPY . . 13 | 14 | WORKDIR /dbt/sagerx -------------------------------------------------------------------------------- /dbt/profiles.yml: -------------------------------------------------------------------------------- 1 | sagerx: 2 | outputs: 3 | dev: 4 | type: postgres 5 | threads: 1 6 | host: postgres 7 | port: 5432 8 | user: sagerx 9 | pass: sagerx 10 | dbname: sagerx 11 | schema: sagerx_dev 12 | 13 | prod: 14 | type: postgres 15 | threads: 1 16 | host: postgres 17 | port: 5432 18 | user: sagerx 19 | pass: sagerx 20 | dbname: sagerx 21 | schema: sagerx 22 | 23 | target: dev 24 | -------------------------------------------------------------------------------- /dbt/sagerx/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | target/ 3 | dbt_packages/ 4 | logs/ 5 | -------------------------------------------------------------------------------- /dbt/sagerx/README.md: -------------------------------------------------------------------------------- 1 | Welcome to your new dbt project! 2 | 3 | ### Using the starter project 4 | 5 | Try running the following commands: 6 | - dbt run 7 | - dbt test 8 | 9 | 10 | ### Resources: 11 | - Learn more about dbt [in the docs](https://docs.getdbt.com/docs/introduction) 12 | - Check out [Discourse](https://discourse.getdbt.com/) for commonly asked questions and answers 13 | - Join the [chat](https://community.getdbt.com/) on Slack for live discussions and support 14 | - Find [dbt events](https://events.getdbt.com) near you 15 | - Check out [the blog](https://blog.getdbt.com/) for the latest news on dbt's development and best practices 16 | -------------------------------------------------------------------------------- /dbt/sagerx/analyses/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderxio/sagerx/8f64ff1bc743150661fb466a60d18318fca047a5/dbt/sagerx/analyses/.gitkeep -------------------------------------------------------------------------------- /dbt/sagerx/dbt_project.yml: -------------------------------------------------------------------------------- 1 | # Name your project! Project names should contain only lowercase characters 2 | # and underscores. A good package name should reflect your organization's 3 | # name or the intended use of these models 4 | name: "sagerx" 5 | version: "1.0.0" 6 | config-version: 2 7 | 8 | # This setting configures which "profile" dbt uses for this project. 9 | profile: "sagerx" 10 | 11 | # These configurations specify where dbt should look for different types of files. 12 | # The `model-paths` config, for example, states that models in this project can be 13 | # found in the "models/" directory. You probably won't need to change these! 14 | model-paths: ["models"] 15 | analysis-paths: ["analyses"] 16 | test-paths: ["tests"] 17 | seed-paths: ["seeds"] 18 | macro-paths: ["macros"] 19 | snapshot-paths: ["snapshots"] 20 | 21 | target-path: "target" # directory which will store compiled SQL files 22 | clean-targets: # directories to be removed by `dbt clean` 23 | - "target" 24 | - "dbt_packages" 25 | 26 | # Configuring models 27 | # Full documentation: https://docs.getdbt.com/docs/configuring-models 28 | 29 | # These settings can be overridden in the individual model 30 | # files using the `{{ config(...) }}` macro. 31 | models: 32 | sagerx: 33 | staging: 34 | +schema: sagerx 35 | +materialized: view 36 | intermediate: 37 | +schema: sagerx 38 | +materialized: table 39 | marts: 40 | +schema: sagerx 41 | +materialized: table 42 | +persist_docs: 43 | relation: true 44 | columns: true 45 | -------------------------------------------------------------------------------- /dbt/sagerx/macros/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderxio/sagerx/8f64ff1bc743150661fb466a60d18318fca047a5/dbt/sagerx/macros/.gitkeep -------------------------------------------------------------------------------- /dbt/sagerx/macros/get_custom_schema.sql: -------------------------------------------------------------------------------- 1 | -- get_custom_schema.sql 2 | -- https://docs.getdbt.com/docs/build/custom-schemas 3 | 4 | {% macro generate_schema_name(custom_schema_name, node) -%} 5 | {{ generate_schema_name_for_env(custom_schema_name, node) }} 6 | {%- endmacro %} 7 | -------------------------------------------------------------------------------- /dbt/sagerx/macros/ndc_convert.sql: -------------------------------------------------------------------------------- 1 | {% macro ndc_convert (ndc, to_format) %} 2 | 3 | {% set ndc11 = ndc_to_11(ndc) %} 4 | {% set format_list = ['10 Digit','11 Digit','4-4-2','5-3-2','5-4-1','5-4-2','5-5','4-6'] %} 5 | {% if to_format not in format_list %} 6 | {{ "format must be of viable type" }} 7 | {% endif %} 8 | 9 | 10 | {%- set return_value %} 11 | CASE WHEN {{to_format}} = '10 Digit' THEN NULL 12 | WHEN {{to_format}} = '11 Digit' THEN {{ndc11}} 13 | WHEN {{to_format}} = '4-4-2' THEN 14 | CASE WHEN SUBSTRING({{ndc11}},1,1) = '0' THEN SUBSTRING({{ndc11}},2,4) ||'-'|| SUBSTRING({{ndc11}},6,4) ||'-'|| RIGHT({{ndc11}},2) ELSE NULL END 15 | WHEN {{to_format}} = '5-3-2' THEN 16 | CASE WHEN SUBSTRING({{ndc11}},6,1) = '0' THEN LEFT({{ndc11}},5) ||'-'|| SUBSTRING({{ndc11}},7,3) ||'-'|| RIGHT({{ndc11}},2) ELSE NULL END 17 | WHEN {{to_format}} = '5-4-1' THEN 18 | CASE WHEN SUBSTRING({{ndc11}},10,1) = '0' THEN LEFT({{ndc11}},5) ||'-'|| SUBSTRING({{ndc11}},6,4) ||'-'|| RIGHT({{ndc11}},1) ELSE NULL END 19 | WHEN {{to_format}} = '5-4-2' THEN LEFT({{ndc11}},5) ||'-'|| SUBSTRING({{ndc11}},6,4) ||'-'|| RIGHT({{ndc11}},2) 20 | WHEN {{to_format}} = '5-5' THEN NULL 21 | WHEN {{to_format}} = '4-6' THEN NULL 22 | ELSE NULL 23 | END 24 | {% endset %} 25 | {{return_value}} 26 | 27 | {% endmacro %} 28 | -------------------------------------------------------------------------------- /dbt/sagerx/macros/ndc_format.sql: -------------------------------------------------------------------------------- 1 | {% macro ndc_format(ndc) %} 2 | {% set ndc_format %} 3 | CASE WHEN {{ndc}} ~ '^\d{10}$' THEN '10 Digit' 4 | WHEN {{ndc}} ~ '^\d{11}$' THEN '11 Digit' 5 | WHEN {{ndc}} ~ '^\d{4}-\d{4}-\d{2}$' THEN '4-4-2' 6 | WHEN {{ndc}} ~ '^\d{5}-\d{3}-\d{2}$' THEN '5-3-2' 7 | WHEN {{ndc}} ~ '^\d{5}-\d{4}-\d{1}$' THEN '5-4-1' 8 | WHEN {{ndc}} ~ '^\d{5}-\d{4}-\d{2}$' THEN '5-4-2' 9 | WHEN {{ndc}} ~ '^\d{5}-\d{5}$' THEN '5-5' 10 | WHEN {{ndc}} ~ '^\d{4}-\d{6}$' THEN '4-6' 11 | ELSE 'Unknown' 12 | END 13 | {% endset %} 14 | {{ndc_format}} 15 | {% endmacro %} 16 | -------------------------------------------------------------------------------- /dbt/sagerx/macros/ndc_to_11.sql: -------------------------------------------------------------------------------- 1 | {%- macro ndc_to_11(ndc) %} 2 | {%- set return_value %} 3 | CASE WHEN {{ ndc_format (ndc) }} = '10 Digit' THEN NULL 4 | WHEN {{ ndc_format (ndc) }} = '11 Digit' THEN {{ndc}} 5 | WHEN {{ ndc_format (ndc) }} = '4-4-2' THEN '0' || LEFT({{ndc}},4) || REPLACE(RIGHT({{ndc}},7),'-','') 6 | WHEN {{ ndc_format (ndc) }} = '5-3-2' THEN LEFT({{ndc}},5) || '0' || REPLACE(RIGHT({{ndc}},6),'-','') 7 | WHEN {{ ndc_format (ndc) }} = '5-4-1' THEN REPLACE(LEFT({{ndc}},10),'-','') || '0' || RIGHT({{ndc}}, 1) 8 | WHEN {{ ndc_format (ndc) }} = '5-4-2' THEN REPLACE({{ndc}},'-','') 9 | WHEN {{ ndc_format (ndc) }} = '5-5' THEN NULL 10 | WHEN {{ ndc_format (ndc) }} = '4-6' THEN NULL 11 | ELSE NULL 12 | END 13 | {% endset %} 14 | {{return_value}} 15 | {% endmacro -%} 16 | -------------------------------------------------------------------------------- /dbt/sagerx/models/intermediate/dailymed/int_dailymed_image_name_ndcs.sql: -------------------------------------------------------------------------------- 1 | -- int_dailymed_image_name_ndcs 2 | 3 | with 4 | 5 | package_label_section_images as ( 6 | 7 | select * from {{ ref('stg_dailymed__package_label_section_images') }} 8 | 9 | ), 10 | 11 | regex_ndcs as ( 12 | 13 | select 14 | *, 15 | (regexp_matches(image, '(?:\d{4}|\d{5})-\d{3,6}(?:-\d{1,2})?|\d{11}|\d{10}', 'g'))[1] as regex_ndc 16 | from package_label_section_images 17 | 18 | /* 19 | \d{11} | # 11 digit 20 | \d{10} | # 10 digit 21 | \d{5}-\d{5} | # 5-5 22 | \d{5}-\d{4}-\d{2} | # 5-4-2 23 | \d{5}-\d{4}-\d{1} | # 5-4-1 24 | \d{5}-\d{3}-\d{2} | # 5-3-2 25 | \d{4}-\d{6} | # 4-6 26 | \d{4}-\d{4}-\d{2} # 4-4-2 27 | */ 28 | 29 | ), 30 | 31 | valid_spl_ndcs as ( 32 | 33 | select * from {{ ref('stg_dailymed__ndcs') }} 34 | 35 | ), 36 | 37 | validated_ndcs as ( 38 | 39 | select 40 | regex_ndcs.*, 41 | spl_ndc.ndc, 42 | spl_ndc.ndc11 43 | from regex_ndcs 44 | inner join valid_spl_ndcs spl_ndc 45 | on spl_ndc.set_id = regex_ndcs.set_id 46 | and {{ ndc_to_11('spl_ndc.ndc') }} = {{ ndc_to_11('regex_ndcs.regex_ndc') }} 47 | 48 | ) 49 | 50 | select * from validated_ndcs 51 | -------------------------------------------------------------------------------- /dbt/sagerx/models/intermediate/dailymed/int_dailymed_image_xml_ndcs.sql: -------------------------------------------------------------------------------- 1 | -- int_dailymed_image_xml_ndcs 2 | 3 | with 4 | 5 | ranked_package_label_images as 6 | ( 7 | 8 | select * from {{ ref('int_dailymed_ranked_package_label_images') }} 9 | 10 | ), 11 | 12 | ranked_package_label_ndcs as 13 | ( 14 | 15 | select * from {{ ref('int_dailymed_ranked_package_label_ndcs') }} 16 | 17 | ) 18 | 19 | select 20 | img.set_id, 21 | ndc.ndc, 22 | img.image 23 | from ranked_package_label_images img 24 | left join ranked_package_label_ndcs ndc 25 | on ndc.package_label_section_id = img.package_label_section_id 26 | and ndc.rn = img.rn 27 | where ndc.ndc is not null 28 | -------------------------------------------------------------------------------- /dbt/sagerx/models/intermediate/dailymed/int_dailymed_organization_metrics.sql: -------------------------------------------------------------------------------- 1 | /* intermediate.int_dailymed_organization_metrics */ 2 | 3 | with dailymed_main as ( 4 | select * from {{ ref('stg_dailymed__main') }} 5 | ), 6 | 7 | dailymed_organizations as ( 8 | select * from {{ ref('stg_dailymed__organizations') }} 9 | ), 10 | 11 | dailymed_organization_texts as ( 12 | select * from {{ ref('stg_dailymed__organization_texts') }} 13 | ) 14 | 15 | select o.set_id 16 | , ma.market_status 17 | , sum(case when org_type = 'Functioner' then 1 else 0 end) as functioner_count 18 | , sum(case when org_type = 'Labeler' then 1 else 0 end) as labeler_count 19 | , sum(case when org_type = 'Repacker' then 1 else 0 end) as repacker_count 20 | , case when sum(case when ot.set_id is not null then 1 else 0 end) > 0 then 'Yes' else '' end as organization_text 21 | , case when sum(case when org_type = 'Labeler' then 1 else 0 end) = 1 22 | and sum(case when org_type = 'Functioner' then 1 else 0 end) = 0 23 | then 'yes' else '' end as labeler_only 24 | , count(*) 25 | from dailymed_main ma 26 | inner join dailymed_organizations o 27 | on o.set_id = ma.set_id 28 | left join dailymed_organization_texts ot 29 | on o.set_id = ot.set_id 30 | group by o.set_id, ma.market_status 31 | -------------------------------------------------------------------------------- /dbt/sagerx/models/intermediate/dailymed/int_dailymed_ranked_package_label_images.sql: -------------------------------------------------------------------------------- 1 | with 2 | 3 | package_label_images as ( 4 | 5 | select * from{{ ref('stg_dailymed__package_label_section_images') }} 6 | 7 | ), 8 | 9 | ranked_package_images as ( 10 | 11 | select 12 | *, 13 | row_number() over ( 14 | partition by package_label_section_id 15 | order by id 16 | ) as rn 17 | from package_label_images 18 | 19 | ) 20 | 21 | select * from ranked_package_images 22 | -------------------------------------------------------------------------------- /dbt/sagerx/models/intermediate/dailymed/int_dailymed_ranked_package_label_ndcs.sql: -------------------------------------------------------------------------------- 1 | with 2 | 3 | validated_package_ndcs as ( 4 | 5 | select * from {{ ref('int_dailymed_validated_package_label_ndcs') }} 6 | 7 | ), 8 | 9 | ranked_package_ndcs as ( 10 | 11 | select 12 | *, 13 | row_number() over ( 14 | partition by package_label_section_id 15 | order by id 16 | ) as rn 17 | from validated_package_ndcs 18 | 19 | ) 20 | 21 | select * from ranked_package_ndcs 22 | -------------------------------------------------------------------------------- /dbt/sagerx/models/intermediate/dailymed/int_dailymed_validated_package_label_ndcs.sql: -------------------------------------------------------------------------------- 1 | --int_dailymed_validated_package_label_ndcs 2 | 3 | with 4 | 5 | valid_spl_ndcs as ( 6 | 7 | select * from{{ ref('stg_dailymed__ndcs') }} 8 | 9 | ), 10 | 11 | package_label_ndc_matches as ( 12 | 13 | select * from {{ ref('stg_dailymed__package_label_section_ndcs') }} 14 | 15 | ), 16 | 17 | validated_package_ndcs as ( 18 | 19 | select 20 | * 21 | from package_label_ndc_matches pkg_ndc 22 | where exists ( 23 | 24 | select 25 | ndc 26 | from valid_spl_ndcs 27 | where ndc = pkg_ndc.ndc 28 | 29 | ) 30 | 31 | ) 32 | 33 | select * from validated_package_ndcs 34 | -------------------------------------------------------------------------------- /dbt/sagerx/models/intermediate/fda/int_fda_packaging_components.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized='table') }} 2 | 3 | with 4 | 5 | all_fda as ( 6 | select ndc11, packagedescription 7 | from {{ ref('stg_fda_ndc__ndcs') }} 8 | 9 | union all 10 | 11 | select ndc11, packagedescription 12 | from {{ ref('stg_fda_excluded__ndcs') }} 13 | 14 | union all 15 | 16 | select ndc11, packagedescription 17 | from {{ ref('stg_fda_unfinished__ndcs') }} 18 | ), 19 | 20 | split_components as ( 21 | select 22 | z.ndc11 23 | , z.packagedescription 24 | , z.ordinality as component_line 25 | , trim(z.token) as component_text 26 | from ( 27 | select distinct 28 | all_fda.ndc11 29 | , all_fda.packagedescription 30 | , s.token 31 | , s.ordinality 32 | from 33 | all_fda 34 | , unnest( 35 | string_to_array( 36 | regexp_replace( 37 | all_fda.packagedescription 38 | , '(? 0 87 | group by ndc11 88 | 89 | ), 90 | 91 | final as ( 92 | 93 | select 94 | total_product, 95 | inner_outer_value_unit.* 96 | from inner_outer_value_unit 97 | left join total_product 98 | on total_product.ndc11 = inner_outer_value_unit.ndc11 99 | 100 | ) 101 | 102 | select 103 | * 104 | from final 105 | -------------------------------------------------------------------------------- /dbt/sagerx/models/intermediate/fda/int_fda_packaging_parts.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized='view') }} 2 | 3 | with 4 | 5 | packaging_components as ( 6 | select * from {{ ref('int_fda_packaging_components') }} 7 | where component_text like ('%*%') 8 | ) 9 | 10 | select 11 | z.ndc11 12 | , z.packagedescription 13 | , z.component_line 14 | , z.component_text 15 | , z.ordinality as part_line 16 | , trim(z.token) as part_text 17 | from ( 18 | select distinct 19 | components.* 20 | , s.token 21 | , s.ordinality 22 | from 23 | packaging_components components 24 | , unnest( 25 | string_to_array( 26 | component_text 27 | , '*') 28 | ) with ordinality as s(token, ordinality) 29 | ) z 30 | order by ndc11, component_line, part_line 31 | -------------------------------------------------------------------------------- /dbt/sagerx/models/intermediate/fda/int_fda_packaging_subparts.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized='view') }} 2 | 3 | with 4 | 5 | packaging_parts as ( 6 | select * from {{ ref('int_fda_packaging_parts') }} 7 | where part_text like ('%/%') 8 | ) 9 | 10 | select 11 | z.ndc11 12 | , z.packagedescription 13 | , z.component_line 14 | , z.component_text 15 | , z.part_line 16 | , z.part_text 17 | , z.ordinality as subpart_line 18 | , trim(z.token) as subpart_text 19 | from ( 20 | select distinct 21 | parts.* 22 | , s.token 23 | , s.ordinality 24 | from 25 | packaging_parts parts 26 | , unnest( 27 | string_to_array( 28 | part_text 29 | , '/') 30 | ) with ordinality as s(token, ordinality) 31 | ) z 32 | order by ndc11, component_line, part_line, subpart_line 33 | -------------------------------------------------------------------------------- /dbt/sagerx/models/intermediate/nadac/int_nadac_pricing.sql: -------------------------------------------------------------------------------- 1 | -- int_nadac_pricing.sql 2 | 3 | with 4 | 5 | pricing as ( 6 | 7 | select 8 | * 9 | from {{ ref('int_nadac_historical_pricing') }} 10 | where is_last_price 11 | order by ndc_description 12 | 13 | ) 14 | 15 | select * from pricing 16 | -------------------------------------------------------------------------------- /dbt/sagerx/models/intermediate/orange_book/_int_orange_book__models.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: int_fda_ndc_to_te 5 | description: | 6 | > DISCLAIMER: This model is under development and incomplete. 7 | 8 | This is an attempt at an NDC-level mapping of Orange Book TE codes. There are cases in Orange Book where two different strengths of a given ANDA have different TE codes. This means within an ANDA, two different NDCs could have different TE codes. 9 | 10 | The problem we're encountering is that there's not a reliable / programmatic way to get from the strength of a product in Orange Book and the strength of the product in the FDA NDC Directory. 11 | 12 | Because I can't figure out NDC-level mapping, I only include applications with a single OB TE code. 13 | 14 | Again - please only treat this as the beginning of a proof of concept and do not use for clinical or other purposes. 15 | columns: 16 | - name: ndc11 17 | description: The NDC11 of the product. 18 | - name: application_number 19 | description: The ANDA / NDA / etc number. 20 | - name: te_code 21 | description: The full therapeutic equvalency (TE) code as listed within Orange Book. 22 | - name: first_two_te_code 23 | description: Just the first two characters of the TE code - for ease of use downstream. 24 | - name: first_one_te_code 25 | description: Just the first character of the TE code - for ease of use downstream. 26 | -------------------------------------------------------------------------------- /dbt/sagerx/models/intermediate/orange_book/int_fda_ndc_to_te.sql: -------------------------------------------------------------------------------- 1 | -- DISCLAIMER: This model is under development and incomplete. 2 | 3 | with cte as ( 4 | select 5 | fda.ndc11 6 | , obp.te_code 7 | , count(fda.ndc11) over( partition by fda.ndc11 ) as num_te_codes 8 | from {{ source('orange_book', 'orange_book_products') }} as obp 9 | inner join {{ ref('stg_fda_ndc__ndcs') }} as fda 10 | on concat(case when obp.appl_type = 'A' then 'ANDA' else 'NDA' end, obp.appl_no) = fda.applicationnumber 11 | group by fda.ndc11, obp.te_code 12 | ) 13 | select 14 | fda.ndc11 15 | , fda.applicationnumber as application_number 16 | , cte.te_code 17 | , left(cte.te_code, 2) as first_two_te_code 18 | , left(cte.te_code, 1) as first_one_te_code 19 | from {{ ref('stg_fda_ndc__ndcs') }} as fda 20 | inner join cte 21 | on fda.ndc11 = cte.ndc11 22 | and cte.num_te_codes = 1 23 | -------------------------------------------------------------------------------- /dbt/sagerx/models/intermediate/rxclass/_int_rxclass__models.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: int_rxclass_clinical_products_to_atc_class 5 | description: "Clinical products to ATC class" 6 | columns: 7 | - name: clinical_product_ndc 8 | - name: clinical_product_rxcui 9 | - name: clinical_product_name 10 | - name: clinical_product_code 11 | - name: clinical_product_class_name 12 | 13 | - name: int_rxclass_clinical_products_to_cvx_code 14 | description: "Clinical products to CVX code" 15 | columns: 16 | - name: clinical_product_ndc 17 | - name: clinical_product_rxcui 18 | - name: clinical_product_name 19 | - name: clinical_product_code 20 | - name: clinical_product_class_name 21 | 22 | - name: int_rxclass_clinical_products_to_schedule 23 | description: "Clinical products to DEA schedule" 24 | columns: 25 | - name: clinical_product_ndc 26 | - name: clinical_product_rxcui 27 | - name: clinical_product_name 28 | - name: clinical_product_code 29 | - name: clinical_product_class_name 30 | 31 | - name: int_rxclass_clinical_products_to_va_class 32 | description: "Clinical products to VA class" 33 | columns: 34 | - name: clinical_product_ndc 35 | - name: clinical_product_rxcui 36 | - name: clinical_product_name 37 | - name: clinical_product_code 38 | - name: clinical_product_class_name -------------------------------------------------------------------------------- /dbt/sagerx/models/intermediate/rxclass/int_rxclass_clinical_products_to_atc_class.sql: -------------------------------------------------------------------------------- 1 | select distinct 2 | rs.atv as clinical_product_ndc, 3 | rcl.rxcui as clinical_product_rxcui, 4 | rcl.name as clinical_product_name, 5 | rcl.class_id as clinical_product_code, 6 | rcl.class_name as clinical_product_class_name 7 | from sagerx_lake.rxnorm_rxnsat rs 8 | join sagerx_lake.rxnorm_rxnconso rc on rs.rxaui = rc.rxaui 9 | join sagerx_lake.rxclass rcl on rcl.rxcui = rs.rxcui 10 | where 11 | rs.atn = 'NDC' 12 | and rc.sab = 'RXNORM' 13 | and rcl.rela_source = 'ATCPROD' -------------------------------------------------------------------------------- /dbt/sagerx/models/intermediate/rxclass/int_rxclass_clinical_products_to_cvx_code.sql: -------------------------------------------------------------------------------- 1 | select distinct 2 | rs.atv as clinical_product_ndc, 3 | rcl.rxcui as clinical_product_rxcui, 4 | rcl.name as clinical_product_name, 5 | rcl.class_id as clinical_product_code, 6 | rcl.class_name as clinical_product_class_name 7 | from sagerx_lake.rxnorm_rxnsat rs 8 | join sagerx_lake.rxnorm_rxnconso rc on rs.rxaui = rc.rxaui 9 | join sagerx_lake.rxclass rcl on rcl.rxcui = rs.rxcui 10 | where 11 | rs.atn = 'NDC' 12 | and rc.sab = 'RXNORM' 13 | and rcl.rela_source = 'CDC' -------------------------------------------------------------------------------- /dbt/sagerx/models/intermediate/rxclass/int_rxclass_clinical_products_to_schedule.sql: -------------------------------------------------------------------------------- 1 | select distinct 2 | rs.atv as clinical_product_ndc, 3 | rcl.rxcui as clinical_product_rxcui, 4 | rcl.name as clinical_product_name, 5 | rcl.class_id as clinical_product_code, 6 | rcl.class_name as clinical_product_class_name 7 | from sagerx_lake.rxnorm_rxnsat rs 8 | join sagerx_lake.rxnorm_rxnconso rc on rs.rxaui = rc.rxaui 9 | join sagerx_lake.rxclass rcl on rcl.rxcui = rs.rxcui 10 | where 11 | rs.atn = 'NDC' 12 | and rc.sab = 'RXNORM' 13 | and rcl.rela_source = 'RXNORM' -------------------------------------------------------------------------------- /dbt/sagerx/models/intermediate/rxclass/int_rxclass_clinical_products_to_va_class.sql: -------------------------------------------------------------------------------- 1 | select distinct 2 | rs.atv as clinical_product_ndc, 3 | rcl.rxcui as clinical_product_rxcui, 4 | rcl.name as clinical_product_name, 5 | rcl.class_id as clinical_product_code, 6 | rcl.class_name as clinical_product_class_name 7 | from sagerx_lake.rxnorm_rxnsat rs 8 | join sagerx_lake.rxnorm_rxnconso rc on rs.rxaui = rc.rxaui 9 | join sagerx_lake.rxclass rcl on rcl.rxcui = rs.rxcui 10 | where 11 | rs.atn = 'NDC' 12 | and rc.sab = 'RXNORM' 13 | and rcl.rela_source = 'VA' -------------------------------------------------------------------------------- /dbt/sagerx/models/intermediate/rxnorm/int_mthspl_products_to_active_ingredients.sql: -------------------------------------------------------------------------------- 1 | -- int_mthspl_products_to_active_ingredients.sql 2 | 3 | with 4 | 5 | substance as ( 6 | 7 | select * from {{ ref('stg_rxnorm__mthspl_substances') }} 8 | 9 | ), 10 | 11 | product as ( 12 | 13 | select * from {{ ref('stg_rxnorm__mthspl_products') }} 14 | 15 | ) 16 | 17 | select distinct 18 | concat(lpad(split_part(product.ndc,'-', 1), 5, '0'), lpad(split_part(product.ndc,'-', 2), 4, '0')) as ndc9 19 | , product.ndc as ndc 20 | , product.rxcui as product_rxcui 21 | , product.name as product_name 22 | , product.tty as product_tty 23 | , substance.unii as active_ingredient_unii 24 | , substance.rxcui as active_ingredient_rxcui 25 | , substance.name as active_ingredient_name 26 | , substance.tty as active_ingredient_tty 27 | , product.active as active 28 | , product.prescribable as prescribable 29 | from sagerx_lake.rxnorm_rxnrel rxnrel 30 | inner join substance 31 | on rxnrel.rxaui1 = substance.rxaui 32 | inner join product 33 | on rxnrel.rxaui2 = product.rxaui 34 | where rela = 'has_active_ingredient' 35 | -------------------------------------------------------------------------------- /dbt/sagerx/models/intermediate/rxnorm/int_mthspl_products_to_active_moieties.sql: -------------------------------------------------------------------------------- 1 | -- int_mthspl_products_to_active_moieties.sql 2 | 3 | with 4 | 5 | substance as ( 6 | 7 | select * from {{ ref('stg_rxnorm__mthspl_substances') }} 8 | 9 | ), 10 | 11 | product as ( 12 | 13 | select * from {{ ref('stg_rxnorm__mthspl_products') }} 14 | 15 | ) 16 | 17 | select distinct 18 | concat(lpad(split_part(product.ndc,'-', 1), 5, '0'), lpad(split_part(product.ndc,'-', 2), 4, '0')) as ndc9 19 | , product.ndc as ndc 20 | , product.rxcui as product_rxcui 21 | , product.name as product_name 22 | , product.tty as product_tty 23 | , substance.unii as active_moiety_unii 24 | , substance.rxcui as active_moiety_rxcui 25 | , substance.name as active_moiety_name 26 | , substance.tty as active_moiety_tty 27 | , product.active as active 28 | , product.prescribable as prescribable 29 | from sagerx_lake.rxnorm_rxnrel rxnrel 30 | inner join substance 31 | on rxnrel.rxaui1 = substance.rxaui 32 | inner join product 33 | on rxnrel.rxaui2 = product.rxaui 34 | where rela = 'has_active_moiety' 35 | -------------------------------------------------------------------------------- /dbt/sagerx/models/intermediate/rxnorm/int_mthspl_products_to_inactive_ingredients.sql: -------------------------------------------------------------------------------- 1 | -- int_mthspl_products_to_inactive_ingredients.sql 2 | 3 | with 4 | 5 | substance as ( 6 | select * from {{ ref('stg_rxnorm__mthspl_substances') }} 7 | ) 8 | 9 | , product as ( 10 | select * from {{ ref('stg_rxnorm__mthspl_products') }} 11 | ) 12 | 13 | select distinct 14 | concat(lpad(split_part(product.ndc,'-', 1), 5, '0'), lpad(split_part(product.ndc,'-', 2), 4, '0')) as ndc9 15 | , product.ndc as ndc 16 | , product.rxcui as product_rxcui 17 | , product.name as product_name 18 | , product.tty as product_tty 19 | , substance.unii as inactive_ingredient_unii 20 | , substance.rxcui as inactive_ingredient_rxcui 21 | , substance.name as inactive_ingredient_name 22 | , substance.tty as inactive_ingredient_tty 23 | , product.active as active 24 | , product.prescribable as prescribable 25 | from product 26 | inner join sagerx_lake.rxnorm_rxnrel rxnrel 27 | on rxnrel.rxaui2 = product.rxaui 28 | inner join substance 29 | on substance.rxaui = rxnrel.rxaui1 30 | where rela = 'has_inactive_ingredient' 31 | -------------------------------------------------------------------------------- /dbt/sagerx/models/intermediate/rxnorm/int_rxnorm_all_ndcs_to_product_rxcuis.sql: -------------------------------------------------------------------------------- 1 | with all_ndcs as 2 | ( 3 | select * from {{ ref('stg_rxnorm__all_ndcs') }} 4 | ), 5 | 6 | product_rxcuis as 7 | ( 8 | select * from sagerx_lake.rxnorm_rxnconso 9 | where sab = 'RXNORM' 10 | and tty in ('SCD', 'SBD', 'GPCK', 'BPCK') 11 | ) 12 | 13 | select distinct 14 | all_ndcs.ndc11 15 | , product_rxcuis.rxcui 16 | from all_ndcs 17 | inner join product_rxcuis 18 | on all_ndcs.rxcui = product_rxcuis.rxcui 19 | -------------------------------------------------------------------------------- /dbt/sagerx/models/intermediate/rxnorm/int_rxnorm_clinical_products_to_clinical_product_components.sql: -------------------------------------------------------------------------------- 1 | -- int_rxnorm_clinical_products_to_clinical_product_components.sql 2 | 3 | with 4 | 5 | ndc as ( 6 | 7 | select * from {{ ref('stg_rxnorm__ndcs') }} 8 | 9 | ), 10 | 11 | rcp as ( 12 | 13 | select * from {{ ref('stg_rxnorm__clinical_products') }} 14 | 15 | ), 16 | 17 | rcpcl as ( 18 | 19 | select * from {{ ref('stg_rxnorm__clinical_product_component_links') }} 20 | 21 | ), 22 | 23 | rcpc as ( 24 | 25 | select * from {{ ref('stg_rxnorm__clinical_product_components') }} 26 | 27 | ) 28 | 29 | select 30 | rcp.rxcui as clinical_product_rxcui 31 | , rcp.name as clinical_product_name 32 | , rcp.tty as clinical_product_tty 33 | , rcpc.rxcui as clinical_product_component_rxcui 34 | , rcpc.name as clinical_product_compnent_name 35 | , rcpc.tty as clinical_product_component_tty 36 | , rcp.active 37 | , rcp.prescribable 38 | from rcp 39 | left join rcpcl 40 | on rcp.rxcui = rcpcl.clinical_product_rxcui 41 | left join rcpc 42 | on rcpcl.clinical_product_component_rxcui = rcpc.rxcui 43 | -------------------------------------------------------------------------------- /dbt/sagerx/models/intermediate/rxnorm/int_rxnorm_clinical_products_to_dose_forms.sql: -------------------------------------------------------------------------------- 1 | -- int_rxnorm_clinical_products_to_dose_forms.sql 2 | 3 | with 4 | 5 | rcp as ( 6 | 7 | select * from {{ ref('stg_rxnorm__clinical_products') }} 8 | 9 | ), 10 | 11 | rcpcl as ( 12 | 13 | select * from {{ ref('stg_rxnorm__clinical_product_component_links') }} 14 | 15 | ), 16 | 17 | rcpc as ( 18 | 19 | select * from {{ ref('stg_rxnorm__clinical_product_components') }} 20 | 21 | ), 22 | 23 | rdf as ( 24 | 25 | select * from {{ ref('stg_rxnorm__dose_forms') }} 26 | 27 | ) 28 | 29 | select 30 | rcp.rxcui as clinical_product_rxcui 31 | , rcp.name as clinical_product_name 32 | , rcp.tty as clinical_product_tty 33 | , rcpc.rxcui as clinical_product_component_rxcui 34 | , rcpc.name as clinical_product_compnent_name 35 | , rcpc.tty as clinical_product_component_tty 36 | , rdf.rxcui as dose_form_rxcui 37 | , rdf.name as dose_form_name 38 | , rdf.tty as dose_form_tty 39 | , rcp.active 40 | , rcp.prescribable 41 | from rcp 42 | left join rcpcl 43 | on rcp.rxcui = rcpcl.clinical_product_rxcui 44 | left join rcpc 45 | on rcpcl.clinical_product_component_rxcui = rcpc.rxcui 46 | left join rdf 47 | on rcpc.dose_form_rxcui = rdf.rxcui 48 | -------------------------------------------------------------------------------- /dbt/sagerx/models/intermediate/rxnorm/int_rxnorm_clinical_products_to_ingredient_components.sql: -------------------------------------------------------------------------------- 1 | -- int_rxnorm_clinical_products_to_ingredient_components.sql 2 | 3 | with 4 | 5 | rcp as ( 6 | 7 | select * from {{ ref('stg_rxnorm__clinical_products') }} 8 | 9 | ), 10 | 11 | rcpcl as ( 12 | 13 | select * from {{ ref('stg_rxnorm__clinical_product_component_links') }} 14 | 15 | ), 16 | 17 | rcpc as ( 18 | 19 | select * from {{ ref('stg_rxnorm__clinical_product_components') }} 20 | 21 | ), 22 | 23 | rdf as ( 24 | 25 | select * from {{ ref('stg_rxnorm__dose_forms') }} 26 | 27 | ), 28 | 29 | ri as ( 30 | 31 | select * from {{ ref('stg_rxnorm__ingredients') }} 32 | 33 | ), 34 | 35 | ricl as ( 36 | 37 | select * from {{ ref('stg_rxnorm__ingredient_component_links') }} 38 | 39 | ), 40 | 41 | ric as ( 42 | 43 | select * from {{ ref('stg_rxnorm__ingredient_components') }} 44 | 45 | ) 46 | 47 | select 48 | rcp.rxcui as clinical_product_rxcui 49 | , rcp.name as clinical_product_name 50 | , rcp.tty as clinical_product_tty 51 | , rcpc.rxcui as clinical_product_component_rxcui 52 | , rcpc.name as clinical_product_component_name 53 | , rcpc.tty as clinical_product_component_tty 54 | , rdf.rxcui as dose_form_rxcui 55 | , rdf.name as dose_form_name 56 | , rdf.tty as dose_form_tty 57 | , ri.rxcui as ingredient_rxcui 58 | , ri.name as ingredient_name 59 | , ri.tty as ingredient_tty 60 | , ric.rxcui as ingredient_component_rxcui 61 | , ric.name as ingredient_component_name 62 | , ric.tty as ingredient_component_tty 63 | , rcp.active 64 | , rcp.prescribable 65 | from rcp 66 | left join rcpcl 67 | on rcp.rxcui = rcpcl.clinical_product_rxcui 68 | left join rcpc 69 | on rcpcl.clinical_product_component_rxcui = rcpc.rxcui 70 | left join rdf 71 | on rcpc.dose_form_rxcui = rdf.rxcui 72 | left join ri 73 | on rcpc.ingredient_rxcui = ri.rxcui 74 | left join ricl 75 | on ri.rxcui = ricl.ingredient_rxcui 76 | left join ric 77 | on ricl.ingredient_component_rxcui = ric.rxcui 78 | -------------------------------------------------------------------------------- /dbt/sagerx/models/intermediate/rxnorm/int_rxnorm_clinical_products_to_ingredients.sql: -------------------------------------------------------------------------------- 1 | -- int_rxnorm_clinical_products_to_ingredients.sql 2 | 3 | with 4 | 5 | rcp as ( 6 | 7 | select * from {{ ref('stg_rxnorm__clinical_products') }} 8 | 9 | ), 10 | 11 | rcpcl as ( 12 | 13 | select * from {{ ref('stg_rxnorm__clinical_product_component_links') }} 14 | 15 | ), 16 | 17 | rcpc as ( 18 | 19 | select * from {{ ref('stg_rxnorm__clinical_product_components') }} 20 | 21 | ), 22 | 23 | rdf as ( 24 | 25 | select * from {{ ref('stg_rxnorm__dose_forms') }} 26 | 27 | ), 28 | 29 | ri as ( 30 | 31 | select * from {{ ref('stg_rxnorm__ingredients') }} 32 | 33 | ) 34 | 35 | select 36 | rcp.rxcui as clinical_product_rxcui 37 | , rcp.name as clinical_product_name 38 | , rcp.tty as clinical_product_tty 39 | , string_agg(rcpc.rxcui, ' | ') as clinical_product_component_rxcui 40 | , string_agg(rcpc.name, ' | ') as clinical_product_compnent_name 41 | , string_agg(rcpc.tty, ' | ') as clinical_product_component_tty 42 | , string_agg(rdf.rxcui, ' | ') as dose_form_rxcui 43 | , string_agg(rdf.name, ' | ') as dose_form_name 44 | , string_agg(rdf.tty, ' | ') as dose_form_tty 45 | , string_agg(ri.rxcui, ' | ') as ingredient_rxcui 46 | , string_agg(ri.name, ' | ') as ingredient_name 47 | , string_agg(ri.tty, ' | ') as ingredient_tty 48 | , rcp.active 49 | , rcp.prescribable 50 | from rcp 51 | left join rcpcl 52 | on rcp.rxcui = rcpcl.clinical_product_rxcui 53 | left join rcpc 54 | on rcpcl.clinical_product_component_rxcui = rcpc.rxcui 55 | left join rdf 56 | on rcpc.dose_form_rxcui = rdf.rxcui 57 | left join ri 58 | on rcpc.ingredient_rxcui = ri.rxcui 59 | group by 60 | rcp.rxcui 61 | , rcp.name 62 | , rcp.tty 63 | , rcp.active 64 | , rcp.prescribable 65 | 66 | -------------------------------------------------------------------------------- /dbt/sagerx/models/intermediate/rxnorm/int_rxnorm_clinical_products_to_ndcs.sql: -------------------------------------------------------------------------------- 1 | -- int_rxnorm_clinical_products_to_ndcs.sql 2 | 3 | with 4 | 5 | rcp as ( 6 | 7 | select * from {{ ref('stg_rxnorm__clinical_products') }} 8 | 9 | ), 10 | 11 | rcpcl as ( 12 | 13 | select * from {{ ref('stg_rxnorm__clinical_product_component_links') }} 14 | 15 | ), 16 | 17 | rcpc as ( 18 | 19 | select * from {{ ref('stg_rxnorm__clinical_product_components') }} 20 | 21 | ), 22 | 23 | rdf as ( 24 | 25 | select * from {{ ref('stg_rxnorm__dose_forms') }} 26 | 27 | ), 28 | 29 | ri as ( 30 | 31 | select * from {{ ref('stg_rxnorm__ingredients') }} 32 | 33 | ), 34 | 35 | rn as ( 36 | 37 | select * from {{ ref('stg_rxnorm__ndcs') }} 38 | 39 | ) 40 | 41 | select 42 | rcp.rxcui as clinical_product_rxcui 43 | , rcp.name as clinical_product_name 44 | , rcp.tty as clinical_product_tty 45 | , rcpc.rxcui as clinical_product_component_rxcui 46 | , rcpc.name as clinical_product_compnent_name 47 | , rcpc.tty as clinical_product_component_tty 48 | , rdf.rxcui as dose_form_rxcui 49 | , rdf.name as dose_form_name 50 | , rdf.tty as dose_form_tty 51 | , ri.rxcui as ingredient_rxcui 52 | , ri.name as ingredient_name 53 | , ri.tty as ingredient_tty 54 | , rn.brand_product_rxcui 55 | , rn.ndc 56 | , rcp.active 57 | , rcp.prescribable 58 | from rcp 59 | left join rcpcl 60 | on rcp.rxcui = rcpcl.clinical_product_rxcui 61 | left join rcpc 62 | on rcpcl.clinical_product_component_rxcui = rcpc.rxcui 63 | left join rdf 64 | on rcpc.dose_form_rxcui = rdf.rxcui 65 | left join ri 66 | on rcpc.ingredient_rxcui = ri.rxcui 67 | left join rn 68 | on rcp.rxcui = rn.clinical_product_rxcui 69 | -------------------------------------------------------------------------------- /dbt/sagerx/models/intermediate/rxnorm/int_rxnorm_ndcs_to_products.sql: -------------------------------------------------------------------------------- 1 | -- int_rxnorm_ndcs_to_products.sql 2 | 3 | with 4 | 5 | ndc as ( 6 | 7 | select * from {{ ref('stg_rxnorm__ndcs') }} 8 | 9 | ), 10 | 11 | rcp as ( 12 | 13 | select * from {{ ref('stg_rxnorm__clinical_products') }} 14 | 15 | ), 16 | 17 | rbp as ( 18 | 19 | select * from {{ ref('stg_rxnorm__brand_products') }} 20 | 21 | ) 22 | 23 | select distinct 24 | ndc 25 | , coalesce(rbp.rxcui, rcp.rxcui, null) as product_rxcui 26 | , coalesce(rbp.name, rcp.name, null) as product_name 27 | , coalesce(rbp.tty, rcp.tty, null) as product_tty 28 | , rcp.rxcui as clinical_product_rxcui 29 | , rcp.name as clinical_product_name 30 | , rcp.tty as clinical_product_tty 31 | from ndc 32 | left join rcp 33 | on ndc.clinical_product_rxcui = rcp.rxcui 34 | left join rbp 35 | on ndc.brand_product_rxcui = rbp.rxcui 36 | -------------------------------------------------------------------------------- /dbt/sagerx/models/intermediate/umls/_int_umls__models.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: int_umls_clinical_products_to_crosswalk_codes 5 | description: Crosswalk of clinical products to UMLS codes. 6 | 7 | - name: int_umls_ingredient_components_to_crosswalk_codes 8 | description: Crosswalk of ingredient components (TTY = IN) to UMLS codes. 9 | 10 | - name: int_umls_precise_ingredients_to_crosswalk_codes 11 | description: Crosswalk of precise ingredients (TTY = PIN) to UMLS codes. 12 | 13 | - name: int_umls_multiple_ingredients_to_crosswalk_codes 14 | description: Crosswalk of multiple ingredients (TTY = MIN) to UMLS codes. 15 | -------------------------------------------------------------------------------- /dbt/sagerx/models/intermediate/umls/int_umls_ingredient_components_to_crosswalk_codes.sql: -------------------------------------------------------------------------------- 1 | -- int_umls_ingredient_components_to_crosswalk_codes.sql 2 | 3 | with 4 | 5 | ingredient_components as ( 6 | 7 | select 8 | * 9 | from {{ ref('stg_rxclass__rxclass') }} 10 | where tty = 'IN' 11 | 12 | ), 13 | 14 | crosswalk_codes as ( 15 | 16 | select 17 | * 18 | from {{ ref('stg_umls__crosswalk_codes') }} 19 | ) 20 | 21 | select 22 | ingredient_components.rxcui as ingredient_component_rxcui, 23 | ingredient_components.name as ingredient_component_name, 24 | ingredient_components.tty as ingredient_component_tty, 25 | ingredient_components.rela, 26 | ingredient_components.class_id, 27 | ingredient_components.class_name, 28 | ingredient_components.class_type, 29 | ingredient_components.rela_source, 30 | crosswalk_codes.* 31 | from ingredient_components 32 | inner join crosswalk_codes 33 | on crosswalk_codes.from_code = ingredient_components.class_id 34 | -------------------------------------------------------------------------------- /dbt/sagerx/models/intermediate/umls/int_umls_multiple_ingredients_to_crosswalk_codes.sql: -------------------------------------------------------------------------------- 1 | -- int_umls_multiple_ingredients_to_crosswalk_codes.sql 2 | 3 | with 4 | 5 | multiple_ingredients as ( 6 | 7 | select 8 | * 9 | from {{ ref('stg_rxclass__rxclass') }} 10 | where tty = 'MIN' 11 | 12 | ), 13 | 14 | crosswalk_codes as ( 15 | 16 | select 17 | * 18 | from {{ ref('stg_umls__crosswalk_codes') }} 19 | ) 20 | 21 | select 22 | multiple_ingredients.rxcui as multiple_ingredient_rxcui, 23 | multiple_ingredients.name as multiple_ingredient_name, 24 | multiple_ingredients.tty as multiple_ingredient_tty, 25 | multiple_ingredients.rela, 26 | multiple_ingredients.class_id, 27 | multiple_ingredients.class_name, 28 | multiple_ingredients.class_type, 29 | multiple_ingredients.rela_source, 30 | crosswalk_codes.* 31 | from multiple_ingredients 32 | inner join crosswalk_codes 33 | on crosswalk_codes.from_code = multiple_ingredients.class_id 34 | -------------------------------------------------------------------------------- /dbt/sagerx/models/intermediate/umls/int_umls_precise_ingredients_to_crosswalk_codes.sql: -------------------------------------------------------------------------------- 1 | -- int_umls_precise_ingredients_to_crosswalk_codes.sql 2 | 3 | with 4 | 5 | precise_ingredients as ( 6 | 7 | select 8 | * 9 | from {{ ref('stg_rxclass__rxclass') }} 10 | where tty = 'PIN' 11 | 12 | ), 13 | 14 | crosswalk_codes as ( 15 | 16 | select 17 | * 18 | from {{ ref('stg_umls__crosswalk_codes') }} 19 | ) 20 | 21 | select 22 | precise_ingredients.rxcui as precise_ingredient_rxcui, 23 | precise_ingredients.name as precise_ingredient_name, 24 | precise_ingredients.tty as precise_ingredient_tty, 25 | precise_ingredients.rela, 26 | precise_ingredients.class_id, 27 | precise_ingredients.class_name, 28 | precise_ingredients.class_type, 29 | precise_ingredients.rela_source, 30 | crosswalk_codes.* 31 | from precise_ingredients 32 | inner join crosswalk_codes 33 | on crosswalk_codes.from_code = precise_ingredients.class_id 34 | -------------------------------------------------------------------------------- /dbt/sagerx/models/marts/classification/_classification__models.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: atc_codes_to_rxnorm_products 5 | description: | 6 | ATC 1-4 codes to product-level RXCUIs and descriptions. 7 | columns: 8 | - name: rxcui 9 | description: > 10 | Product-level RxNorm RXCUI. 11 | tests: 12 | - unique 13 | - not_null 14 | - name: rxnorm_description 15 | description: The RxNorm normalized description (STR). 16 | - name: atc_1_name 17 | description: The name of the ATC1 level associated with this product. 18 | - name: atc_2_name 19 | description: The name of the ATC2 level associated with this product. 20 | - name: atc_3_name 21 | description: The name of the ATC3 level associated with this product. 22 | - name: atc_4_name 23 | description: The name of the ATC4 level associated with this product. 24 | 25 | - name: clinical_products_to_diseases 26 | description: | 27 | RxNorm clinical product RXCUIs (SCD / GPCK) to MeSH codes, 28 | ICD-9 codes, ICD-10 codes, and SNOMED-CT codes. 29 | 30 | This table contains multiple different types of relations 31 | between these concepts, including `may_treat`, `may_prevent`, 32 | and `ci_with`. 33 | 34 | MeSH codes are more general and the other codes are more specific, 35 | allowing for a hierarchy if desired. 36 | 37 | RxClass is used for the relations between RxNorm RXCUIs and 38 | MeSH codes. 39 | 40 | UMLS is used for the relations between MeSH and the other types 41 | of codes. 42 | columns: 43 | - name: clinical_product_rxcui 44 | - name: clinical_product_name 45 | - name: clinical_product_tty 46 | - name: via_ingredient_rxcui 47 | - name: via_ingredient_name 48 | - name: via_ingredient_tty 49 | - name: rela 50 | - name: rela_source 51 | - name: class_id 52 | - name: class_name 53 | - name: class_type 54 | - name: disease_id 55 | - name: disease_source 56 | - name: to_name as disease_name 57 | -------------------------------------------------------------------------------- /dbt/sagerx/models/marts/classification/atc_codes_to_rxnorm_products.sql: -------------------------------------------------------------------------------- 1 | -- atc_codes_to_rxnorm_products 2 | 3 | with rxclass_atcprod as ( 4 | 5 | select * from {{ ref('stg_rxclass__rxclass') }} 6 | where rela_source = 'ATCPROD' 7 | 8 | ) 9 | 10 | , atc_codes as ( 11 | 12 | select * from {{ ref('stg_rxnorm__atc_codes') }} 13 | 14 | ) 15 | 16 | , rxnorm_product_rxcuis as ( 17 | 18 | select * from {{ ref('stg_rxnorm__product_rxcuis') }} 19 | 20 | ) 21 | 22 | select distinct 23 | rxclass_atcprod.rxcui 24 | , rxnorm_product_rxcuis.str as rxnorm_description 25 | , atc_codes.atc_1_code 26 | , atc_codes.atc_2_code 27 | , atc_codes.atc_3_code 28 | , atc_codes.atc_4_code 29 | , atc_codes.atc_1_name 30 | , atc_codes.atc_2_name 31 | , atc_codes.atc_3_name 32 | , atc_codes.atc_4_name 33 | from rxclass_atcprod 34 | left join atc_codes 35 | on atc_codes.atc_4_code = rxclass_atcprod.class_id 36 | left join rxnorm_product_rxcuis 37 | on rxnorm_product_rxcuis.rxcui = rxclass_atcprod.rxcui 38 | order by rxcui 39 | -------------------------------------------------------------------------------- /dbt/sagerx/models/marts/classification/clinical_products_to_diseases.sql: -------------------------------------------------------------------------------- 1 | with 2 | 3 | clinical_products_crosswalk as ( 4 | 5 | select 6 | clinical_product_rxcui, 7 | clinical_product_name, 8 | clinical_product_tty, 9 | via_ingredient_rxcui, 10 | via_ingredient_name, 11 | via_ingredient_tty, 12 | rela, 13 | rela_source, 14 | class_id, 15 | class_name, 16 | class_type, 17 | to_code as disease_id, 18 | to_source as disease_source, 19 | to_name as disease_name 20 | from {{ ref('int_umls_clinical_products_to_crosswalk_codes') }} 21 | 22 | ) 23 | 24 | select * from clinical_products_crosswalk 25 | -------------------------------------------------------------------------------- /dbt/sagerx/models/marts/fda_excluded/fda_excluded.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | {{ndc_to_11('pack.ndcpackagecode')}} AS ndc11 3 | , pack.productid 4 | , pack.productndc 5 | , producttypename 6 | , proprietaryname 7 | , proprietarynamesuffix 8 | , nonproprietaryname 9 | , dosageformname 10 | , routename 11 | , pack.startmarketingdate 12 | , pack.endmarketingdate 13 | , marketingcategoryname 14 | , applicationnumber 15 | , labelername 16 | , substancename 17 | , active_numerator_strength 18 | , active_ingred_unit 19 | , pharm_classes 20 | , deaschedule 21 | , pack.ndc_exclude_flag 22 | , listing_record_certified_through 23 | , ndcpackagecode 24 | , packagedescription 25 | , sample_package 26 | FROM {{source('fda_excluded','fda_excluded_package')}} AS pack 27 | LEFT JOIN {{source('fda_excluded','fda_excluded_product')}} AS prod 28 | ON pack.productid = prod.productid -------------------------------------------------------------------------------- /dbt/sagerx/models/marts/ndc/_ndc__models.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: all_ndc_descriptions 5 | description: | 6 | NDC to RxNorm description (with RXCUI) and FDA description. 7 | 8 | NOTE: If RxNorm and RxNorm Historical have same NDCs, we prefer RxNorm. 9 | If any of the 3 FDA sources have same NDCs, we prefer FDA NDC, FDA Excluded, FDA Unfinished in that order. 10 | columns: 11 | - name: ndc 12 | description: The NDC in NDC11 format. This column contains only unique values. 13 | tests: 14 | - unique 15 | - not_null 16 | - name: rxcui 17 | description: > 18 | If NDC was found in an RxNorm sources, we have an associated RXCUI. 19 | If NULL, the source is likely FDA. 20 | - name: rxnorm_description 21 | description: If RxNorm source, will have the RxNorm normalized description (STR). 22 | - name: fda_description 23 | description: If FDA source, will have a description cobbled together from FDA columns for lack of a normalized FDA description. 24 | -------------------------------------------------------------------------------- /dbt/sagerx/models/marts/ndc/gtins.sql: -------------------------------------------------------------------------------- 1 | -- gtins.sql 2 | 3 | with 4 | 5 | ndcs as ( 6 | 7 | select 8 | ndc11, 9 | replace(ndcpackagecode, '-', '') as ndc10, 10 | ndcpackagecode as ndc, 11 | concat( 12 | '003', 13 | replace(ndcpackagecode,'-', '') 14 | ) as gtin13, 15 | concat( 16 | '3', 17 | replace(ndcpackagecode,'-', '') 18 | ) as gtin11, 19 | concat( 20 | '03', 21 | split_part(ndcpackagecode, '-', 1) 22 | ) as gs1_company_prefix 23 | 24 | from {{ ref('stg_fda_ndc__ndcs') }} 25 | 26 | ), 27 | 28 | digits as ( 29 | 30 | -- split the 13-digit number into individual digits 31 | select 32 | ndc, 33 | position, 34 | substring(gtin13 from position for 1)::int as digit 35 | from ndcs, 36 | generate_series(1, 13) as position 37 | 38 | ), 39 | 40 | products as ( 41 | 42 | -- apply the alternating multiplication rule 43 | select 44 | *, 45 | case 46 | when position % 2 = 1 47 | then digit * 3 48 | else digit * 1 49 | end as product 50 | from digits 51 | 52 | ), 53 | 54 | sums as ( 55 | 56 | -- sum of the products of each digit 57 | select 58 | ndc, 59 | sum(product) as sum 60 | from products 61 | group by ndc 62 | 63 | ), 64 | 65 | check_digits as ( 66 | 67 | -- round the sum to the nearest 10 and subtract the sum 68 | select 69 | ndc, 70 | ceil(sum / 10.0) * 10 - sum as check_digit 71 | from sums 72 | 73 | ), 74 | 75 | gtin14s as ( 76 | 77 | -- concatenate the gtin13 and check_digit 78 | select 79 | ndc11, 80 | ndc10, 81 | ndcs.ndc, 82 | concat( 83 | gtin13, 84 | check_digit 85 | ) as gtin14, 86 | concat( 87 | gtin11, 88 | check_digit 89 | ) as gtin12, 90 | gs1_company_prefix 91 | from ndcs 92 | left join check_digits 93 | on check_digits.ndc = ndcs.ndc 94 | 95 | ) 96 | 97 | select * from gtin14s 98 | -------------------------------------------------------------------------------- /dbt/sagerx/models/marts/ndc/ndc_associations.sql: -------------------------------------------------------------------------------- 1 | -- ndc_associations 2 | 3 | select * from {{ ref('stg_fda_ndc__ndc_associations') }} 4 | -------------------------------------------------------------------------------- /dbt/sagerx/models/marts/ndc/ndcs_to_label_images.sql: -------------------------------------------------------------------------------- 1 | -- ndcs_to_label_images 2 | 3 | with 4 | 5 | image_xml_ndcs as ( 6 | 7 | select * from {{ ref('int_dailymed_image_xml_ndcs') }} 8 | 9 | 10 | ), 11 | 12 | image_name_ndcs as ( 13 | 14 | select * from {{ ref('int_dailymed_image_name_ndcs') }} 15 | 16 | ), 17 | 18 | all_image_ndcs as ( 19 | 20 | select 21 | set_id, 22 | ndc, 23 | image 24 | from image_xml_ndcs 25 | 26 | union 27 | 28 | select 29 | set_id, 30 | ndc, 31 | image 32 | from image_name_ndcs 33 | 34 | ), 35 | 36 | all_image_ndcs_ndc11 as ( 37 | 38 | select 39 | set_id, 40 | ndc, 41 | {{ ndc_to_11('ndc') }} as ndc11, 42 | concat('https://dailymed.nlm.nih.gov/dailymed/image.cfm?name=', image, '&setid=', set_id) as image_url, 43 | image as image_file, 44 | concat('https://dailymed.nlm.nih.gov/dailymed/drugInfo.cfm?setid=', set_id) as dailymed_spl_url 45 | from all_image_ndcs 46 | 47 | ) 48 | 49 | select * from all_image_ndcs_ndc11 50 | -------------------------------------------------------------------------------- /dbt/sagerx/models/marts/ndc/pack_size.sql: -------------------------------------------------------------------------------- 1 | with 2 | 3 | packaging_components as ( 4 | 5 | select 6 | * 7 | from {{ ref('int_fda_packaging_components') }} 8 | 9 | ), 10 | 11 | innermost_unit as ( 12 | 13 | select ndc11, inner_unit 14 | from ( 15 | select ndc11, inner_unit, 16 | row_number() over (partition by ndc11 order by component_line desc) as row_num 17 | from packaging_components 18 | ) as ranked 19 | where row_num = 1 20 | 21 | ), 22 | 23 | outermost_unit as ( 24 | 25 | select ndc11, outer_unit 26 | from packaging_components 27 | where component_line = 1 28 | 29 | ) 30 | 31 | select distinct 32 | packaging_components.ndc11, 33 | outermost_unit.outer_unit as outermost_unit, 34 | total_product, 35 | case 36 | when innermost_unit.inner_unit like('%KIT %') 37 | then 'KIT' 38 | else innermost_unit.inner_unit 39 | end as innermost_unit, 40 | packagedescription 41 | from packaging_components 42 | left join innermost_unit 43 | on innermost_unit.ndc11 = packaging_components.ndc11 44 | left join outermost_unit 45 | on outermost_unit.ndc11 = packaging_components.ndc11 46 | -------------------------------------------------------------------------------- /dbt/sagerx/models/marts/pricing/pricing.sql: -------------------------------------------------------------------------------- 1 | -- pricing.sql 2 | 3 | with 4 | 5 | nadac as ( 6 | 7 | select 8 | * 9 | from {{ ref('int_nadac_pricing') }} 10 | 11 | ), 12 | 13 | mccpd as ( 14 | 15 | select 16 | * 17 | -- TODO: make a stanging table and int table instead of hitting source in a mart 18 | from {{ source('mccpd', 'mccpd') }} 19 | ), 20 | 21 | all_ndcs as ( 22 | 23 | select ndc from nadac 24 | 25 | union 26 | 27 | select ndc from mccpd 28 | 29 | ), 30 | 31 | pricing as ( 32 | 33 | select 34 | all_ndcs.*, 35 | nadac.ndc_description as nadac_description, 36 | nadac.nadac_per_unit, 37 | mccpd.medication_name as mccpd_description, 38 | mccpd.unit_billing_price, 39 | mccpd.unit_price 40 | from all_ndcs 41 | left join nadac 42 | on nadac.ndc = all_ndcs.ndc 43 | left join mccpd 44 | on mccpd.ndc = all_ndcs.ndc 45 | 46 | ) 47 | 48 | select * from pricing 49 | -------------------------------------------------------------------------------- /dbt/sagerx/models/marts/pricing/pricing_historical.sql: -------------------------------------------------------------------------------- 1 | -- pricing_historical.sql 2 | 3 | with 4 | 5 | nadac_historical as ( 6 | 7 | select 8 | * 9 | from {{ ref('int_nadac_historical_pricing') }} 10 | 11 | ), 12 | 13 | mccpd as ( 14 | 15 | select 16 | * 17 | -- TODO: make a stanging table and int table instead of hitting source in a mart 18 | from {{ source('mccpd', 'mccpd') }} 19 | ), 20 | 21 | all_ndcs as ( 22 | 23 | select ndc from nadac_historical 24 | 25 | union 26 | 27 | select ndc from mccpd 28 | 29 | ), 30 | 31 | pricing as ( 32 | 33 | select 34 | all_ndcs.*, 35 | nadac_historical.ndc_description as nadac_description, 36 | nadac_historical.nadac_per_unit, 37 | mccpd.medication_name as mccpd_description, 38 | mccpd.unit_billing_price, 39 | mccpd.unit_price 40 | from all_ndcs 41 | left join nadac_historical 42 | on nadac_historical.ndc = all_ndcs.ndc 43 | left join mccpd 44 | on mccpd.ndc = all_ndcs.ndc 45 | 46 | ) 47 | 48 | select * from pricing 49 | -------------------------------------------------------------------------------- /dbt/sagerx/models/marts/products/brand_products_with_related_ndcs.sql: -------------------------------------------------------------------------------- 1 | with brand_products as ( 2 | select * from {{ ref('stg_rxnorm__brand_products') }} 3 | ) 4 | 5 | , fda_ndcs as ( 6 | select * from {{ ref('stg_fda_ndc__ndcs') }} 7 | ) 8 | 9 | , rxnorm_ndcs_to_products as ( 10 | select * from {{ ref('int_rxnorm_ndcs_to_products') }} 11 | ) 12 | 13 | , map as ( 14 | select 15 | prod.tty as product_tty 16 | , prod.rxcui as product_rxcui 17 | , prod.name as product_name 18 | , ndc.product_tty as ndc_product_tty 19 | , ndc.product_rxcui as ndc_product_rxcui 20 | , ndc.product_name as ndc_product_name 21 | , ndc.ndc 22 | , fda.product_startmarketingdate 23 | , fda.package_startmarketingdate 24 | from brand_products prod 25 | left join rxnorm_ndcs_to_products ndc 26 | on ndc.clinical_product_rxcui = prod.clinical_product_rxcui 27 | left join fda_ndcs fda 28 | on fda.ndc11 = ndc.ndc 29 | order by prod.rxcui 30 | ) 31 | 32 | select 33 | * 34 | from map 35 | -------------------------------------------------------------------------------- /dbt/sagerx/models/marts/products/product_synonyms.sql: -------------------------------------------------------------------------------- 1 | with 2 | 3 | rxnorm_synonyms as ( 4 | 5 | select 6 | str as synonym, 7 | rxcui as product_rxcui, 8 | 'RXNORM' as source 9 | from {{ source('rxnorm', 'rxnorm_rxnconso') }} 10 | where sab = 'RXNORM' 11 | and tty in('PSN', 'SY', 'TMSY', 'ET') 12 | 13 | ), 14 | 15 | nadac_synonyms as ( 16 | 17 | select distinct 18 | ndc_description as synonym, 19 | product_rxcui, 20 | 'NADAC' as source 21 | from {{ source('nadac', 'nadac') }} n 22 | left join {{ ref('int_rxnorm_ndcs_to_products') }} r 23 | on r.ndc = n.ndc 24 | where r.product_rxcui is not null 25 | 26 | ), 27 | 28 | fda_synonyms as ( 29 | 30 | select distinct 31 | trim(concat( 32 | nonproprietaryname 33 | , ' ' 34 | , active_numerator_strength 35 | , ' ' 36 | , active_ingred_unit 37 | , ' ' 38 | , lower(dosageformname) 39 | , case when proprietaryname is not null then concat( 40 | ' [' 41 | , proprietaryname 42 | , case when proprietarynamesuffix is not null then concat( 43 | ' ' 44 | , proprietarynamesuffix 45 | ) else '' end 46 | , ']' 47 | ) else '' end 48 | )) as synonym, 49 | product_rxcui, 50 | 'FDA' as source 51 | from sagerx_dev.stg_fda_ndc__ndcs f 52 | left join sagerx_dev.int_rxnorm_ndcs_to_products r 53 | on r.ndc = f.ndc11 54 | where r.product_rxcui is not null 55 | 56 | ), 57 | 58 | all_synonyms as ( 59 | 60 | select * from rxnorm_synonyms 61 | 62 | union 63 | 64 | select * from nadac_synonyms 65 | 66 | union 67 | 68 | select * from fda_synonyms 69 | 70 | ), 71 | 72 | rxnorm_products as ( 73 | 74 | select * from {{ ref('stg_rxnorm__products') }} 75 | 76 | ), 77 | 78 | prescribable_product_synonyms as ( 79 | 80 | select 81 | all_synonyms.* 82 | from all_synonyms 83 | inner join rxnorm_products 84 | on rxnorm_products.rxcui = all_synonyms.product_rxcui 85 | where rxnorm_products.prescribable = true 86 | 87 | ) 88 | 89 | select * from prescribable_product_synonyms 90 | -------------------------------------------------------------------------------- /dbt/sagerx/models/marts/products/products.sql: -------------------------------------------------------------------------------- 1 | with 2 | 3 | rxnorm_products as ( 4 | 5 | select * from {{ ref('stg_rxnorm__products') }} 6 | 7 | ), 8 | 9 | rxnorm_psn as ( 10 | 11 | select 12 | rxcui, 13 | str 14 | from {{ source('rxnorm', 'rxnorm_rxnconso') }} 15 | where sab = 'RXNORM' 16 | and tty = 'PSN' 17 | 18 | ), 19 | 20 | rxnorm_clinical_products_to_ingredients as ( 21 | 22 | select * from {{ ref('int_rxnorm_clinical_products_to_ingredients') }} 23 | 24 | ) 25 | 26 | select 27 | prod.rxcui as product_rxcui 28 | , prod.name as product_name 29 | , prod.tty as product_tty 30 | , psn.str as prescribable_name 31 | , case 32 | when prod.tty in ('SBD', 'BPCK') then 'brand' 33 | when prod.tty in ('SCD', 'GPCK') then 'generic' 34 | end as brand_vs_generic 35 | , substring(prod.name from '\[(.*)\]') as brand_name 36 | , cping.clinical_product_rxcui 37 | , cping.clinical_product_name 38 | , cping.clinical_product_tty 39 | , cping.ingredient_name 40 | -- strength - couldn't easily get strength at this grain - can if needed 41 | , cping.dose_form_name 42 | , prod.active 43 | , prod.prescribable 44 | from rxnorm_products prod 45 | left join rxnorm_clinical_products_to_ingredients cping 46 | on cping.clinical_product_rxcui = prod.clinical_product_rxcui 47 | left join rxnorm_psn psn 48 | on psn.rxcui = prod.rxcui 49 | -------------------------------------------------------------------------------- /dbt/sagerx/models/marts/products/products_to_inactive_ingredients.sql: -------------------------------------------------------------------------------- 1 | -- products_to_inactive_ingredients.sql 2 | 3 | with products_to_inactive_ingredients as ( 4 | select * from {{ ref('int_mthspl_products_to_inactive_ingredients') }} 5 | ) 6 | 7 | , unii_codes as ( 8 | select * from {{ ref('stg_fda_unii__unii_codes') }} 9 | ) 10 | 11 | , usp_preservatives as ( 12 | select * from {{ ref('usp_preservatives') }} 13 | ) 14 | 15 | select 16 | ndc9 17 | , ndc 18 | , unii_codes.unii as fda_unii_code 19 | , unii_codes.display_name as fda_unii_display_name 20 | , unii_codes.pubchem as pubchem_id 21 | , max(case 22 | when preservative.cas_rn is not null 23 | then 1 24 | end) as preservative 25 | , product_rxcui 26 | , string_agg(product_name, ' | ') as product_name 27 | , product_tty 28 | , inactive_ingredient_unii 29 | , inactive_ingredient_rxcui 30 | , string_agg(inactive_ingredient_name, ' | ') as inactive_ingredient_name 31 | , inactive_ingredient_tty 32 | , active 33 | , prescribable 34 | from products_to_inactive_ingredients 35 | /* 36 | need to join unii_codes twice - once 37 | to pull in the actual UNII -> displa 38 | y name 39 | mapping, and another initial one to try 40 | to map substance RXCUIs to FDA UNII RXCUIs. 41 | */ 42 | left join unii_codes rxcui_to_unii 43 | on rxcui_to_unii.rxcui = inactive_ingredient_rxcui 44 | /* 45 | if MTHSPL (DailyMed) has a substance UNII, 46 | use that. if it does not, try to map the 47 | substance RXCUI to the FDA UNII RXCUI and 48 | then use the resulting matched UNII to pull 49 | in the UNII display name. 50 | */ 51 | left join unii_codes 52 | on unii_codes.unii = case 53 | when ( 54 | inactive_ingredient_unii is not null 55 | and 56 | inactive_ingredient_unii != 'NOCODE' 57 | ) then inactive_ingredient_unii 58 | else rxcui_to_unii.unii 59 | end 60 | left join usp_preservatives preservative 61 | on preservative.cas_rn = unii_codes.rn 62 | group by 63 | ndc9 64 | , ndc 65 | , unii_codes.unii 66 | , unii_codes.display_name 67 | , unii_codes.pubchem 68 | , product_rxcui 69 | , product_tty 70 | , inactive_ingredient_unii 71 | , inactive_ingredient_rxcui 72 | , inactive_ingredient_tty 73 | , active 74 | , prescribable 75 | -------------------------------------------------------------------------------- /dbt/sagerx/models/marts/purdue/scorecard_data.sql: -------------------------------------------------------------------------------- 1 | with 2 | 3 | ing_to_report as ( 4 | 5 | select * from {{ ref('int_inactive_ingredients_to_fda_enforcement_reports') }} 6 | 7 | ) 8 | 9 | select * from ing_to_report 10 | where active_ingredient_name in ( 11 | 'risperidone' 12 | , 'adalimumab' 13 | , 'lidocaine' 14 | , 'carbamazepine' 15 | , 'phenytoin' 16 | , 'midazolam' 17 | , 'valproate' 18 | , 'tacrolimus' 19 | , 'amoxicillin' 20 | , 'hydrocortisone' 21 | , 'cetirizine' 22 | , 'pertuzumab' 23 | , 'methylphenidate' 24 | , 'erythromycin' 25 | , 'gabapentin' 26 | , 'lopinavir / ritonavir' 27 | , 'levothyroxine' 28 | , 'albuterol' 29 | ) 30 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/ashp/_ashp__models.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: stg_ashp__current_drug_shortages 5 | description: Current ASHP drug shortages 6 | columns: 7 | - name: id 8 | description: The ID of the shortage as defined by the detail page URL ID 9 | data_tests: 10 | - unique 11 | - not_null 12 | - name: name 13 | description: The name of the shortage as described by ASHP 14 | - name: url 15 | description: The full URL link to the shortage detail page 16 | - name: shortage_reasons 17 | description: A list of reasons for the shortage 18 | - name: resupply_dates 19 | description: A list of resupply dates 20 | - name: alternatives_and_management 21 | description: Alternatives and management information 22 | - name: care_implications 23 | description: Implications on patient care 24 | - name: safety_notices 25 | description: Safety notices related to the shortage 26 | - name: created_date 27 | description: The date the shortage record was created by ASHP 28 | - name: updated_date 29 | description: The date the shortage record was last updated by ASHP 30 | 31 | - name: stg_ashp__current_drug_shortages_ndcs 32 | description: Affected and available NDCs for each ASHP drug shortage. 33 | columns: 34 | - name: id 35 | description: The ID of the shortage as defined by the detail page URL ID 36 | - name: product 37 | description: The NDC product description 38 | - name: manufacturer 39 | description: The NDC manufacturer 40 | - name: description 41 | description: The NDC description relevant to the shortage 42 | - name: ndc_11 43 | description: The NDC package code in NDC-11 format 44 | - name: ndc_type 45 | description: | 46 | NDC package status as it relates to the shortage 47 | (either 'affected' or 'available') -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/ashp/_ashp__sources.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | sources: 4 | - name: ashp 5 | description: Current drug shortage list from ASHP 6 | schema: sagerx_lake 7 | tables: 8 | - name: ashp_shortage_list 9 | desciption: Current drug shortage list from ASHP 10 | columns: 11 | - name: name 12 | description: The name of the shortage as described by ASHP 13 | - name: detail_url 14 | description: | 15 | The partial URL for the detail about the shortage. 16 | Requires a prefix to be a complete URL. That prefix is 17 | `https://www.ashp.org/drug-shortages/current-shortages/`. 18 | - name: shortage_reasons 19 | description: A list of reasons for the shortage 20 | - name: resupply_dates 21 | description: A list of resupply dates 22 | - name: alternatives_and_management 23 | description: Alternatives and management information 24 | - name: care_implications 25 | description: Implications on patient care 26 | - name: safety_notices 27 | description: Safety notices related to the shortage 28 | - name: created_date 29 | description: The date the shortage record was created by ASHP 30 | - name: updated_date 31 | description: The date the shortage record was last updated by ASHP 32 | 33 | - name: ashp_shortage_list_ndcs 34 | desciption: Affected and available NDCs for each ASHP drug shortage. 35 | columns: 36 | - name: detail_url 37 | description: | 38 | The partial URL for the shortage detail page, 39 | containing an id parameter which can be used as 40 | an index 41 | - name: ndc_description 42 | description: The NDC description statement associated with the shortage 43 | - name: ndc_type 44 | description: | 45 | NDC package status as it relates to the shortage 46 | (either 'affected' or 'available') 47 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/ashp/stg_ashp__current_drug_shortages.sql: -------------------------------------------------------------------------------- 1 | -- stg_ashp__current_drug_shortages.sql 2 | 3 | with 4 | 5 | ashp_shortage_list as ( 6 | 7 | select * from {{ source('ashp', 'ashp_shortage_list') }} 8 | 9 | ), 10 | 11 | current_drug_shortages as ( 12 | 13 | select 14 | split_part(detail_url, '=', 2)::int as id, 15 | name, 16 | concat( 17 | 'https://www.ashp.org/drug-shortages/current-shortages/', 18 | lower(detail_url)) as url, 19 | shortage_reasons::jsonb, 20 | resupply_dates::jsonb, 21 | alternatives_and_management::jsonb, 22 | care_implications::jsonb, 23 | safety_notices::jsonb, 24 | created_date::date, 25 | updated_date::date 26 | from ashp_shortage_list 27 | 28 | ) 29 | 30 | select 31 | * 32 | from current_drug_shortages 33 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/ashp/stg_ashp__current_drug_shortages_ndcs.sql: -------------------------------------------------------------------------------- 1 | -- stg_ashp__current_drug_shortages_ndcs.sql 2 | 3 | with 4 | 5 | ashp_shortage_list as ( 6 | 7 | select 8 | detail_url, 9 | -- Prepare description by removing any commas inside of parentheses 10 | regexp_replace(ndc_description, '\(([^)]*),([^)]*)\)', '(\1\2)', 'g') as ndc_description, 11 | ndc_type 12 | from {{ source('ashp', 'ashp_shortage_list_ndcs') }} 13 | 14 | ), 15 | 16 | current_drug_shortages_ndcs as ( 17 | 18 | select 19 | split_part(detail_url, '=', 2)::int as id, 20 | split_part(ndc_description, ',', 1) as product, 21 | split_part(ndc_description, ',', 2) as manufacturer, 22 | -- Split NDC description by commas and keep array items 3 through n-1 23 | array_to_string((string_to_array(ndc_description, ','))[3:array_upper(string_to_array(ndc_description, ','), 1)-1], ',') as description, 24 | -- Get NDC using regular expression 25 | replace((regexp_match(ndc_description, '\d{5}\-\d{4}\-\d{2}'))[1], '-', '') as ndc_11, 26 | ndc_type 27 | from ashp_shortage_list 28 | 29 | ) 30 | 31 | select 32 | * 33 | from current_drug_shortages_ndcs 34 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/dailymed/stg_dailymed__interactions.sql: -------------------------------------------------------------------------------- 1 | /* staging.stg_dailymed__interactions */ 2 | 3 | with xml_table as 4 | ( 5 | select zip_file, xml_content::xml as xml_column 6 | from sagerx_lake.dailymed 7 | ) 8 | 9 | select zip_file, y.* 10 | from xml_table x, 11 | xmltable('dailymed/InteractionText' 12 | passing xml_column 13 | columns 14 | document_id text path '../documentId', 15 | set_id text path '../SetId', 16 | version_number text path '../VersionNumber', 17 | interaction_text text path '.' 18 | ) y 19 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/dailymed/stg_dailymed__main.sql: -------------------------------------------------------------------------------- 1 | /* staging.stg_dailymed__main */ 2 | 3 | with xml_table as 4 | ( 5 | select zip_file, xml_content::xml as xml_column 6 | from sagerx_lake.dailymed 7 | ) 8 | 9 | select zip_file, y.*, 'https://dailymed.nlm.nih.gov/dailymed/drugInfo.cfm?setid=' || y.set_id 10 | from xml_table x, 11 | xmltable('dailymed' 12 | passing xml_column 13 | columns 14 | document_id TEXT PATH './documentId', 15 | set_id TEXT PATH './SetId', 16 | version_number TEXT PATH './VersionNumber', 17 | effective_date TEXT PATH './EffectiveDate', 18 | market_status TEXT PATH './MarketStatus', 19 | application_number TEXT PATH './ApplicationNumber' 20 | ) y 21 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/dailymed/stg_dailymed__ndcs.sql: -------------------------------------------------------------------------------- 1 | /* sagerx_dev.stg_dailymed__ndcs */ 2 | 3 | with xml_table as 4 | ( 5 | select zip_file, xml_content::xml as xml_column 6 | from sagerx_lake.dailymed 7 | ), 8 | 9 | sql_table as ( 10 | 11 | select zip_file, y.* 12 | from xml_table x, 13 | xmltable('dailymed/NDCList/NDC' 14 | passing xml_column 15 | columns 16 | document_id text path '../../documentId', 17 | set_id text path '../../SetId', 18 | version_number text path '../../VersionNumber', 19 | ndc text path '.' 20 | ) y 21 | 22 | ), 23 | 24 | cte as ( 25 | 26 | select 27 | *, 28 | {{ ndc_to_11('ndc') }} as ndc11 29 | 30 | from sql_table 31 | 32 | ) 33 | 34 | select * from cte 35 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/dailymed/stg_dailymed__organization_activities.sql: -------------------------------------------------------------------------------- 1 | /* staging.stg_dailymed__organization_activities */ 2 | 3 | with xml_table as 4 | ( 5 | select zip_file, xml_content::xml as xml_column 6 | from sagerx_lake.dailymed 7 | ) 8 | 9 | select zip_file, y.* 10 | from xml_table x, 11 | xmltable('/dailymed/Organizations/establishment/function' 12 | passing xml_column 13 | columns 14 | document_id text path '../../../documentId', 15 | set_id text path '../../../SetId', 16 | version_number text path '../VersionNumber', 17 | dun text path '../DUN', 18 | activity text path './name' 19 | ) y 20 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/dailymed/stg_dailymed__organization_items.sql: -------------------------------------------------------------------------------- 1 | /* staging.dailymed_organization_item */ 2 | 3 | with xml_table as 4 | ( 5 | select zip_file, xml_content::xml as xml_column 6 | from sagerx_lake.dailymed 7 | ) 8 | 9 | select zip_file, y.* 10 | from xml_table x, 11 | xmltable('/dailymed/Organizations/establishment/function/item_list/item' 12 | passing xml_column 13 | columns 14 | document_id text path '../../../../../documentId', 15 | set_id text path '../../../../../SetId', 16 | version_number text path '../../../../../VersionNumber', 17 | dun text path '../../../DUN', 18 | activity text path '../../name', 19 | item text path '.' 20 | ) y 21 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/dailymed/stg_dailymed__organization_texts.sql: -------------------------------------------------------------------------------- 1 | /* staging.stg_dailymed__organization_texts */ 2 | 3 | with xml_table as 4 | ( 5 | select zip_file, xml_content::xml as xml_column 6 | from sagerx_lake.dailymed 7 | ) 8 | 9 | select zip_file 10 | , document_id 11 | , set_id 12 | , version_number 13 | , organization_text 14 | , row_num 15 | from (select zip_file 16 | , y.document_id 17 | , y.set_id 18 | , y.version_number 19 | , y.organization_text 20 | --,regexp_matches(organization_text, '(manufactured|distributed) (by|for):([\s\S]*)(?=manufactured|distributed|made)', 'ig') as mfdg_by_match 21 | ,row_number() over (partition by zip_file order by length(organization_text) desc) as row_num 22 | from xml_table x, 23 | xmltable('/dailymed/Organizations/OrganizationsText' 24 | passing xml_column 25 | columns 26 | document_id text path '../../documentId', 27 | set_id text path '../../SetId', 28 | version_number text path '../../VersionNumber', 29 | organization_text text path '.' 30 | ) y 31 | ) z 32 | where row_num = 1 33 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/dailymed/stg_dailymed__organizations.sql: -------------------------------------------------------------------------------- 1 | /* staging.stg_dailymed__organizations */ 2 | 3 | with xml_table as 4 | ( 5 | select zip_file, xml_content::xml as xml_column 6 | from sagerx_lake.dailymed 7 | ) 8 | 9 | select zip_file, y.* 10 | from xml_table x, 11 | xmltable('/dailymed/Organizations/establishment' 12 | passing xml_column 13 | columns 14 | document_id text path '../../documentId', 15 | set_id text path '../../SetId', 16 | version_number text path '../../VersionNumber', 17 | dun text path './DUN', 18 | org_name text path './name', 19 | org_type text path './type' 20 | ) y 21 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/dailymed/stg_dailymed__package_label_section_images.sql: -------------------------------------------------------------------------------- 1 | /* stg_dailymed__package_label_section_images */ 2 | 3 | with 4 | 5 | package_label_sections as 6 | ( 7 | select * from {{ ref('stg_dailymed__package_label_sections') }} 8 | ), 9 | 10 | images as ( 11 | 12 | select 13 | p.set_id, 14 | p.id as package_label_section_id, 15 | y.* 16 | from package_label_sections p, 17 | xmltable( 18 | '//MediaList/Media' passing media_list 19 | columns 20 | image text path 'Image', 21 | image_id text path 'ID' 22 | ) y 23 | 24 | ), 25 | 26 | id_images as ( 27 | 28 | select 29 | row_number() over() as id, 30 | * 31 | from images 32 | 33 | ) 34 | 35 | select * from id_images 36 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/dailymed/stg_dailymed__package_label_section_ndcs.sql: -------------------------------------------------------------------------------- 1 | /* stg_dailymed__package_label_section_ndcs */ 2 | 3 | with 4 | 5 | package_label_sections as 6 | ( 7 | select * from {{ ref('stg_dailymed__package_label_sections') }} 8 | ), 9 | 10 | ndcs as ( 11 | 12 | select 13 | p.set_id, 14 | p.id as package_label_section_id, 15 | -- TODO: account for NDCs with spaces instead of dashes 16 | -- example ndc 55292 140 01 17 | -- example set_id a0aad470-3f38-af97-e053-2995a90a383a 18 | regexp_replace(regexp_replace((regexp_matches(p.text, '(?:\d{4}|\d{5})\s*(?:-|–)\s*\d{3,6}\s*(?:-|–)\s*\d{1,2}|\d{11}|\d{10}', 'g'))[1], '\s', '', 'g'), '–', '-') as ndc 19 | from package_label_sections p 20 | 21 | ), 22 | 23 | id_ndcs as ( 24 | 25 | select 26 | row_number() over() as id, 27 | * 28 | from ndcs 29 | 30 | ) 31 | 32 | select * from id_ndcs 33 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/dailymed/stg_dailymed__package_label_sections.sql: -------------------------------------------------------------------------------- 1 | /* staging.stg_dailymed__package_label_sections */ 2 | 3 | with xml_table as 4 | ( 5 | select zip_file, xml_content::xml as xml_column 6 | from sagerx_lake.dailymed 7 | ) 8 | 9 | select 10 | zip_file 11 | , y.* 12 | from xml_table x, 13 | xmltable( 14 | '//PackageLabel' passing xml_column 15 | columns 16 | document_id text path '../../documentId', 17 | set_id text path '../../SetId', 18 | version_number text path '../../VersionNumber', 19 | id text path 'ID', 20 | text text path 'Text', 21 | media_list xml path 'MediaList' 22 | ) y 23 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/fda_enforcement/_fda_enforcement__models.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: stg_fda_enforcement__reports 5 | description: "FDA enforcement reports" 6 | columns: 7 | - name: status 8 | - name: city 9 | - name: state 10 | - name: country 11 | - name: classification 12 | - name: openfda 13 | - name: product_type 14 | - name: event_id 15 | - name: recalling_firm 16 | - name: address_1 17 | - name: address_2 18 | - name: postal_code 19 | - name: voluntary_mandated 20 | - name: initial_firm_notification 21 | - name: distribution_pattern 22 | - name: recall_number 23 | description: "The recall number." 24 | tests: 25 | - unique 26 | - not_null 27 | - name: product_description 28 | - name: product_quantity 29 | - name: reason_for_recall 30 | - name: recall_initiation_date 31 | - name: center_classification_date 32 | - name: report_date 33 | - name: code_info 34 | 35 | models: 36 | - name: stg_fda_enforcement__regex_ndcs 37 | description: "FDA enforcement NDCs from RegEx." 38 | columns: 39 | # primary key would be recall_number + ndc11 40 | - name: recall_number 41 | - name: ndc11 42 | - name: ndc9 43 | 44 | models: 45 | - name: stg_fda_enforcement__json_ndcs 46 | description: "FDA enforcement NDCs from JSON." 47 | columns: 48 | # primary key would be recall_number + ndc11 49 | - name: recall_number 50 | - name: ndc11 51 | - name: ndc9 52 | - name: app_num 53 | 54 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/fda_enforcement/_fda_enforcement__sources.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | sources: 4 | - name: fda_enforcement 5 | schema: sagerx_lake 6 | tables: 7 | - name: fda_enforcement 8 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/fda_enforcement/stg_fda_enforcement__json_ndcs.sql: -------------------------------------------------------------------------------- 1 | -- stg_fda_enforcement__json_ndcs.sql 2 | 3 | WITH base AS ( 4 | select 5 | fdae.recall_number 6 | , ndc.id_value #>> '{}' as ndc 7 | , app_num.id_value #>> '{}' as app_num 8 | from sagerx_lake.fda_enforcement fdae 9 | , json_array_elements(openfda->'package_ndc') with ordinality ndc(id_value, line) 10 | , json_array_elements(openfda->'application_number') with ordinality app_num(id_value, line) 11 | ) 12 | 13 | select 14 | fdae.recall_number 15 | , {{ndc_to_11 ('ndc')}} as ndc11 16 | , left({{ ndc_to_11 ('ndc')}},9) as ndc9 17 | , app_num 18 | from sagerx_lake.fda_enforcement fdae 19 | , json_array_elements(openfda->'package_ndc') with ordinality ndc(id_value, line) 20 | , json_array_elements(openfda->'application_number') with ordinality app_num(id_value, line) 21 | where {{ndc_to_11('ndc')}} is not null -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/fda_enforcement/stg_fda_enforcement__regex_ndcs.sql: -------------------------------------------------------------------------------- 1 | -- stg_fda_enforcement__regex_ndcs.sql 2 | 3 | 4 | with 5 | 6 | z_base as ( 7 | select 8 | recall_number 9 | , (regexp_matches(product_description, '(\m\d{1,5}-\d{1,4}-\d{1,2}\M|\m\d{11}\M)', 'g'))[1] as ndc 10 | from sagerx_lake.fda_enforcement 11 | ), 12 | z as ( 13 | select 14 | recall_number 15 | , {{ndc_to_11 ('ndc')}} as ndc11 16 | , left( {{ndc_to_11 ('ndc')}}, 9) as ndc9 17 | from z_base 18 | ) 19 | 20 | select 21 | * 22 | from z 23 | where ndc11 is not null 24 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/fda_enforcement/stg_fda_enforcement__reports.sql: -------------------------------------------------------------------------------- 1 | -- stg_fda_enforcement__reports.sql 2 | 3 | select 4 | status 5 | , city 6 | , state 7 | , country 8 | , classification 9 | , openfda 10 | , product_type 11 | , event_id 12 | , recalling_firm 13 | , address_1 14 | , address_2 15 | , postal_code 16 | , voluntary_mandated 17 | , initial_firm_notification 18 | , distribution_pattern 19 | , recall_number 20 | , product_description 21 | , product_quantity 22 | , reason_for_recall 23 | , recall_initiation_date 24 | , center_classification_date 25 | , report_date 26 | , code_info 27 | from sagerx_lake.fda_enforcement 28 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/fda_excluded/stg_fda_excluded__classes.sql: -------------------------------------------------------------------------------- 1 | -- stg_fda_excluded__classes.sql 2 | with 3 | 4 | product as ( 5 | select * from {{ source('fda_excluded', 'fda_excluded_product') }} 6 | ) 7 | 8 | , pharm_classes_array as ( 9 | select 10 | product.productid 11 | , token 12 | , row_number() over (partition by product.productid order by token desc) as class_line 13 | from product, unnest(string_to_array(product.pharm_classes, ',')) as token 14 | ) 15 | 16 | select 17 | classes.productid 18 | , classes.class_line 19 | , trim(left(classes.token, position('[' in classes.token) -1 )) as class_name 20 | , substring(classes.token, '\[(.+)\]') as class_type 21 | from pharm_classes_array classes 22 | order by 23 | productid 24 | , class_line 25 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/fda_excluded/stg_fda_excluded__ndcs.sql: -------------------------------------------------------------------------------- 1 | -- stg_fda_excluded__ndcs.sql 2 | 3 | with 4 | 5 | product as ( 6 | 7 | select * from {{ source('fda_excluded', 'fda_excluded_product') }} 8 | 9 | ), 10 | 11 | package as ( 12 | 13 | select * from {{ source('fda_excluded', 'fda_excluded_package') }} 14 | 15 | ) 16 | 17 | select 18 | {{ndc_to_11 ('ndcpackagecode')}} as ndc11 19 | , package.productid 20 | , package.productndc 21 | , producttypename 22 | , proprietaryname 23 | , proprietarynamesuffix 24 | , nonproprietaryname 25 | , dosageformname 26 | , routename 27 | , product.startmarketingdate as product_startmarketingdate 28 | , product.endmarketingdate as product_endmarketingdate 29 | , marketingcategoryname 30 | , applicationnumber 31 | , labelername 32 | , substancename 33 | , active_numerator_strength 34 | , active_ingred_unit 35 | , pharm_classes 36 | , deaschedule 37 | , product.ndc_exclude_flag as product_ndc_exclude_flag 38 | , listing_record_certified_through 39 | , ndcpackagecode 40 | , packagedescription 41 | , package.startmarketingdate as package_startmarketingdate 42 | , package.endmarketingdate as package_endmarketingdate 43 | , package.ndc_exclude_flag as package_ndc_exclude_flag 44 | , sample_package 45 | from package 46 | left join product 47 | on package.productid = product.productid 48 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/fda_excluded/stg_fda_excluded__substances.sql: -------------------------------------------------------------------------------- 1 | -- stg_fda_excluded__substances.sql 2 | 3 | with 4 | 5 | product as ( 6 | select * from {{ source('fda_excluded', 'fda_excluded_product') }} 7 | ) 8 | 9 | , substancename_array as ( 10 | select 11 | productid 12 | , substance 13 | , row_number() over(partition by productid) as substance_line 14 | from product, unnest(string_to_array(substancename, '; ')) as substance 15 | ) 16 | 17 | , strength_array as ( 18 | select 19 | productid 20 | , strength 21 | , row_number() over(partition by productid) as strength_line 22 | from product, unnest(string_to_array(active_numerator_strength, '; ')) as strength 23 | ) 24 | 25 | , unit_array as ( 26 | select 27 | productid 28 | , unit 29 | , row_number() over(partition by productid) as unit_line 30 | from product, unnest(string_to_array(active_ingred_unit, '; ')) as unit 31 | ) 32 | 33 | select 34 | substance.productid 35 | , substance.substance_line 36 | , substance.substance as substancename 37 | , strength.strength as active_numerator_strength 38 | , unit.unit as active_ingred_unit 39 | from substancename_array substance 40 | inner join strength_array strength 41 | on strength.productid = substance.productid 42 | and strength.strength_line = substance.substance_line 43 | inner join unit_array unit 44 | on unit.productid = substance.productid 45 | and unit.unit_line = substance.substance_line 46 | order by 47 | productid 48 | , substance_line -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/fda_ndc/stg_fda_ndc__classes.sql: -------------------------------------------------------------------------------- 1 | -- stg_fda_ndc__classes.sql 2 | 3 | with 4 | 5 | product as ( 6 | select * from {{ source('fda_ndc', 'fda_ndc_product') }} 7 | ) 8 | 9 | , pharm_classes_array as ( 10 | select 11 | product.productid 12 | , token 13 | , row_number() over (partition by product.productid order by token desc) as class_line 14 | from product, unnest(string_to_array(product.pharm_classes, ',')) as token 15 | ) 16 | 17 | select 18 | classes.productid 19 | , classes.class_line 20 | , trim(left(classes.token, position('[' in classes.token) -1 )) as class_name 21 | , substring(classes.token, '\[(.+)\]') as class_type 22 | from pharm_classes_array classes 23 | order by 24 | productid 25 | , class_line -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/fda_ndc/stg_fda_ndc__ndc_associations.sql: -------------------------------------------------------------------------------- 1 | -- stg_fda_ndc__ndc_associations 2 | 3 | with package as ( 4 | 5 | select * 6 | from {{ source('fda_ndc', 'fda_ndc_package') }} 7 | 8 | ), 9 | 10 | extracted_ndc as ( 11 | 12 | select 13 | package.ndcpackagecode, 14 | regexp_matches(package.packagedescription, '\d+-\d+-\d+', 'g') as ndc_match, 15 | packagedescription 16 | from package 17 | 18 | ), 19 | 20 | ndc_array as ( 21 | 22 | select 23 | ndc.ndcpackagecode, 24 | unnest(ndc.ndc_match) as token, 25 | packagedescription 26 | from extracted_ndc ndc 27 | 28 | ), 29 | 30 | ranked_array as ( 31 | 32 | select 33 | ndcpackagecode, 34 | token, 35 | row_number() over() as rn, 36 | packagedescription 37 | from ndc_array 38 | 39 | ), 40 | 41 | final_array as ( 42 | 43 | select 44 | ndcpackagecode, 45 | token, 46 | row_number() over (partition by ndcpackagecode order by rn) as ndc_line, 47 | packagedescription 48 | from ranked_array 49 | 50 | ), 51 | 52 | ndc_associations as ( 53 | 54 | select 55 | ndcpackagecode as outer_ndc, 56 | {{ ndc_to_11('ndcpackagecode') }} as outer_ndc11, 57 | ndc_line, 58 | token as ndc, 59 | {{ ndc_to_11('token') }} as ndc11, 60 | packagedescription 61 | from final_array 62 | order by 63 | ndcpackagecode, 64 | ndc_line 65 | 66 | ) 67 | 68 | select * from ndc_associations 69 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/fda_ndc/stg_fda_ndc__ndcs.sql: -------------------------------------------------------------------------------- 1 | -- stg_fda_ndc__ndcs.sql 2 | 3 | with 4 | 5 | product as ( 6 | 7 | select * from {{ source('fda_ndc', 'fda_ndc_product') }} 8 | 9 | ), 10 | 11 | package as ( 12 | 13 | select * from {{ source('fda_ndc', 'fda_ndc_package') }} 14 | 15 | ) 16 | 17 | select 18 | {{ndc_to_11 ('ndcpackagecode') }} as ndc11 19 | , package.productid 20 | , package.productndc 21 | , producttypename 22 | , proprietaryname 23 | , proprietarynamesuffix 24 | , nonproprietaryname 25 | , dosageformname 26 | , routename 27 | , product.startmarketingdate as product_startmarketingdate 28 | , product.endmarketingdate as product_endmarketingdate 29 | , marketingcategoryname 30 | , applicationnumber 31 | , labelername 32 | , substancename 33 | , active_numerator_strength 34 | , active_ingred_unit 35 | , pharm_classes 36 | , deaschedule 37 | , product.ndc_exclude_flag as product_ndc_exclude_flag 38 | , listing_record_certified_through 39 | , ndcpackagecode 40 | , packagedescription 41 | , package.startmarketingdate as package_startmarketingdate 42 | , package.endmarketingdate as package_endmarketingdate 43 | , package.ndc_exclude_flag as package_ndc_exclude_flag 44 | , sample_package 45 | from package 46 | left join product 47 | on package.productid = product.productid 48 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/fda_ndc/stg_fda_ndc__substances.sql: -------------------------------------------------------------------------------- 1 | -- stg_fda_ndc__substances.sql 2 | 3 | with 4 | 5 | product as ( 6 | select * from {{ source('fda_ndc', 'fda_ndc_product') }} 7 | ) 8 | 9 | , substancename_array as ( 10 | select 11 | productid 12 | , substance 13 | , row_number() over(partition by productid) as substance_line 14 | from product, unnest(string_to_array(substancename, '; ')) as substance 15 | ) 16 | 17 | , strength_array as ( 18 | select 19 | productid 20 | , strength 21 | , row_number() over(partition by productid) as strength_line 22 | from product, unnest(string_to_array(active_numerator_strength, '; ')) as strength 23 | ) 24 | 25 | , unit_array as ( 26 | select 27 | productid 28 | , unit 29 | , row_number() over(partition by productid) as unit_line 30 | from product, unnest(string_to_array(active_ingred_unit, '; ')) as unit 31 | ) 32 | 33 | select 34 | substance.productid 35 | , substance.substance_line 36 | , substance.substance as substancename 37 | , strength.strength as active_numerator_strength 38 | , unit.unit as active_ingred_unit 39 | from substancename_array substance 40 | inner join strength_array strength 41 | on strength.productid = substance.productid 42 | and strength.strength_line = substance.substance_line 43 | inner join unit_array unit 44 | on unit.productid = substance.productid 45 | and unit.unit_line = substance.substance_line 46 | order by 47 | productid 48 | , substance_line -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/fda_unfinished/stg_fda_unfinished__ndcs.sql: -------------------------------------------------------------------------------- 1 | -- stg_fda_unfinished__ndcs.sql 2 | 3 | with 4 | 5 | product as ( 6 | 7 | select * from {{ source('fda_unfinished', 'fda_unfinished_product') }} 8 | 9 | ), 10 | 11 | package as ( 12 | 13 | select * from {{ source('fda_unfinished', 'fda_unfinished_package') }} 14 | 15 | ) 16 | 17 | select 18 | {{ndc_to_11 ('package.ndcpackagecode')}} as ndc11 19 | , package.productid 20 | , package.productndc 21 | , producttypename 22 | , nonproprietaryname 23 | , dosageformname 24 | , product.startmarketingdate as product_startmarketingdate 25 | , product.endmarketingdate as product_endmarketingdate 26 | , marketingcategoryname 27 | , labelername 28 | , substancename 29 | , active_numerator_strength 30 | , active_ingred_unit 31 | , deaschedule 32 | , listing_record_certified_through 33 | , ndcpackagecode 34 | , packagedescription 35 | , package.startmarketingdate as package_startmarketingdate 36 | , package.endmarketingdate as package_endmarketingdate 37 | from package 38 | left join product 39 | on package.productid = product.productid 40 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/fda_unfinished/stg_fda_unfinished__substances.sql: -------------------------------------------------------------------------------- 1 | -- stg_fda_unfinished__substances.sql 2 | 3 | with 4 | 5 | product as ( 6 | select * from {{ source('fda_unfinished', 'fda_unfinished_product') }} 7 | ) 8 | 9 | , substancename_array as ( 10 | select 11 | productid 12 | , substance 13 | , row_number() over(partition by productid) as substance_line 14 | from product, unnest(string_to_array(substancename, '; ')) as substance 15 | ) 16 | 17 | , strength_array as ( 18 | select 19 | productid 20 | , strength 21 | , row_number() over(partition by productid) as strength_line 22 | from product, unnest(string_to_array(active_numerator_strength, '; ')) as strength 23 | ) 24 | 25 | , unit_array as ( 26 | select 27 | productid 28 | , unit 29 | , row_number() over(partition by productid) as unit_line 30 | from product, unnest(string_to_array(active_ingred_unit, '; ')) as unit 31 | ) 32 | 33 | select 34 | substance.productid 35 | , substance.substance_line 36 | , substance.substance as substancename 37 | , strength.strength as active_numerator_strength 38 | , unit.unit as active_ingred_unit 39 | from substancename_array substance 40 | inner join strength_array strength 41 | on strength.productid = substance.productid 42 | and strength.strength_line = substance.substance_line 43 | inner join unit_array unit 44 | on unit.productid = substance.productid 45 | and unit.unit_line = substance.substance_line 46 | order by 47 | productid 48 | , substance_line -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/fda_unii/_fda_unii__sources.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | sources: 4 | - name: fda_unii 5 | description: FDA UNII codes. 6 | schema: sagerx_lake 7 | tables: 8 | - name: fda_unii 9 | desciption: FDA UNII codes. 10 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/fda_unii/stg_fda_unii__unii_codes.sql: -------------------------------------------------------------------------------- 1 | -- stg_fda_unii__unii_codes.sql 2 | 3 | with 4 | 5 | fda_unii as ( 6 | select * from {{ source('fda_unii', 'fda_unii') }} 7 | ) 8 | 9 | select 10 | unii 11 | , display_name 12 | , rxcui 13 | , pubchem 14 | , rn 15 | , ncit 16 | , ncbi 17 | , dailymed 18 | from fda_unii 19 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/mccpd/_mccpd__sources.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | sources: 4 | - name: mccpd 5 | schema: sagerx_lake 6 | tables: 7 | - name: mccpd 8 | description: > 9 | Mark Cuban Cost Plus Drugs pricing. 10 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/nadac/_nadac__models.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: stg_nadac__nadac 5 | description: "Flags NADAC survey data to make it easy to find most recent price, first price, and dollar / percent changes between prices. Can also SUM change_type to see the number of price increases over time." 6 | columns: 7 | - name: ndc 8 | description: "The National Drug Code (NDC) is a numerical code maintained by the FDA that includes the labeler code, product code, and package code. The NDC is an 11-digit code." 9 | tests: 10 | - not_null 11 | - name: ndc_description 12 | description: "Identifies the drug name, strength, and dosage form of the drug product." 13 | tests: 14 | - not_null 15 | - name: nadac_per_unit 16 | description: "The National Average Drug Acquisition Cost per unit. Staging table converts this to a numeric type." 17 | - name: pricing_unit 18 | description: "Indicates the pricing unit for the associated NDC ('ML', 'GM' or 'EA')." 19 | - name: price_start_date 20 | description: "The effective date of the NADAC Per Unit cost. Staging table converts this to a date type." 21 | - name: most_recent_price 22 | description: "True if the price is the most recent available price." 23 | - name: first_price 24 | description: "True if the price is the first available price." 25 | - name: dollar_change 26 | description: "Change between this price and previous price in dollars." 27 | - name: percent_change 28 | description: "Change between this price and previous price in percentage." 29 | - name: change_type 30 | description: "1 if the price went up, 0 if the price went down." 31 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/nadac/stg_nadac__nadac.sql: -------------------------------------------------------------------------------- 1 | -- stg_nadac__nadac.sql 2 | 3 | with 4 | 5 | nadac as ( 6 | 7 | select distinct 8 | ndc_description, 9 | ndc, 10 | nadac_per_unit::numeric, 11 | effective_date::date, 12 | pricing_unit, 13 | pharmacy_type_indicator, 14 | otc, 15 | explanation_code, 16 | classification_for_rate_setting, 17 | corresponding_generic_drug_nadac_per_unit, 18 | corresponding_generic_drug_effective_date::date, 19 | as_of_date::date 20 | from {{ source('nadac', 'nadac') }} 21 | 22 | ) 23 | 24 | select * from nadac 25 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/orange_book/_orange_book__models.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/purple_book/_purple_book__models.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/rxclass/_rxclass__models.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: stg_rxclass__rxclass 5 | description: All RxClass mappings to the lowest level RxClass code. 6 | columns: 7 | - name: rxcui 8 | - name: name 9 | - name: tty 10 | - name: rela 11 | - name: class_id 12 | - name: class_name 13 | - name: class_type 14 | - name: rela_source 15 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/rxclass/_rxclass__sources.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | sources: 4 | - name: rxclass 5 | description: | 6 | This data source represents all of RxClass. 7 | 8 | Below are descriptions of a select few of the RxClass sources. 9 | 10 | ## ATCPROD 11 | 12 | > A product-level mapping of RxNorm to ATC1-4 classes. 13 | 14 | RxClass has added RxNorm product-level mapping for ATC. The product-level mapping is now the default source mapping for the ATC classes in the RxClass browser. 15 | 16 | When extended to RxNorm products, an ingredient-level mapping to ATC can lead to inapplicable mappings. For example, through its ingredient, timolol, the RxNorm product 1923428 (timolol 2.5 MG/ML Ophthalmic Solution) is associated with both ophthalmologicals and cardiovascular system medications, while only the former is accurate. In contrast, ATCPROD only associates this product with the ophthalmologicals class Beta blocking agents (S01ED). 17 | 18 | To use the RxNorm product-level mapping for ATC in the RxClass API, the relaSource parameter should be specified as “ATCPROD”. 19 | https://rxnav.nlm.nih.gov/REST/rxclass/class/byRxcui.xml?rxcui=1923428&relaSource=ATC 20 | https://rxnav.nlm.nih.gov/REST/rxclass/class/byRxcui.xml?rxcui=1923428&relaSource=ATCPROD 21 | The RxNorm product-level mapping for ATC was produced by the National Library of Medicine. While not all active RxNorm products are covered by the mapping, the mapping accounts for over 97% of the Medicare Part-D prescriptions from 2012-2020. 22 | 23 | The original ingredient-level mapping to ATC from the WHO Collaborating Centre for Drug Statistics Methodology remains available in RxClass (select ATC under Edit Drug Sources) and through the RxClass API (relaSource=ATC). 24 | schema: sagerx_lake 25 | tables: 26 | - name: rxclass 27 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/rxclass/stg_rxclass__rxclass.sql: -------------------------------------------------------------------------------- 1 | -- stg_rxclass__rxclass.sql 2 | 3 | with rxclass as ( 4 | 5 | select 6 | * 7 | from {{ source('rxclass', 'rxclass') }} 8 | 9 | ) 10 | 11 | select distinct 12 | * 13 | from rxclass -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/rxnorm/_rxnorm__sources.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | sources: 4 | - name: rxnorm 5 | schema: sagerx_lake 6 | tables: 7 | - name: rxnorm_rxnatomarchive 8 | - name: rxnorm_rxnconso 9 | - name: rxnorm_rxncui 10 | - name: rxnorm_rxncuichanges 11 | - name: rxnorm_rxndoc 12 | - name: rxnorm_rxnrel 13 | - name: rxnorm_rxnsab 14 | - name: rxnorm_rxnsat 15 | - name: rxnorm_rxnsty 16 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/rxnorm/stg_rxnorm__all_ndcs.sql: -------------------------------------------------------------------------------- 1 | -- stg_rxnorm__all_ndcs.sql 2 | 3 | select 4 | {{ ndc_to_11 ('rxnsat.atv') }}as ndc11 5 | , rxnsat.atv as ndc 6 | , rxnsat.rxcui 7 | , rxnsat.sab 8 | , case when rxnsat.suppress = 'N' then true else false end as active 9 | , case when rxnsat.cvf = '4096' then true else false end as prescribable 10 | from sagerx_lake.rxnorm_rxnsat rxnsat 11 | where rxnsat.atn = 'NDC' 12 | and rxnsat.sab in ('ATC', 'CVX', 'DRUGBANK', 'MSH', 'MTHCMSFRF', 'MTHSPL', 'RXNORM', 'USP', 'VANDF') 13 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/rxnorm/stg_rxnorm__atc_codes.sql: -------------------------------------------------------------------------------- 1 | with atc as ( 2 | select distinct a.rxcui 3 | ,a.code 4 | ,b.atn 5 | ,b.atv as atc_class_level 6 | ,a.str as description 7 | ,a.sab 8 | ,a.tty 9 | from ( 10 | select * 11 | from sagerx_lake.rxnorm_rxnconso 12 | where sab = 'ATC' 13 | and tty not like 'RXN%' 14 | order by code 15 | ) a 16 | left join sagerx_lake.rxnorm_rxnsat b 17 | on a.code= b.code 18 | where atn = 'ATC_LEVEL' 19 | order by code 20 | ) 21 | 22 | , atc_5 as ( 23 | select 24 | * 25 | from atc 26 | where atc_class_level = '5' 27 | ) 28 | 29 | , atc_4 as ( 30 | select 31 | * 32 | from atc 33 | where atc_class_level = '4' 34 | ) 35 | 36 | , atc_3 as ( 37 | select 38 | * 39 | from atc 40 | where atc_class_level = '3' 41 | ) 42 | 43 | , atc_2 as ( 44 | select 45 | * 46 | from atc 47 | where atc_class_level = '2' 48 | ) 49 | 50 | , atc_1 as ( 51 | select 52 | * 53 | from atc 54 | where atc_class_level = '1' 55 | ) 56 | 57 | , sagerx_atc as ( 58 | 59 | select 60 | atc_1.code as atc_1_code 61 | , atc_1.description as atc_1_name 62 | , atc_2.code as atc_2_code 63 | , atc_2.description as atc_2_name 64 | , atc_3.code as atc_3_code 65 | , atc_3.description as atc_3_name 66 | , atc_4.code as atc_4_code 67 | , atc_4.description as atc_4_name 68 | , atc_5.code as atc_5_code 69 | , atc_5.description as atc_5_name 70 | , atc_5.rxcui as ingredient_rxcui 71 | , atc_5.description as ingredient_name 72 | , atc_5.tty as ingredient_tty 73 | 74 | from atc_5 75 | left join atc_4 76 | on left(atc_5.code, 5) = atc_4.code 77 | left join atc_3 78 | on left(atc_4.code, 4) = atc_3.code 79 | left join atc_2 80 | on left(atc_3.code, 3) = atc_2.code 81 | left join atc_1 82 | on left(atc_2.code, 1) = atc_1.code 83 | ) 84 | 85 | select * 86 | from sagerx_atc 87 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/rxnorm/stg_rxnorm__brand_product_component_links.sql: -------------------------------------------------------------------------------- 1 | -- stg_rxnorm__brand_product_component_links.sql 2 | 3 | select distinct 4 | product.rxcui as brand_product_rxcui 5 | , case when product_component.rxcui is null 6 | then product.rxcui 7 | else product_component.rxcui 8 | end as brand_product_component_rxcui 9 | from sagerx_lake.rxnorm_rxnconso product 10 | left join sagerx_lake.rxnorm_rxnrel rxnrel on rxnrel.rxcui2 = product.rxcui and rxnrel.rela = 'contains' 11 | left join sagerx_lake.rxnorm_rxnconso product_component 12 | on rxnrel.rxcui1 = product_component.rxcui 13 | and product_component.tty in ('SBD', 'SCD') -- NOTE: BPCKs can contain SBDs AND SCDs 14 | and product_component.sab = 'RXNORM' 15 | where product.tty in ('SBD', 'BPCK') 16 | and product.sab = 'RXNORM' 17 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/rxnorm/stg_rxnorm__brand_product_components.sql: -------------------------------------------------------------------------------- 1 | -- stg_rxnorm__brand_product_components.sql 2 | 3 | select distinct 4 | case when product.tty = 'SBD' then product.rxcui else product_component.rxcui end rxcui 5 | , case when product.tty = 'SBD' then product.str else product_component.str end name 6 | , case when product.tty = 'SBD' then product.tty else product_component.tty end tty 7 | , case when product_component.tty = 'SCD' then product_component.rxcui else rxnrel_scd.rxcui1 end clinical_product_component_rxcui 8 | , rxnrel_bn.rxcui1 as brand_rxcui 9 | , case when 10 | case when product.tty = 'SBD' 11 | then product.suppress 12 | else product_component.suppress 13 | end = 'N' 14 | then true 15 | else false 16 | end as active 17 | , case when 18 | case when product.tty = 'SBD' 19 | then product.cvf 20 | else product_component.cvf 21 | end = '4096' 22 | then true 23 | else false 24 | end as prescribable 25 | from sagerx_lake.rxnorm_rxnconso product 26 | left join sagerx_lake.rxnorm_rxnrel rxnrel on rxnrel.rxcui2 = product.rxcui and rxnrel.rela = 'contains' 27 | left join sagerx_lake.rxnorm_rxnconso product_component 28 | on rxnrel.rxcui1 = product_component.rxcui 29 | and product_component.tty in ('SBD', 'SCD') -- NOTE: BPCKs can contain SBDs AND SCDs 30 | and product_component.sab = 'RXNORM' 31 | left join sagerx_lake.rxnorm_rxnrel rxnrel_scd 32 | on rxnrel_scd.rxcui2 = case when product_component.rxcui is null then product.rxcui else product_component.rxcui end 33 | and rxnrel_scd.rela = 'tradename_of' -- rxnrel_scd.rxcui1 = clinical_product_component_rxcui 34 | left join sagerx_lake.rxnorm_rxnrel rxnrel_bn 35 | on rxnrel_bn.rxcui2 = case when product_component.rxcui is null then product.rxcui else product_component.rxcui end 36 | and rxnrel_bn.rela = 'has_ingredient' -- rxnrel_bn.rxcui1 = brand_rxcui 37 | where product.tty in ('SBD', 'BPCK') 38 | and product.sab = 'RXNORM' 39 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/rxnorm/stg_rxnorm__brand_products.sql: -------------------------------------------------------------------------------- 1 | -- stg_rxnorm__brand_products.sql 2 | 3 | select 4 | product.rxcui as rxcui 5 | , product.str as name 6 | , product.tty as tty 7 | , clinical_product.rxcui as clinical_product_rxcui 8 | , case when product.suppress = 'N' then true else false end as active 9 | , case when product.cvf = '4096' then true else false end as prescribable 10 | from sagerx_lake.rxnorm_rxnconso product 11 | left join sagerx_lake.rxnorm_rxnrel rxnrel on rxnrel.rxcui2 = product.rxcui and rxnrel.rela = 'tradename_of' 12 | left join sagerx_lake.rxnorm_rxnconso clinical_product 13 | on rxnrel.rxcui1 = clinical_product.rxcui 14 | and clinical_product.tty in ('SCD', 'GPCK') 15 | and clinical_product.sab = 'RXNORM' 16 | where product.tty in('SBD', 'BPCK') 17 | and product.sab = 'RXNORM' 18 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/rxnorm/stg_rxnorm__brands.sql: -------------------------------------------------------------------------------- 1 | -- stg_rxnorm__brands.sql 2 | 3 | with cte as ( 4 | select 5 | sq.* 6 | , row_number() over(partition by rxcui order by ingredient_tty desc) as rn 7 | from ( 8 | 9 | select 10 | brand.rxcui as rxcui 11 | , brand.str as name 12 | , brand.tty as tty 13 | , ingredient.rxcui as ingredient_rxcui 14 | , ingredient.str as ingredient_name 15 | , ingredient.tty as ingredient_tty 16 | from sagerx_lake.rxnorm_rxnconso brand 17 | inner join sagerx_lake.rxnorm_rxnrel rxnrel on rxnrel.rxcui2 = brand.rxcui and rxnrel.rela = 'tradename_of' 18 | inner join sagerx_lake.rxnorm_rxnconso ingredient 19 | on rxnrel.rxcui1 = ingredient.rxcui 20 | and ingredient.tty = 'IN' 21 | and ingredient.sab = 'RXNORM' 22 | where brand.tty = 'BN' 23 | and brand.sab = 'RXNORM' 24 | 25 | union all 26 | 27 | select 28 | brand.rxcui as rxcui 29 | , brand.str as name 30 | , brand.tty as tty 31 | , ingredient.rxcui as ingredient_rxcui 32 | , ingredient.str as ingredient_name 33 | , ingredient.tty as ingredient_tty 34 | from sagerx_lake.rxnorm_rxnconso brand 35 | inner join sagerx_lake.rxnorm_rxnrel sbd_rxnrel on sbd_rxnrel.rxcui2 = brand.rxcui and sbd_rxnrel.rela = 'ingredient_of' 36 | inner join sagerx_lake.rxnorm_rxnrel scd_rxnrel on scd_rxnrel.rxcui2 = sbd_rxnrel.rxcui1 and scd_rxnrel.rela = 'tradename_of' 37 | inner join sagerx_lake.rxnorm_rxnrel ingredient_rxnrel on ingredient_rxnrel.rxcui2 = scd_rxnrel.rxcui1 and ingredient_rxnrel.rela = 'has_ingredients' 38 | left join sagerx_lake.rxnorm_rxnconso ingredient 39 | on ingredient_rxnrel.rxcui1 = ingredient.rxcui 40 | and ingredient.tty = 'MIN' 41 | and ingredient.sab = 'RXNORM' 42 | where brand.tty = 'BN' 43 | and brand.sab = 'RXNORM' 44 | ) sq 45 | ) 46 | 47 | select distinct 48 | brand.rxcui as rxcui 49 | , brand.str as name 50 | , brand.tty as tty 51 | , case when brand.suppress = 'N' then true else false end as active 52 | , case when brand.cvf = '4096' then true else false end as prescribable 53 | , cte.ingredient_rxcui as ingredient_rxcui 54 | from sagerx_lake.rxnorm_rxnconso product 55 | inner join sagerx_lake.rxnorm_rxnrel rxnrel on rxnrel.rxcui2 = product.rxcui and rxnrel.rela = 'has_ingredient' 56 | inner join sagerx_lake.rxnorm_rxnconso brand 57 | on rxnrel.rxcui1 = brand.rxcui 58 | and brand.tty = 'BN' 59 | and brand.sab = 'RXNORM' 60 | Left join cte on cte.rxcui = brand.rxcui and cte.rn < 2 61 | where product.tty = 'SBD' 62 | and product.sab = 'RXNORM' 63 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/rxnorm/stg_rxnorm__clinical_product_component_links.sql: -------------------------------------------------------------------------------- 1 | -- stg_rxnorm__clinical_product_component_links.sql 2 | 3 | select distinct 4 | product.rxcui as clinical_product_rxcui 5 | , case when product_component.rxcui is null 6 | then product.rxcui 7 | else product_component.rxcui 8 | end as clinical_product_component_rxcui 9 | from sagerx_lake.rxnorm_rxnconso product 10 | left join sagerx_lake.rxnorm_rxnrel rxnrel on rxnrel.rxcui2 = product.rxcui and rxnrel.rela = 'contains' 11 | left join sagerx_lake.rxnorm_rxnconso product_component 12 | on rxnrel.rxcui1 = product_component.rxcui 13 | and product_component.tty = 'SCD' 14 | and product_component.sab = 'RXNORM' 15 | where product.tty in('SCD', 'GPCK') 16 | and product.sab = 'RXNORM' 17 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/rxnorm/stg_rxnorm__clinical_products.sql: -------------------------------------------------------------------------------- 1 | -- stg_rxnorm__clinical_products.sql 2 | 3 | select 4 | product.rxcui as rxcui 5 | , product.str as name 6 | , product.tty as tty 7 | , case when product.suppress = 'N' then true else false end as active 8 | , case when product.cvf = '4096' then true else false end as prescribable 9 | from sagerx_lake.rxnorm_rxnconso product 10 | where product.tty in('SCD', 'GPCK') 11 | and product.sab = 'RXNORM' 12 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/rxnorm/stg_rxnorm__dose_form_group_links.sql: -------------------------------------------------------------------------------- 1 | -- stg_rxnorm__dose_form_group_links.sql 2 | 3 | select distinct 4 | dose_form.rxcui dose_form_rxcui 5 | , rxnrel.rxcui1 dose_form_group_rxcui 6 | from sagerx_lake.rxnorm_rxnconso dose_form 7 | inner join sagerx_lake.rxnorm_rxnrel rxnrel 8 | on rxnrel.rxcui2 = dose_form.rxcui 9 | and rxnrel.rela = 'isa' 10 | and rxnrel.sab = 'RXNORM' 11 | where dose_form.tty = 'DF' 12 | and dose_form.sab = 'RXNORM' 13 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/rxnorm/stg_rxnorm__dose_form_groups.sql: -------------------------------------------------------------------------------- 1 | -- stg_rxnorm__dose_form_groups.sql 2 | 3 | select 4 | dose_form_group.rxcui rxcui 5 | , dose_form_group.str name 6 | , dose_form_group.tty tty 7 | , case when dose_form_group.suppress = 'N' then true else false end as active 8 | , case when dose_form_group.cvf = '4096' then true else false end as prescribable 9 | from sagerx_lake.rxnorm_rxnconso dose_form_group 10 | where dose_form_group.tty = 'DFG' 11 | and dose_form_group.sab = 'RXNORM' 12 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/rxnorm/stg_rxnorm__dose_forms.sql: -------------------------------------------------------------------------------- 1 | -- stg_rxnorm__dose_forms.sql 2 | 3 | select 4 | dose_form.rxcui rxcui 5 | , dose_form.str name 6 | , dose_form.tty tty 7 | , case when dose_form.suppress = 'N' then true else false end as active 8 | , case when dose_form.cvf = '4096' then true else false end as prescribable 9 | from sagerx_lake.rxnorm_rxnconso dose_form 10 | where dose_form.tty = 'DF' 11 | and dose_form.sab = 'RXNORM' 12 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/rxnorm/stg_rxnorm__hcpcs_codes.sql: -------------------------------------------------------------------------------- 1 | select distinct 2 | b.rxcui, 3 | a.atv as hcpcs_code, 4 | b.tty, 5 | b.str as drug_name 6 | from sagerx_lake.rxnorm_rxnsat a 7 | join sagerx_lake.rxnorm_rxnconso b on a.rxcui = b.rxcui 8 | where a.atn = 'DHJC' 9 | and a.atv like 'J%' 10 | and b.tty in ('GPCK', 'BPCK', 'SCD', 'SBD') 11 | order by a.atv 12 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/rxnorm/stg_rxnorm__ingredient_component_links.sql: -------------------------------------------------------------------------------- 1 | -- stg_rxnorm__ingredient_component_links.sql 2 | 3 | with cte as ( 4 | select 5 | rxnrel.rxcui2 as ingredient_rxcui 6 | , ingredient_component.rxcui as ingredient_component_rxcui 7 | , ingredient_component.str as ingredient_component_name 8 | , ingredient_component.tty as ingredient_component_tty 9 | from 10 | sagerx_lake.rxnorm_rxnrel rxnrel 11 | inner join sagerx_lake.rxnorm_rxnconso ingredient_component 12 | on rxnrel.rxcui1 = ingredient_component.rxcui 13 | where rxnrel.rela = 'has_part' 14 | and ingredient_component.tty = 'IN' 15 | and ingredient_component.sab = 'RXNORM' 16 | ) 17 | 18 | select distinct 19 | ingredient.rxcui as ingredient_rxcui 20 | , case when cte.ingredient_component_rxcui is null 21 | then ingredient.rxcui 22 | else cte.ingredient_component_rxcui 23 | end as ingredient_component_rxcui 24 | from sagerx_lake.rxnorm_rxnconso ingredient 25 | left join cte on ingredient.rxcui = cte.ingredient_rxcui 26 | where ingredient.tty in('IN', 'MIN') 27 | and ingredient.sab = 'RXNORM' 28 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/rxnorm/stg_rxnorm__ingredient_components.sql: -------------------------------------------------------------------------------- 1 | -- stg_rxnorm__ingredient_components.sql 2 | 3 | with cte as ( 4 | select 5 | rxnrel.rxcui2 as ingredient_rxcui 6 | , ingredient_component.rxcui as rxcui 7 | , ingredient_component.str as name 8 | , ingredient_component.tty as tty 9 | , ingredient_component.suppress 10 | , ingredient_component.cvf 11 | from 12 | sagerx_lake.rxnorm_rxnrel rxnrel 13 | inner join sagerx_lake.rxnorm_rxnconso ingredient_component 14 | on rxnrel.rxcui1 = ingredient_component.rxcui 15 | where rxnrel.rela = 'has_part' 16 | and ingredient_component.tty = 'IN' 17 | and ingredient_component.sab = 'RXNORM' 18 | ) 19 | 20 | select distinct 21 | case when cte.rxcui is null then ingredient.rxcui else cte.rxcui end rxcui 22 | , case when cte.name is null then ingredient.str else cte.name end name 23 | , case when cte.tty is null then ingredient.tty else cte.tty end tty 24 | , case when 25 | case when cte.rxcui is null then ingredient.suppress else cte.suppress end = 'N' then true else false end as active 26 | , case when 27 | case when cte.rxcui is null then ingredient.cvf else cte.cvf end = '4096' then true else false end as prescribable 28 | from sagerx_lake.rxnorm_rxnconso ingredient 29 | left join cte on ingredient.rxcui = cte.ingredient_rxcui 30 | where ingredient.tty in('IN', 'MIN') 31 | and ingredient.sab = 'RXNORM' 32 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/rxnorm/stg_rxnorm__ingredient_strength_links.sql: -------------------------------------------------------------------------------- 1 | -- stg_rxnorm__ingredient_strength_links.sql 2 | 3 | select distinct 4 | product_component.rxcui as clinical_product_component_rxcui 5 | , ingredient.rxcui as ingredient_component_rxcui 6 | , ingredient_strength.rxcui as ingredient_strength_rxcui 7 | from sagerx_lake.rxnorm_rxnconso ingredient_strength 8 | inner join sagerx_lake.rxnorm_rxnrel has_ingredient 9 | on has_ingredient.rxcui2 = ingredient_strength.rxcui 10 | and has_ingredient.rela = 'has_ingredient' 11 | inner join sagerx_lake.rxnorm_rxnconso ingredient 12 | on ingredient.rxcui = has_ingredient.rxcui1 13 | and ingredient.tty = 'IN' 14 | and ingredient.sab = 'RXNORM' 15 | inner join sagerx_lake.rxnorm_rxnrel constitutes 16 | on constitutes.rxcui2 = ingredient_strength.rxcui 17 | and constitutes.rela = 'constitutes' 18 | inner join sagerx_lake.rxnorm_rxnconso product_component 19 | on product_component.rxcui = constitutes.rxcui1 20 | and product_component.tty = 'SCD' 21 | and product_component.sab = 'RXNORM' 22 | where ingredient_strength.tty = 'SCDC' 23 | and ingredient_strength.sab = 'RXNORM' 24 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/rxnorm/stg_rxnorm__ingredient_strengths.sql: -------------------------------------------------------------------------------- 1 | -- stg_rxnorm__ingredient_strengths.sql 2 | 3 | select 4 | ingredient_strength.rxcui as rxcui 5 | , ingredient_strength.str as name 6 | , numerator_value.atv as numerator_value 7 | , numerator_unit.atv as numerator_unit 8 | , denominator_value.atv as denominator_value 9 | , denominator_unit.atv as denominator_unit 10 | , text.atv as text 11 | , case when ingredient_strength.suppress = 'N' 12 | then true 13 | else false 14 | end as active 15 | , case when ingredient_strength.cvf = '4096' 16 | then true 17 | else false 18 | end as prescribable 19 | from sagerx_lake.rxnorm_rxnconso ingredient_strength 20 | left join sagerx_lake.rxnorm_rxnsat numerator_value 21 | on numerator_value.rxcui = ingredient_strength.rxcui 22 | and numerator_value.atn = 'RXN_BOSS_STRENGTH_NUM_VALUE' 23 | left join sagerx_lake.rxnorm_rxnsat numerator_unit 24 | on numerator_unit.rxcui = ingredient_strength.rxcui 25 | and numerator_unit.atn = 'RXN_BOSS_STRENGTH_NUM_UNIT' 26 | left join sagerx_lake.rxnorm_rxnsat denominator_value 27 | on denominator_value.rxcui = ingredient_strength.rxcui 28 | and denominator_value.atn = 'RXN_BOSS_STRENGTH_DENOM_VALUE' 29 | left join sagerx_lake.rxnorm_rxnsat denominator_unit 30 | on denominator_unit.rxcui = ingredient_strength.rxcui 31 | and denominator_unit.atn = 'RXN_BOSS_STRENGTH_DENOM_UNIT' 32 | left join sagerx_lake.rxnorm_rxnsat text 33 | on text.rxcui = ingredient_strength.rxcui 34 | and text.atn = 'RXN_STRENGTH' 35 | where ingredient_strength.tty = 'SCDC' 36 | and ingredient_strength.sab = 'RXNORM' 37 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/rxnorm/stg_rxnorm__ingredients.sql: -------------------------------------------------------------------------------- 1 | -- stg_rxnorm__ingredients.sql 2 | 3 | select 4 | ingredient.rxcui rxcui 5 | , ingredient.str name 6 | , ingredient.tty tty 7 | , case when ingredient.suppress = 'N' then true else false end as active 8 | , case when ingredient.cvf = '4096' then true else false end as prescribable 9 | from sagerx_lake.rxnorm_rxnconso ingredient 10 | where ingredient.tty in('IN', 'MIN') 11 | and ingredient.sab = 'RXNORM' 12 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/rxnorm/stg_rxnorm__mthspl_ndcs.sql: -------------------------------------------------------------------------------- 1 | -- stg_rxnorm__mthspl_ndcs.sql 2 | 3 | select 4 | {{ ndc_to_11 ('rxnsat.atv')}} as ndc11 5 | , rxnsat.atv as ndc 6 | , rxnsat.rxcui 7 | , case when rxnsat.suppress = 'N' then true else false end as active 8 | , case when rxnsat.cvf = '4096' then true else false end as prescribable 9 | from sagerx_lake.rxnorm_rxnsat rxnsat 10 | where rxnsat.atn = 'NDC' 11 | and rxnsat.sab = 'MTHSPL' 12 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/rxnorm/stg_rxnorm__mthspl_products.sql: -------------------------------------------------------------------------------- 1 | -- stg_rxnorm__mthspl_products.sql 2 | 3 | select 4 | product.rxcui as rxcui 5 | , product.str as name 6 | , product.tty as tty 7 | , product.rxaui as rxaui 8 | , product.code as ndc 9 | , case when product.suppress = 'N' then true else false end as active 10 | , case when product.cvf = '4096' then true else false end as prescribable 11 | from sagerx_lake.rxnorm_rxnconso product 12 | where product.tty = 'DP' 13 | and product.sab = 'MTHSPL' 14 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/rxnorm/stg_rxnorm__mthspl_substances.sql: -------------------------------------------------------------------------------- 1 | -- stg_rxnorm__mthspl_substances.sql 2 | 3 | select 4 | substance.rxcui rxcui 5 | , substance.str name 6 | , substance.tty tty 7 | , substance.rxaui rxaui 8 | , substance.code unii 9 | , case when substance.suppress = 'N' then true else false end as active 10 | , case when substance.cvf = '4096' then true else false end as prescribable 11 | from sagerx_lake.rxnorm_rxnconso substance 12 | where substance.tty = 'SU' 13 | and substance.sab = 'MTHSPL' 14 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/rxnorm/stg_rxnorm__ndcs.sql: -------------------------------------------------------------------------------- 1 | -- stg_rxnorm__ndcs.sql 2 | 3 | select rxnsat.atv as ndc 4 | ,case when product.tty in ('BPCK','SBD') then clinical_product.rxcui 5 | else rxnsat.rxcui end as clinical_product_rxcui 6 | ,case when product.tty in ('BPCK','SBD') then rxnsat.rxcui 7 | else null end as brand_product_rxcui 8 | , case when rxnsat.suppress = 'N' then true else false end as active 9 | , case when rxnsat.cvf = '4096' then true else false end as prescribable 10 | from sagerx_lake.rxnorm_rxnsat rxnsat 11 | inner join sagerx_lake.rxnorm_rxnconso product on rxnsat.rxaui = product.rxaui 12 | left join sagerx_lake.rxnorm_rxnrel rxnrel on rxnsat.rxcui = rxnrel.rxcui2 and rela = 'tradename_of' and product.tty in ('BPCK','SBD') 13 | left join sagerx_lake.rxnorm_rxnconso clinical_product 14 | on rxnrel.rxcui1 = clinical_product.rxcui 15 | and clinical_product.tty in ('SCD','GPCK') 16 | and clinical_product.sab = 'RXNORM' 17 | where rxnsat.atn = 'NDC' 18 | and rxnsat.sab in ('ATC', 'CVX', 'DRUGBANK', 'MSH', 'MTHCMSFRF', 'MTHSPL', 'RXNORM', 'USP', 'VANDF') 19 | and product.tty in ('SCD','SBD','GPCK','BPCK') 20 | and product.sab = 'RXNORM' 21 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/rxnorm/stg_rxnorm__precise_ingredient_links.sql: -------------------------------------------------------------------------------- 1 | -- stg_rxnorm__precise_ingredient_links.sql 2 | 3 | select distinct 4 | ingredient_strength.rxcui as ingredient_strength_rxcui 5 | , precise_ingredient.rxcui as precise_ingredient_rxcui 6 | from sagerx_lake.rxnorm_rxnconso precise_ingredient 7 | inner join sagerx_lake.rxnorm_rxnrel precise_ingredient_of 8 | on precise_ingredient_of.rxcui2 = precise_ingredient.rxcui 9 | and precise_ingredient_of.rela = 'precise_ingredient_of' 10 | inner join sagerx_lake.rxnorm_rxnconso ingredient_strength 11 | on ingredient_strength.rxcui = precise_ingredient_of.rxcui1 12 | and ingredient_strength.tty = 'SCDC' 13 | and ingredient_strength.sab = 'RXNORM' 14 | where precise_ingredient.tty = 'PIN' 15 | and precise_ingredient.sab = 'RXNORM' 16 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/rxnorm/stg_rxnorm__precise_ingredients.sql: -------------------------------------------------------------------------------- 1 | -- stg_rxnorm__precise_ingredients.sql 2 | 3 | select 4 | ingredient.rxcui rxcui 5 | , ingredient.str name 6 | , ingredient.tty tty 7 | , case when ingredient.suppress = 'N' then true else false end as active 8 | , case when ingredient.cvf = '4096' then true else false end as prescribable 9 | from sagerx_lake.rxnorm_rxnconso ingredient 10 | where ingredient.tty = 'PIN' 11 | and ingredient.sab = 'RXNORM' 12 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/rxnorm/stg_rxnorm__product_rxcuis.sql: -------------------------------------------------------------------------------- 1 | -- stg_rxnorm__product_rxcuis 2 | 3 | select * from {{ source('rxnorm', 'rxnorm_rxnconso') }} 4 | where sab = 'RXNORM' 5 | and tty in ('SCD', 'SBD', 'GPCK', 'BPCK') 6 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/rxnorm/stg_rxnorm__products.sql: -------------------------------------------------------------------------------- 1 | -- stg_rxnorm__products.sql 2 | 3 | select 4 | product.rxcui as rxcui 5 | , product.str as name 6 | , product.tty as tty 7 | , case 8 | when brand_product.rxcui is not null then brand_product.clinical_product_rxcui 9 | else product.rxcui 10 | end as clinical_product_rxcui 11 | , case 12 | when product.suppress = 'N' then true 13 | else false 14 | end as active 15 | , case 16 | when product.cvf = '4096' then true 17 | else false 18 | end as prescribable 19 | from {{ source('rxnorm', 'rxnorm_rxnconso') }} product 20 | left join {{ ref('stg_rxnorm__brand_products') }} brand_product 21 | on product.rxcui = brand_product.rxcui 22 | where product.tty in('SCD', 'GPCK', 'SBD', 'BPCK') 23 | and product.sab = 'RXNORM' 24 | 25 | /* 26 | with 27 | 28 | rcp as ( 29 | 30 | select * from {{ ref('stg_rxnorm__clinical_products') }} 31 | 32 | ), 33 | 34 | rbp as ( 35 | 36 | select * from {{ ref('stg_rxnorm__brand_products') }} 37 | 38 | ) 39 | 40 | select distinct 41 | coalesce(rbp.rxcui, rcp.rxcui, null) as product_rxcui 42 | , coalesce(rbp.name, rcp.name, null) as product_name 43 | , coalesce(rbp.tty, rcp.tty, null) as product_tty 44 | , rcp.rxcui as clinical_product_rxcui 45 | , rcp.name as clinical_product_name 46 | , rcp.tty as clinical_product_tty 47 | from rcp 48 | left join rbp 49 | on rbp.clinical_product_rxcui = rcp.rxcui 50 | */ 51 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/rxnorm_historical/_rxnorm_historical__sources.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | sources: 4 | - name: rxnorm_historical 5 | schema: sagerx_lake 6 | tables: 7 | - name: rxnorm_historical 8 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/rxnorm_historical/stg_rxnorm_historical__most_recent_ndcs.sql: -------------------------------------------------------------------------------- 1 | -- stg_rxnorm_historical__most_recent_ndcs.sql 2 | 3 | with 4 | 5 | rxnorm_historical_ndcs as 6 | ( 7 | select * from {{ ref('stg_rxnorm_historical__ndcs') }} 8 | ) 9 | 10 | /* 11 | NOTE: we do this grouping and ranking to avoid NDCs that 12 | relate to RXCUIs that have been remapped to multiple RXCUIs 13 | - see issue #265 for more details 14 | */ 15 | , grouped_and_ranked_rxnorm_historical_ndcs as 16 | ( 17 | 18 | select 19 | ndc 20 | , end_date 21 | , row_number() over (partition by ndc order by end_date desc) as end_date_line 22 | , count(rxcui) as rxcui_count 23 | from rxnorm_historical_ndcs 24 | group by ndc, end_date 25 | order by count(rxcui) desc 26 | 27 | ) 28 | 29 | select 30 | rxnorm_historical_ndcs.* 31 | from grouped_and_ranked_rxnorm_historical_ndcs 32 | inner join rxnorm_historical_ndcs 33 | on rxnorm_historical_ndcs.ndc = grouped_and_ranked_rxnorm_historical_ndcs.ndc 34 | and rxnorm_historical_ndcs.end_date = grouped_and_ranked_rxnorm_historical_ndcs.end_date 35 | where rxcui_count = 1 -- only NDCs that are associated with one RXCUI per end_date 36 | and end_date_line = 1 -- only NDCs that are most recently associated with an RXCUI 37 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/rxnorm_historical/stg_rxnorm_historical__ndcs.sql: -------------------------------------------------------------------------------- 1 | -- stg_rxnorm_historical__ndcs.sql 2 | 3 | select 4 | * 5 | from {{ source('rxnorm_historical', 'rxnorm_historical') }} 6 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/rxterms/_rxterms__models.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: stg_rxterms__names 5 | description: Table of drug names and commonly used synonyms or abbreviations for the drugs. Only un-suppressed and not retired terms. 6 | columns: 7 | - name: name 8 | description: Drug name (either generic or brand name) and intended route. 9 | - name: synonyms 10 | description: Commonly used synonyms or abbreviations for the drug. 11 | 12 | - name: stg_rxterms__strengths 13 | description: Tables of drug strengths and their corresponding rxcuis. Only un-suppressed and not retired terms. 14 | columns: 15 | - name: rxcui 16 | description: The RxNorm concept unique identifier for the drug. 17 | - name: name 18 | description: Drug name (either generic or brand name) and intended route. 19 | - name: strength 20 | description: Strength information parsed from the RxNorm full name. Concatenated strength and dose form values. 21 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/rxterms/stg_rxterms__names.sql: -------------------------------------------------------------------------------- 1 | with 2 | 3 | rxterms as ( 4 | 5 | select * from {{ source('rxterms', 'rxterms') }} 6 | 7 | ) 8 | 9 | select distinct 10 | display_name as name 11 | , display_name_synonym as synonyms 12 | from rxterms 13 | where suppress_for is null 14 | and is_retired is null 15 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/rxterms/stg_rxterms__strengths.sql: -------------------------------------------------------------------------------- 1 | with 2 | 3 | rxterms as ( 4 | 5 | select * from {{ source('rxterms', 'rxterms') }} 6 | 7 | ) 8 | 9 | select distinct 10 | rxcui 11 | , display_name as name 12 | , concat(strength, ' ', new_dose_form) as strength 13 | from rxterms 14 | where suppress_for is null 15 | and is_retired is null 16 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/umls/_stg_umls__models.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: stg_umls__crosswalk_codes 5 | description: | 6 | UMLS crosswalk from MeSH to ICD9, ICD10, and SNOMED. 7 | 8 | TODO: Make this more generic - not just MeSH but other 9 | potential "from_sources". 10 | columns: 11 | - name: from_source 12 | description: The source of the original code. 13 | - name: from_code 14 | description: The original code to which we want to map other codes. 15 | - name: to_source 16 | description: The destination source we are mapping to. 17 | - name: to_code 18 | description: The synonymous code we are mapping to. 19 | - name: to_name 20 | description: The name of the concept we are mapping to. 21 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/umls/_stg_umls__sources.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | sources: 4 | - name: umls 5 | schema: sagerx_lake 6 | tables: 7 | - name: umls_crosswalk 8 | description: | 9 | Crosswalk between select vocabularies in UMLS. 10 | 11 | https://documentation.uts.nlm.nih.gov/rest/source-asserted-identifiers/crosswalk/ 12 | 13 | Currently available: 14 | - MSH -> ICD9CM 15 | - MSH -> ICD10CM 16 | - MSH -> SNOMEDCT_US 17 | 18 | A common use case of the CUIs in the UMLS is as a sort of 19 | ‘bridge of synonymy’ between code sets. For a given 20 | source-asserted code, the crosswalk endpoint will return 21 | codes from other sources that have UMLS-asserted synonymy. 22 | It is important to note that the synonymy asserted by the 23 | UMLS in the MRCONSO.RRF files (and the APIs derived from 24 | them) has not been rigorously tested and maintained in 25 | actual clinical care. 26 | 27 | With that disclaimer, users often have questions such as 28 | ‘I have a code from vocabulary x, what is the equivalent 29 | code from vocabulary y according to UMLS synonymy?’. 30 | Although UMLS CUIs can be used as a starting point, 31 | results should be carefully reviewed for relevancy 32 | in your use case. 33 | -------------------------------------------------------------------------------- /dbt/sagerx/models/staging/umls/stg_umls__crosswalk_codes.sql: -------------------------------------------------------------------------------- 1 | -- stg_umls__crosswalk_codes.sql 2 | 3 | select 4 | -- TODO: make DAG store the source name (MSH) 5 | -- so this is more general than just MeSH 6 | 'MSH' as from_source, 7 | mesh_code as from_code, 8 | root_source as to_source, 9 | ui as to_code, 10 | name as to_name 11 | from {{ source('umls', 'umls_crosswalk') }} 12 | where obsolete = false 13 | -------------------------------------------------------------------------------- /dbt/sagerx/seeds/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderxio/sagerx/8f64ff1bc743150661fb466a60d18318fca047a5/dbt/sagerx/seeds/.gitkeep -------------------------------------------------------------------------------- /dbt/sagerx/seeds/_seeds__models.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | seeds: 4 | - name: usp_preservatives 5 | description: | 6 | A list of CAS RN identifiers and USP product names obtained manually from 7 | searching the [USP catalog](https://store.usp.org/preservatives/category/USP-1213) 8 | for products in the "Preservatives" category. 9 | -------------------------------------------------------------------------------- /dbt/sagerx/seeds/usp_preservatives.csv: -------------------------------------------------------------------------------- 1 | cas_rn,usp_product_name 2 | 17927-65-0,Aluminum Sulfate (2 g) 3 | 60-00-4,Edetic Acid (200 mg) 4 | 79-09-4,Propionic Acid (1.5 mL/ampule; 3 ampules) 5 | 6001-64-5,Chlorobutanol (200 mg) 6 | 59-51-8,Racemethionine (200 mg) 7 | 128-37-0,Butylated Hydroxytoluene (500 mg) 8 | 5793-89-5,Calcium Saccharate (200 mg) 9 | 121-00-6,3-tert-Butyl-4-hydroxyanisole (200 mg) 10 | 137-40-6,Sodium Propionate (200 mg) 11 | 89-65-6,Erythorbic Acid (50 mg) 12 | 122-99-6,Phenoxyethanol (500 mg) (2-Phenoxyethanol) 13 | 94-13-3,Propylparaben (200 mg) 14 | 8001-54-5,Benzalkonium Chloride (5 mL of approx. 4% aqueous solution) 15 | 7681-57-4,Sodium Metabisulfite (2 X 500 mg) 16 | 110-44-1,Sorbic Acid (1 g) 17 | 100-51-6,Benzyl Alcohol (500 mg/ampule) 18 | 99-76-3,Methylparaben (125 mg) 19 | 590-00-1,Potassium Sorbate (1 g) 20 | 24634-61-5,Potassium Sorbate (1 g) 21 | 532-32-1,Sodium Benzoate (1 g) 22 | 88-32-4,2-tert-Butyl-4-hydroxyanisole (200 mg) 23 | 120-47-8,Ethylparaben (200 mg) 24 | 90-64-2,Mandelic Acid (500 mg) 25 | 121-79-9,Propyl Gallate (200 mg) 26 | 4075-81-4,Calcium Propionate (100 mg) 27 | 94-26-8,Butylparaben (200 mg) 28 | 39236-46-9,Imidurea (200 mg) 29 | 520-45-6,Dehydroacetic Acid (200 mg) 30 | 57-09-0,Cetrimonium Bromide (1 g) 31 | -------------------------------------------------------------------------------- /dbt/sagerx/snapshots/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderxio/sagerx/8f64ff1bc743150661fb466a60d18318fca047a5/dbt/sagerx/snapshots/.gitkeep -------------------------------------------------------------------------------- /dbt/sagerx/tests/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderxio/sagerx/8f64ff1bc743150661fb466a60d18318fca047a5/dbt/sagerx/tests/.gitkeep -------------------------------------------------------------------------------- /docs/images/sagerx_airflow_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderxio/sagerx/8f64ff1bc743150661fb466a60d18318fca047a5/docs/images/sagerx_airflow_example.png -------------------------------------------------------------------------------- /docs/images/sagerx_postgres_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderxio/sagerx/8f64ff1bc743150661fb466a60d18318fca047a5/docs/images/sagerx_postgres_example.png -------------------------------------------------------------------------------- /docs/style_guide.md: -------------------------------------------------------------------------------- 1 | # SageRx Style Guide 2 | 3 | ## Purpose 4 | 5 | This guide will help you understand how we structure this project, such as table and schema names. 6 | 7 | ## Table Names 8 | 9 | These also correspond to the underlying file names that create the tables. File names must be unique and correspond to the name of the model when selected and created in the warehouse. 10 | 11 | We recommend putting as much clear information into the file name as possible, including a prefix for the layer the model exists in, important grouping information, and specific information about the entity or transformation in the model. 12 | 13 | **Marts**: 14 | 15 | - Name format: [concept]s.sql 16 | - Concept correctly captures the content of the table, since these are user facing this is important 17 | - Name should be plural 18 | 19 | **Intermediates**: 20 | 21 | - Name format: int\_[entity]s\_[verb]s 22 | - Verbs should capture the business logic or transformations conducted 23 | - Name should be plural 24 | - Staging table references should be in a CTE 25 | 26 | **Staging**: 27 | 28 | - Name format: stg\_[source]\_\_[entity]s 29 | - Entity captures the data values expected 30 | - Name should be plural 31 | - Staging models are the only place we’ll reference source tables, and our staging models should have a 1-to-1 relationship to our source tables 32 | - Source table references should be in a CTE 33 | 34 | **Sources**: 35 | 36 | - Name format: [source]\_[content] 37 | - Content captures the raw data that is imported 38 | - Name should be singular 39 | 40 | ## Schema Names 41 | 42 | **sagerx_lake** 43 | 44 | - Contains raw data from data sources and seed tables, users can also access these tables to manipulate the data for their use cases. 45 | 46 | **sagerx_dev** 47 | 48 | - Contains tables in development live and in-progress data can be stored. 49 | 50 | **sagerx** 51 | 52 | - Contains user-facing tables live, these are expected to be "production" ready data. 53 | 54 | ## DAG Philosophy 55 | 56 | Our use case of airflow DAGs is to download the data and upload it to our database, this commonality has allowed us to create layers of abstraction. 57 | 58 | **airflow operator** 59 | 60 | - Creates a DAG with standard parameters. 61 | 62 | **sagerx** 63 | 64 | - Project specific functions on how to interact with the project and its data. 65 | - Add here common ways to interact with the project or the data. 66 | 67 | **common_dag_tasks** 68 | 69 | - Common operations performed by Airflow DAGs. 70 | - Useful in defining how we have standardized the way that DAGs run. 71 | - Add tasks here that abstract away common airflow operations. 72 | 73 | **user_macros** 74 | 75 | - Common functions used to manipulate data. 76 | - Add here common ways to process data. 77 | -------------------------------------------------------------------------------- /pgadmin/servers.json: -------------------------------------------------------------------------------- 1 | { 2 | "Servers": { 3 | "1": { 4 | "Name": "sagerx", 5 | "Group": "Servers", 6 | "Host": "postgres", 7 | "Port": 5432, 8 | "MaintenanceDB": "postgres", 9 | "Username": "sagerx", 10 | "SSLMode": "prefer", 11 | "Comment": "Server with airflow and sagerx databases on it" 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /postgres/0_pg_stat_statement.sh: -------------------------------------------------------------------------------- 1 | sed -i "s/#shared_preload_libraries = ''/shared_preload_libraries = 'pg_stat_statements'\npg_stat_statements.max=10000\npg_stat_statements.track=all/g" /var/lib/postgresql/data/postgresql.conf 2 | 3 | echo "Enabled pg_stat_statements" -------------------------------------------------------------------------------- /postgres/1_airflow.sql: -------------------------------------------------------------------------------- 1 | --Build airflow database and user 2 | CREATE USER airflow WITH ENCRYPTED PASSWORD 'airflow'; 3 | CREATE DATABASE airflow; 4 | GRANT ALL PRIVILEGES ON DATABASE airflow TO airflow; 5 | 6 | --Make foreign data wrapper to allow sagerx read access to airflow tables 7 | CREATE EXTENSION IF NOT EXISTS postgres_fdw; 8 | CREATE SERVER airflow_fdw FOREIGN DATA WRAPPER postgres_fdw OPTIONS (host 'postgres', port '5432', dbname 'airflow'); 9 | CREATE USER MAPPING FOR sagerx SERVER airflow_fdw OPTIONS (user 'airflow', password 'airflow'); 10 | GRANT USAGE ON FOREIGN SERVER airflow_fdw TO sagerx; -------------------------------------------------------------------------------- /postgres/2_sagerx_setup.sql: -------------------------------------------------------------------------------- 1 | --Make schemas for sagerx 2 | CREATE SCHEMA sagerx_dev; 3 | CREATE SCHEMA sagerx_lake; 4 | CREATE SCHEMA sagerx; 5 | 6 | --Add pg_stat_statements extension for query monitoring 7 | CREATE EXTENSION IF NOT EXISTS pg_stat_statements; 8 | 9 | CREATE TABLE sagerx.data_availability ( 10 | schema_name text, 11 | table_name text, 12 | has_data boolean, 13 | materialized text 14 | ); --------------------------------------------------------------------------------