├── .github
    ├── FUNDING.yml
    ├── ISSUE_TEMPLATE
    │   ├── issue_template.md
    │   └── proposal_template.md
    └── pull_request_template.md
├── .gitignore
├── LICENSE.md
├── README.md
├── airflow
    ├── Dockerfile
    ├── config
    │   └── airflow.cfg
    ├── dags
    │   ├── .gitkeep
    │   ├── airflow_operator.py
    │   ├── ashp
    │   │   └── dag.py
    │   ├── build_marts
    │   │   └── dag.py
    │   ├── cms_part_d
    │   │   ├── dag.py
    │   │   ├── load_basic_drugs_formulary_file.sql
    │   │   ├── load_beneficiary_cost_file.sql
    │   │   ├── load_excluded_drugs_formulary_file.sql
    │   │   ├── load_geographic_locator_file.sql
    │   │   ├── load_indication_based_coverage_formulary_file.sql
    │   │   ├── load_insulin_beneficiary_cost_file.sql
    │   │   ├── load_pharmacy_networks_file.sql
    │   │   ├── load_plan_information_file.sql
    │   │   └── load_pricing_file.sql
    │   ├── common_dag_tasks.py
    │   ├── dailymed
    │   │   ├── dag.py
    │   │   └── template.xsl
    │   ├── dailymed_pharm_class
    │   │   ├── dag.py
    │   │   └── load-dailymed_pharm_class.sql
    │   ├── dailymed_rxnorm
    │   │   ├── dag.py
    │   │   └── load-dailymed_rxnorm.sql
    │   ├── dailymed_zip_file_metadata
    │   │   ├── dag.py
    │   │   └── load-dailymed_zip_file_metadata.sql
    │   ├── dbt_gcp
    │   │   └── dag.py
    │   ├── export_marts
    │   │   └── dag.py
    │   ├── fda_enforcement
    │   │   ├── dag.py
    │   │   └── dag_tasks.py
    │   ├── fda_excluded
    │   │   ├── dag.py
    │   │   ├── load_package.sql
    │   │   └── load_product.sql
    │   ├── fda_ndc
    │   │   ├── dag.py
    │   │   ├── load_package.sql
    │   │   └── load_product.sql
    │   ├── fda_unfinished
    │   │   ├── dag.py
    │   │   ├── load_package.sql
    │   │   └── load_product.sql
    │   ├── fda_unii
    │   │   ├── dag.py
    │   │   └── load_unii.sql
    │   ├── mccpd
    │   │   ├── dag.py
    │   │   └── dag_tasks.py
    │   ├── nadac
    │   │   ├── dag.py
    │   │   └── load_nadac.sql
    │   ├── orange_book
    │   │   ├── dag.py
    │   │   ├── load_exclusivity.sql
    │   │   ├── load_patent.sql
    │   │   └── load_products.sql
    │   ├── purple_book
    │   │   ├── dag.py
    │   │   ├── dag_tasks.py
    │   │   └── load_purple_book.sql
    │   ├── rxclass
    │   │   ├── dag.py
    │   │   └── dag_tasks.py
    │   ├── rxnorm
    │   │   ├── dag.py
    │   │   ├── load_rxnconso.sql
    │   │   ├── load_rxncui.sql
    │   │   ├── load_rxncuichanges.sql
    │   │   ├── load_rxndoc.sql
    │   │   ├── load_rxnrel.sql
    │   │   ├── load_rxnrxnatomarchive.sql
    │   │   ├── load_rxnsab.sql
    │   │   ├── load_rxnsat.sql
    │   │   ├── load_rxnsty.sql
    │   │   └── terms-of-service.md
    │   ├── rxnorm_historical
    │   │   ├── dag.py
    │   │   └── dag_tasks.py
    │   ├── rxterms
    │   │   ├── dag.py
    │   │   ├── load_ingredients.sql
    │   │   └── load_rxterms.sql
    │   ├── sagerx.py
    │   ├── umls
    │   │   ├── dag.py
    │   │   └── dag_tasks.py
    │   ├── user_macros.py
    │   └── vsac
    │   │   ├── dag.py
    │   │   └── dag_tasks.py
    ├── hidden_dags
    │   └── meps
    │   │   ├── meps_medical_conditions_dag.py
    │   │   ├── meps_population_characteristics_dag.py
    │   │   └── meps_prescribed_medications_dag.py
    └── requirements.txt
├── dbt
    ├── Dockerfile
    ├── profiles.yml
    └── sagerx
    │   ├── .gitignore
    │   ├── README.md
    │   ├── analyses
    │       └── .gitkeep
    │   ├── dbt_project.yml
    │   ├── macros
    │       ├── .gitkeep
    │       ├── check_data_availability.sql
    │       ├── get_custom_schema.sql
    │       ├── ndc_convert.sql
    │       ├── ndc_format.sql
    │       └── ndc_to_11.sql
    │   ├── models
    │       ├── intermediate
    │       │   ├── dailymed
    │       │   │   ├── int_dailymed_image_name_ndcs.sql
    │       │   │   ├── int_dailymed_image_xml_ndcs.sql
    │       │   │   ├── int_dailymed_organization_metrics.sql
    │       │   │   ├── int_dailymed_ranked_package_label_images.sql
    │       │   │   ├── int_dailymed_ranked_package_label_ndcs.sql
    │       │   │   └── int_dailymed_validated_package_label_ndcs.sql
    │       │   ├── fda
    │       │   │   ├── int_fda_packaging_components.sql
    │       │   │   ├── int_fda_packaging_parts.sql
    │       │   │   └── int_fda_packaging_subparts.sql
    │       │   ├── fda_enforcement
    │       │   │   └── int_inactive_ingredients_to_fda_enforcement_reports.sql
    │       │   ├── nadac
    │       │   │   ├── _int_nadac__models.yml
    │       │   │   ├── int_nadac_historical_pricing.sql
    │       │   │   └── int_nadac_pricing.sql
    │       │   ├── orange_book
    │       │   │   ├── _int_orange_book__models.yml
    │       │   │   └── int_fda_ndc_to_te.sql
    │       │   ├── rxclass
    │       │   │   ├── _int_rxclass__models.yml
    │       │   │   ├── int_rxclass_clinical_products_to_atc_class.sql
    │       │   │   ├── int_rxclass_clinical_products_to_cvx_code.sql
    │       │   │   ├── int_rxclass_clinical_products_to_schedule.sql
    │       │   │   └── int_rxclass_clinical_products_to_va_class.sql
    │       │   ├── rxnorm
    │       │   │   ├── _int_rxnorm__models.yml
    │       │   │   ├── int_mthspl_products_to_active_ingredients.sql
    │       │   │   ├── int_mthspl_products_to_active_moieties.sql
    │       │   │   ├── int_mthspl_products_to_inactive_ingredients.sql
    │       │   │   ├── int_rxnorm_all_ndcs_to_product_rxcuis.sql
    │       │   │   ├── int_rxnorm_clinical_products_to_clinical_product_components.sql
    │       │   │   ├── int_rxnorm_clinical_products_to_dose_forms.sql
    │       │   │   ├── int_rxnorm_clinical_products_to_ingredient_components.sql
    │       │   │   ├── int_rxnorm_clinical_products_to_ingredient_strengths.sql
    │       │   │   ├── int_rxnorm_clinical_products_to_ingredients.sql
    │       │   │   ├── int_rxnorm_clinical_products_to_ndcs.sql
    │       │   │   └── int_rxnorm_ndcs_to_products.sql
    │       │   └── umls
    │       │   │   ├── _int_umls__models.yml
    │       │   │   ├── int_umls_clinical_products_to_crosswalk_codes.sql
    │       │   │   ├── int_umls_ingredient_components_to_crosswalk_codes.sql
    │       │   │   ├── int_umls_multiple_ingredients_to_crosswalk_codes.sql
    │       │   │   └── int_umls_precise_ingredients_to_crosswalk_codes.sql
    │       ├── marts
    │       │   ├── classification
    │       │   │   ├── _classification__models.yml
    │       │   │   ├── atc_codes_to_rxnorm_products.sql
    │       │   │   └── clinical_products_to_diseases.sql
    │       │   ├── fda_excluded
    │       │   │   └── fda_excluded.sql
    │       │   ├── ndc
    │       │   │   ├── _ndc__models.yml
    │       │   │   ├── all_ndc_descriptions.sql
    │       │   │   ├── all_ndcs_to_sources.sql
    │       │   │   ├── gtins.sql
    │       │   │   ├── ndc_associations.sql
    │       │   │   ├── ndcs_to_label_images.sql
    │       │   │   └── pack_size.sql
    │       │   ├── pricing
    │       │   │   ├── pricing.sql
    │       │   │   └── pricing_historical.sql
    │       │   ├── products
    │       │   │   ├── _products__models.yml
    │       │   │   ├── brand_products_with_related_ndcs.sql
    │       │   │   ├── product_synonyms.sql
    │       │   │   ├── products.sql
    │       │   │   └── products_to_inactive_ingredients.sql
    │       │   └── purdue
    │       │   │   └── scorecard_data.sql
    │       └── staging
    │       │   ├── ashp
    │       │       ├── _ashp__models.yml
    │       │       ├── _ashp__sources.yml
    │       │       ├── stg_ashp__current_drug_shortages.sql
    │       │       └── stg_ashp__current_drug_shortages_ndcs.sql
    │       │   ├── dailymed
    │       │       ├── stg_dailymed__interactions.sql
    │       │       ├── stg_dailymed__main.sql
    │       │       ├── stg_dailymed__ndcs.sql
    │       │       ├── stg_dailymed__organization_activities.sql
    │       │       ├── stg_dailymed__organization_items.sql
    │       │       ├── stg_dailymed__organization_texts.sql
    │       │       ├── stg_dailymed__organizations.sql
    │       │       ├── stg_dailymed__package_label_section_images.sql
    │       │       ├── stg_dailymed__package_label_section_ndcs.sql
    │       │       └── stg_dailymed__package_label_sections.sql
    │       │   ├── fda_enforcement
    │       │       ├── _fda_enforcement__models.yml
    │       │       ├── _fda_enforcement__sources.yml
    │       │       ├── stg_fda_enforcement__json_ndcs.sql
    │       │       ├── stg_fda_enforcement__regex_ndcs.sql
    │       │       └── stg_fda_enforcement__reports.sql
    │       │   ├── fda_excluded
    │       │       ├── _fda_excluded__models.yml
    │       │       ├── _fda_excluded__sources.yml
    │       │       ├── stg_fda_excluded__classes.sql
    │       │       ├── stg_fda_excluded__ndcs.sql
    │       │       └── stg_fda_excluded__substances.sql
    │       │   ├── fda_ndc
    │       │       ├── _fda_ndc__models.yml
    │       │       ├── _fda_ndc__sources.yml
    │       │       ├── stg_fda_ndc__classes.sql
    │       │       ├── stg_fda_ndc__ndc_associations.sql
    │       │       ├── stg_fda_ndc__ndcs.sql
    │       │       └── stg_fda_ndc__substances.sql
    │       │   ├── fda_unfinished
    │       │       ├── _fda_unfinished__models.yml
    │       │       ├── _fda_unfinished__sources.yml
    │       │       ├── stg_fda_unfinished__ndcs.sql
    │       │       └── stg_fda_unfinished__substances.sql
    │       │   ├── fda_unii
    │       │       ├── _fda_unii__sources.yml
    │       │       └── stg_fda_unii__unii_codes.sql
    │       │   ├── mccpd
    │       │       └── _mccpd__sources.yml
    │       │   ├── nadac
    │       │       ├── _nadac__models.yml
    │       │       ├── _nadac__sources.yml
    │       │       └── stg_nadac__nadac.sql
    │       │   ├── orange_book
    │       │       ├── _orange_book__models.yml
    │       │       └── _orange_book__sources.yml
    │       │   ├── purple_book
    │       │       ├── _purple_book__models.yml
    │       │       └── _purple_book__sources.yml
    │       │   ├── rxclass
    │       │       ├── _rxclass__models.yml
    │       │       ├── _rxclass__sources.yml
    │       │       └── stg_rxclass__rxclass.sql
    │       │   ├── rxnorm
    │       │       ├── _rxnorm__models.yml
    │       │       ├── _rxnorm__sources.yml
    │       │       ├── stg_rxnorm__all_ndcs.sql
    │       │       ├── stg_rxnorm__atc_codes.sql
    │       │       ├── stg_rxnorm__brand_product_component_links.sql
    │       │       ├── stg_rxnorm__brand_product_components.sql
    │       │       ├── stg_rxnorm__brand_products.sql
    │       │       ├── stg_rxnorm__brands.sql
    │       │       ├── stg_rxnorm__clinical_product_component_links.sql
    │       │       ├── stg_rxnorm__clinical_product_components.sql
    │       │       ├── stg_rxnorm__clinical_products.sql
    │       │       ├── stg_rxnorm__dose_form_group_links.sql
    │       │       ├── stg_rxnorm__dose_form_groups.sql
    │       │       ├── stg_rxnorm__dose_forms.sql
    │       │       ├── stg_rxnorm__hcpcs_codes.sql
    │       │       ├── stg_rxnorm__ingredient_component_links.sql
    │       │       ├── stg_rxnorm__ingredient_components.sql
    │       │       ├── stg_rxnorm__ingredient_strength_links.sql
    │       │       ├── stg_rxnorm__ingredient_strengths.sql
    │       │       ├── stg_rxnorm__ingredients.sql
    │       │       ├── stg_rxnorm__mthspl_ndcs.sql
    │       │       ├── stg_rxnorm__mthspl_products.sql
    │       │       ├── stg_rxnorm__mthspl_substances.sql
    │       │       ├── stg_rxnorm__ndcs.sql
    │       │       ├── stg_rxnorm__precise_ingredient_links.sql
    │       │       ├── stg_rxnorm__precise_ingredients.sql
    │       │       ├── stg_rxnorm__product_rxcuis.sql
    │       │       └── stg_rxnorm__products.sql
    │       │   ├── rxnorm_historical
    │       │       ├── _rxnorm_historical__sources.yml
    │       │       ├── stg_rxnorm_historical__most_recent_ndcs.sql
    │       │       └── stg_rxnorm_historical__ndcs.sql
    │       │   ├── rxterms
    │       │       ├── _rxterms__models.yml
    │       │       ├── _rxterms__sources.yml
    │       │       ├── stg_rxterms__names.sql
    │       │       └── stg_rxterms__strengths.sql
    │       │   └── umls
    │       │       ├── _stg_umls__models.yml
    │       │       ├── _stg_umls__sources.yml
    │       │       └── stg_umls__crosswalk_codes.sql
    │   ├── seeds
    │       ├── .gitkeep
    │       ├── _seeds__models.yml
    │       └── usp_preservatives.csv
    │   ├── snapshots
    │       └── .gitkeep
    │   └── tests
    │       └── .gitkeep
├── docker-compose.yml
├── docs
    ├── catalog.json
    ├── images
    │   ├── sagerx_airflow_example.png
    │   └── sagerx_postgres_example.png
    ├── index.html
    ├── manifest.json
    ├── run_results.json
    └── style_guide.md
├── pgadmin
    └── servers.json
└── postgres
    ├── 0_pg_stat_statement.sh
    ├── 1_airflow.sql
    └── 2_sagerx_setup.sql


/.github/FUNDING.yml:
--------------------------------------------------------------------------------
 1 | # These are supported funding model platforms
 2 | 
 3 | github: coderxio # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
 4 | patreon: # Replace with a single Patreon username
 5 | open_collective: # Replace with a single Open Collective username
 6 | ko_fi: # Replace with a single Ko-fi username
 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
 9 | liberapay: # Replace with a single Liberapay username
10 | issuehunt: # Replace with a single IssueHunt username
11 | otechie: # Replace with a single Otechie username
12 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
13 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
14 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/issue_template.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Issue
 3 | about: Create a new issue
 4 | ---
 5 | # Problem Statement
 6 | [What needs to be done and why]
 7 | 
 8 | # Criteria for Success
 9 | [Measureable outcome if possible]
10 | 
11 | # Additional Information
12 | [ways one might accomplish this task, links, documentation, alternatives, etc.]
13 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/proposal_template.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Proposal
 3 | about: Propose a new feature or some other changes not related to a direct issue
 4 | ---
 5 | 
 6 | # Proposal
 7 | [What is the idea]
 8 | 
 9 | # Rationale
10 | [Why should this be implemented]
11 | 


--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
 1 | Resolves #ISSUE NUMBER
 2 | 
 3 | ## Explanation
 4 | [What did you change?]
 5 | 
 6 | ## Rationale
 7 | [Why did you make the changes mentioned above? What alternatives did you consider?]
 8 | 
 9 | ## Tests
10 | 1. What testing did you do?
11 | 1. Attach testing logs inside a summary block:
12 | 
13 | <details>
14 | <summary>testing logs</summary>
15 | 
16 | ```
17 | 
18 | ```
19 | </details>
20 | 
21 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .env
 2 | notebooks
 3 | 
 4 | # Python files
 5 | __pycache__
 6 | venv
 7 | 
 8 | # Airflow Volumes
 9 | data
10 | logs
11 | extracts
12 | plugins
13 | 
14 | # dbt
15 | .user.yml
16 | 
17 | # Desktop Services Store
18 | .DS_Store
19 | 
20 | # GCP
21 | gcp.json
22 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | Copyright 2024 CodeRx, LLC
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | You may obtain a copy of the License at
 6 | 
 7 |     http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | 


--------------------------------------------------------------------------------
/airflow/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM apache/airflow:2.5.1
2 | 
3 | COPY requirements.txt .
4 | 
5 | RUN pip install --no-cache-dir -r requirements.txt
6 | 


--------------------------------------------------------------------------------
/airflow/dags/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderxio/sagerx/8f64ff1bc743150661fb466a60d18318fca047a5/airflow/dags/.gitkeep


--------------------------------------------------------------------------------
/airflow/dags/airflow_operator.py:
--------------------------------------------------------------------------------
 1 | from airflow import DAG
 2 | from airflow.models.param import Param
 3 | 
 4 | from sagerx import get_dataset, read_sql_file, get_sql_list, alert_slack_channel
 5 |     
 6 | def create_dag(dag_id,**kwargs) -> DAG:
 7 |     from airflow.utils.dates import days_ago
 8 |     from datetime import timedelta
 9 | 
10 |     dag_args ={
11 |         "dag_id":dag_id,
12 |         "start_date": days_ago(0),
13 |         "schedule": "0 5 * * *",  # run at 5am every day
14 |         "description": f"Processes {dag_id} source",
15 |     }
16 | 
17 |     default_args = {
18 |         "owner": "airflow",
19 |         "depends_on_past": False,
20 |         "email": ["admin@sagerx.io"],
21 |         "email_on_failure": False,
22 |         "email_on_retry": False,
23 |         "retries": 1,
24 |         "retry_delay": timedelta(minutes=5),
25 |         "retrieve_dataset_function": get_dataset,
26 |         "on_failure_callback": alert_slack_channel,
27 |         "dagrun_timeout":60
28 |     }
29 | 
30 |     dag_args.update(kwargs)
31 |     default_args.update(kwargs)
32 | 
33 |     dag = DAG(**dag_args,default_args=default_args)
34 | 
35 |     return dag


--------------------------------------------------------------------------------
/airflow/dags/build_marts/dag.py:
--------------------------------------------------------------------------------
 1 | import pendulum
 2 | 
 3 | from airflow_operator import create_dag
 4 | from common_dag_tasks import get_most_recent_dag_run
 5 | from airflow.decorators import dag,task
 6 | from airflow.operators.trigger_dagrun import TriggerDagRunOperator
 7 | 
 8 | from common_dag_tasks import run_subprocess_command
 9 | 
10 | def run_dag_condition(dag_id):
11 |     last_run = get_most_recent_dag_run(dag_id)
12 |     # if a DAG from the list of dependencies is more than 5 days stale
13 |     if last_run is None or (pendulum.now() - last_run.execution_date).days > 5:
14 |         if last_run is not None:
15 |             print(f'{dag_id} was last run {last_run.execution_date}.')
16 |         else:
17 |             print(f'{dag_id} has never been run.')
18 |         return True
19 |     else:
20 |         print(f"{dag_id} was last run {last_run.execution_date} and will now skipped.")
21 |         return False
22 | 
23 | def get_dag_list():
24 |     list_of_dags = []
25 |     dag_dependencies = ["fda_ndc","fda_unfinished","fda_excluded","rxnorm","rxclass","rxnorm_historical"]
26 |     for dag in dag_dependencies:
27 |         if run_dag_condition(dag):
28 |             list_of_dags.append(dag)
29 |     print(f'list of dags to run{list_of_dags}')
30 |     return list_of_dags
31 | 
32 | dag = create_dag(
33 |     dag_id="build_marts",
34 |     schedule = "0 5 * * 2", #every tuesday at 5:00am
35 |     catchup=False,
36 |     concurrency=2
37 | )
38 | with dag:
39 | 
40 |     # PLEASE NOTE this block will execute each of the DAGs in turn;
41 |     # When all are being run consecutively, the process will take in excess of 60 minutes
42 |     
43 |     @task
44 |     def execute_external_dag_list(**kwargs): 
45 |         dags_list = get_dag_list()
46 |         for ex_dag in dags_list:
47 |             print(f'triggering {ex_dag}')
48 |             dag_task = TriggerDagRunOperator(
49 |                 task_id=f"{ex_dag}_task",
50 |                 trigger_dag_id=ex_dag,
51 |                 conf={"source_dag_id": "build_marts"},
52 |                 wait_for_completion=True)
53 |             dag_task.execute(context=kwargs)
54 | 
55 |     # Once DBT freshness metrics are implemented, this task can be updated
56 |     @task
57 |     def transform_tasks():
58 |         run_subprocess_command(['docker', 'exec', 'dbt', 'dbt', 'seed'], cwd='/dbt/sagerx')
59 |         run_subprocess_command(['docker', 'exec', 'dbt', 'dbt', 'run', '--select', '+models/marts/ndc'], cwd='/dbt/sagerx')
60 |         run_subprocess_command(['docker', 'exec', 'dbt', 'dbt', 'run', '--select', '+models/marts/classification'], cwd='/dbt/sagerx')
61 |         run_subprocess_command(['docker', 'exec', 'dbt', 'dbt', 'run', '--select', '+models/marts/products'], cwd='/dbt/sagerx')
62 | 
63 |     execute_external_dag_list() >> transform_tasks()
64 | 


--------------------------------------------------------------------------------
/airflow/dags/cms_part_d/load_basic_drugs_formulary_file.sql:
--------------------------------------------------------------------------------
 1 | /* sagerx_lake.cms_basic_drugs_formulary */
 2 | DROP TABLE IF EXISTS sagerx_lake.cms_basic_drugs_formulary CASCADE;
 3 | 
 4 | CREATE TABLE sagerx_lake.cms_basic_drugs_formulary (
 5 | formulary_id           VARCHAR(8) NOT NULL,
 6 | formulary_version         VARCHAR(5) NOT NULL,
 7 | contract_year      VARCHAR(4),
 8 | rxcui  VARCHAR(8),
 9 | ndc  VARCHAR(11),
10 | tier_level_value    TEXT,
11 | quantity_limit_yn    VARCHAR(1),
12 | quantity_limit_amount      VARCHAR(7),
13 | quantity_limit_days      VARCHAR(3),
14 | prior_authorization_yn      VARCHAR(1),
15 | step_therapy_yn      VARCHAR(1)
16 | );
17 | 
18 | COPY sagerx_lake.cms_basic_drugs_formulary
19 | FROM '{{ ti.xcom_pull(task_ids='extract') }}/basic drugs formulary file  PPUF_{{params.year}}Q{{params.quarter}}.txt' DELIMITER '|' CSV HEADER;;
20 | 


--------------------------------------------------------------------------------
/airflow/dags/cms_part_d/load_beneficiary_cost_file.sql:
--------------------------------------------------------------------------------
 1 | /* sagerx_lake.cms_beneficiary_cost */
 2 | DROP TABLE IF EXISTS sagerx_lake.cms_beneficiary_cost CASCADE;
 3 | 
 4 | CREATE TABLE sagerx_lake.cms_beneficiary_cost (
 5 | contract_id           VARCHAR(5) NOT NULL,
 6 | plan_id           VARCHAR(3) NOT NULL,
 7 | segment_id         VARCHAR(3) NOT NULL,
 8 | coverage_level      SMALLINT,
 9 | tier  SMALLINT,
10 | days_supply  SMALLINT,
11 | 
12 | cost_type_pref  SMALLINT,
13 | cost_amt_pref  NUMERIC(14,2),
14 | cost_min_amt_pref  VARCHAR(12),
15 | cost_max_amt_pref  NUMERIC(14,2),
16 | 
17 | cost_type_nonpref   SMALLINT,
18 | cost_amt_nonpref  NUMERIC(14,2),
19 | cost_min_amt_nonpref  VARCHAR(12),
20 | cost_max_amt_nonpref  NUMERIC(14,2),
21 | 
22 | cost_type_mail_pref  SMALLINT,
23 | cost_amt_mail_pref  NUMERIC(14,2),
24 | cost_min_amt_mail_pref  VARCHAR(12),
25 | cost_max_amt_mail_pref  NUMERIC(14,2),
26 | 
27 | cost_type_mail_nonpref   VARCHAR(1),
28 | cost_amt_mail_nonpref  NUMERIC(14,2),
29 | cost_min_amt_mail_nonpref  VARCHAR(12),
30 | cost_max_amt_mail_nonpref  NUMERIC(14,2),
31 | 
32 | tier_specialty_yn   VARCHAR(1),
33 | ded_applies_yn   VARCHAR(1)
34 | );
35 | 
36 | COPY sagerx_lake.cms_beneficiary_cost
37 | FROM '{{ ti.xcom_pull(task_ids='extract') }}/beneficiary cost file  PPUF_{{params.year}}Q{{params.quarter}}.txt' DELIMITER '|' CSV HEADER;;
38 | 


--------------------------------------------------------------------------------
/airflow/dags/cms_part_d/load_excluded_drugs_formulary_file.sql:
--------------------------------------------------------------------------------
 1 | /* sagerx_lake.cms_excluded_drugs_formulary */
 2 | DROP TABLE IF EXISTS sagerx_lake.cms_excluded_drugs_formulary CASCADE;
 3 | 
 4 | CREATE TABLE sagerx_lake.cms_excluded_drugs_formulary (
 5 | contract_id           VARCHAR(5) NOT NULL,
 6 | plan_id         VARCHAR(3) NOT NULL,
 7 | rxcui      VARCHAR(8),
 8 | tier  TEXT,
 9 | quantity_limit_yn  VARCHAR(5),
10 | quantity_limit_amount    VARCHAR(8),
11 | quantity_limit_days    VARCHAR(3),
12 | prior_auth_yn      VARCHAR(1),
13 | step_therapy_yn      VARCHAR(1),
14 | capped_benefit_yn      VARCHAR(1)
15 | );
16 | 
17 | COPY sagerx_lake.cms_excluded_drugs_formulary
18 | FROM '{{ ti.xcom_pull(task_ids='extract') }}/excluded drugs formulary file  PPUF_{{params.year}}Q{{params.quarter}}.txt' DELIMITER '|' CSV HEADER;;


--------------------------------------------------------------------------------
/airflow/dags/cms_part_d/load_geographic_locator_file.sql:
--------------------------------------------------------------------------------
 1 | /* sagerx_lake.cms_geographic_locator */
 2 | DROP TABLE IF EXISTS sagerx_lake.cms_geographic_locator CASCADE;
 3 | 
 4 | CREATE TABLE sagerx_lake.cms_geographic_locator (
 5 | county_code           VARCHAR(5) NOT NULL,
 6 | statename         VARCHAR(30) NOT NULL,
 7 | county      VARCHAR(50),
 8 | ma_region_code  VARCHAR(2),
 9 | ma_region  VARCHAR(150),
10 | pdp_region_code  VARCHAR(2),
11 | pdp_region    VARCHAR(150)
12 | );
13 | 
14 | COPY sagerx_lake.cms_geographic_locator
15 | FROM '{{ ti.xcom_pull(task_ids='extract') }}/geographic locator file PPUF_{{params.year}}Q{{params.quarter}}.txt' DELIMITER '|' CSV HEADER;;
16 | 


--------------------------------------------------------------------------------
/airflow/dags/cms_part_d/load_indication_based_coverage_formulary_file.sql:
--------------------------------------------------------------------------------
 1 | /* sagerx_lake.cms_indication_based_coverage_formulary */
 2 | DROP TABLE IF EXISTS sagerx_lake.cms_indication_based_coverage_formulary CASCADE;
 3 | 
 4 | CREATE TABLE sagerx_lake.cms_indication_based_coverage_formulary (
 5 | contract_id           VARCHAR(5) NOT NULL,
 6 | plan_id         VARCHAR(3) NOT NULL,
 7 | rxcui      VARCHAR(8),
 8 | disease  VARCHAR(100)
 9 | );
10 | 
11 | COPY sagerx_lake.cms_indication_based_coverage_formulary
12 | FROM '{{ ti.xcom_pull(task_ids='extract') }}/Indication Based Coverage Formulary File  PPUF_{{params.year}}Q{{params.quarter}}.txt' DELIMITER '|' CSV HEADER;;
13 | 


--------------------------------------------------------------------------------
/airflow/dags/cms_part_d/load_insulin_beneficiary_cost_file.sql:
--------------------------------------------------------------------------------
 1 | /* sagerx_lake.cms_insulin_beneficiary_cost */
 2 | DROP TABLE IF EXISTS sagerx_lake.cms_insulin_beneficiary_cost CASCADE;
 3 | 
 4 | CREATE TABLE sagerx_lake.cms_insulin_beneficiary_cost (
 5 | contract_id           TEXT NOT NULL,
 6 | plan_id         TEXT NOT NULL,
 7 | segment_id      TEXT,
 8 | tier            TEXT,
 9 | days_supply     TEXT,
10 | copay_amt_pref_insln    TEXT,
11 | copay_amt_nonpref_insln TEXT,
12 | copay_amt_mail_pref_insln   TEXT,
13 | copay_amt_mail_nonpref_insln    TEXT
14 | );
15 | 
16 | COPY sagerx_lake.cms_insulin_beneficiary_cost
17 | FROM '{{ ti.xcom_pull(task_ids='extract') }}/insulin beneficiary cost file  PPUF_{{params.year}}Q{{params.quarter}}.txt' DELIMITER '|' CSV HEADER;;


--------------------------------------------------------------------------------
/airflow/dags/cms_part_d/load_pharmacy_networks_file.sql:
--------------------------------------------------------------------------------
 1 | /* sagerx_lake.cms_pharmacy_networks */
 2 | DROP TABLE IF EXISTS sagerx_lake.cms_pharmacy_networks CASCADE;
 3 | 
 4 | CREATE TABLE sagerx_lake.cms_pharmacy_networks (
 5 | contract_id           VARCHAR(5) NOT NULL,
 6 | plan_id         VARCHAR(3) NOT NULL,
 7 | segment_id      VARCHAR(3),
 8 | pharmacy_number  VARCHAR(12),
 9 | pharmacy_zipcode  VARCHAR(5),
10 | preferred_status_retail    VARCHAR(1),
11 | preferred_status_mail    VARCHAR(1),
12 | pharmacy_retail      VARCHAR(1),
13 | pharmacy_mail      VARCHAR(1),
14 | in_area_flag      TEXT,
15 | floor_price      TEXT,
16 | brand_dispensing_fee_30       TEXT,
17 | brand_dispensing_fee_60       TEXT,
18 | brand_dispensing_fee_90       TEXT,
19 | generic_dispensing_fee_30       TEXT,
20 | generic_dispensing_fee_60       TEXT,
21 | generic_dispensing_fee_90       TEXT
22 | );
23 | 
24 | COPY sagerx_lake.cms_pharmacy_networks
25 | FROM '{{ ti.xcom_pull(task_ids='extract') }}/pharmacy networks file  PPUF_{{params.year}}Q{{params.quarter}} part 1.txt' DELIMITER '|' CSV HEADER;;
26 | 
27 | COPY sagerx_lake.cms_pharmacy_networks
28 | FROM '{{ ti.xcom_pull(task_ids='extract') }}/pharmacy networks file  PPUF_{{params.year}}Q{{params.quarter}} part 2.txt' DELIMITER '|' CSV HEADER;;
29 | 
30 | COPY sagerx_lake.cms_pharmacy_networks
31 | FROM '{{ ti.xcom_pull(task_ids='extract') }}/pharmacy networks file  PPUF_{{params.year}}Q{{params.quarter}} part 3.txt' DELIMITER '|' CSV HEADER;;
32 | 
33 | COPY sagerx_lake.cms_pharmacy_networks
34 | FROM '{{ ti.xcom_pull(task_ids='extract') }}/pharmacy networks file  PPUF_{{params.year}}Q{{params.quarter}} part 4.txt' DELIMITER '|' CSV HEADER;;
35 | 
36 | COPY sagerx_lake.cms_pharmacy_networks
37 | FROM '{{ ti.xcom_pull(task_ids='extract') }}/pharmacy networks file  PPUF_{{params.year}}Q{{params.quarter}} part 5.txt' DELIMITER '|' CSV HEADER;;
38 | 
39 | COPY sagerx_lake.cms_pharmacy_networks
40 | FROM '{{ ti.xcom_pull(task_ids='extract') }}/pharmacy networks file  PPUF_{{params.year}}Q{{params.quarter}} part 6.txt' DELIMITER '|' CSV HEADER;;
41 | 


--------------------------------------------------------------------------------
/airflow/dags/cms_part_d/load_plan_information_file.sql:
--------------------------------------------------------------------------------
 1 | /* sagerx_lake.cms_plan_information */
 2 | DROP TABLE IF EXISTS sagerx_lake.cms_plan_information CASCADE;
 3 | 
 4 | CREATE TABLE sagerx_lake.cms_plan_information (
 5 | contract_id           VARCHAR(5) NOT NULL,
 6 | plan_id         VARCHAR(3) NOT NULL,
 7 | segment_id      VARCHAR(3),
 8 | contract_name  VARCHAR(100),
 9 | plan_name  VARCHAR(80),
10 | formulary_id    VARCHAR(8),
11 | premium    TEXT,
12 | deductible      TEXT,
13 | ma_region_code      VARCHAR(2),
14 | pdp_region_code      VARCHAR(2),
15 | state       VARCHAR(2),
16 | county_code       VARCHAR(5),
17 | snp       VARCHAR(1),
18 | plan_suppressed_yn       VARCHAR(1)
19 | );
20 | 
21 | COPY sagerx_lake.cms_plan_information
22 | FROM '{{ ti.xcom_pull(task_ids='extract') }}/plan information  PPUF_{{params.year}}Q{{params.quarter}}.txt' DELIMITER '|' CSV HEADER ENCODING 'WIN1252';;


--------------------------------------------------------------------------------
/airflow/dags/cms_part_d/load_pricing_file.sql:
--------------------------------------------------------------------------------
 1 | /* sagerx_lake.cms_pricing */
 2 | DROP TABLE IF EXISTS sagerx_lake.cms_pricing CASCADE;
 3 | 
 4 | CREATE TABLE sagerx_lake.cms_pricing (
 5 | contract_id           VARCHAR(5) NOT NULL,
 6 | plan_id         VARCHAR(3) NOT NULL,
 7 | segment_id      VARCHAR(3),
 8 | ndc  VARCHAR(11),
 9 | days_supply  TEXT,
10 | unit_cost    TEXT
11 | );
12 | 
13 | COPY sagerx_lake.cms_pricing
14 | FROM '{{ ti.xcom_pull(task_ids='extract') }}/pricing file PPUF_{{params.year}}Q{{params.quarter}}.txt' DELIMITER '|' CSV HEADER;;
15 | 


--------------------------------------------------------------------------------
/airflow/dags/dailymed_pharm_class/dag.py:
--------------------------------------------------------------------------------
 1 | from airflow_operator import create_dag
 2 | from airflow.utils.helpers import chain
 3 | 
 4 | from common_dag_tasks import  extract, get_ordered_sql_tasks, get_ds_folder
 5 | from sagerx import read_sql_file
 6 | from airflow.providers.postgres.operators.postgres import PostgresOperator
 7 | 
 8 | 
 9 | dag_id = "dailymed_pharm_class"
10 | 
11 | dag = create_dag(
12 |     dag_id=dag_id,
13 |     schedule= "0 5 * * *",  # run at 5am every day
14 |     max_active_runs=1,
15 |     concurrency=2,
16 | )
17 | 
18 | with dag:
19 |     url = "https://dailymed-data.nlm.nih.gov/public-release-files/pharmacologic_class_mappings.zip"
20 |     ds_folder = get_ds_folder(dag_id)
21 | 
22 |     extract_task = extract(dag_id,url)
23 | 
24 |     task_list = [extract_task]
25 |     for sql in get_ordered_sql_tasks(dag_id):
26 |         sql_path = ds_folder / sql
27 |         task_id = sql[:-4] #remove .sql
28 | 
29 |         sql_task = PostgresOperator(
30 |             task_id=task_id,
31 |             postgres_conn_id="postgres_default",
32 |             sql=read_sql_file(sql_path).format(data_path=extract_task),
33 |             dag=dag
34 |         )
35 |         task_list.append(sql_task)
36 |     
37 |     chain(*task_list) 
38 |    


--------------------------------------------------------------------------------
/airflow/dags/dailymed_pharm_class/load-dailymed_pharm_class.sql:
--------------------------------------------------------------------------------
 1 | /* sagerx_lake.dailymed_pharm_class */
 2 | DROP TABLE IF EXISTS sagerx_lake.dailymed_pharm_class;
 3 | 
 4 | CREATE TABLE sagerx_lake.dailymed_pharm_class (
 5 | spl_setid           TEXT,
 6 | spl_version         TEXT,
 7 | pharma_setid        TEXT,
 8 | pharma_version      TEXT
 9 | );
10 | 
11 | COPY sagerx_lake.dailymed_pharm_class
12 | FROM '{data_path}/pharmacologic_class_mappings.txt' DELIMITER '|' QUOTE E'\b' CSV HEADER;
13 | 


--------------------------------------------------------------------------------
/airflow/dags/dailymed_rxnorm/dag.py:
--------------------------------------------------------------------------------
 1 | from airflow_operator import create_dag
 2 | from airflow.utils.helpers import chain
 3 | 
 4 | from common_dag_tasks import  extract, get_ordered_sql_tasks, get_ds_folder
 5 | from sagerx import read_sql_file
 6 | from airflow.providers.postgres.operators.postgres import PostgresOperator
 7 | 
 8 | 
 9 | dag_id = "dailymed_rxnorm"
10 | 
11 | dag = create_dag(
12 |     dag_id=dag_id,
13 |     schedule= "0 5 * * *",  # run at 5am every day
14 |     max_active_runs=1,
15 |     concurrency=2,
16 | )
17 | 
18 | with dag:
19 |     url = "https://dailymed-data.nlm.nih.gov/public-release-files/rxnorm_mappings.zip"
20 |     ds_folder = get_ds_folder(dag_id)
21 | 
22 |     extract_task = extract(dag_id,url)
23 | 
24 |     task_list = [extract_task]
25 |     for sql in get_ordered_sql_tasks(dag_id):
26 |         sql_path = ds_folder / sql
27 |         task_id = sql[:-4] #remove .sql
28 | 
29 |         sql_task = PostgresOperator(
30 |             task_id=task_id,
31 |             postgres_conn_id="postgres_default",
32 |             sql=read_sql_file(sql_path).format(data_path=extract_task),
33 |             dag=dag
34 |         )
35 |         task_list.append(sql_task)
36 |     
37 |     chain(*task_list) 
38 |    


--------------------------------------------------------------------------------
/airflow/dags/dailymed_rxnorm/load-dailymed_rxnorm.sql:
--------------------------------------------------------------------------------
 1 | /* sagerx_lake.dailymed_rxnorm */
 2 | DROP TABLE IF EXISTS sagerx_lake.dailymed_rxnorm CASCADE;
 3 | 
 4 | CREATE TABLE sagerx_lake.dailymed_rxnorm (
 5 | setid           TEXT,
 6 | spl_version     TEXT,
 7 | rxcui           TEXT,
 8 | rxstr           TEXT,
 9 | rxtty           TEXT
10 | );
11 | 
12 | COPY sagerx_lake.dailymed_rxnorm
13 | FROM '{data_path}/rxnorm_mappings.txt' DELIMITER '|' QUOTE E'\b' CSV HEADER;
14 | 


--------------------------------------------------------------------------------
/airflow/dags/dailymed_zip_file_metadata/dag.py:
--------------------------------------------------------------------------------
 1 | from airflow_operator import create_dag
 2 | from airflow.utils.helpers import chain
 3 | 
 4 | from common_dag_tasks import  extract, get_ordered_sql_tasks, get_ds_folder
 5 | from sagerx import read_sql_file
 6 | from airflow.providers.postgres.operators.postgres import PostgresOperator
 7 | 
 8 | 
 9 | dag_id = "dailymed_zip_file_metadata"
10 | 
11 | dag = create_dag(
12 |     dag_id=dag_id,
13 |     schedule= "0 5 * * *",  # run at 5am every day
14 |     max_active_runs=1,
15 |     concurrency=2,
16 | )
17 | 
18 | with dag:
19 |     url = "https://dailymed-data.nlm.nih.gov/public-release-files/dm_spl_zip_files_meta_data.zip"
20 |     ds_folder = get_ds_folder(dag_id)
21 | 
22 |     extract_task = extract(dag_id,url)
23 | 
24 |     task_list = [extract_task]
25 |     for sql in get_ordered_sql_tasks(dag_id):
26 |         sql_path = ds_folder / sql
27 |         task_id = sql[:-4] #remove .sql
28 | 
29 |         sql_task = PostgresOperator(
30 |             task_id=task_id,
31 |             postgres_conn_id="postgres_default",
32 |             sql=read_sql_file(sql_path).format(data_path=extract_task),
33 |             dag=dag
34 |         )
35 |         task_list.append(sql_task)
36 |     
37 |     chain(*task_list) 
38 |    


--------------------------------------------------------------------------------
/airflow/dags/dailymed_zip_file_metadata/load-dailymed_zip_file_metadata.sql:
--------------------------------------------------------------------------------
 1 | /* sagerx_lake.dailymed_zip_file_metadata */
 2 | DROP TABLE IF EXISTS sagerx_lake.dailymed_zip_file_metadata;
 3 | 
 4 | CREATE TABLE sagerx_lake.dailymed_zip_file_metadata (
 5 | setid           TEXT,
 6 | zip_file_name   TEXT,
 7 | upload_date     TEXT,
 8 | spl_version     TEXT,
 9 | title           TEXT
10 | );
11 | 
12 | COPY sagerx_lake.dailymed_zip_file_metadata
13 | FROM '{data_path}/dm_spl_zip_files_meta_data.txt' DELIMITER '|' QUOTE E'\b' CSV HEADER;
14 | 


--------------------------------------------------------------------------------
/airflow/dags/export_marts/dag.py:
--------------------------------------------------------------------------------
 1 | import sqlalchemy
 2 | import pandas as pd
 3 | import boto3
 4 | 
 5 | from io import StringIO
 6 | from os import environ
 7 | from airflow_operator import create_dag
 8 | from airflow.decorators import dag,task
 9 | from airflow.operators.trigger_dagrun import TriggerDagRunOperator
10 | from airflow.hooks.subprocess import SubprocessHook
11 | from airflow.hooks.postgres_hook import PostgresHook
12 | from airflow.models import Variable
13 | 
14 | 
15 | 
16 | dag = create_dag(
17 |     dag_id="export_marts",
18 |     schedule = "0 7 * * 2", #every tuesday at 7:00am
19 |     catchup=False,
20 |     concurrency=2
21 | )
22 | 
23 | with dag:
24 | 
25 |     @task
26 |     def export_marts():
27 |         pg_hook = PostgresHook(postgres_conn_id="postgres_default")
28 |         engine = pg_hook.get_sqlalchemy_engine()
29 |         marts_list = ["all_ndc_descriptions","atc_codes_to_rxnorm_products","all_ndcs_to_sources","products_to_inactive_ingredients","products","brand_products_with_related_ndcs"]
30 |         mart_dfs={}
31 |         with engine.connect() as connection:
32 |             for mart in marts_list:
33 |                 if sqlalchemy.inspect(engine).has_table(mart, schema='sagerx_dev'):  
34 |                     print(f'{mart} exists and will be exported')
35 |                     df = pd.read_sql(f"SELECT * FROM sagerx_dev.{mart};", con=connection)
36 |                     mart_dfs[mart] = df
37 | 
38 |         access_key = environ.get("AWS_ACCESS_KEY_ID")
39 |         secret_key = environ.get("AWS_SECRET_ACCESS_KEY")
40 |         dest_bucket = environ.get("AWS_DEST_BUCKET")
41 | 
42 |         s3_resource = boto3.resource(
43 |             's3',
44 |             aws_access_key_id= access_key,
45 |             aws_secret_access_key= secret_key
46 |         )
47 | 
48 |         for k in list(mart_dfs.keys()):
49 |             print(f'putting {k}')
50 |             csv_buffer = StringIO()
51 |             mart_dfs[k].to_csv(csv_buffer, index=False)
52 | 
53 |             s3_resource.Object(dest_bucket, f'{k}.csv').put(Body=csv_buffer.getvalue())
54 | 
55 |     export_marts()
56 | 


--------------------------------------------------------------------------------
/airflow/dags/fda_enforcement/dag.py:
--------------------------------------------------------------------------------
 1 | import pendulum
 2 | from airflow_operator import create_dag
 3 | from common_dag_tasks import  extract,transform, get_ds_folder
 4 | from fda_enforcement.dag_tasks import  load_json
 5 | # from airflow.operators.python import ShortCircuitOperator
 6 | # from airflow.providers.postgres.operators.postgres import PostgresOperator
 7 | # from sagerx import read_sql_file
 8 | 
 9 | dag_id = "fda_enforcement"
10 | 
11 | dag = create_dag(
12 |     dag_id=dag_id,
13 |     schedule="0 4 * * 3",
14 |     start_date=pendulum.yesterday(),
15 |     max_active_runs=1,
16 |     concurrency=2,
17 | )
18 | 
19 | 
20 | with dag:
21 |     url = "https://download.open.fda.gov/drug/enforcement/drug-enforcement-0001-of-0001.json.zip"
22 |     ds_folder = get_ds_folder(dag_id)
23 |     file_name = "/drug-enforcement-0001-of-0001.json"
24 | 
25 |     extract_task = extract(dag_id,url)
26 |     
27 |     load_task = load_json(str(extract_task)+file_name)
28 |     
29 |     transform_staging_task = transform.override(task_id='transform-staging')(dag_id)
30 |     transform_intermediate_task = transform.override(task_id='transform-intermediate')(dag_id,'intermediate')
31 | 
32 |     extract_task >> load_task >> transform_staging_task  >> transform_intermediate_task


--------------------------------------------------------------------------------
/airflow/dags/fda_enforcement/dag_tasks.py:
--------------------------------------------------------------------------------
 1 | from airflow.decorators import task
 2 | from common_dag_tasks import url_request
 3 | from sagerx import read_json_file, load_df_to_pg
 4 | 
 5 | # Task to download data from web location
 6 | @task(task_id='extract')
 7 | def fda_enf_extract(data_interval_start=None, data_interval_end=None):
 8 |     import pandas as pd
 9 |     import logging
10 | 
11 |     start_date = data_interval_start.format("YYYYMMDD")
12 |     end_date = data_interval_end.format("YYYYMMDD")
13 |     print(f"Start date: {start_date}, End date: {end_date}")
14 | 
15 |     url = f"https://api.fda.gov/drug/enforcement.json?search=report_date:[{start_date}+TO+{end_date}]&limit=1000"
16 |     logging.info(url)
17 | 
18 |     response = url_request(url)
19 | 
20 |     json_object = response.json()["results"]
21 | 
22 |     df = pd.DataFrame(json_object)
23 |     df.set_index("recall_number")
24 | 
25 |     return df
26 | 
27 | @task
28 | def load_json(data_path):
29 |     import pandas as pd
30 |     print(f"JSON path: {data_path}")
31 |     json_object = read_json_file(data_path)
32 |     df = pd.DataFrame(json_object["results"])
33 |     df.set_index("recall_number")
34 |     print(f"Dataframe loaded. Number of rows: {len(df)}")
35 |     load_df_to_pg(df,"sagerx_lake","fda_enforcement","replace",dtype_name="openfda")
36 | 


--------------------------------------------------------------------------------
/airflow/dags/fda_excluded/dag.py:
--------------------------------------------------------------------------------
 1 | import pendulum
 2 | 
 3 | from airflow_operator import create_dag
 4 | from airflow.utils.helpers import chain
 5 | 
 6 | from common_dag_tasks import  extract, transform, get_ordered_sql_tasks, get_ds_folder
 7 | from sagerx import read_sql_file
 8 | from airflow.decorators import dag,task
 9 | from airflow.providers.postgres.operators.postgres import PostgresOperator
10 | 
11 | 
12 | dag_id = "fda_excluded"
13 | 
14 | dag = create_dag(
15 |     dag_id=dag_id,
16 |     schedule= "30 4 * * *",  # run at 4:30am every day
17 |     start_date=pendulum.yesterday(),
18 |     catchup=False,
19 |     max_active_runs=1,
20 |     concurrency=2,
21 | )
22 | 
23 | with dag:
24 |     url = "https://www.accessdata.fda.gov/cder/ndc_excluded.zip"
25 |     ds_folder = get_ds_folder(dag_id)
26 | 
27 |     extract_task = extract(dag_id,url)
28 |     transform_task = transform(dag_id)
29 | 
30 |     sql_tasks = []
31 |     for sql in get_ordered_sql_tasks(dag_id):
32 |         sql_path = ds_folder / sql
33 |         task_id = sql[:-4] #remove .sql
34 |         sql_task = PostgresOperator(
35 |             task_id=task_id,
36 |             postgres_conn_id="postgres_default",
37 |             sql=read_sql_file(sql_path).format(data_path=extract_task),
38 |             dag=dag
39 |         )
40 |         sql_tasks.append(sql_task)
41 | 
42 |     extract_task >> sql_tasks >> transform_task


--------------------------------------------------------------------------------
/airflow/dags/fda_excluded/load_package.sql:
--------------------------------------------------------------------------------
 1 | /* sagerx_lake.fda_excluded_package */
 2 | DROP TABLE IF EXISTS sagerx_lake.fda_excluded_package CASCADE;
 3 | 
 4 | CREATE TABLE sagerx_lake.fda_excluded_package (
 5 | productid           TEXT NOT NULL,
 6 | productndc          TEXT NOT NULL,
 7 | ndcpackagecode      TEXT,
 8 | packagedescription  TEXT,
 9 | startmarketingdate  TEXT,
10 | endmarketingdate    TEXT,
11 | ndc_exclude_flag    TEXT,
12 | sample_package      TEXT
13 | );
14 | 
15 | COPY sagerx_lake.fda_excluded_package
16 | FROM '{data_path}/Packages_excluded.txt' DELIMITER E'\t' CSV HEADER ENCODING 'WIN1252';;
17 | 
18 | CREATE INDEX IF NOT EXISTS x_productid
19 | ON sagerx_lake.fda_excluded_package(productid);


--------------------------------------------------------------------------------
/airflow/dags/fda_excluded/load_product.sql:
--------------------------------------------------------------------------------
 1 | /* sagerx_lake.fda_excluded_package */
 2 | DROP TABLE IF EXISTS sagerx_lake.fda_excluded_product CASCADE;
 3 | 
 4 | CREATE TABLE sagerx_lake.fda_excluded_product (
 5 | productid                           TEXT,
 6 | productndc                          TEXT,
 7 | producttypename                     TEXT,
 8 | proprietaryname                     TEXT,
 9 | proprietarynamesuffix               TEXT,
10 | nonproprietaryname                  TEXT,
11 | dosageformname                      TEXT,
12 | routename                           TEXT,
13 | startmarketingdate                  TEXT,
14 | endmarketingdate                    TEXT,
15 | marketingcategoryname               TEXT,
16 | applicationnumber                   TEXT,
17 | labelername                         TEXT,
18 | substancename                       TEXT,
19 | active_numerator_strength           TEXT,
20 | active_ingred_unit                  TEXT,
21 | pharm_classes                       TEXT,
22 | deaschedule                         TEXT,
23 | ndc_exclude_flag                    TEXT,
24 | listing_record_certified_through    TEXT,
25 | PRIMARY KEY (productid)
26 | );
27 | 
28 | COPY sagerx_lake.fda_excluded_product
29 | FROM '{data_path}/Products_excluded.txt' DELIMITER E'\t' CSV HEADER ENCODING 'WIN1252';;
30 | 
31 | CREATE INDEX IF NOT EXISTS x_productid
32 | ON sagerx_lake.fda_excluded_product(productid);


--------------------------------------------------------------------------------
/airflow/dags/fda_ndc/dag.py:
--------------------------------------------------------------------------------
 1 | import pendulum
 2 | 
 3 | from airflow_operator import create_dag
 4 | from airflow.providers.postgres.operators.postgres import PostgresOperator
 5 | 
 6 | from common_dag_tasks import  extract, transform, generate_sql_list, get_ds_folder
 7 | from sagerx import read_sql_file
 8 | 
 9 | dag_id = "fda_ndc"
10 | 
11 | dag = create_dag(
12 |     dag_id=dag_id,
13 |     schedule="0 4 * * *",
14 |     start_date=pendulum.yesterday(),
15 |     catchup=False,
16 |     concurrency=2,
17 | )
18 | 
19 | with dag:
20 |     url= "https://www.accessdata.fda.gov/cder/ndctext.zip"
21 |     ds_folder = get_ds_folder(dag_id)
22 | 
23 |     extract_task = extract(dag_id,url)
24 |     transform_task = transform(dag_id)
25 | 
26 |     sql_tasks = []
27 |     for sql in generate_sql_list(dag_id):
28 |         sql_path = ds_folder / sql
29 |         task_id = sql[:-4] #remove .sql
30 |         sql_task = PostgresOperator(
31 |             task_id=task_id,
32 |             postgres_conn_id="postgres_default",
33 |             sql=read_sql_file(sql_path).format(data_path=extract_task),
34 |             dag=dag
35 |         )
36 |         sql_tasks.append(sql_task)
37 |         
38 |     extract_task >> sql_tasks >> transform_task
39 | 


--------------------------------------------------------------------------------
/airflow/dags/fda_ndc/load_package.sql:
--------------------------------------------------------------------------------
 1 | /* sagerx_lake.fda_ndc_package */
 2 | DROP TABLE IF EXISTS sagerx_lake.fda_ndc_package CASCADE;
 3 | 
 4 | CREATE TABLE sagerx_lake.fda_ndc_package (
 5 | productid           TEXT NOT NULL,
 6 | productndc          TEXT NOT NULL,
 7 | ndcpackagecode      TEXT,
 8 | packagedescription  TEXT,
 9 | startmarketingdate  TEXT,
10 | endmarketingdate    TEXT,
11 | ndc_exclude_flag    TEXT,
12 | sample_package      TEXT
13 | );
14 | 
15 | COPY sagerx_lake.fda_ndc_package
16 | FROM '{data_path}/package.txt' DELIMITER E'\t' CSV HEADER ENCODING 'WIN1252';;
17 | 
18 | CREATE INDEX IF NOT EXISTS x_productid
19 | ON sagerx_lake.fda_ndc_package(productid);


--------------------------------------------------------------------------------
/airflow/dags/fda_ndc/load_product.sql:
--------------------------------------------------------------------------------
 1 | /* sagerx_lake.fda_ndc_product */
 2 | DROP TABLE IF EXISTS sagerx_lake.fda_ndc_product CASCADE;
 3 | 
 4 | CREATE TABLE sagerx_lake.fda_ndc_product (
 5 | productid                           TEXT, 
 6 | productndc                          TEXT, 
 7 | producttypename                     TEXT, 
 8 | proprietaryname                     TEXT, 
 9 | proprietarynamesuffix               TEXT, 
10 | nonproprietaryname                  TEXT, 
11 | dosageformname                      TEXT, 
12 | routename                           TEXT, 
13 | startmarketingdate                  TEXT, 
14 | endmarketingdate                    TEXT, 
15 | marketingcategoryname               TEXT, 
16 | applicationnumber                   TEXT, 
17 | labelername                         TEXT, 
18 | substancename                       TEXT, 
19 | active_numerator_strength           TEXT, 
20 | active_ingred_unit                  TEXT, 
21 | pharm_classes                       TEXT, 
22 | deaschedule                         TEXT, 
23 | ndc_exclude_flag                    TEXT,
24 | listing_record_certified_through    TEXT,
25 | PRIMARY KEY (productid)
26 | );
27 | 
28 | COPY sagerx_lake.fda_ndc_product
29 | FROM '{data_path}/product.txt' DELIMITER E'\t' CSV HEADER ENCODING 'WIN1252';;
30 | 
31 | CREATE INDEX IF NOT EXISTS x_productid
32 | ON sagerx_lake.fda_ndc_product(productid);


--------------------------------------------------------------------------------
/airflow/dags/fda_unfinished/dag.py:
--------------------------------------------------------------------------------
 1 | import pendulum
 2 | 
 3 | from airflow_operator import create_dag
 4 | from airflow.utils.helpers import chain
 5 | 
 6 | from common_dag_tasks import  extract, transform, get_ordered_sql_tasks, get_ds_folder
 7 | from sagerx import read_sql_file
 8 | from airflow.providers.postgres.operators.postgres import PostgresOperator
 9 | 
10 | 
11 | dag_id = "fda_unfinished"
12 | 
13 | dag = create_dag(
14 |     dag_id=dag_id,
15 |     schedule= "0 4 * * *",  # run a 4:15am every day
16 |     start_date=pendulum.yesterday(),
17 |     catchup=False,
18 |     max_active_runs=1,
19 |     concurrency=2,
20 | )
21 | 
22 | with dag:
23 |     url = "https://www.accessdata.fda.gov/cder/ndc_unfinished.zip"
24 |     ds_folder = get_ds_folder(dag_id)
25 | 
26 |     extract_task = extract(dag_id,url)
27 |     transform_task = transform(dag_id)
28 | 
29 |     sql_tasks = []
30 |     for sql in get_ordered_sql_tasks(dag_id):
31 |         sql_path = ds_folder / sql
32 |         task_id = sql[:-4] #remove .sql
33 |         sql_task = PostgresOperator(
34 |             task_id=task_id,
35 |             postgres_conn_id="postgres_default",
36 |             sql=read_sql_file(sql_path).format(data_path=extract_task),
37 |             dag=dag
38 |         )
39 |         sql_tasks.append(sql_task)
40 |     
41 |     extract_task >> sql_tasks >> transform_task
42 | 


--------------------------------------------------------------------------------
/airflow/dags/fda_unfinished/load_package.sql:
--------------------------------------------------------------------------------
 1 | /* sagerx_lake.fda_unfinished_package */
 2 | DROP TABLE IF EXISTS sagerx_lake.fda_unfinished_package CASCADE;
 3 | 
 4 | CREATE TABLE sagerx_lake.fda_unfinished_package (
 5 | productid           TEXT NOT NULL,
 6 | productndc          TEXT NOT NULL,             
 7 | ndcpackagecode      TEXT,
 8 | packagedescription  TEXT,
 9 | startmarketingdate  TEXT,
10 | endmarketingdate    TEXT
11 | );
12 | 
13 | COPY sagerx_lake.fda_unfinished_package
14 | FROM '{data_path}/unfinished_package.txt' DELIMITER E'\t' CSV HEADER ENCODING 'WIN1252';
15 | 
16 | CREATE INDEX IF NOT EXISTS x_productid
17 | ON sagerx_lake.fda_unfinished_package(productid);


--------------------------------------------------------------------------------
/airflow/dags/fda_unfinished/load_product.sql:
--------------------------------------------------------------------------------
 1 | /* sagerx_lake.fda_unfinished_product*/
 2 | DROP TABLE IF EXISTS sagerx_lake.fda_unfinished_product CASCADE;
 3 | 
 4 | CREATE TABLE sagerx_lake.fda_unfinished_product (
 5 | productid                           TEXT,
 6 | productndc                          TEXT,
 7 | producttypename                     TEXT,
 8 | nonproprietaryname                  TEXT,
 9 | dosageformname                      TEXT,
10 | startmarketingdate                  TEXT,
11 | endmarketingdate                    TEXT,
12 | marketingcategoryname               TEXT,
13 | labelername                         TEXT,
14 | substancename                       TEXT,
15 | active_numerator_strength           TEXT,
16 | active_ingred_unit                  TEXT,
17 | deaschedule                         TEXT,
18 | listing_record_certified_through    TEXT,
19 | PRIMARY KEY (productid)
20 | );
21 | 
22 | COPY sagerx_lake.fda_unfinished_product FROM '{data_path}/unfinished_product.txt' DELIMITER E'\t' CSV HEADER ENCODING 'WIN1252';
23 | 
24 | CREATE INDEX IF NOT EXISTS x_productid
25 | ON sagerx_lake.fda_unfinished_product(productid);


--------------------------------------------------------------------------------
/airflow/dags/fda_unii/dag.py:
--------------------------------------------------------------------------------
 1 | import pendulum
 2 | 
 3 | from airflow_operator import create_dag
 4 | from airflow.providers.postgres.operators.postgres import PostgresOperator
 5 | from airflow.decorators import task
 6 | 
 7 | from common_dag_tasks import  extract, transform, generate_sql_list, get_ds_folder
 8 | from sagerx import read_sql_file
 9 | 
10 | dag_id = "fda_unii"
11 | 
12 | dag = create_dag(
13 |     dag_id=dag_id,
14 |     schedule="0 4 * * *",
15 |     start_date=pendulum.yesterday(),
16 |     catchup=False,
17 |     concurrency=2,
18 | )
19 | 
20 | with dag:
21 |     url= "https://precision.fda.gov/uniisearch/archive/latest/UNII_Data.zip"
22 |     ds_folder = get_ds_folder(dag_id)
23 | 
24 |     extract_task = extract(dag_id,url)
25 |     transform_task = transform(dag_id)
26 | 
27 |     @task
28 |     def get_file_name(data_path) -> str:
29 |         import re
30 |         import os
31 |         import logging
32 | 
33 |         logging.info(f'Data path: {data_path}')
34 | 
35 |         file_name = ''
36 |         # note: extract_task contains the path to /opt/data/fda_unii/UNII_Data/
37 |         # example file_name: UNII_Records_22Jun2024.txt
38 |         for subfile in os.listdir(data_path):
39 |             if re.match("UNII_Records", subfile):
40 |                 file_name = subfile
41 |         
42 |         if file_name == '':
43 |             logging.error('Could not find file_name.')
44 | 
45 |         return file_name
46 |     
47 |     file_name_task = get_file_name(extract_task)
48 |         
49 |     sql_tasks = []
50 |     for sql in generate_sql_list(dag_id):
51 |         sql_path = ds_folder / sql
52 |         task_id = sql[:-4] #remove .sql
53 |         sql_task = PostgresOperator(
54 |             task_id=task_id,
55 |             postgres_conn_id="postgres_default",
56 |             sql=read_sql_file(sql_path).format(
57 |                 data_path=extract_task,
58 |                 file_name=file_name_task
59 |             ),
60 |             dag=dag
61 |         )
62 |         sql_tasks.append(sql_task)
63 |         
64 |     file_name_task >> sql_tasks >> transform_task
65 | 


--------------------------------------------------------------------------------
/airflow/dags/fda_unii/load_unii.sql:
--------------------------------------------------------------------------------
 1 | /* sagerx_lake.fda_unii */
 2 | DROP TABLE IF EXISTS sagerx_lake.fda_unii CASCADE;
 3 | 
 4 | CREATE TABLE sagerx_lake.fda_unii (
 5 |     unii                  TEXT NOT NULL,
 6 |     display_name          TEXT,
 7 |     rn                    TEXT,
 8 |     ec                    TEXT,
 9 |     ncit                  TEXT,
10 |     rxcui                 TEXT,
11 |     pubchem               TEXT,
12 |     epa_comptox           TEXT,
13 |     smsid                 TEXT,
14 |     catalogue_of_life     TEXT,
15 |     itis                  TEXT,
16 |     ncbi                  TEXT,
17 |     plants                TEXT,
18 |     powo                  TEXT,
19 |     grin                  TEXT,
20 |     mpns                  TEXT,
21 |     inn_id                TEXT,
22 |     usan_id               TEXT,
23 |     mf                    TEXT,
24 |     inchikey              TEXT,
25 |     smiles                TEXT,
26 |     ingredient_type       TEXT,
27 |     substance_type        TEXT,
28 |     uuid                  TEXT,
29 |     dailymed              TEXT
30 | );
31 | 
32 | COPY sagerx_lake.fda_unii
33 | FROM '{data_path}/{file_name}' DELIMITER E'\t' CSV HEADER ENCODING 'WIN1252';;
34 | 


--------------------------------------------------------------------------------
/airflow/dags/mccpd/dag.py:
--------------------------------------------------------------------------------
 1 | import pendulum
 2 | from airflow.decorators import dag
 3 | from mccpd.dag_tasks import extract, load
 4 | from common_dag_tasks import transform
 5 | 
 6 | dag_id = "mccpd"
 7 | 
 8 | @dag(
 9 |     dag_id=dag_id,
10 |     schedule_interval="0 3 15 * *",  # Runs on the 15th of each month at 3 AM
11 |     start_date=pendulum.today('UTC').add(days=-1),
12 |     catchup=False
13 | )
14 | def mccpd():
15 |     extract_task = extract(dag_id)
16 |     load_task = load(extract_task)
17 |     transform_task = transform(dag_id, models_subdir=['staging', 'intermediate'])
18 | 
19 |     extract_task >> load_task >> transform_task
20 | 
21 | dag = mccpd()
22 | 


--------------------------------------------------------------------------------
/airflow/dags/mccpd/dag_tasks.py:
--------------------------------------------------------------------------------
 1 | from airflow.decorators import task
 2 | import pandas as pd
 3 | import re
 4 | from sagerx import fetch_json, camel_to_snake, get_rxcuis, load_df_to_pg, get_concurrent_api_results, write_json_file, read_json_file, create_path
 5 | from common_dag_tasks import url_request, get_data_folder
 6 | import logging
 7 | from airflow.models import Variable
 8 | from airflow.hooks.postgres_hook import PostgresHook
 9 | 
10 | @task
11 | def extract(dag_id:str) -> str:
12 |     url = 'https://us-central1-costplusdrugs-publicapi.cloudfunctions.net/main'
13 |     results = fetch_json(url)
14 |     print(results)
15 | 
16 |     data_folder = get_data_folder(dag_id)
17 |     file_path = create_path(data_folder) / 'data.json'
18 |     file_path_str = file_path.resolve().as_posix()
19 | 
20 |     write_json_file(file_path_str, results)
21 | 
22 |     print(f"Extraction Completed! Data saved to file: {file_path_str}")
23 | 
24 |     return file_path_str
25 | 
26 | 
27 | @task
28 | def load(file_path_str:str):
29 |     results = read_json_file(file_path_str)
30 | 
31 |     # Create a DataFrame directly from JSON
32 |     df = pd.json_normalize(
33 |         results, 
34 |         record_path=["results"]
35 |     )
36 | 
37 |     print(f'Dataframe created of {len(df)} length.')
38 |     load_df_to_pg(df,"sagerx_lake","mccpd","replace",index=False)
39 | 


--------------------------------------------------------------------------------
/airflow/dags/nadac/load_nadac.sql:
--------------------------------------------------------------------------------
 1 | /* sagerx_lake.nadac */
 2 | CREATE TABLE IF NOT EXISTS sagerx_lake.nadac (
 3 | ndc_description		                            TEXT NOT NULL,
 4 | ndc			                                    VARCHAR (11) NOT NULL,
 5 | nadac_per_unit		                            NUMERIC (12,5),
 6 | effective_date                                  DATE NOT NULL,
 7 | pricing_unit		                            TEXT,
 8 | pharmacy_type_indicator	                        TEXT,
 9 | otc		                                        TEXT,
10 | explanation_code		                        TEXT,
11 | classification_for_rate_setting		            TEXT,
12 | corresponding_generic_drug_nadac_per_unit		TEXT,
13 | corresponding_generic_drug_effective_date		DATE,
14 | as_of_date  			                        DATE
15 | );
16 | 
17 | TRUNCATE sagerx_lake.nadac;
18 | 
19 | COPY sagerx_lake.nadac
20 | FROM '{{ ti.xcom_pull(task_ids='extract') }}' CSV HEADER;


--------------------------------------------------------------------------------
/airflow/dags/orange_book/dag.py:
--------------------------------------------------------------------------------
 1 | from airflow_operator import create_dag
 2 | from airflow.utils.helpers import chain
 3 | 
 4 | from common_dag_tasks import  extract, get_ordered_sql_tasks, get_ds_folder
 5 | from sagerx import read_sql_file
 6 | from airflow.providers.postgres.operators.postgres import PostgresOperator
 7 | 
 8 | 
 9 | dag_id = "orange_book"
10 | 
11 | dag = create_dag(
12 |     dag_id=dag_id,
13 |     schedule= "15 0 24 1 *",  # runs once monthly on the 24th day at 00:15
14 |     max_active_runs=1,
15 |     concurrency=2,
16 | )
17 | 
18 | with dag:
19 |     url = "https://www.fda.gov/media/76860/download"
20 |     ds_folder = get_ds_folder(dag_id)
21 | 
22 |     extract_task = extract(dag_id,url)
23 | 
24 |     task_list = [extract_task]
25 |     for sql in get_ordered_sql_tasks(dag_id):
26 |         sql_path = ds_folder / sql
27 |         task_id = sql[:-4] #remove .sql
28 | 
29 |         sql_task = PostgresOperator(
30 |             task_id=task_id,
31 |             postgres_conn_id="postgres_default",
32 |             sql=read_sql_file(sql_path).format(data_path=extract_task),
33 |             dag=dag
34 |         )
35 |         task_list.append(sql_task)
36 |     
37 |     chain(*task_list) 
38 |    


--------------------------------------------------------------------------------
/airflow/dags/orange_book/load_exclusivity.sql:
--------------------------------------------------------------------------------
 1 | /* sagerx_lake.orange_book_exlusivity */
 2 | DROP TABLE IF EXISTS sagerx_lake.orange_book_exlusivity;
 3 | 
 4 | CREATE TABLE sagerx_lake.orange_book_exlusivity (
 5 | appl_type          TEXT,
 6 | appl_no            TEXT,
 7 | product_no         TEXT,
 8 | exclusivity_code   TEXT,
 9 | exclusivity_date   TEXT
10 | );
11 | 
12 | COPY sagerx_lake.orange_book_exlusivity
13 | FROM '{data_path}/exclusivity.txt' DELIMITER '~' CSV HEADER;


--------------------------------------------------------------------------------
/airflow/dags/orange_book/load_patent.sql:
--------------------------------------------------------------------------------
 1 | /* sagerx_lake.orange_book_patent */
 2 | DROP TABLE IF EXISTS sagerx_lake.orange_book_patent;
 3 | 
 4 | CREATE TABLE sagerx_lake.orange_book_patent (
 5 | appl_type                   TEXT,
 6 | appl_no                     TEXT,
 7 | product_no                  TEXT,
 8 | patent_no                   TEXT,
 9 | patent_expire_date_text     TEXT,
10 | drug_substance_flag         TEXT,
11 | drug_product_flag           TEXT,
12 | patent_use_code             TEXT,
13 | delist_flag                 TEXT,
14 | submission_date             TEXT
15 | );
16 | 
17 | COPY sagerx_lake.orange_book_patent
18 | FROM '{data_path}/patent.txt' DELIMITER '~' CSV HEADER;


--------------------------------------------------------------------------------
/airflow/dags/orange_book/load_products.sql:
--------------------------------------------------------------------------------
 1 | /* sagerx_lake.orange_book_products */
 2 | DROP TABLE IF EXISTS sagerx_lake.orange_book_products;
 3 | 
 4 | CREATE TABLE sagerx_lake.orange_book_products (
 5 | ingredient             TEXT,
 6 | df_route               TEXT,
 7 | trade_name             TEXT,
 8 | applicant              TEXT,
 9 | strength               TEXT,
10 | appl_type              TEXT,
11 | appl_no                TEXT,
12 | product_no             TEXT,
13 | te_code                TEXT,
14 | approval_date          TEXT,
15 | rld                    TEXT,
16 | rs                     TEXT,
17 | type                   TEXT,
18 | applicant_full_name    TEXT
19 | );
20 | 
21 | COPY sagerx_lake.orange_book_products
22 | FROM '{data_path}/products.txt' DELIMITER '~' CSV HEADER;


--------------------------------------------------------------------------------
/airflow/dags/purple_book/dag.py:
--------------------------------------------------------------------------------
 1 | from airflow_operator import create_dag
 2 | from airflow.utils.helpers import chain
 3 | 
 4 | from common_dag_tasks import  extract, get_ordered_sql_tasks, get_ds_folder
 5 | from sagerx import read_sql_file
 6 | from airflow.providers.postgres.operators.postgres import PostgresOperator
 7 | from purple_book.dag_tasks import modify_csv
 8 | 
 9 | dag_id = "purple_book"
10 | 
11 | dag = create_dag(
12 |     dag_id=dag_id,
13 |     schedule= "15 0 24 1 *",  # runs once monthly on the 23rd
14 |     max_active_runs=1,
15 |     concurrency=2,
16 | )
17 | 
18 | with dag:
19 |     file_name = "{{ (execution_date - macros.dateutil.relativedelta.relativedelta(months=1)).strftime('%Y') }}/purplebook-search-{{ (execution_date - macros.dateutil.relativedelta.relativedelta(months=1)).strftime('%B').lower() }}-data-download.csv"
20 |     url = f"https://purplebooksearch.fda.gov/files/{file_name}"
21 |     ds_folder = get_ds_folder(dag_id)
22 | 
23 |     extract_task = extract(dag_id,url)
24 |     modify_task = modify_csv(extract_task)
25 | 
26 |     task_list = [extract_task,modify_task]
27 | 
28 |     for sql in get_ordered_sql_tasks(dag_id):
29 |         sql_path = ds_folder / sql
30 |         task_id = sql[:-4] #remove .sql
31 | 
32 |         sql_task = PostgresOperator(
33 |             task_id=task_id,
34 |             postgres_conn_id="postgres_default",
35 |             sql=read_sql_file(sql_path).format(data_path=extract_task, file_name=file_name),
36 |             dag=dag
37 |         )
38 |         task_list.append(sql_task)
39 |     
40 |     chain(*task_list) 
41 |    


--------------------------------------------------------------------------------
/airflow/dags/purple_book/dag_tasks.py:
--------------------------------------------------------------------------------
 1 | from airflow.decorators import task
 2 | import csv
 3 | 
 4 | @task
 5 | def modify_csv(file_path):
 6 |     print(f"Modifying CSV file at {file_path}")
 7 | 
 8 |     with open(file_path, newline='') as file:
 9 |         csvreader = csv.reader(file)
10 | 
11 |         for _ in range(3): 
12 |             next(csvreader,None) # skips the first 3 rows
13 | 
14 |         """
15 |         Skip the top section by checking if the row is empty (signifying the end of the top section)
16 | 
17 |         The bottom section of each report contains all products in the Purple Book database for that month, 
18 |         including the products listed in the top section that were added or changed.        
19 |         """ 
20 |         
21 |         for row in csvreader:
22 |             if not any(row):
23 |                 break
24 | 
25 |         rows = []
26 |         for row in csvreader:
27 |                 rows.append(row)
28 | 
29 |     with open(file_path, 'w', encoding='UTF8', newline='') as f:
30 |         writer = csv.writer(f)
31 |         writer.writerows(rows)
32 | 


--------------------------------------------------------------------------------
/airflow/dags/purple_book/load_purple_book.sql:
--------------------------------------------------------------------------------
 1 | /* sagerx_lake.purple_book */
 2 | DROP TABLE IF EXISTS sagerx_lake.purple_book;
 3 | 
 4 | CREATE TABLE sagerx_lake.purple_book (
 5 | nru											TEXT,
 6 | applicant									TEXT,
 7 | bla_number									TEXT,
 8 | proprietary_name							TEXT,
 9 | proper_name									TEXT,
10 | bla_type									TEXT,
11 | strength									TEXT,
12 | dosage_form									TEXT,
13 | route_of_administration						TEXT,
14 | product_presentation						TEXT,
15 | status										TEXT,
16 | licensure									TEXT,
17 | approval_date								TEXT,
18 | ref_product_proper_name						TEXT,
19 | ref_Product_proprietary_name				TEXT,
20 | supplement_number							TEXT,
21 | submission_type								TEXT,
22 | license_number								TEXT,
23 | product_number								TEXT,
24 | center										TEXT,
25 | date_of_first_licensure						TEXT,
26 | exclusivity_expiration_date					TEXT,
27 | first_interchangeable_exclusivity_exp_date	TEXT,
28 | ref_product_exclusivity_exp_date			TEXT,
29 | orphan_exclusivity_exp_date					TEXT
30 | );
31 | 
32 | COPY sagerx_lake.purple_book
33 | FROM '{data_path}' DELIMITER ',' CSV HEADER QUOTE '"';
34 | 


--------------------------------------------------------------------------------
/airflow/dags/rxclass/dag.py:
--------------------------------------------------------------------------------
 1 | import pendulum
 2 | 
 3 | from airflow.decorators import dag
 4 | 
 5 | from rxclass.dag_tasks import extract, load
 6 | 
 7 | from common_dag_tasks import transform
 8 | 
 9 | 
10 | dag_id = "rxclass"
11 | 
12 | @dag(
13 |     dag_id=dag_id,
14 |     schedule_interval="0 3 15 * *",  # Runs on the 15th of each month at 3 AM
15 |     start_date=pendulum.today('UTC').add(days=-1),
16 |     catchup=False
17 | )
18 | def rxclass():
19 |     # Main processing task
20 |     extract_task = extract(dag_id)
21 |     load_task = load(extract_task)
22 |     transform_task = transform(dag_id, models_subdir=['staging', 'intermediate'])
23 |     
24 |     extract_task >> load_task >> transform_task
25 | 
26 | # Instantiate the DAG
27 | dag = rxclass()
28 | 


--------------------------------------------------------------------------------
/airflow/dags/rxclass/dag_tasks.py:
--------------------------------------------------------------------------------
 1 | from airflow.decorators import task
 2 | import pandas as pd
 3 | from sagerx import get_rxcuis, load_df_to_pg, get_concurrent_api_results, write_json_file, read_json_file, create_path
 4 | from common_dag_tasks import get_data_folder
 5 | import logging
 6 | 
 7 | def create_url_list(rxcui_list:list)-> list:
 8 |     urls=[]
 9 | 
10 |     for rxcui in rxcui_list:
11 |         urls.append(f"https://rxnav.nlm.nih.gov/REST/rxclass/class/byRxcui.json?rxcui={rxcui}")
12 |     return urls
13 | 
14 | @task
15 | def extract(dag_id:str) -> str:
16 |     """
17 |     Retrieves RxClass concepts from RxNav for the EPC class type,
18 |     processes them concurrently, and loads results into Postgres.
19 |     """
20 |     logging.info("Starting data retrieval for RxClass...")
21 | 
22 |     # 1. Fetch the list of concepts
23 |     tty_list = ['IN','PIN','MIN','SCDC','SCDF','SCDFP','SCDG','SCDGP','SCD','GPCK','BN','SBDC','SBDF','SBDFP','SBDG','SBD','BPCK']
24 |     #tty_list = ['SCD', 'SBD', 'GPCK', 'BPCK']
25 |     #tty_list = ['BPCK']
26 |     rxcui_list = get_rxcuis(tty_list, active_only = True)
27 |     logging.info(f"Fetched {len(rxcui_list)} RXCUIs.")
28 | 
29 |     # 1.5. Create list of urls
30 |     url_list = create_url_list(rxcui_list)
31 | 
32 |     results = get_concurrent_api_results(url_list)
33 | 
34 |     data_folder = get_data_folder(dag_id)
35 |     file_path = create_path(data_folder) / 'data.json'
36 |     file_path_str = file_path.resolve().as_posix()
37 | 
38 |     write_json_file(file_path_str, results)
39 | 
40 |     print(f"Extraction Completed! Data saved to file: {file_path_str}")
41 | 
42 |     return file_path_str
43 | 
44 | 
45 | @task
46 | def load(file_path_str:str):
47 |     results = read_json_file(file_path_str)
48 | 
49 |     classes = []
50 |     for result in results:
51 |         response = result['response']
52 |         if 'rxclassDrugInfoList' in response:
53 |             for drug_info in response["rxclassDrugInfoList"]["rxclassDrugInfo"]:
54 |                 classes.append(
55 |                     dict(
56 |                         rxcui = drug_info["minConcept"].get("rxcui"),
57 |                         name = drug_info["minConcept"].get("name",""),
58 |                         tty = drug_info["minConcept"].get("tty",""),
59 |                         rela = drug_info.get("rela",""),
60 |                         class_id = drug_info["rxclassMinConceptItem"].get("classId",""),
61 |                         class_name = drug_info["rxclassMinConceptItem"].get("className",""),
62 |                         class_type = drug_info["rxclassMinConceptItem"].get("classType",""),
63 |                         rela_source = drug_info.get("relaSource","")            
64 |                     )
65 |                 )
66 |     df = pd.DataFrame(classes).drop_duplicates()
67 |     print(f'Dataframe created of {len(df)} length.')
68 |     load_df_to_pg(df,"sagerx_lake","rxclass","replace",index=False)
69 | 


--------------------------------------------------------------------------------
/airflow/dags/rxnorm/dag.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | import pendulum
 3 | 
 4 | from sagerx import get_dataset, read_sql_file, get_sql_list, alert_slack_channel
 5 | 
 6 | from airflow.decorators import dag, task
 7 | 
 8 | from airflow.operators.python import get_current_context
 9 | from airflow.providers.postgres.operators.postgres import PostgresOperator
10 | from airflow.hooks.postgres_hook import PostgresHook
11 | from airflow.models import Variable
12 | 
13 | from common_dag_tasks import extract, transform, run_subprocess_command
14 | 
15 | 
16 | @dag(
17 |     schedule="0 0 10 * *",
18 |     start_date=pendulum.datetime(2005, 1, 1),
19 |     catchup=False,
20 | )
21 | def rxnorm():
22 |     dag_id = "rxnorm"
23 |     api_key = Variable.get("umls_api")
24 |     ds_url = f"https://uts-ws.nlm.nih.gov/download?url=https://download.nlm.nih.gov/umls/kss/rxnorm/RxNorm_full_current.zip&apiKey={api_key}"
25 | 
26 |     extract_task = extract(dag_id, ds_url)
27 | 
28 |     # Task to load data into source db schema
29 |     load = []
30 |     ds_folder = Path("/opt/airflow/dags") / dag_id
31 |     for sql in get_sql_list("load", ds_folder):
32 |         sql_path = ds_folder / sql
33 |         task_id = sql[:-4]
34 |         load.append(
35 |             PostgresOperator(
36 |                 task_id=task_id,
37 |                 postgres_conn_id="postgres_default",
38 |                 sql=read_sql_file(sql_path),
39 |             )
40 |         )
41 | 
42 |     transform_task = transform(dag_id, models_subdir=['staging', 'intermediate'])
43 | 
44 |     extract_task >> load >> transform_task
45 | 
46 | rxnorm()
47 | 


--------------------------------------------------------------------------------
/airflow/dags/rxnorm/load_rxnconso.sql:
--------------------------------------------------------------------------------
 1 | /* sagerx_lake.rxnorm_rxnconso */
 2 | DROP TABLE IF EXISTS sagerx_lake.rxnorm_rxnconso CASCADE;
 3 | 
 4 | CREATE TABLE sagerx_lake.rxnorm_rxnconso (
 5 | rxcui		VARCHAR(8) NOT NULL,
 6 | lat			VARCHAR (3) DEFAULT 'ENG' NOT NULL,
 7 | ts			VARCHAR (1),
 8 | lui			VARCHAR(8),
 9 | stt			VARCHAR (3),
10 | sui			VARCHAR (8),
11 | ispref		VARCHAR (1),
12 | rxaui		VARCHAR(8) NOT NULL,
13 | saui		VARCHAR (50),
14 | scui		VARCHAR (50),
15 | sdui		VARCHAR (50),
16 | sab			VARCHAR (20) NOT NULL,
17 | tty			VARCHAR (20) NOT NULL,
18 | code		VARCHAR (50) NOT NULL,
19 | str			TEXT NOT NULL,
20 | srl			VARCHAR (10),
21 | suppress	VARCHAR (1),
22 | cvf			VARCHAR(50),
23 | blank       TEXT
24 | );
25 | 
26 | COPY sagerx_lake.rxnorm_rxnconso FROM '{{ ti.xcom_pull(task_ids='extract') }}/rrf/RXNCONSO.RRF' CSV DELIMITER '|' ENCODING 'UTF8' ESCAPE E'\b' QUOTE E'\b';
27 | --ESCAPE and QOUTE characters are dummy to remove default
28 | 
29 | CREATE INDEX IF NOT EXISTS rxnconso_str
30 | ON sagerx_lake.rxnorm_rxnconso(str);
31 | 
32 | 
33 | CREATE INDEX IF NOT EXISTS rxnconso_rxcui
34 | ON sagerx_lake.rxnorm_rxnconso(rxcui);
35 | 
36 | 
37 | CREATE INDEX IF NOT EXISTS rxnconso_tty
38 | ON sagerx_lake.rxnorm_rxnconso(tty);
39 | 
40 | 
41 | CREATE INDEX IF NOT EXISTS rxnconso_code
42 | ON sagerx_lake.rxnorm_rxnconso(code);
43 | --IF NOT EXISTS added if in future table is not always dropped first
44 | 


--------------------------------------------------------------------------------
/airflow/dags/rxnorm/load_rxncui.sql:
--------------------------------------------------------------------------------
 1 | /* sagerx_lake.rxnorm_rxncui */
 2 | DROP TABLE IF EXISTS sagerx_lake.rxnorm_rxncui CASCADE;
 3 | 
 4 |  CREATE TABLE sagerx_lake.rxnorm_rxncui (
 5 |  cui1           varchar(8),
 6 |  ver_start      varchar(40),
 7 |  ver_end        varchar(40),
 8 |  cardinality    varchar(8),
 9 |  cui2           varchar(8),
10 |  blank          TEXT
11 | );
12 | 
13 | COPY sagerx_lake.rxnorm_rxncui
14 | FROM '{{ ti.xcom_pull(task_ids='extract') }}/rrf/RXNCUI.RRF'CSV DELIMITER '|' ENCODING 'UTF8' ESCAPE E'\b' QUOTE E'\b';
15 | --ESCAPE and QOUTE characters are dummy to remove default


--------------------------------------------------------------------------------
/airflow/dags/rxnorm/load_rxncuichanges.sql:
--------------------------------------------------------------------------------
 1 | /* sagerx_lake.rxnorm_rxncuichanges */
 2 | 
 3 | DROP TABLE IF EXISTS  sagerx_lake.rxnorm_rxncuichanges CASCADE;
 4 | 
 5 | CREATE TABLE sagerx_lake.rxnorm_rxncuichanges (
 6 |       rxaui         varchar(8),
 7 |       code          varchar(50),
 8 |       sab           varchar(20),
 9 |       tty           varchar(20),
10 |       str           varchar(3000),
11 |       old_rxcui     varchar(8) not null,
12 |       new_rxcui     varchar(8) NOT NULL,
13 |       blank         TEXT
14 | );
15 | 
16 | COPY sagerx_lake.rxnorm_rxncuichanges
17 | FROM '{{ ti.xcom_pull(task_ids='extract') }}/rrf/RXNCUICHANGES.RRF' CSV DELIMITER '|' ENCODING 'UTF8' ESCAPE E'\b' QUOTE E'\b';
18 | --ESCAPE and QOUTE characters are dummy to remove default
19 | 


--------------------------------------------------------------------------------
/airflow/dags/rxnorm/load_rxndoc.sql:
--------------------------------------------------------------------------------
 1 | /* sagerx_lake.rxnorm_rxndoc */
 2 | 
 3 | DROP TABLE IF EXISTS sagerx_lake.rxnorm_rxndoc CASCADE;
 4 | 
 5 | CREATE TABLE sagerx_lake.rxnorm_rxndoc (
 6 |     dockey      varchar(50) NOT NULL,
 7 |     value       varchar(1000),
 8 |     type        varchar(50) NOT NULL,
 9 |     expl        varchar(1000),
10 |     blank       TEXT
11 | );
12 | 
13 | COPY sagerx_lake.rxnorm_rxndoc
14 | FROM '{{ ti.xcom_pull(task_ids='extract') }}/rrf/RXNDOC.RRF' CSV DELIMITER '|' ENCODING 'UTF8' ESCAPE E'\b' QUOTE E'\b';
15 | --ESCAPE and QOUTE characters are dummy to remove default
16 | 


--------------------------------------------------------------------------------
/airflow/dags/rxnorm/load_rxnrel.sql:
--------------------------------------------------------------------------------
 1 | /* sagerx_lake.rxnorm_rxnrel */
 2 | DROP TABLE IF EXISTS sagerx_lake.rxnorm_rxnrel CASCADE;
 3 | 
 4 | CREATE TABLE sagerx_lake.rxnorm_rxnrel (
 5 |     rxcui1    varchar(8) ,
 6 |     rxaui1    varchar(8),
 7 |     stype1    varchar(50),
 8 |     rel       varchar(4) ,
 9 |     rxcui2    varchar(8) ,
10 |     rxaui2    varchar(8),
11 |     stype2    varchar(50),
12 |     rela      varchar(100) ,
13 |     rui       varchar(10),
14 |     srui      varchar(50),
15 |     sab       varchar(20) NOT NULL,
16 |     sl        varchar(1000),
17 |     dir       varchar(1),
18 |     rg        varchar(10),
19 |     suppress  varchar(1),
20 |     cvf       varchar(50),
21 |     blank     TEXT
22 | );
23 | 
24 | COPY sagerx_lake.rxnorm_rxnrel
25 | FROM '{{ ti.xcom_pull(task_ids='extract') }}/rrf/RXNREL.RRF' CSV DELIMITER '|' ENCODING 'UTF8' ESCAPE E'\b' QUOTE E'\b';
26 | --ESCAPE and QOUTE characters are dummy to remove default
27 | 
28 | CREATE INDEX IF NOT EXISTS rxnrel_rxcui1
29 | ON sagerx_lake.rxnorm_rxnrel(rxcui1);
30 | 
31 | CREATE INDEX IF NOT EXISTS rxnrel_rxcui2
32 | ON sagerx_lake.rxnorm_rxnrel(rxcui2);
33 | 
34 | CREATE INDEX IF NOT EXISTS rxnrel_rela
35 | ON sagerx_lake.rxnorm_rxnrel(rela);
36 | 


--------------------------------------------------------------------------------
/airflow/dags/rxnorm/load_rxnrxnatomarchive.sql:
--------------------------------------------------------------------------------
 1 | /* sagerx_lake.rxnorm_rxnatomarchive */
 2 | DROP TABLE IF EXISTS sagerx_lake.rxnorm_rxnatomarchive CASCADE;
 3 | 
 4 | CREATE TABLE sagerx_lake.rxnorm_rxnatomarchive (
 5 |    rxaui             varchar(8) not null,
 6 |    aui               varchar(10),
 7 |    str               varchar(4000) not null,
 8 |    archive_timestamp varchar(280) not null,
 9 |    created_timestamp varchar(280) not null,
10 |    updated_timestamp varchar(280) not null,
11 |    code              varchar(50),
12 |    is_brand          varchar(1),
13 |    lat               varchar(3),
14 |    last_released     varchar(30),
15 |    saui              varchar(50),
16 |    vsab              varchar(40),
17 |    rxcui             varchar(8),
18 |    sab               varchar(20),
19 |    tty               varchar(20),
20 |    merged_to_rxcui   varchar(8),
21 |    blank             TEXT
22 | );
23 | 
24 | COPY sagerx_lake.rxnorm_rxnatomarchive
25 | FROM '{{ ti.xcom_pull(task_ids='extract') }}/rrf/RXNATOMARCHIVE.RRF' CSV DELIMITER '|' ENCODING 'UTF8' ESCAPE E'\b' QUOTE E'\b';
26 | --ESCAPE and QOUTE characters are dummy to remove default
27 | 
28 | CREATE INDEX IF NOT EXISTS rxnrel_rxaui
29 | ON sagerx_lake.rxnorm_rxnatomarchive(rxaui);
30 | 
31 | CREATE INDEX IF NOT EXISTS rxnrel_rxcui
32 | ON sagerx_lake.rxnorm_rxnatomarchive(rxcui);
33 | 
34 | CREATE INDEX IF NOT EXISTS rxnrel_mergedcui
35 | ON sagerx_lake.rxnorm_rxnatomarchive(merged_to_rxcui);


--------------------------------------------------------------------------------
/airflow/dags/rxnorm/load_rxnsab.sql:
--------------------------------------------------------------------------------
 1 | /* sagerx_lake.rxnorm_rxnsab */
 2 | DROP TABLE IF EXISTS sagerx_lake.rxnorm_rxnsab CASCADE;
 3 | 
 4 | CREATE TABLE sagerx_lake.rxnorm_rxnsab  (
 5 |    vcui           varchar (8),
 6 |    rcui           varchar (8),
 7 |    vsab           varchar (40),
 8 |    rsab           varchar (20) NOT NULL,
 9 |    son            varchar (3000),
10 |    sf             varchar (20),
11 |    sver           varchar (20),
12 |    vstart         varchar (10),
13 |    vend           varchar (10),
14 |    imeta          varchar (10),
15 |    rmeta          varchar (10),
16 |    slc            varchar (1000),
17 |    scc            varchar (1000),
18 |    srl            integer,
19 |    tfr            integer,
20 |    cfr            integer,
21 |    cxty           varchar (50),
22 |    ttyl           varchar (300),
23 |    atnl           varchar (1000),
24 |    lat            varchar (3),
25 |    cenc           varchar (20),
26 |    curver         varchar (1),
27 |    sabin          varchar (1),
28 |    ssn            varchar (3000),
29 |    scit           varchar (4000),
30 |    blank          TEXT
31 | );
32 | 
33 | COPY sagerx_lake.rxnorm_rxnsab
34 | FROM '{{ ti.xcom_pull(task_ids='extract') }}/rrf/RXNSAB.RRF' CSV DELIMITER '|' ENCODING 'UTF8' ESCAPE E'\b' QUOTE E'\b';
35 | --ESCAPE and QOUTE characters are dummy to remove default


--------------------------------------------------------------------------------
/airflow/dags/rxnorm/load_rxnsat.sql:
--------------------------------------------------------------------------------
 1 | /* sagerx_lake.rxnorm_rxnsat */
 2 | DROP TABLE IF EXISTS sagerx_lake.rxnorm_rxnsat CASCADE;
 3 | 
 4 | CREATE TABLE sagerx_lake.rxnorm_rxnsat (
 5 | rxcui            varchar(8) ,
 6 | lui              varchar(8),
 7 | sui              varchar(8),
 8 | rxaui            varchar(8),
 9 | stype            varchar (50),
10 | code             varchar (50),
11 | atui             varchar(11),
12 | satui            varchar (50),
13 | atn              varchar (1000) NOT NULL,
14 | sab              varchar (20) NOT NULL,
15 | atv              varchar (7000),
16 | suppress         varchar (1),
17 | cvf              varchar (50),
18 | blank		     TEXT
19 | );
20 | 
21 | COPY sagerx_lake.rxnorm_rxnsat
22 | FROM '{{ ti.xcom_pull(task_ids='extract') }}/rrf/RXNSAT.RRF' CSV DELIMITER '|' ENCODING 'UTF8' ESCAPE E'\b' QUOTE E'\b';
23 | --ESCAPE and QOUTE characters are dummy to remove default
24 | 
25 | CREATE INDEX IF NOT EXISTS rxnsat_rxcui
26 | ON sagerx_lake.rxnorm_rxnsat(rxcui);
27 | 
28 | CREATE INDEX IF NOT EXISTS rxnsat_atv
29 | ON sagerx_lake.rxnorm_rxnsat(atv);
30 | 
31 | CREATE INDEX IF NOT EXISTS rxnsat_atn
32 | ON sagerx_lake.rxnorm_rxnsat(atn);
33 | 


--------------------------------------------------------------------------------
/airflow/dags/rxnorm/load_rxnsty.sql:
--------------------------------------------------------------------------------
 1 | /* sagerx_lake.rxnorm_rxnsty */
 2 | DROP TABLE IF EXISTS sagerx_lake.rxnorm_rxnsty CASCADE;
 3 | 
 4 | CREATE TABLE sagerx_lake.rxnorm_rxnsty (
 5 |    rxcui          varchar(8) NOT NULL,
 6 |    tui            varchar (4),
 7 |    stn            varchar (100),
 8 |    sty            varchar (50),
 9 |    atui           varchar (11),
10 |    cvf            varchar (50),
11 |    blank          TEXT
12 | );
13 | 
14 | COPY sagerx_lake.rxnorm_rxnsty
15 | FROM '{{ ti.xcom_pull(task_ids='extract') }}/rrf/RXNSTY.RRF' CSV DELIMITER '|' ENCODING 'UTF8' ESCAPE E'\b' QUOTE E'\b';
16 | --ESCAPE and QOUTE characters are dummy to remove default


--------------------------------------------------------------------------------
/airflow/dags/rxnorm/terms-of-service.md:
--------------------------------------------------------------------------------
1 | # RxNorm Terms of Service
2 | 
3 | ```
4 | This product uses publicly available data courtesy of the U.S. National Library of Medicine (NLM), National Institutes of Health, Department of Health and Human Services; NLM is not responsible for the product and does not endorse or recommend this or any other product.
5 | ```
6 | 
7 | More information can be found at https://www.nlm.nih.gov/research/umls/rxnorm/docs/termsofservice.html.


--------------------------------------------------------------------------------
/airflow/dags/rxnorm_historical/dag.py:
--------------------------------------------------------------------------------
 1 | import pendulum
 2 | 
 3 | from airflow.decorators import dag
 4 | 
 5 | from rxnorm_historical.dag_tasks import extract, load
 6 | 
 7 | 
 8 | dag_id = "rxnorm_historical"
 9 | 
10 | @dag(
11 |     dag_id=dag_id,
12 |     schedule_interval="0 3 15 * *",  # Runs on the 15th of each month at 3 AM
13 |     start_date=pendulum.today('UTC').add(days=-1),
14 |     catchup=False
15 | )
16 | def rxnorm_historical():
17 |     # Main processing task
18 |     extract_task = extract(dag_id)
19 |     load_task = load(extract_task)
20 |     
21 |     extract_task >> load_task
22 |     
23 | # Instantiate the DAG
24 | dag = rxnorm_historical()
25 | 


--------------------------------------------------------------------------------
/airflow/dags/rxterms/dag.py:
--------------------------------------------------------------------------------
 1 | from airflow_operator import create_dag
 2 | from airflow.utils.helpers import chain
 3 | 
 4 | from common_dag_tasks import  extract, transform, get_ordered_sql_tasks, get_ds_folder
 5 | from sagerx import read_sql_file
 6 | from airflow.providers.postgres.operators.postgres import PostgresOperator
 7 | 
 8 | 
 9 | dag_id = "rxterms"
10 | 
11 | dag = create_dag(
12 |     dag_id=dag_id,
13 |     schedule= "45 0 15 1 *",  # runs once monthly on the 15th day at 00:45
14 |     max_active_runs=1,
15 |     concurrency=2,
16 | )   
17 | 
18 | with dag:
19 |     mnth = "{{ macros.ds_format(ds, '%Y-%m-%d', '%Y%m' ) }}"
20 |     url = f"https://data.lhncbc.nlm.nih.gov/public/rxterms/release/RxTerms{mnth}.zip"
21 |     ds_folder = get_ds_folder(dag_id)
22 | 
23 |     extract_task = extract(dag_id,url)
24 |     transform_task = transform(dag_id)
25 | 
26 |     sql_tasks = []
27 |     for sql in get_ordered_sql_tasks(dag_id):
28 |         sql_path = ds_folder / sql
29 |         task_id = sql[:-4] #remove .sql
30 | 
31 |         sql_task = PostgresOperator(
32 |             task_id=task_id,
33 |             postgres_conn_id="postgres_default",
34 |             sql=read_sql_file(sql_path).format(data_path=extract_task, mnth=mnth),
35 |             dag=dag
36 |         )
37 |         sql_tasks.append(sql_task)
38 |     
39 |     extract_task >> sql_tasks >> transform_task
40 |    


--------------------------------------------------------------------------------
/airflow/dags/rxterms/load_ingredients.sql:
--------------------------------------------------------------------------------
 1 | /* sagerx_lake.rxterms_ingredients */
 2 | DROP TABLE IF EXISTS sagerx_lake.rxterms_ingredients;
 3 | 
 4 | CREATE TABLE sagerx_lake.rxterms_ingredients (
 5 | rxcui           TEXT,
 6 | ingredient      TEXT,
 7 | ing_rxcui       TEXT
 8 | );
 9 | 
10 | COPY sagerx_lake.rxterms_ingredients
11 | FROM '{data_path}/RxTermsIngredients{mnth}.txt' DELIMITER '|' QUOTE E'\b' CSV HEADER;
12 | 


--------------------------------------------------------------------------------
/airflow/dags/rxterms/load_rxterms.sql:
--------------------------------------------------------------------------------
 1 | /* sagerx_lake.rxterms */
 2 | DROP TABLE IF EXISTS sagerx_lake.rxterms;
 3 | 
 4 | CREATE TABLE sagerx_lake.rxterms (
 5 | rxcui                   TEXT,
 6 | generic_rxcui           TEXT,
 7 | tty                     TEXT,
 8 | full_name               TEXT,
 9 | rxn_dose_form           TEXT,
10 | full_generic_name       TEXT,
11 | brand_name              TEXT,
12 | display_name            TEXT,
13 | route                   TEXT,
14 | new_dose_form           TEXT,
15 | strength                TEXT,
16 | suppress_for            TEXT,
17 | display_name_synonym    TEXT,
18 | is_retired              TEXT,
19 | sxdg_rxcui              TEXT,
20 | sxdg_tty                TEXT,
21 | sxdg_name               TEXT,
22 | psn                     TEXT
23 | );
24 | 
25 | COPY sagerx_lake.rxterms
26 | FROM '{data_path}/RxTerms{mnth}.txt' DELIMITER '|' QUOTE E'\b' CSV HEADER;
27 | 


--------------------------------------------------------------------------------
/airflow/dags/umls/dag.py:
--------------------------------------------------------------------------------
 1 | import pendulum
 2 | from airflow.decorators import dag
 3 | from umls.dag_tasks import extract, load
 4 | from common_dag_tasks import transform
 5 | 
 6 | dag_id = "umls"
 7 | 
 8 | @dag(
 9 |     dag_id=dag_id,
10 |     schedule_interval="0 3 15 * *",  # Runs on the 15th of each month at 3 AM
11 |     start_date=pendulum.today('UTC').add(days=-1),
12 |     catchup=False
13 | )
14 | def umls():
15 |     extract_task = extract(dag_id)
16 |     load_task = load(extract_task)
17 |     transform_task = transform(dag_id, models_subdir=['staging', 'intermediate'])
18 | 
19 |     extract_task >> load_task >> transform_task
20 | 
21 | dag = umls()
22 | 


--------------------------------------------------------------------------------
/airflow/dags/user_macros.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime, date, timedelta
 2 | import calendar
 3 | 
 4 | 
 5 | def ds_datetime(ds):
 6 |     return datetime.strptime(ds, "%Y-%m-%d")
 7 | 
 8 | 
 9 | def get_date_of_prior_weekday(
10 |     weekday, reference_date=date.today(), date_format="%m-%d-%Y"
11 | ):
12 | 
13 |     """Gets the date of the prior weekday.  If today is the same weekday, returns today's date."""
14 |     weekday = weekday.lower()
15 |     weekdays = [d.lower() for d in list(calendar.day_name)]
16 |     weekday_number = weekdays.index(weekday)
17 | 
18 |     if weekday_number < 0:
19 |         raise ValueError
20 | 
21 |     offset = (reference_date.weekday() - weekday_number) % 7
22 |     prior_weekday = reference_date - timedelta(days=offset)
23 |     prior_weekday = prior_weekday.strftime(date_format)
24 | 
25 |     return prior_weekday
26 | 
27 | 
28 | def get_quarter(reference_date: date) -> int:
29 |     return (reference_date.month - 1) // 3 + 1
30 | 
31 | 
32 | def get_first_day_of_quarter(reference_date: date, date_format="%m-%d-%Y"):
33 |     quarter = get_quarter(reference_date)
34 |     required_date = datetime(reference_date.year, (3 * quarter) - 2, 1)
35 |     return required_date.strftime(date_format)
36 | 
37 | 
38 | def list_to_bash_array(list: list):
39 |     array_str = ""
40 |     for x in list:
41 |         array_str = f"{array_str} {x}"
42 |     return array_str.strip()
43 | 


--------------------------------------------------------------------------------
/airflow/dags/vsac/dag.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | import pendulum
 3 | 
 4 | from sagerx import get_dataset, read_sql_file, get_sql_list, alert_slack_channel
 5 | 
 6 | from airflow.decorators import dag, task
 7 | 
 8 | from airflow.operators.python import get_current_context
 9 | from airflow.providers.postgres.operators.postgres import PostgresOperator
10 | from airflow.hooks.postgres_hook import PostgresHook
11 | 
12 | from common_dag_tasks import run_subprocess_command, extract
13 | from vsac.dag_tasks import main_execution
14 | 
15 | 
16 | 
17 | @dag(
18 |     schedule="0 3 * * *",
19 |     start_date=pendulum.yesterday(),
20 |     catchup=False,
21 | )
22 | def vsac():
23 |     dag_id = "vsac"
24 |     base_url = "https://cts.nlm.nih.gov/fhir"
25 |     ds_url = ""
26 | 
27 |     extract_load_task = main_execution()
28 | 
29 |     extract_load_task
30 | 
31 | vsac()
32 | 


--------------------------------------------------------------------------------
/airflow/hidden_dags/meps/meps_medical_conditions_dag.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | import pendulum
 3 | 
 4 | from sagerx import get_dataset, read_sql_file, get_sql_list, alert_slack_channel
 5 | 
 6 | from airflow.decorators import dag, task
 7 | 
 8 | from airflow.operators.python import get_current_context
 9 | from airflow.providers.postgres.operators.postgres import PostgresOperator
10 | from airflow.hooks.postgres_hook import PostgresHook
11 | from airflow.hooks.subprocess import SubprocessHook
12 | 
13 | 
14 | @dag(
15 |     schedule="0 4 * * *",
16 |     start_date=pendulum.today(),
17 |     catchup=False,
18 | )
19 | def meps_medical_conditions():
20 |     col_names = ["duid","pid","dupersid","condn","condidx","panel","condrn","agediag","crnd1","crnd2","crnd3","crnd4","crnd5","injury","accdnwrk","icd10cdx","ccsr1x","ccsr2x","ccsr3x","hhnum","ipnum","opnum","obnum","ernum","rxnum","perwt18f","varstr","varpsu"]
21 |     col_spaces = [(0,7),(7,10),(10,20),(20,23),(23,36),(36,38),(38,39),(39,42),(42,44),(44,46),(46,47),(47,49),(49,51),(51,52),(52,55),(55,58),(58,64),(64,70),(70,76),(76,78),(78,80),(80,83),(83,86),(86,88),(88,90),(90,102),(102,106),(106,107)]
22 |     dag_id = "meps_medical_conditions"
23 |     filename = "h207"
24 |     ds_url = f"https://meps.ahrq.gov/mepsweb/data_files/pufs/{filename}/{filename}dat.zip"
25 | 
26 |     # Task to download data from web location
27 |     @task
28 |     def extract():
29 |         data_folder = Path("/opt/airflow/data") / dag_id
30 |         data_path = get_dataset(ds_url, data_folder)
31 |         return data_path
32 |     
33 |     @task
34 |     def load(data_path):
35 |         import pandas as pd
36 |         import sqlalchemy
37 | 
38 |         pg_hook = PostgresHook(postgres_conn_id="postgres_default")
39 |         engine = pg_hook.get_sqlalchemy_engine()
40 | 
41 |         # create empty table with columns in postgres
42 |         # overwrite existing table, if exists
43 |         df = pd.DataFrame(columns = col_names)
44 |         df.to_sql(
45 |             dag_id,
46 |             con=engine,
47 |             schema="datasource",
48 |             if_exists="replace",
49 |             index=False
50 |         )
51 | 
52 |         with pd.read_fwf(
53 |             data_path + f'/{filename}.dat',
54 |             header=None,
55 |             names=col_names,
56 |             converters={col: str for col in col_names},
57 |             colspecs=col_spaces,
58 |             chunksize=1000
59 |         ) as reader:
60 |             reader
61 |             for chunk in reader:
62 |                 chunk.to_sql(
63 |                     dag_id,
64 |                     con=engine,
65 |                     schema="datasource",
66 |                     if_exists="append",
67 |                     index=False
68 |                 )
69 | 
70 |     load(extract())
71 | 
72 | meps_medical_conditions()
73 | 


--------------------------------------------------------------------------------
/airflow/requirements.txt:
--------------------------------------------------------------------------------
1 | # Any change made here should accompany an increment 
2 | # to the image version on line 5 of docker-compose.yml
3 | 
4 | dbt-core
5 | dbt-postgres
6 | apache-airflow[google]
7 | bs4
8 | 


--------------------------------------------------------------------------------
/dbt/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.11-slim
 2 | 
 3 | ENV DBT_PROFILES_DIR=/dbt
 4 | 
 5 | RUN apt-get -y update
 6 | RUN apt-get -y install git
 7 | 
 8 | RUN pip install dbt-core dbt-postgres
 9 | 
10 | WORKDIR /dbt
11 | 
12 | COPY . .
13 | 
14 | WORKDIR /dbt/sagerx


--------------------------------------------------------------------------------
/dbt/profiles.yml:
--------------------------------------------------------------------------------
 1 | sagerx:
 2 |   outputs:
 3 |     dev:
 4 |       type: postgres
 5 |       threads: 1
 6 |       host: postgres
 7 |       port: 5432
 8 |       user: sagerx
 9 |       pass: sagerx
10 |       dbname: sagerx
11 |       schema: sagerx_dev
12 | 
13 |     prod:
14 |       type: postgres
15 |       threads: 1
16 |       host: postgres
17 |       port: 5432
18 |       user: sagerx
19 |       pass: sagerx
20 |       dbname: sagerx
21 |       schema: sagerx
22 | 
23 |   target: dev
24 | 


--------------------------------------------------------------------------------
/dbt/sagerx/.gitignore:
--------------------------------------------------------------------------------
1 | 
2 | target/
3 | dbt_packages/
4 | logs/
5 | 


--------------------------------------------------------------------------------
/dbt/sagerx/README.md:
--------------------------------------------------------------------------------
 1 | Welcome to your new dbt project!
 2 | 
 3 | ### Using the starter project
 4 | 
 5 | Try running the following commands:
 6 | - dbt run
 7 | - dbt test
 8 | 
 9 | 
10 | ### Resources:
11 | - Learn more about dbt [in the docs](https://docs.getdbt.com/docs/introduction)
12 | - Check out [Discourse](https://discourse.getdbt.com/) for commonly asked questions and answers
13 | - Join the [chat](https://community.getdbt.com/) on Slack for live discussions and support
14 | - Find [dbt events](https://events.getdbt.com) near you
15 | - Check out [the blog](https://blog.getdbt.com/) for the latest news on dbt's development and best practices
16 | 


--------------------------------------------------------------------------------
/dbt/sagerx/analyses/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderxio/sagerx/8f64ff1bc743150661fb466a60d18318fca047a5/dbt/sagerx/analyses/.gitkeep


--------------------------------------------------------------------------------
/dbt/sagerx/dbt_project.yml:
--------------------------------------------------------------------------------
 1 | # Name your project! Project names should contain only lowercase characters
 2 | # and underscores. A good package name should reflect your organization's
 3 | # name or the intended use of these models
 4 | name: "sagerx"
 5 | version: "1.0.0"
 6 | config-version: 2
 7 | 
 8 | # This setting configures which "profile" dbt uses for this project.
 9 | profile: "sagerx"
10 | 
11 | # These configurations specify where dbt should look for different types of files.
12 | # The `model-paths` config, for example, states that models in this project can be
13 | # found in the "models/" directory. You probably won't need to change these!
14 | model-paths: ["models"]
15 | analysis-paths: ["analyses"]
16 | test-paths: ["tests"]
17 | seed-paths: ["seeds"]
18 | macro-paths: ["macros"]
19 | snapshot-paths: ["snapshots"]
20 | 
21 | target-path: "target" # directory which will store compiled SQL files
22 | clean-targets: # directories to be removed by `dbt clean`
23 |   - "target"
24 |   - "dbt_packages"
25 | 
26 | # Configuring models
27 | # Full documentation: https://docs.getdbt.com/docs/configuring-models
28 | 
29 | # These settings can be overridden in the individual model
30 | # files using the `{{ config(...) }}` macro.
31 | models:
32 |   sagerx:
33 |     staging:
34 |       +schema: sagerx
35 |       +materialized: view
36 |     intermediate:
37 |       +schema: sagerx
38 |       +materialized: table
39 |     marts:
40 |       +schema: sagerx
41 |       +materialized: table
42 |     +persist_docs:
43 |       relation: true
44 |       columns: true
45 | 


--------------------------------------------------------------------------------
/dbt/sagerx/macros/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderxio/sagerx/8f64ff1bc743150661fb466a60d18318fca047a5/dbt/sagerx/macros/.gitkeep


--------------------------------------------------------------------------------
/dbt/sagerx/macros/get_custom_schema.sql:
--------------------------------------------------------------------------------
1 | -- get_custom_schema.sql
2 | -- https://docs.getdbt.com/docs/build/custom-schemas
3 | 
4 | {% macro generate_schema_name(custom_schema_name, node) -%}
5 |     {{ generate_schema_name_for_env(custom_schema_name, node) }}
6 | {%- endmacro %}
7 | 


--------------------------------------------------------------------------------
/dbt/sagerx/macros/ndc_convert.sql:
--------------------------------------------------------------------------------
 1 | {% macro ndc_convert (ndc, to_format) %}
 2 | 
 3 |     {% set ndc11 = ndc_to_11(ndc) %}
 4 |     {% set format_list = ['10 Digit','11 Digit','4-4-2','5-3-2','5-4-1','5-4-2','5-5','4-6'] %}
 5 |     {% if to_format not in format_list %}
 6 |         {{ "format must be of viable type" }}
 7 |     {% endif %}
 8 |     
 9 | 
10 |     {%- set return_value %}
11 |         CASE WHEN {{to_format}} = '10 Digit' THEN NULL
12 |             WHEN {{to_format}} = '11 Digit' THEN {{ndc11}}
13 | 		    WHEN {{to_format}} = '4-4-2' THEN 
14 | 		 			CASE WHEN SUBSTRING({{ndc11}},1,1) = '0' THEN SUBSTRING({{ndc11}},2,4) ||'-'|| SUBSTRING({{ndc11}},6,4) ||'-'|| RIGHT({{ndc11}},2) ELSE NULL END
15 |             WHEN {{to_format}} = '5-3-2' THEN 
16 | 		 			CASE WHEN SUBSTRING({{ndc11}},6,1) = '0' THEN LEFT({{ndc11}},5) ||'-'|| SUBSTRING({{ndc11}},7,3) ||'-'|| RIGHT({{ndc11}},2) ELSE NULL END
17 | 		    WHEN {{to_format}} = '5-4-1' THEN
18 | 		 			CASE WHEN SUBSTRING({{ndc11}},10,1) = '0' THEN LEFT({{ndc11}},5) ||'-'|| SUBSTRING({{ndc11}},6,4) ||'-'|| RIGHT({{ndc11}},1) ELSE NULL END
19 | 		    WHEN {{to_format}} = '5-4-2' THEN LEFT({{ndc11}},5) ||'-'|| SUBSTRING({{ndc11}},6,4) ||'-'|| RIGHT({{ndc11}},2)
20 | 		    WHEN {{to_format}} = '5-5' THEN NULL
21 | 		    WHEN {{to_format}} = '4-6' THEN NULL
22 | 	    ELSE NULL
23 | 	    END
24 |   {% endset %}
25 | {{return_value}}
26 | 
27 | {% endmacro %}
28 | 


--------------------------------------------------------------------------------
/dbt/sagerx/macros/ndc_format.sql:
--------------------------------------------------------------------------------
 1 | {% macro ndc_format(ndc) %}
 2 |   {% set ndc_format %}
 3 |     CASE WHEN {{ndc}} ~ '^\d{10}$' THEN '10 Digit'
 4 |         WHEN {{ndc}} ~ '^\d{11}$' THEN '11 Digit'
 5 |         WHEN {{ndc}} ~ '^\d{4}-\d{4}-\d{2}$' THEN '4-4-2'
 6 |         WHEN {{ndc}} ~ '^\d{5}-\d{3}-\d{2}$' THEN '5-3-2'
 7 |         WHEN {{ndc}} ~ '^\d{5}-\d{4}-\d{1}$' THEN '5-4-1'
 8 |         WHEN {{ndc}} ~ '^\d{5}-\d{4}-\d{2}$' THEN '5-4-2'
 9 |         WHEN {{ndc}} ~ '^\d{5}-\d{5}$' THEN '5-5'
10 |         WHEN {{ndc}} ~ '^\d{4}-\d{6}$' THEN '4-6'
11 | 	  ELSE 'Unknown'
12 | 	  END
13 |   {% endset %}
14 |   {{ndc_format}}
15 | {% endmacro %}
16 | 


--------------------------------------------------------------------------------
/dbt/sagerx/macros/ndc_to_11.sql:
--------------------------------------------------------------------------------
 1 | {%- macro ndc_to_11(ndc) %}
 2 |   {%- set return_value %}
 3 |   CASE WHEN {{ ndc_format (ndc) }} = '10 Digit' THEN NULL
 4 |      WHEN {{ ndc_format (ndc) }} =  '11 Digit' THEN {{ndc}}
 5 |      WHEN {{ ndc_format (ndc) }} =  '4-4-2' THEN '0' || LEFT({{ndc}},4) || REPLACE(RIGHT({{ndc}},7),'-','')
 6 |      WHEN {{ ndc_format (ndc) }} =  '5-3-2' THEN LEFT({{ndc}},5) || '0' || REPLACE(RIGHT({{ndc}},6),'-','')
 7 |      WHEN {{ ndc_format (ndc) }} =  '5-4-1' THEN REPLACE(LEFT({{ndc}},10),'-','') || '0' || RIGHT({{ndc}}, 1)
 8 |      WHEN {{ ndc_format (ndc) }} =  '5-4-2' THEN REPLACE({{ndc}},'-','')
 9 |      WHEN {{ ndc_format (ndc) }} =  '5-5' THEN NULL
10 |      WHEN {{ ndc_format (ndc) }} =  '4-6' THEN NULL
11 |   ELSE NULL
12 |   END
13 |   {% endset %}
14 |   {{return_value}}
15 | {% endmacro -%}
16 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/intermediate/dailymed/int_dailymed_image_name_ndcs.sql:
--------------------------------------------------------------------------------
 1 | -- int_dailymed_image_name_ndcs
 2 | 
 3 | with
 4 | 
 5 | package_label_section_images as (
 6 | 
 7 |     select * from {{ ref('stg_dailymed__package_label_section_images') }}
 8 | 
 9 | ),
10 | 
11 | regex_ndcs as (
12 | 
13 |     select
14 |         *,
15 |         (regexp_matches(image, '(?:\d{4}|\d{5})-\d{3,6}(?:-\d{1,2})?|\d{11}|\d{10}', 'g'))[1] as regex_ndc
16 |     from package_label_section_images
17 | 
18 |     /*
19 |         \d{11}              | # 11 digit
20 |         \d{10}              | # 10 digit
21 |         \d{5}-\d{5}         | # 5-5
22 |         \d{5}-\d{4}-\d{2}   | # 5-4-2
23 |         \d{5}-\d{4}-\d{1}   | # 5-4-1
24 |         \d{5}-\d{3}-\d{2}   | # 5-3-2
25 |         \d{4}-\d{6}         | # 4-6
26 |         \d{4}-\d{4}-\d{2}     # 4-4-2
27 |     */
28 | 
29 | ),
30 | 
31 | valid_spl_ndcs as (
32 | 
33 |     select * from {{ ref('stg_dailymed__ndcs') }}
34 | 
35 | ),
36 | 
37 | validated_ndcs as (
38 | 
39 |     select
40 |         regex_ndcs.*,
41 |         spl_ndc.ndc,
42 |         spl_ndc.ndc11
43 |     from regex_ndcs
44 |     inner join valid_spl_ndcs spl_ndc
45 |         on spl_ndc.set_id = regex_ndcs.set_id
46 |         and {{ ndc_to_11('spl_ndc.ndc') }} = {{ ndc_to_11('regex_ndcs.regex_ndc') }}
47 | 
48 | )
49 | 
50 | select * from validated_ndcs
51 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/intermediate/dailymed/int_dailymed_image_xml_ndcs.sql:
--------------------------------------------------------------------------------
 1 | -- int_dailymed_image_xml_ndcs
 2 | 
 3 | with
 4 | 
 5 | ranked_package_label_images as
 6 | (
 7 | 
 8 |     select * from {{ ref('int_dailymed_ranked_package_label_images') }}
 9 | 
10 | ),
11 | 
12 | ranked_package_label_ndcs as
13 | (
14 | 
15 |     select * from {{ ref('int_dailymed_ranked_package_label_ndcs') }}
16 | 
17 | )
18 | 
19 | select
20 | 	img.set_id,
21 | 	ndc.ndc,
22 | 	img.image
23 | from ranked_package_label_images img
24 | left join ranked_package_label_ndcs ndc
25 | 	on ndc.package_label_section_id = img.package_label_section_id
26 | 	and ndc.rn = img.rn
27 | where ndc.ndc is not null
28 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/intermediate/dailymed/int_dailymed_organization_metrics.sql:
--------------------------------------------------------------------------------
 1 | /* intermediate.int_dailymed_organization_metrics */
 2 | 
 3 | with dailymed_main as (
 4 |     select * from {{ ref('stg_dailymed__main') }}
 5 | ),
 6 | 
 7 | dailymed_organizations as (
 8 |     select * from {{ ref('stg_dailymed__organizations') }}
 9 | ),
10 | 
11 | dailymed_organization_texts as (
12 |     select * from {{ ref('stg_dailymed__organization_texts') }}
13 | )
14 | 
15 | select o.set_id
16 | 	, ma.market_status
17 | 	, sum(case when org_type = 'Functioner' then 1 else 0 end) as functioner_count
18 | 	, sum(case when org_type = 'Labeler' then 1 else 0 end) as labeler_count
19 | 	, sum(case when org_type = 'Repacker' then 1 else 0 end) as repacker_count
20 | 	, case when sum(case when ot.set_id is not null then 1 else 0 end) > 0 then 'Yes' else '' end as organization_text
21 | 	, case when sum(case when org_type = 'Labeler' then 1 else 0 end) = 1 
22 | 				and sum(case when org_type = 'Functioner' then 1 else 0 end) = 0
23 | 			then 'yes' else '' end as labeler_only
24 | 	, count(*)
25 | from dailymed_main ma
26 | 	inner join dailymed_organizations o
27 |         on o.set_id = ma.set_id
28 | 	left join dailymed_organization_texts ot
29 |         on o.set_id = ot.set_id
30 | group by o.set_id, ma.market_status
31 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/intermediate/dailymed/int_dailymed_ranked_package_label_images.sql:
--------------------------------------------------------------------------------
 1 | with
 2 | 
 3 | package_label_images as (
 4 | 
 5 |     select * from{{ ref('stg_dailymed__package_label_section_images') }}
 6 | 
 7 | ),
 8 | 
 9 | ranked_package_images as (
10 | 
11 |     select 
12 |         *,
13 |         row_number() over (
14 |             partition by package_label_section_id
15 |             order by id
16 |         ) as rn
17 |     from package_label_images
18 | 
19 | )
20 | 
21 | select * from ranked_package_images
22 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/intermediate/dailymed/int_dailymed_ranked_package_label_ndcs.sql:
--------------------------------------------------------------------------------
 1 | with
 2 | 
 3 | validated_package_ndcs as (
 4 | 
 5 |     select * from {{ ref('int_dailymed_validated_package_label_ndcs') }}
 6 | 
 7 | ),
 8 | 
 9 | ranked_package_ndcs as (
10 | 
11 |     select 
12 |         *,
13 |         row_number() over (
14 |             partition by package_label_section_id
15 |             order by id
16 |         ) as rn
17 |     from validated_package_ndcs
18 | 
19 | )
20 | 
21 | select * from ranked_package_ndcs
22 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/intermediate/dailymed/int_dailymed_validated_package_label_ndcs.sql:
--------------------------------------------------------------------------------
 1 | --int_dailymed_validated_package_label_ndcs
 2 | 
 3 | with
 4 | 
 5 | valid_spl_ndcs as (
 6 | 
 7 |     select * from{{ ref('stg_dailymed__ndcs') }}
 8 | 
 9 | ),
10 | 
11 | package_label_ndc_matches as (
12 | 
13 |     select * from {{ ref('stg_dailymed__package_label_section_ndcs') }}
14 | 
15 | ),
16 | 
17 | validated_package_ndcs as (
18 | 
19 |     select
20 |         *
21 |     from package_label_ndc_matches pkg_ndc
22 |     where exists (
23 | 
24 |         select
25 |             ndc
26 |         from valid_spl_ndcs
27 |         where ndc = pkg_ndc.ndc
28 |         
29 |     )
30 | 
31 | )
32 | 
33 | select * from validated_package_ndcs
34 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/intermediate/fda/int_fda_packaging_components.sql:
--------------------------------------------------------------------------------
  1 | {{ config(materialized='table') }}
  2 | 
  3 | with 
  4 | 
  5 | all_fda as (
  6 | 	select ndc11, packagedescription
  7 | 	from {{ ref('stg_fda_ndc__ndcs') }}
  8 | 
  9 | 	union all
 10 | 
 11 | 	select ndc11, packagedescription
 12 | 	from {{ ref('stg_fda_excluded__ndcs') }}
 13 | 
 14 | 	union all
 15 | 
 16 | 	select ndc11, packagedescription
 17 | 	from {{ ref('stg_fda_unfinished__ndcs') }}
 18 | ),
 19 | 
 20 | split_components as (
 21 | 	select
 22 | 		z.ndc11
 23 | 		, z.packagedescription
 24 | 		, z.ordinality as component_line
 25 | 		, trim(z.token) as component_text
 26 | 	from (
 27 | 		select distinct 
 28 | 			all_fda.ndc11
 29 | 			, all_fda.packagedescription
 30 | 			, s.token
 31 | 			, s.ordinality
 32 | 		from
 33 | 			all_fda
 34 | 			, unnest(
 35 | 				string_to_array(
 36 | 					regexp_replace(
 37 | 						all_fda.packagedescription
 38 | 						, '(?<!\*.*)\/'
 39 | 						, ' | '
 40 | 						, 'g')
 41 | 					, '|')
 42 | 				) with ordinality as s(token, ordinality)
 43 | 	) z
 44 | 	order by ndc11, component_line
 45 | ),
 46 | 
 47 | inner_outer_text as (
 48 | 	select
 49 | 		c.*
 50 | 		, trim(substring(component_text from '(.*) in ')) as inner_text
 51 | 		, trim(substring(component_text from ' in (.*?)(?:\(|$)')) as outer_text
 52 | 		, trim(substring(component_text from '\((.+)\)')) as outer_ndc
 53 | 	from split_components c
 54 | ),
 55 | 
 56 | inner_outer_value_unit as (
 57 | 
 58 | 	select
 59 | 		*
 60 | 		, {{ ndc_to_11('outer_ndc') }} as outer_ndc11
 61 | 		, (regexp_match(inner_text, '^([^ ]+) (.+)'))[1]::numeric as inner_value
 62 | 		, (regexp_match(inner_text, '^([^ ]+) (.+)'))[2] as inner_unit
 63 | 		, (regexp_match(outer_text, '^([^ ]+) (.+)'))[1]::numeric as outer_value
 64 | 		, (regexp_match(outer_text, '^([^ ]+) (.+)'))[2] as outer_unit
 65 | 	from inner_outer_text
 66 | 
 67 | ),
 68 | 
 69 | inner_outer_product as (
 70 | 
 71 | 	select
 72 | 		*,
 73 | 		(inner_value * outer_value)::numeric as product
 74 | 	from inner_outer_value_unit
 75 | 
 76 | ),
 77 | 
 78 | total_product as (
 79 | 
 80 | 	select
 81 | 		ndc11,
 82 | 		-- NOTE: had to do this because was breaking build_marts - see #378
 83 | 		--array_product(array_agg(product)) as total_product
 84 | 		null as total_product
 85 | 	from inner_outer_product
 86 | 	where product > 0
 87 | 	group by ndc11
 88 | 
 89 | ),
 90 | 
 91 | final as (
 92 | 
 93 | 	select
 94 | 		total_product,
 95 | 		inner_outer_value_unit.*
 96 | 	from inner_outer_value_unit
 97 | 	left join total_product
 98 | 		on total_product.ndc11 = inner_outer_value_unit.ndc11
 99 | 
100 | )
101 | 
102 | select 
103 | 	*
104 | from final
105 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/intermediate/fda/int_fda_packaging_parts.sql:
--------------------------------------------------------------------------------
 1 | {{ config(materialized='view') }}
 2 | 
 3 | with 
 4 | 
 5 | packaging_components as (
 6 | 	select * from {{ ref('int_fda_packaging_components') }}
 7 | 	where component_text like ('%*%')
 8 | )
 9 | 
10 | select
11 | 	z.ndc11
12 |     , z.packagedescription
13 | 	, z.component_line
14 | 	, z.component_text
15 |    	, z.ordinality as part_line
16 | 	, trim(z.token) as part_text
17 | from (
18 |     select distinct 
19 |         components.*
20 | 	    , s.token
21 |         , s.ordinality
22 | 	from
23 | 		packaging_components components
24 | 		, unnest(
25 | 			string_to_array(
26 | 				component_text
27 | 				, '*')
28 | 			) with ordinality as s(token, ordinality)
29 | ) z
30 | order by ndc11, component_line, part_line
31 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/intermediate/fda/int_fda_packaging_subparts.sql:
--------------------------------------------------------------------------------
 1 | {{ config(materialized='view') }}
 2 | 
 3 | with 
 4 | 
 5 | packaging_parts as (
 6 | 	select * from {{ ref('int_fda_packaging_parts') }}
 7 | 	where part_text like ('%/%')
 8 | )
 9 | 
10 | select
11 | 	z.ndc11
12 |     , z.packagedescription
13 | 	, z.component_line
14 | 	, z.component_text
15 |     , z.part_line
16 |     , z.part_text
17 |    	, z.ordinality as subpart_line
18 | 	, trim(z.token) as subpart_text
19 | from (
20 |     select distinct 
21 |         parts.*
22 | 	    , s.token
23 |         , s.ordinality
24 | 	from
25 | 		packaging_parts parts
26 | 		, unnest(
27 | 			string_to_array(
28 | 				part_text
29 | 				, '/')
30 | 			) with ordinality as s(token, ordinality)
31 | ) z
32 | order by ndc11, component_line, part_line, subpart_line
33 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/intermediate/nadac/int_nadac_pricing.sql:
--------------------------------------------------------------------------------
 1 | -- int_nadac_pricing.sql
 2 | 
 3 | with
 4 | 
 5 | pricing as (
 6 | 
 7 |     select
 8 |         *
 9 |     from {{ ref('int_nadac_historical_pricing') }}
10 |     where is_last_price
11 |     order by ndc_description
12 | 
13 | )
14 | 
15 | select * from pricing
16 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/intermediate/orange_book/_int_orange_book__models.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | models:
 4 |   - name: int_fda_ndc_to_te
 5 |     description: |
 6 |       > DISCLAIMER: This model is under development and incomplete.
 7 |       
 8 |       This is an attempt at an NDC-level mapping of Orange Book TE codes. There are cases in Orange Book where two different strengths of a given ANDA have different TE codes. This means within an ANDA, two different NDCs could have different TE codes.
 9 | 
10 |       The problem we're encountering is that there's not a reliable / programmatic way to get from the strength of a product in Orange Book and the strength of the product in the FDA NDC Directory.
11 | 
12 |       Because I can't figure out NDC-level mapping, I only include applications with a single OB TE code.
13 | 
14 |       Again - please only treat this as the beginning of a proof of concept and do not use for clinical or other purposes.
15 |     columns:
16 |       - name: ndc11
17 |         description: The NDC11 of the product.
18 |       - name: application_number
19 |         description: The ANDA / NDA / etc number.
20 |       - name: te_code
21 |         description: The full therapeutic equvalency (TE) code as listed within Orange Book.
22 |       - name: first_two_te_code
23 |         description: Just the first two characters of the TE code - for ease of use downstream.
24 |       - name: first_one_te_code
25 |         description: Just the first character of the TE code - for ease of use downstream.
26 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/intermediate/orange_book/int_fda_ndc_to_te.sql:
--------------------------------------------------------------------------------
 1 | -- DISCLAIMER: This model is under development and incomplete.
 2 | 
 3 | with cte as (
 4 |     select
 5 |         fda.ndc11
 6 |         , obp.te_code
 7 |         , count(fda.ndc11) over( partition by fda.ndc11 ) as num_te_codes
 8 |     from {{ source('orange_book', 'orange_book_products') }} as obp
 9 |     inner join {{ ref('stg_fda_ndc__ndcs') }} as fda 
10 |         on concat(case when obp.appl_type = 'A' then 'ANDA' else 'NDA' end, obp.appl_no) = fda.applicationnumber
11 |     group by fda.ndc11, obp.te_code
12 | )
13 | select
14 |     fda.ndc11
15 |     , fda.applicationnumber as application_number
16 |     , cte.te_code
17 |     , left(cte.te_code, 2) as first_two_te_code
18 |     , left(cte.te_code, 1) as first_one_te_code
19 | from {{ ref('stg_fda_ndc__ndcs') }} as fda 
20 | inner join cte 
21 |     on fda.ndc11 = cte.ndc11 
22 |     and cte.num_te_codes = 1
23 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/intermediate/rxclass/_int_rxclass__models.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | models:
 4 |   - name: int_rxclass_clinical_products_to_atc_class
 5 |     description: "Clinical products to ATC class"
 6 |     columns:
 7 |       - name: clinical_product_ndc
 8 |       - name: clinical_product_rxcui
 9 |       - name: clinical_product_name
10 |       - name: clinical_product_code
11 |       - name: clinical_product_class_name
12 | 
13 |   - name: int_rxclass_clinical_products_to_cvx_code
14 |     description: "Clinical products to CVX code"
15 |     columns:
16 |       - name: clinical_product_ndc
17 |       - name: clinical_product_rxcui
18 |       - name: clinical_product_name
19 |       - name: clinical_product_code
20 |       - name: clinical_product_class_name
21 | 
22 |   - name: int_rxclass_clinical_products_to_schedule
23 |     description: "Clinical products to DEA schedule"
24 |     columns:
25 |       - name: clinical_product_ndc
26 |       - name: clinical_product_rxcui
27 |       - name: clinical_product_name
28 |       - name: clinical_product_code
29 |       - name: clinical_product_class_name
30 | 
31 |   - name: int_rxclass_clinical_products_to_va_class
32 |     description: "Clinical products to VA class"
33 |     columns:
34 |       - name: clinical_product_ndc
35 |       - name: clinical_product_rxcui
36 |       - name: clinical_product_name
37 |       - name: clinical_product_code
38 |       - name: clinical_product_class_name


--------------------------------------------------------------------------------
/dbt/sagerx/models/intermediate/rxclass/int_rxclass_clinical_products_to_atc_class.sql:
--------------------------------------------------------------------------------
 1 | select distinct
 2 | 	rs.atv as clinical_product_ndc,
 3 | 	rcl.rxcui as clinical_product_rxcui,
 4 | 	rcl.name as clinical_product_name,
 5 | 	rcl.class_id as clinical_product_code,
 6 | 	rcl.class_name as clinical_product_class_name
 7 | from sagerx_lake.rxnorm_rxnsat rs
 8 | join sagerx_lake.rxnorm_rxnconso rc on rs.rxaui = rc.rxaui
 9 | join sagerx_lake.rxclass rcl on rcl.rxcui = rs.rxcui
10 | where
11 | 	rs.atn = 'NDC'
12 | 	and rc.sab = 'RXNORM'
13 | 	and rcl.rela_source = 'ATCPROD'


--------------------------------------------------------------------------------
/dbt/sagerx/models/intermediate/rxclass/int_rxclass_clinical_products_to_cvx_code.sql:
--------------------------------------------------------------------------------
 1 | select distinct
 2 | 	rs.atv as clinical_product_ndc,
 3 | 	rcl.rxcui as clinical_product_rxcui,
 4 | 	rcl.name as clinical_product_name,
 5 | 	rcl.class_id as clinical_product_code,
 6 | 	rcl.class_name as clinical_product_class_name
 7 | from sagerx_lake.rxnorm_rxnsat rs
 8 | join sagerx_lake.rxnorm_rxnconso rc on rs.rxaui = rc.rxaui
 9 | join sagerx_lake.rxclass rcl on rcl.rxcui = rs.rxcui
10 | where
11 | 	rs.atn = 'NDC'
12 | 	and rc.sab = 'RXNORM'
13 | 	and rcl.rela_source = 'CDC'


--------------------------------------------------------------------------------
/dbt/sagerx/models/intermediate/rxclass/int_rxclass_clinical_products_to_schedule.sql:
--------------------------------------------------------------------------------
 1 | select distinct
 2 | 	rs.atv as clinical_product_ndc,
 3 | 	rcl.rxcui as clinical_product_rxcui,
 4 | 	rcl.name as clinical_product_name,
 5 | 	rcl.class_id as clinical_product_code,
 6 | 	rcl.class_name as clinical_product_class_name
 7 | from sagerx_lake.rxnorm_rxnsat rs
 8 | join sagerx_lake.rxnorm_rxnconso rc on rs.rxaui = rc.rxaui
 9 | join sagerx_lake.rxclass rcl on rcl.rxcui = rs.rxcui
10 | where
11 | 	rs.atn = 'NDC'
12 | 	and rc.sab = 'RXNORM'
13 | 	and rcl.rela_source = 'RXNORM'


--------------------------------------------------------------------------------
/dbt/sagerx/models/intermediate/rxclass/int_rxclass_clinical_products_to_va_class.sql:
--------------------------------------------------------------------------------
 1 | select distinct
 2 | 	rs.atv as clinical_product_ndc,
 3 | 	rcl.rxcui as clinical_product_rxcui,
 4 | 	rcl.name as clinical_product_name,
 5 | 	rcl.class_id as clinical_product_code,
 6 | 	rcl.class_name as clinical_product_class_name
 7 | from sagerx_lake.rxnorm_rxnsat rs
 8 | join sagerx_lake.rxnorm_rxnconso rc on rs.rxaui = rc.rxaui
 9 | join sagerx_lake.rxclass rcl on rcl.rxcui = rs.rxcui
10 | where
11 | 	rs.atn = 'NDC'
12 | 	and rc.sab = 'RXNORM'
13 | 	and rcl.rela_source = 'VA'


--------------------------------------------------------------------------------
/dbt/sagerx/models/intermediate/rxnorm/int_mthspl_products_to_active_ingredients.sql:
--------------------------------------------------------------------------------
 1 | -- int_mthspl_products_to_active_ingredients.sql
 2 | 
 3 | with
 4 | 
 5 | substance as (
 6 | 
 7 |     select * from {{ ref('stg_rxnorm__mthspl_substances') }}
 8 | 
 9 | ),
10 | 
11 | product as (
12 | 
13 |     select * from {{ ref('stg_rxnorm__mthspl_products') }}
14 | 
15 | )
16 | 
17 | select distinct
18 |     concat(lpad(split_part(product.ndc,'-', 1), 5, '0'), lpad(split_part(product.ndc,'-', 2), 4, '0')) as ndc9
19 |     , product.ndc as ndc
20 |     , product.rxcui as product_rxcui
21 |     , product.name as product_name
22 |     , product.tty as product_tty
23 |     , substance.unii as active_ingredient_unii
24 |     , substance.rxcui as active_ingredient_rxcui
25 |     , substance.name as active_ingredient_name
26 |     , substance.tty as active_ingredient_tty	
27 |     , product.active as active
28 |     , product.prescribable as prescribable
29 | from sagerx_lake.rxnorm_rxnrel rxnrel
30 | inner join substance
31 |     on rxnrel.rxaui1 = substance.rxaui
32 | inner join product
33 |     on rxnrel.rxaui2 = product.rxaui
34 | where rela = 'has_active_ingredient'
35 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/intermediate/rxnorm/int_mthspl_products_to_active_moieties.sql:
--------------------------------------------------------------------------------
 1 | -- int_mthspl_products_to_active_moieties.sql
 2 | 
 3 | with
 4 | 
 5 | substance as (
 6 | 
 7 |     select * from {{ ref('stg_rxnorm__mthspl_substances') }}
 8 | 
 9 | ),
10 | 
11 | product as (
12 | 
13 |     select * from {{ ref('stg_rxnorm__mthspl_products') }}
14 | 
15 | )
16 | 
17 | select distinct
18 |     concat(lpad(split_part(product.ndc,'-', 1), 5, '0'), lpad(split_part(product.ndc,'-', 2), 4, '0')) as ndc9
19 |     , product.ndc as ndc
20 |     , product.rxcui as product_rxcui
21 |     , product.name as product_name
22 |     , product.tty as product_tty
23 |     , substance.unii as active_moiety_unii
24 |     , substance.rxcui as active_moiety_rxcui
25 |     , substance.name as active_moiety_name
26 |     , substance.tty as active_moiety_tty	
27 |     , product.active as active
28 |     , product.prescribable as prescribable
29 | from sagerx_lake.rxnorm_rxnrel rxnrel
30 | inner join substance
31 |     on rxnrel.rxaui1 = substance.rxaui
32 | inner join product
33 |     on rxnrel.rxaui2 = product.rxaui
34 | where rela = 'has_active_moiety'
35 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/intermediate/rxnorm/int_mthspl_products_to_inactive_ingredients.sql:
--------------------------------------------------------------------------------
 1 | -- int_mthspl_products_to_inactive_ingredients.sql
 2 | 
 3 | with
 4 | 
 5 | substance as (
 6 |     select * from {{ ref('stg_rxnorm__mthspl_substances') }}
 7 | )
 8 | 
 9 | , product as (
10 |     select * from {{ ref('stg_rxnorm__mthspl_products') }}
11 | )
12 | 
13 | select distinct
14 |     concat(lpad(split_part(product.ndc,'-', 1), 5, '0'), lpad(split_part(product.ndc,'-', 2), 4, '0')) as ndc9
15 |     , product.ndc as ndc
16 |     , product.rxcui as product_rxcui
17 |     , product.name as product_name
18 |     , product.tty as product_tty
19 |     , substance.unii as inactive_ingredient_unii
20 |     , substance.rxcui as inactive_ingredient_rxcui
21 |     , substance.name as inactive_ingredient_name
22 |     , substance.tty as inactive_ingredient_tty	
23 |     , product.active as active
24 |     , product.prescribable as prescribable
25 | from product
26 | inner join sagerx_lake.rxnorm_rxnrel rxnrel
27 |     on rxnrel.rxaui2 = product.rxaui
28 | inner join substance
29 |     on substance.rxaui = rxnrel.rxaui1
30 | where rela = 'has_inactive_ingredient'
31 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/intermediate/rxnorm/int_rxnorm_all_ndcs_to_product_rxcuis.sql:
--------------------------------------------------------------------------------
 1 | with all_ndcs as
 2 | (
 3 |     select * from {{ ref('stg_rxnorm__all_ndcs') }}
 4 | ),
 5 | 
 6 | product_rxcuis as
 7 | (
 8 |     select * from sagerx_lake.rxnorm_rxnconso
 9 |     where sab = 'RXNORM'
10 |         and tty in ('SCD', 'SBD', 'GPCK', 'BPCK')
11 | )
12 | 
13 | select distinct
14 |     all_ndcs.ndc11
15 |     , product_rxcuis.rxcui
16 | from all_ndcs
17 | inner join product_rxcuis
18 |     on all_ndcs.rxcui = product_rxcuis.rxcui
19 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/intermediate/rxnorm/int_rxnorm_clinical_products_to_clinical_product_components.sql:
--------------------------------------------------------------------------------
 1 | -- int_rxnorm_clinical_products_to_clinical_product_components.sql
 2 | 
 3 | with
 4 | 
 5 | ndc as (
 6 | 
 7 |     select * from {{ ref('stg_rxnorm__ndcs') }}
 8 | 
 9 | ),
10 | 
11 | rcp as (
12 | 
13 |     select * from {{ ref('stg_rxnorm__clinical_products') }}
14 | 
15 | ),
16 | 
17 | rcpcl as (
18 | 
19 |     select * from {{ ref('stg_rxnorm__clinical_product_component_links') }}
20 | 
21 | ),
22 | 
23 | rcpc as (
24 | 
25 |     select * from {{ ref('stg_rxnorm__clinical_product_components') }}
26 | 
27 | )
28 | 
29 | select
30 |     rcp.rxcui as clinical_product_rxcui
31 |     , rcp.name as clinical_product_name
32 |     , rcp.tty as clinical_product_tty
33 |     , rcpc.rxcui as clinical_product_component_rxcui
34 |     , rcpc.name as clinical_product_compnent_name
35 |     , rcpc.tty as clinical_product_component_tty
36 |     , rcp.active
37 |     , rcp.prescribable
38 | from rcp 
39 | left join rcpcl 
40 |     on rcp.rxcui = rcpcl.clinical_product_rxcui 
41 | left join rcpc 
42 |     on rcpcl.clinical_product_component_rxcui = rcpc.rxcui 
43 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/intermediate/rxnorm/int_rxnorm_clinical_products_to_dose_forms.sql:
--------------------------------------------------------------------------------
 1 | -- int_rxnorm_clinical_products_to_dose_forms.sql
 2 | 
 3 | with
 4 | 
 5 | rcp as (
 6 | 
 7 |     select * from {{ ref('stg_rxnorm__clinical_products') }}
 8 | 
 9 | ),
10 | 
11 | rcpcl as (
12 | 
13 |     select * from {{ ref('stg_rxnorm__clinical_product_component_links') }}
14 | 
15 | ),
16 | 
17 | rcpc as (
18 | 
19 |     select * from {{ ref('stg_rxnorm__clinical_product_components') }}
20 | 
21 | ),
22 | 
23 | rdf as (
24 | 
25 |     select * from {{ ref('stg_rxnorm__dose_forms') }}
26 | 
27 | )
28 | 
29 | select
30 |     rcp.rxcui as clinical_product_rxcui
31 |     , rcp.name as clinical_product_name
32 |     , rcp.tty as clinical_product_tty
33 |     , rcpc.rxcui as clinical_product_component_rxcui
34 |     , rcpc.name as clinical_product_compnent_name
35 |     , rcpc.tty as clinical_product_component_tty
36 |     , rdf.rxcui as dose_form_rxcui
37 |     , rdf.name as dose_form_name
38 |     , rdf.tty as dose_form_tty
39 |     , rcp.active
40 |     , rcp.prescribable
41 | from rcp 
42 | left join rcpcl 
43 |     on rcp.rxcui = rcpcl.clinical_product_rxcui 
44 | left join rcpc 
45 |     on rcpcl.clinical_product_component_rxcui = rcpc.rxcui 
46 | left join rdf 
47 |     on rcpc.dose_form_rxcui = rdf.rxcui 
48 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/intermediate/rxnorm/int_rxnorm_clinical_products_to_ingredient_components.sql:
--------------------------------------------------------------------------------
 1 | -- int_rxnorm_clinical_products_to_ingredient_components.sql
 2 | 
 3 | with
 4 | 
 5 | rcp as (
 6 | 
 7 |     select * from {{ ref('stg_rxnorm__clinical_products') }}
 8 | 
 9 | ),
10 | 
11 | rcpcl as (
12 | 
13 |     select * from {{ ref('stg_rxnorm__clinical_product_component_links') }}
14 | 
15 | ),
16 | 
17 | rcpc as (
18 | 
19 |     select * from {{ ref('stg_rxnorm__clinical_product_components') }}
20 | 
21 | ),
22 | 
23 | rdf as (
24 | 
25 |     select * from {{ ref('stg_rxnorm__dose_forms') }}
26 | 
27 | ),
28 | 
29 | ri as (
30 | 
31 |     select * from {{ ref('stg_rxnorm__ingredients') }}
32 | 
33 | ),
34 | 
35 | ricl as (
36 | 
37 |     select * from {{ ref('stg_rxnorm__ingredient_component_links') }}
38 | 
39 | ),
40 | 
41 | ric as (
42 | 
43 |     select * from {{ ref('stg_rxnorm__ingredient_components') }}
44 | 
45 | )
46 | 
47 | select
48 |     rcp.rxcui as clinical_product_rxcui
49 |     , rcp.name as clinical_product_name
50 |     , rcp.tty as clinical_product_tty
51 |     , rcpc.rxcui as clinical_product_component_rxcui
52 |     , rcpc.name as clinical_product_component_name
53 |     , rcpc.tty as clinical_product_component_tty
54 |     , rdf.rxcui as dose_form_rxcui
55 |     , rdf.name as dose_form_name
56 |     , rdf.tty as dose_form_tty
57 |     , ri.rxcui as ingredient_rxcui
58 |     , ri.name as ingredient_name
59 |     , ri.tty as ingredient_tty
60 |     , ric.rxcui as ingredient_component_rxcui
61 |     , ric.name as ingredient_component_name
62 |     , ric.tty as ingredient_component_tty
63 |     , rcp.active
64 |     , rcp.prescribable
65 | from rcp 
66 | left join rcpcl 
67 |     on rcp.rxcui = rcpcl.clinical_product_rxcui 
68 | left join rcpc 
69 |     on rcpcl.clinical_product_component_rxcui = rcpc.rxcui 
70 | left join rdf 
71 |     on rcpc.dose_form_rxcui = rdf.rxcui 
72 | left join ri 
73 |     on rcpc.ingredient_rxcui = ri.rxcui 
74 | left join ricl 
75 |     on ri.rxcui = ricl.ingredient_rxcui 
76 | left join ric 
77 |     on ricl.ingredient_component_rxcui = ric.rxcui 
78 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/intermediate/rxnorm/int_rxnorm_clinical_products_to_ingredients.sql:
--------------------------------------------------------------------------------
 1 | -- int_rxnorm_clinical_products_to_ingredients.sql
 2 | 
 3 | with
 4 | 
 5 | rcp as (
 6 | 
 7 |     select * from {{ ref('stg_rxnorm__clinical_products') }}
 8 | 
 9 | ),
10 | 
11 | rcpcl as (
12 | 
13 |     select * from {{ ref('stg_rxnorm__clinical_product_component_links') }}
14 | 
15 | ),
16 | 
17 | rcpc as (
18 | 
19 |     select * from {{ ref('stg_rxnorm__clinical_product_components') }}
20 | 
21 | ),
22 | 
23 | rdf as (
24 | 
25 |     select * from {{ ref('stg_rxnorm__dose_forms') }}
26 | 
27 | ),
28 | 
29 | ri as (
30 | 
31 |     select * from {{ ref('stg_rxnorm__ingredients') }}
32 | 
33 | )
34 | 
35 | select
36 |     rcp.rxcui as clinical_product_rxcui
37 |     , rcp.name as clinical_product_name
38 |     , rcp.tty as clinical_product_tty
39 |     , string_agg(rcpc.rxcui, ' | ') as clinical_product_component_rxcui
40 |     , string_agg(rcpc.name, ' | ') as clinical_product_compnent_name
41 |     , string_agg(rcpc.tty, ' | ') as clinical_product_component_tty
42 |     , string_agg(rdf.rxcui, ' | ') as dose_form_rxcui
43 |     , string_agg(rdf.name, ' | ') as dose_form_name
44 |     , string_agg(rdf.tty, ' | ') as dose_form_tty
45 |     , string_agg(ri.rxcui, ' | ') as ingredient_rxcui
46 |     , string_agg(ri.name, ' | ') as ingredient_name
47 |     , string_agg(ri.tty, ' | ') as ingredient_tty
48 |     , rcp.active
49 |     , rcp.prescribable        
50 | from rcp 
51 | left join rcpcl 
52 |     on rcp.rxcui = rcpcl.clinical_product_rxcui 
53 | left join rcpc 
54 |     on rcpcl.clinical_product_component_rxcui = rcpc.rxcui 
55 | left join rdf 
56 |     on rcpc.dose_form_rxcui = rdf.rxcui 
57 | left join ri 
58 |     on rcpc.ingredient_rxcui = ri.rxcui 
59 | group by
60 |     rcp.rxcui
61 |     , rcp.name
62 |     , rcp.tty
63 |     , rcp.active
64 |     , rcp.prescribable        
65 | 
66 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/intermediate/rxnorm/int_rxnorm_clinical_products_to_ndcs.sql:
--------------------------------------------------------------------------------
 1 | -- int_rxnorm_clinical_products_to_ndcs.sql
 2 | 
 3 | with
 4 | 
 5 | rcp as (
 6 | 
 7 |     select * from {{ ref('stg_rxnorm__clinical_products') }}
 8 | 
 9 | ),
10 | 
11 | rcpcl as (
12 | 
13 |     select * from {{ ref('stg_rxnorm__clinical_product_component_links') }}
14 | 
15 | ),
16 | 
17 | rcpc as (
18 | 
19 |     select * from {{ ref('stg_rxnorm__clinical_product_components') }}
20 | 
21 | ),
22 | 
23 | rdf as (
24 | 
25 |     select * from {{ ref('stg_rxnorm__dose_forms') }}
26 | 
27 | ),
28 | 
29 | ri as (
30 | 
31 |     select * from {{ ref('stg_rxnorm__ingredients') }}
32 | 
33 | ),
34 | 
35 | rn as (
36 | 
37 |     select * from {{ ref('stg_rxnorm__ndcs') }}
38 | 
39 | )
40 | 
41 | select
42 |     rcp.rxcui as clinical_product_rxcui
43 |     , rcp.name as clinical_product_name
44 |     , rcp.tty as clinical_product_tty
45 |     , rcpc.rxcui as clinical_product_component_rxcui
46 |     , rcpc.name as clinical_product_compnent_name
47 |     , rcpc.tty as clinical_product_component_tty
48 |     , rdf.rxcui as dose_form_rxcui
49 |     , rdf.name as dose_form_name
50 |     , rdf.tty as dose_form_tty
51 |     , ri.rxcui as ingredient_rxcui
52 |     , ri.name as ingredient_name
53 |     , ri.tty as ingredient_tty
54 |     , rn.brand_product_rxcui
55 |     , rn.ndc
56 |     , rcp.active
57 |     , rcp.prescribable
58 | from rcp 
59 | left join rcpcl 
60 |     on rcp.rxcui = rcpcl.clinical_product_rxcui 
61 | left join rcpc 
62 |     on rcpcl.clinical_product_component_rxcui = rcpc.rxcui 
63 | left join rdf 
64 |     on rcpc.dose_form_rxcui = rdf.rxcui 
65 | left join ri 
66 |     on rcpc.ingredient_rxcui = ri.rxcui 
67 | left join rn
68 |     on rcp.rxcui = rn.clinical_product_rxcui
69 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/intermediate/rxnorm/int_rxnorm_ndcs_to_products.sql:
--------------------------------------------------------------------------------
 1 | -- int_rxnorm_ndcs_to_products.sql
 2 | 
 3 | with
 4 | 
 5 | ndc as (
 6 | 
 7 |     select * from {{ ref('stg_rxnorm__ndcs') }}
 8 | 
 9 | ),
10 | 
11 | rcp as (
12 | 
13 |     select * from {{ ref('stg_rxnorm__clinical_products') }}
14 | 
15 | ),
16 | 
17 | rbp as (
18 | 
19 |     select * from {{ ref('stg_rxnorm__brand_products') }}
20 | 
21 | )
22 | 
23 | select distinct
24 |     ndc
25 |     , coalesce(rbp.rxcui, rcp.rxcui, null) as product_rxcui
26 |     , coalesce(rbp.name, rcp.name, null) as product_name
27 |     , coalesce(rbp.tty, rcp.tty, null) as product_tty
28 |     , rcp.rxcui as clinical_product_rxcui
29 |     , rcp.name as clinical_product_name
30 |     , rcp.tty as clinical_product_tty
31 | from ndc
32 | left join rcp 
33 |     on ndc.clinical_product_rxcui = rcp.rxcui
34 | left join rbp
35 |     on ndc.brand_product_rxcui = rbp.rxcui
36 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/intermediate/umls/_int_umls__models.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | models:
 4 |   - name: int_umls_clinical_products_to_crosswalk_codes
 5 |     description: Crosswalk of clinical products to UMLS codes.
 6 | 
 7 |   - name: int_umls_ingredient_components_to_crosswalk_codes
 8 |     description: Crosswalk of ingredient components (TTY = IN) to UMLS codes.
 9 | 
10 |   - name: int_umls_precise_ingredients_to_crosswalk_codes
11 |     description: Crosswalk of precise ingredients (TTY = PIN) to UMLS codes.
12 | 
13 |   - name: int_umls_multiple_ingredients_to_crosswalk_codes
14 |     description: Crosswalk of multiple ingredients (TTY = MIN) to UMLS codes.
15 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/intermediate/umls/int_umls_ingredient_components_to_crosswalk_codes.sql:
--------------------------------------------------------------------------------
 1 | -- int_umls_ingredient_components_to_crosswalk_codes.sql
 2 | 
 3 | with
 4 | 
 5 | ingredient_components as (
 6 | 
 7 |     select
 8 |         *
 9 |     from {{ ref('stg_rxclass__rxclass') }}
10 |     where tty = 'IN'
11 | 
12 | ),
13 | 
14 | crosswalk_codes as (
15 | 
16 |     select
17 |         *
18 |     from {{ ref('stg_umls__crosswalk_codes') }}
19 | )
20 | 
21 | select
22 |     ingredient_components.rxcui as ingredient_component_rxcui,
23 |     ingredient_components.name as ingredient_component_name,
24 |     ingredient_components.tty as ingredient_component_tty,
25 |     ingredient_components.rela,
26 |     ingredient_components.class_id,
27 |     ingredient_components.class_name,
28 |     ingredient_components.class_type,
29 |     ingredient_components.rela_source,
30 |     crosswalk_codes.*
31 | from ingredient_components
32 | inner join crosswalk_codes
33 |     on crosswalk_codes.from_code = ingredient_components.class_id
34 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/intermediate/umls/int_umls_multiple_ingredients_to_crosswalk_codes.sql:
--------------------------------------------------------------------------------
 1 | -- int_umls_multiple_ingredients_to_crosswalk_codes.sql
 2 | 
 3 | with
 4 | 
 5 | multiple_ingredients as (
 6 | 
 7 |     select
 8 |         *
 9 |     from {{ ref('stg_rxclass__rxclass') }}
10 |     where tty = 'MIN'
11 | 
12 | ),
13 | 
14 | crosswalk_codes as (
15 | 
16 |     select
17 |         *
18 |     from {{ ref('stg_umls__crosswalk_codes') }}
19 | )
20 | 
21 | select
22 |     multiple_ingredients.rxcui as multiple_ingredient_rxcui,
23 |     multiple_ingredients.name as multiple_ingredient_name,
24 |     multiple_ingredients.tty as multiple_ingredient_tty,
25 |     multiple_ingredients.rela,
26 |     multiple_ingredients.class_id,
27 |     multiple_ingredients.class_name,
28 |     multiple_ingredients.class_type,
29 |     multiple_ingredients.rela_source,
30 |     crosswalk_codes.*
31 | from multiple_ingredients
32 | inner join crosswalk_codes
33 |     on crosswalk_codes.from_code = multiple_ingredients.class_id
34 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/intermediate/umls/int_umls_precise_ingredients_to_crosswalk_codes.sql:
--------------------------------------------------------------------------------
 1 | -- int_umls_precise_ingredients_to_crosswalk_codes.sql
 2 | 
 3 | with
 4 | 
 5 | precise_ingredients as (
 6 | 
 7 |     select
 8 |         *
 9 |     from {{ ref('stg_rxclass__rxclass') }}
10 |     where tty = 'PIN'
11 | 
12 | ),
13 | 
14 | crosswalk_codes as (
15 | 
16 |     select
17 |         *
18 |     from {{ ref('stg_umls__crosswalk_codes') }}
19 | )
20 | 
21 | select
22 |     precise_ingredients.rxcui as precise_ingredient_rxcui,
23 |     precise_ingredients.name as precise_ingredient_name,
24 |     precise_ingredients.tty as precise_ingredient_tty,
25 |     precise_ingredients.rela,
26 |     precise_ingredients.class_id,
27 |     precise_ingredients.class_name,
28 |     precise_ingredients.class_type,
29 |     precise_ingredients.rela_source,
30 |     crosswalk_codes.*
31 | from precise_ingredients
32 | inner join crosswalk_codes
33 |     on crosswalk_codes.from_code = precise_ingredients.class_id
34 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/marts/classification/_classification__models.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | models:
 4 |   - name: atc_codes_to_rxnorm_products
 5 |     description: |
 6 |       ATC 1-4 codes to product-level RXCUIs and descriptions.
 7 |     columns:
 8 |       - name: rxcui
 9 |         description: >
10 |           Product-level RxNorm RXCUI.
11 |         tests:
12 |           - unique
13 |           - not_null
14 |       - name: rxnorm_description
15 |         description: The RxNorm normalized description (STR).
16 |       - name: atc_1_name
17 |         description: The name of the ATC1 level associated with this product.
18 |       - name: atc_2_name
19 |         description: The name of the ATC2 level associated with this product.
20 |       - name: atc_3_name
21 |         description: The name of the ATC3 level associated with this product.
22 |       - name: atc_4_name
23 |         description: The name of the ATC4 level associated with this product.
24 | 
25 |   - name: clinical_products_to_diseases
26 |     description: |
27 |       RxNorm clinical product RXCUIs (SCD / GPCK) to MeSH codes,
28 |       ICD-9 codes, ICD-10 codes, and SNOMED-CT codes.
29 | 
30 |       This table contains multiple different types of relations
31 |       between these concepts, including `may_treat`, `may_prevent`,
32 |       and `ci_with`.
33 | 
34 |       MeSH codes are more general and the other codes are more specific,
35 |       allowing for a hierarchy if desired.
36 | 
37 |       RxClass is used for the relations between RxNorm RXCUIs and
38 |       MeSH codes.
39 | 
40 |       UMLS is used for the relations between MeSH and the other types
41 |       of codes.
42 |     columns:
43 |       - name: clinical_product_rxcui
44 |       - name: clinical_product_name
45 |       - name: clinical_product_tty
46 |       - name: via_ingredient_rxcui
47 |       - name: via_ingredient_name
48 |       - name: via_ingredient_tty
49 |       - name: rela
50 |       - name: rela_source
51 |       - name: class_id
52 |       - name: class_name
53 |       - name: class_type
54 |       - name: disease_id
55 |       - name: disease_source
56 |       - name: to_name as disease_name        
57 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/marts/classification/atc_codes_to_rxnorm_products.sql:
--------------------------------------------------------------------------------
 1 | -- atc_codes_to_rxnorm_products
 2 | 
 3 | with rxclass_atcprod as (
 4 | 
 5 | 	select * from {{ ref('stg_rxclass__rxclass') }}
 6 |     where rela_source = 'ATCPROD'
 7 | 
 8 | )
 9 | 
10 | , atc_codes as (
11 | 
12 |     select * from {{ ref('stg_rxnorm__atc_codes') }}
13 | 
14 | )
15 | 
16 | , rxnorm_product_rxcuis as (
17 | 
18 |     select * from {{ ref('stg_rxnorm__product_rxcuis') }}
19 | 
20 | )
21 | 
22 | select distinct
23 | 	rxclass_atcprod.rxcui
24 | 	, rxnorm_product_rxcuis.str as rxnorm_description
25 | 	, atc_codes.atc_1_code
26 | 	, atc_codes.atc_2_code
27 | 	, atc_codes.atc_3_code
28 | 	, atc_codes.atc_4_code
29 | 	, atc_codes.atc_1_name
30 | 	, atc_codes.atc_2_name
31 | 	, atc_codes.atc_3_name
32 | 	, atc_codes.atc_4_name
33 | from rxclass_atcprod
34 | left join atc_codes
35 | 	on atc_codes.atc_4_code = rxclass_atcprod.class_id
36 | left join rxnorm_product_rxcuis
37 | 	on rxnorm_product_rxcuis.rxcui = rxclass_atcprod.rxcui
38 | order by rxcui
39 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/marts/classification/clinical_products_to_diseases.sql:
--------------------------------------------------------------------------------
 1 | with
 2 | 
 3 | clinical_products_crosswalk as (
 4 |     
 5 |     select
 6 |         clinical_product_rxcui,
 7 |         clinical_product_name,
 8 |         clinical_product_tty,
 9 |         via_ingredient_rxcui,
10 |         via_ingredient_name,
11 |         via_ingredient_tty,
12 |         rela,
13 |         rela_source,
14 |         class_id,
15 |         class_name,
16 |         class_type,
17 |         to_code as disease_id,
18 |         to_source as disease_source,
19 |         to_name as disease_name        
20 |     from {{ ref('int_umls_clinical_products_to_crosswalk_codes') }}
21 | 
22 | )
23 | 
24 | select * from clinical_products_crosswalk
25 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/marts/fda_excluded/fda_excluded.sql:
--------------------------------------------------------------------------------
 1 | SELECT
 2 | 	{{ndc_to_11('pack.ndcpackagecode')}} AS ndc11
 3 | 	, pack.productid
 4 | 	, pack.productndc
 5 | 	, producttypename
 6 | 	, proprietaryname
 7 | 	, proprietarynamesuffix
 8 | 	, nonproprietaryname
 9 | 	, dosageformname
10 | 	, routename
11 | 	, pack.startmarketingdate
12 | 	, pack.endmarketingdate
13 | 	, marketingcategoryname
14 | 	, applicationnumber
15 | 	, labelername
16 | 	, substancename
17 | 	, active_numerator_strength
18 | 	, active_ingred_unit
19 | 	, pharm_classes
20 | 	, deaschedule
21 | 	, pack.ndc_exclude_flag
22 | 	, listing_record_certified_through
23 | 	, ndcpackagecode
24 | 	, packagedescription
25 | 	, sample_package
26 | FROM {{source('fda_excluded','fda_excluded_package')}} AS pack
27 | LEFT JOIN {{source('fda_excluded','fda_excluded_product')}} AS prod
28 | 	ON pack.productid = prod.productid


--------------------------------------------------------------------------------
/dbt/sagerx/models/marts/ndc/_ndc__models.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | models:
 4 |   - name: all_ndc_descriptions
 5 |     description: |
 6 |       NDC to RxNorm description (with RXCUI) and FDA description.
 7 | 
 8 |       NOTE: If RxNorm and RxNorm Historical have same NDCs, we prefer RxNorm.
 9 |       If any of the 3 FDA sources have same NDCs, we prefer FDA NDC, FDA Excluded, FDA Unfinished in that order.
10 |     columns:
11 |       - name: ndc
12 |         description: The NDC in NDC11 format. This column contains only unique values.
13 |         tests:
14 |           - unique
15 |           - not_null
16 |       - name: rxcui
17 |         description: >
18 |           If NDC was found in an RxNorm sources, we have an associated RXCUI.
19 |           If NULL, the source is likely FDA.
20 |       - name: rxnorm_description
21 |         description: If RxNorm source, will have the RxNorm normalized description (STR).
22 |       - name: fda_description
23 |         description: If FDA source, will have a description cobbled together from FDA columns for lack of a normalized FDA description.
24 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/marts/ndc/gtins.sql:
--------------------------------------------------------------------------------
 1 | -- gtins.sql
 2 | 
 3 | with
 4 | 
 5 | ndcs as (
 6 | 
 7 |     select
 8 |         ndc11,
 9 |         replace(ndcpackagecode, '-', '') as ndc10,
10 |         ndcpackagecode as ndc,
11 |         concat(
12 |             '003',
13 |             replace(ndcpackagecode,'-', '')
14 |         ) as gtin13,
15 |         concat(
16 |             '3',
17 |             replace(ndcpackagecode,'-', '')
18 |         ) as gtin11,
19 |         concat(
20 |             '03',
21 |             split_part(ndcpackagecode, '-', 1)
22 |         ) as gs1_company_prefix
23 | 
24 |     from {{ ref('stg_fda_ndc__ndcs') }}
25 | 
26 | ),
27 | 
28 | digits as (
29 | 
30 |     -- split the 13-digit number into individual digits
31 |     select 
32 |         ndc,
33 |         position,
34 |         substring(gtin13 from position for 1)::int as digit
35 |     from ndcs,
36 |         generate_series(1, 13) as position
37 | 
38 | ),
39 | 
40 | products as (
41 | 
42 |     -- apply the alternating multiplication rule
43 |     select 
44 |         *,
45 |         case
46 |             when position % 2 = 1
47 |                 then digit * 3
48 |             else digit * 1 
49 |         end as product
50 |     from digits
51 | 
52 | ),
53 | 
54 | sums as (
55 | 
56 |     -- sum of the products of each digit
57 |     select
58 |         ndc,
59 |         sum(product) as sum
60 |     from products
61 |     group by ndc
62 | 
63 | ),
64 | 
65 | check_digits as (
66 | 
67 |     -- round the sum to the nearest 10 and subtract the sum
68 |     select
69 |         ndc,
70 |         ceil(sum / 10.0) * 10 - sum as check_digit
71 |     from sums
72 | 
73 | ),
74 | 
75 | gtin14s as (
76 | 
77 |     -- concatenate the gtin13 and check_digit
78 |     select
79 |         ndc11,
80 |         ndc10,
81 |         ndcs.ndc,
82 |         concat(
83 |             gtin13,
84 |             check_digit
85 |         ) as gtin14,
86 |         concat(
87 |             gtin11,
88 |             check_digit
89 |         ) as gtin12,
90 |         gs1_company_prefix
91 |     from ndcs
92 |     left join check_digits
93 |         on check_digits.ndc = ndcs.ndc
94 | 
95 | )
96 | 
97 | select * from gtin14s
98 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/marts/ndc/ndc_associations.sql:
--------------------------------------------------------------------------------
1 | -- ndc_associations
2 | 
3 | select * from {{ ref('stg_fda_ndc__ndc_associations') }}
4 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/marts/ndc/ndcs_to_label_images.sql:
--------------------------------------------------------------------------------
 1 | -- ndcs_to_label_images
 2 | 
 3 | with
 4 | 
 5 | image_xml_ndcs as (
 6 | 
 7 | 	select * from {{ ref('int_dailymed_image_xml_ndcs') }}
 8 | 
 9 | 
10 | ),
11 | 
12 | image_name_ndcs as (
13 | 
14 | 	select * from {{ ref('int_dailymed_image_name_ndcs') }}
15 | 
16 | ),
17 | 
18 | all_image_ndcs as (
19 | 
20 | 	select
21 | 		set_id,
22 | 		ndc,
23 | 		image	
24 | 	from image_xml_ndcs
25 | 
26 | 	union
27 | 
28 | 	select
29 | 		set_id,
30 | 		ndc,
31 | 		image
32 | 	from image_name_ndcs
33 | 
34 | ),
35 | 
36 | all_image_ndcs_ndc11 as (
37 | 
38 | 	select
39 | 		set_id,
40 | 		ndc,
41 | 		{{ ndc_to_11('ndc') }} as ndc11,
42 | 		concat('https://dailymed.nlm.nih.gov/dailymed/image.cfm?name=', image, '&setid=', set_id) as image_url,
43 | 		image as image_file,
44 | 		concat('https://dailymed.nlm.nih.gov/dailymed/drugInfo.cfm?setid=', set_id) as dailymed_spl_url
45 | 	from all_image_ndcs
46 | 
47 | )
48 | 
49 | select * from all_image_ndcs_ndc11
50 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/marts/ndc/pack_size.sql:
--------------------------------------------------------------------------------
 1 | with
 2 | 
 3 | packaging_components as (
 4 | 
 5 |     select
 6 |         *
 7 |     from {{ ref('int_fda_packaging_components') }}
 8 | 
 9 | ),
10 | 
11 | innermost_unit as (
12 | 
13 |     select ndc11, inner_unit
14 |     from (
15 |         select ndc11, inner_unit, 
16 |             row_number() over (partition by ndc11 order by component_line desc) as row_num
17 |         from packaging_components
18 |     ) as ranked
19 |     where row_num = 1
20 | 
21 | ),
22 | 
23 | outermost_unit as (
24 | 
25 |     select ndc11, outer_unit
26 |     from packaging_components
27 |     where component_line = 1
28 | 
29 | )
30 | 
31 | select distinct
32 |     packaging_components.ndc11,
33 |     outermost_unit.outer_unit as outermost_unit,
34 |     total_product,
35 |     case
36 |         when innermost_unit.inner_unit like('%KIT %')
37 |             then 'KIT' 
38 |         else innermost_unit.inner_unit 
39 |     end as innermost_unit,
40 |     packagedescription
41 | from packaging_components
42 | left join innermost_unit
43 |     on innermost_unit.ndc11 = packaging_components.ndc11
44 | left join outermost_unit
45 |     on outermost_unit.ndc11 = packaging_components.ndc11
46 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/marts/pricing/pricing.sql:
--------------------------------------------------------------------------------
 1 | -- pricing.sql
 2 | 
 3 | with
 4 | 
 5 | nadac as (
 6 | 
 7 |     select
 8 |         *
 9 |     from {{ ref('int_nadac_pricing') }}
10 | 
11 | ),
12 | 
13 | mccpd as (
14 | 
15 |     select
16 |         *
17 |     -- TODO: make a stanging table and int table instead of hitting source in a mart
18 |     from {{ source('mccpd', 'mccpd') }}
19 | ),
20 | 
21 | all_ndcs as (
22 | 
23 |     select ndc from nadac
24 | 
25 |     union
26 | 
27 |     select ndc from mccpd
28 | 
29 | ),
30 | 
31 | pricing as (
32 | 
33 |     select
34 |         all_ndcs.*,
35 |         nadac.ndc_description as nadac_description,
36 |         nadac.nadac_per_unit,
37 |         mccpd.medication_name as mccpd_description,
38 |         mccpd.unit_billing_price,
39 |         mccpd.unit_price
40 |     from all_ndcs
41 |     left join nadac
42 |         on nadac.ndc = all_ndcs.ndc
43 |     left join mccpd
44 |         on mccpd.ndc = all_ndcs.ndc
45 | 
46 | )
47 | 
48 | select * from pricing
49 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/marts/pricing/pricing_historical.sql:
--------------------------------------------------------------------------------
 1 | -- pricing_historical.sql
 2 | 
 3 | with
 4 | 
 5 | nadac_historical as (
 6 | 
 7 |     select
 8 |         *
 9 |     from {{ ref('int_nadac_historical_pricing') }}
10 | 
11 | ),
12 | 
13 | mccpd as (
14 | 
15 |     select
16 |         *
17 |     -- TODO: make a stanging table and int table instead of hitting source in a mart
18 |     from {{ source('mccpd', 'mccpd') }}
19 | ),
20 | 
21 | all_ndcs as (
22 | 
23 |     select ndc from nadac_historical
24 | 
25 |     union
26 | 
27 |     select ndc from mccpd
28 | 
29 | ),
30 | 
31 | pricing as (
32 | 
33 |     select
34 |         all_ndcs.*,
35 |         nadac_historical.ndc_description as nadac_description,
36 |         nadac_historical.nadac_per_unit,
37 |         mccpd.medication_name as mccpd_description,
38 |         mccpd.unit_billing_price,
39 |         mccpd.unit_price
40 |     from all_ndcs
41 |     left join nadac_historical
42 |         on nadac_historical.ndc = all_ndcs.ndc
43 |     left join mccpd
44 |         on mccpd.ndc = all_ndcs.ndc
45 | 
46 | )
47 | 
48 | select * from pricing
49 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/marts/products/brand_products_with_related_ndcs.sql:
--------------------------------------------------------------------------------
 1 | with brand_products as (
 2 |     select * from {{ ref('stg_rxnorm__brand_products') }}
 3 | )
 4 | 
 5 | , fda_ndcs as (
 6 |     select * from {{ ref('stg_fda_ndc__ndcs') }}
 7 | )
 8 | 
 9 | , rxnorm_ndcs_to_products as (
10 |     select * from {{ ref('int_rxnorm_ndcs_to_products') }}
11 | )
12 | 
13 | , map as (
14 |     select
15 |         prod.tty as product_tty
16 |         , prod.rxcui as product_rxcui
17 |         , prod.name as product_name
18 |         , ndc.product_tty as ndc_product_tty
19 |         , ndc.product_rxcui as ndc_product_rxcui
20 |         , ndc.product_name as ndc_product_name
21 |         , ndc.ndc
22 |         , fda.product_startmarketingdate
23 |         , fda.package_startmarketingdate
24 |     from brand_products prod
25 |     left join rxnorm_ndcs_to_products ndc
26 |         on ndc.clinical_product_rxcui = prod.clinical_product_rxcui
27 |     left join fda_ndcs fda
28 |         on fda.ndc11 = ndc.ndc
29 |     order by prod.rxcui
30 | )
31 | 
32 | select
33 |     *
34 | from map
35 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/marts/products/product_synonyms.sql:
--------------------------------------------------------------------------------
 1 | with
 2 | 
 3 | rxnorm_synonyms as (
 4 | 
 5 |     select
 6 |         str as synonym,
 7 |         rxcui as product_rxcui,
 8 |         'RXNORM' as source
 9 |     from {{ source('rxnorm', 'rxnorm_rxnconso') }}
10 |     where sab = 'RXNORM'
11 |     and tty in('PSN', 'SY', 'TMSY', 'ET')
12 | 
13 | ),
14 | 
15 | nadac_synonyms as (
16 | 
17 |     select distinct
18 |         ndc_description as synonym,
19 |         product_rxcui,
20 |         'NADAC' as source
21 |     from {{ source('nadac', 'nadac') }} n
22 |     left join {{ ref('int_rxnorm_ndcs_to_products') }} r
23 |         on r.ndc = n.ndc
24 |     where r.product_rxcui is not null
25 | 
26 | ),
27 | 
28 | fda_synonyms as (
29 | 
30 |     select distinct
31 |         trim(concat(
32 |             nonproprietaryname
33 |             , ' '
34 |             , active_numerator_strength
35 |             , ' '
36 |             , active_ingred_unit
37 |             , ' '
38 |             , lower(dosageformname)
39 |             , case when proprietaryname is not null then concat(
40 |                 ' ['
41 |                 , proprietaryname
42 |                 , case when proprietarynamesuffix is not null then concat(
43 |                     ' '
44 |                     , proprietarynamesuffix
45 |                     ) else '' end
46 |                 , ']'
47 |                 ) else '' end
48 |             )) as synonym,
49 |             product_rxcui,
50 |             'FDA' as source
51 |     from sagerx_dev.stg_fda_ndc__ndcs f
52 |     left join sagerx_dev.int_rxnorm_ndcs_to_products r
53 |         on r.ndc = f.ndc11
54 |     where r.product_rxcui is not null
55 | 
56 | ),
57 | 
58 | all_synonyms as (
59 |     
60 |     select * from rxnorm_synonyms
61 | 
62 |     union
63 | 
64 |     select * from nadac_synonyms
65 | 
66 |     union
67 | 
68 |     select * from fda_synonyms
69 | 
70 | ),
71 | 
72 | rxnorm_products as (
73 | 
74 |     select * from {{ ref('stg_rxnorm__products') }}
75 | 
76 | ),
77 | 
78 | prescribable_product_synonyms as (
79 | 
80 |     select
81 |         all_synonyms.*
82 |     from all_synonyms
83 |     inner join rxnorm_products
84 |         on rxnorm_products.rxcui = all_synonyms.product_rxcui
85 |     where rxnorm_products.prescribable = true
86 | 
87 | )
88 | 
89 | select * from prescribable_product_synonyms
90 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/marts/products/products.sql:
--------------------------------------------------------------------------------
 1 | with
 2 | 
 3 | rxnorm_products as (
 4 | 
 5 |     select * from {{ ref('stg_rxnorm__products') }}
 6 | 
 7 | ),
 8 | 
 9 | rxnorm_psn as (
10 | 
11 |     select
12 |         rxcui,
13 |         str        
14 |     from {{ source('rxnorm', 'rxnorm_rxnconso') }}
15 |     where sab = 'RXNORM'
16 |         and tty = 'PSN'
17 | 
18 | ),
19 | 
20 | rxnorm_clinical_products_to_ingredients as (
21 | 
22 |     select * from {{ ref('int_rxnorm_clinical_products_to_ingredients') }}
23 | 
24 | )
25 | 
26 | select
27 |     prod.rxcui as product_rxcui
28 |     , prod.name as product_name
29 |     , prod.tty as product_tty
30 |     , psn.str as prescribable_name
31 |     , case
32 |         when prod.tty in ('SBD', 'BPCK') then 'brand'
33 |         when prod.tty in ('SCD', 'GPCK') then 'generic'
34 |         end as brand_vs_generic
35 |     , substring(prod.name from '\[(.*)\]') as brand_name
36 |     , cping.clinical_product_rxcui
37 |     , cping.clinical_product_name
38 |     , cping.clinical_product_tty
39 |     , cping.ingredient_name
40 |     -- strength - couldn't easily get strength at this grain - can if needed
41 |     , cping.dose_form_name
42 |     , prod.active
43 |     , prod.prescribable
44 | from rxnorm_products prod
45 | left join rxnorm_clinical_products_to_ingredients cping
46 |     on cping.clinical_product_rxcui = prod.clinical_product_rxcui
47 | left join rxnorm_psn psn
48 |     on psn.rxcui = prod.rxcui
49 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/marts/products/products_to_inactive_ingredients.sql:
--------------------------------------------------------------------------------
 1 | -- products_to_inactive_ingredients.sql
 2 | 
 3 | with products_to_inactive_ingredients as (
 4 |     select * from {{ ref('int_mthspl_products_to_inactive_ingredients') }}
 5 | )
 6 | 
 7 | , unii_codes as (
 8 |     select * from {{ ref('stg_fda_unii__unii_codes') }}
 9 | )
10 | 
11 | , usp_preservatives as (
12 |     select * from {{ ref('usp_preservatives') }}
13 | )
14 | 
15 | select
16 |     ndc9
17 |     , ndc
18 |     , unii_codes.unii as fda_unii_code
19 |     , unii_codes.display_name as fda_unii_display_name
20 |     , unii_codes.pubchem as pubchem_id
21 |     , max(case
22 |         when preservative.cas_rn is not null
23 |             then 1
24 |         end) as preservative
25 |     , product_rxcui
26 |     , string_agg(product_name, ' | ') as product_name
27 |     , product_tty
28 |     , inactive_ingredient_unii
29 |     , inactive_ingredient_rxcui
30 |     , string_agg(inactive_ingredient_name, ' | ') as inactive_ingredient_name
31 |     , inactive_ingredient_tty
32 |     , active
33 |     , prescribable
34 | from products_to_inactive_ingredients
35 | /*
36 | need to join unii_codes twice - once
37 | to pull in the actual UNII -> displa
38 | y name
39 | mapping, and another initial one to try
40 | to map substance RXCUIs to FDA UNII RXCUIs.
41 | */
42 | left join unii_codes rxcui_to_unii
43 |     on rxcui_to_unii.rxcui = inactive_ingredient_rxcui
44 | /*
45 | if MTHSPL (DailyMed) has a substance UNII,
46 | use that. if it does not, try to map the
47 | substance RXCUI to the FDA UNII RXCUI and
48 | then use the resulting matched UNII to pull
49 | in the UNII display name.
50 | */
51 | left join unii_codes
52 |     on unii_codes.unii = case
53 |         when (
54 |             inactive_ingredient_unii is not null 
55 |             and
56 |             inactive_ingredient_unii != 'NOCODE'
57 |         ) then inactive_ingredient_unii
58 |         else rxcui_to_unii.unii
59 |         end
60 | left join usp_preservatives preservative
61 |     on preservative.cas_rn = unii_codes.rn
62 | group by
63 |     ndc9
64 |     , ndc
65 |     , unii_codes.unii
66 |     , unii_codes.display_name
67 |     , unii_codes.pubchem
68 |     , product_rxcui
69 |     , product_tty
70 |     , inactive_ingredient_unii
71 |     , inactive_ingredient_rxcui
72 |     , inactive_ingredient_tty
73 |     , active
74 |     , prescribable
75 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/marts/purdue/scorecard_data.sql:
--------------------------------------------------------------------------------
 1 | with
 2 | 
 3 | ing_to_report as (
 4 | 
 5 |     select * from {{ ref('int_inactive_ingredients_to_fda_enforcement_reports') }}
 6 | 
 7 | )
 8 | 
 9 | select * from ing_to_report
10 | where active_ingredient_name in (
11 |     'risperidone'
12 |     , 'adalimumab'
13 |     , 'lidocaine'
14 |     , 'carbamazepine'
15 |     , 'phenytoin'
16 |     , 'midazolam'
17 |     , 'valproate'
18 |     , 'tacrolimus'
19 |     , 'amoxicillin'
20 |     , 'hydrocortisone'
21 |     , 'cetirizine'
22 |     , 'pertuzumab'
23 |     , 'methylphenidate'
24 |     , 'erythromycin'
25 |     , 'gabapentin'
26 |     , 'lopinavir / ritonavir'
27 |     , 'levothyroxine'
28 |     , 'albuterol'
29 |     )
30 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/ashp/_ashp__models.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | models:
 4 |   - name: stg_ashp__current_drug_shortages
 5 |     description: Current ASHP drug shortages
 6 |     columns:
 7 |       - name: id
 8 |         description: The ID of the shortage as defined by the detail page URL ID
 9 |         data_tests:
10 |           - unique
11 |           - not_null
12 |       - name: name
13 |         description: The name of the shortage as described by ASHP
14 |       - name: url
15 |         description: The full URL link to the shortage detail page
16 |       - name: shortage_reasons
17 |         description: A list of reasons for the shortage
18 |       - name: resupply_dates
19 |         description: A list of resupply dates
20 |       - name: alternatives_and_management
21 |         description: Alternatives and management information
22 |       - name: care_implications
23 |         description: Implications on patient care
24 |       - name: safety_notices
25 |         description: Safety notices related to the shortage
26 |       - name: created_date
27 |         description: The date the shortage record was created by ASHP
28 |       - name: updated_date
29 |         description: The date the shortage record was last updated by ASHP
30 | 
31 |   - name: stg_ashp__current_drug_shortages_ndcs
32 |     description: Affected and available NDCs for each ASHP drug shortage.
33 |     columns:
34 |       - name: id
35 |         description: The ID of the shortage as defined by the detail page URL ID
36 |       - name: product
37 |         description: The NDC product description
38 |       - name: manufacturer
39 |         description: The NDC manufacturer
40 |       - name: description
41 |         description: The NDC description relevant to the shortage
42 |       - name: ndc_11
43 |         description: The NDC package code in NDC-11 format
44 |       - name: ndc_type
45 |         description: |
46 |           NDC package status as it relates to the shortage
47 |           (either 'affected' or 'available')


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/ashp/_ashp__sources.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | sources:
 4 |   - name: ashp
 5 |     description: Current drug shortage list from ASHP
 6 |     schema: sagerx_lake
 7 |     tables:
 8 |       - name: ashp_shortage_list
 9 |         desciption: Current drug shortage list from ASHP
10 |         columns:
11 |           - name: name
12 |             description: The name of the shortage as described by ASHP
13 |           - name: detail_url
14 |             description: |
15 |               The partial URL for the detail about the shortage.
16 |               Requires a prefix to be a complete URL. That prefix is
17 |               `https://www.ashp.org/drug-shortages/current-shortages/`.
18 |           - name: shortage_reasons
19 |             description: A list of reasons for the shortage
20 |           - name: resupply_dates
21 |             description: A list of resupply dates
22 |           - name: alternatives_and_management
23 |             description: Alternatives and management information
24 |           - name: care_implications
25 |             description: Implications on patient care
26 |           - name: safety_notices
27 |             description: Safety notices related to the shortage
28 |           - name: created_date
29 |             description: The date the shortage record was created by ASHP
30 |           - name: updated_date
31 |             description: The date the shortage record was last updated by ASHP
32 | 
33 |       - name: ashp_shortage_list_ndcs
34 |         desciption: Affected and available NDCs for each ASHP drug shortage.
35 |         columns:
36 |           - name: detail_url
37 |             description: |
38 |               The partial URL for the shortage detail page,
39 |               containing an id parameter which can be used as
40 |               an index
41 |           - name: ndc_description
42 |             description: The NDC description statement associated with the shortage
43 |           - name: ndc_type
44 |             description: |
45 |               NDC package status as it relates to the shortage
46 |               (either 'affected' or 'available')
47 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/ashp/stg_ashp__current_drug_shortages.sql:
--------------------------------------------------------------------------------
 1 | -- stg_ashp__current_drug_shortages.sql
 2 | 
 3 | with 
 4 | 
 5 | ashp_shortage_list as (
 6 | 
 7 |     select * from {{ source('ashp', 'ashp_shortage_list') }}
 8 | 
 9 | ),
10 | 
11 | current_drug_shortages as (
12 | 
13 |     select
14 |         split_part(detail_url, '=', 2)::int as id,
15 |         name,
16 |         concat(
17 |             'https://www.ashp.org/drug-shortages/current-shortages/',
18 |             lower(detail_url)) as url,
19 |         shortage_reasons::jsonb,
20 |         resupply_dates::jsonb,
21 |         alternatives_and_management::jsonb,
22 |         care_implications::jsonb,
23 |         safety_notices::jsonb,
24 |         created_date::date,
25 |         updated_date::date
26 |     from ashp_shortage_list
27 | 
28 | )
29 | 
30 | select
31 |     *
32 | from current_drug_shortages
33 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/ashp/stg_ashp__current_drug_shortages_ndcs.sql:
--------------------------------------------------------------------------------
 1 | -- stg_ashp__current_drug_shortages_ndcs.sql
 2 | 
 3 | with 
 4 | 
 5 | ashp_shortage_list as (
 6 | 
 7 |     select
 8 |         detail_url,
 9 |         -- Prepare description by removing any commas inside of parentheses
10 |         regexp_replace(ndc_description, '\(([^)]*),([^)]*)\)', '(\1\2)', 'g') as ndc_description,
11 |         ndc_type
12 |     from {{ source('ashp', 'ashp_shortage_list_ndcs') }}
13 | 
14 | ),
15 | 
16 | current_drug_shortages_ndcs as (
17 | 
18 |     select
19 |         split_part(detail_url, '=', 2)::int as id,
20 |         split_part(ndc_description, ',', 1) as product,
21 |         split_part(ndc_description, ',', 2) as manufacturer,
22 |         -- Split NDC description by commas and keep array items 3 through n-1
23 |         array_to_string((string_to_array(ndc_description, ','))[3:array_upper(string_to_array(ndc_description, ','), 1)-1], ',') as description,
24 |         -- Get NDC using regular expression
25 |         replace((regexp_match(ndc_description, '\d{5}\-\d{4}\-\d{2}'))[1], '-', '') as ndc_11,
26 |         ndc_type
27 |     from ashp_shortage_list
28 | 
29 | )
30 | 
31 | select
32 |     *
33 | from current_drug_shortages_ndcs
34 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/dailymed/stg_dailymed__interactions.sql:
--------------------------------------------------------------------------------
 1 |  /* staging.stg_dailymed__interactions */
 2 | 
 3 | with xml_table as
 4 | (
 5 | 	select zip_file, xml_content::xml as xml_column
 6 | 	from sagerx_lake.dailymed
 7 | )
 8 | 
 9 | select zip_file, y.*
10 |     from   xml_table x,
11 |             xmltable('dailymed/InteractionText'
12 |               passing xml_column
13 |               columns 
14 |                 document_id 	 text  path '../documentId',
15 | 				set_id  		 text  path '../SetId',
16 | 				version_number	 text  path '../VersionNumber',
17 | 				interaction_text text path '.'
18 | 					) y
19 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/dailymed/stg_dailymed__main.sql:
--------------------------------------------------------------------------------
 1 |  /* staging.stg_dailymed__main */
 2 | 
 3 | with xml_table as
 4 | (
 5 | 	select zip_file, xml_content::xml as xml_column
 6 | 	from sagerx_lake.dailymed
 7 | )
 8 | 
 9 | select zip_file, y.*, 'https://dailymed.nlm.nih.gov/dailymed/drugInfo.cfm?setid=' || y.set_id
10 |     from   xml_table x,
11 |             xmltable('dailymed'
12 |               passing xml_column
13 |               columns 
14 |                 document_id 	TEXT  PATH './documentId',
15 | 				set_id  		TEXT  PATH './SetId',
16 | 				version_number	TEXT  PATH './VersionNumber',
17 |   				effective_date	TEXT  PATH './EffectiveDate',
18 | 				market_status	TEXT  PATH './MarketStatus',
19 | 				application_number TEXT PATH './ApplicationNumber'
20 | 					) y
21 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/dailymed/stg_dailymed__ndcs.sql:
--------------------------------------------------------------------------------
 1 |  /* sagerx_dev.stg_dailymed__ndcs */
 2 | 
 3 | with xml_table as
 4 | (
 5 | 	select zip_file, xml_content::xml as xml_column
 6 | 	from sagerx_lake.dailymed
 7 | ),
 8 | 
 9 | sql_table as (
10 | 
11 | 	select zip_file, y.*
12 | 		from   xml_table x,
13 | 				xmltable('dailymed/NDCList/NDC'
14 | 				passing xml_column
15 | 				columns 
16 | 					document_id 	text  path '../../documentId',
17 | 					set_id  		text  path '../../SetId',
18 | 					version_number	 text  path '../../VersionNumber',
19 | 					ndc				text  path '.'
20 | 						) y
21 | 
22 | ),
23 | 
24 | cte as (
25 | 
26 | 	select
27 | 		*,
28 | 		{{ ndc_to_11('ndc') }} as ndc11
29 | 	
30 | 	from sql_table
31 | 
32 | )
33 | 
34 | select * from cte
35 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/dailymed/stg_dailymed__organization_activities.sql:
--------------------------------------------------------------------------------
 1 | /* staging.stg_dailymed__organization_activities */
 2 |  
 3 | with xml_table as
 4 | (
 5 | 	select zip_file, xml_content::xml as xml_column
 6 | 	from sagerx_lake.dailymed
 7 | )
 8 | 
 9 | select zip_file, y.*
10 |     from   xml_table x,
11 |             xmltable('/dailymed/Organizations/establishment/function'
12 |               passing xml_column
13 |               columns 
14 |                 document_id 	text  path '../../../documentId',
15 | 				set_id  		text  path '../../../SetId',
16 | 				version_number	 text  path '../VersionNumber',
17 | 				dun				text  path '../DUN',
18 | 	            activity		text  path './name'
19 | 					) y
20 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/dailymed/stg_dailymed__organization_items.sql:
--------------------------------------------------------------------------------
 1 | /* staging.dailymed_organization_item */
 2 | 
 3 | with xml_table as
 4 | (
 5 | 	select zip_file, xml_content::xml as xml_column
 6 | 	from sagerx_lake.dailymed
 7 | )
 8 | 
 9 | select zip_file, y.*
10 |     from   xml_table x,
11 |             xmltable('/dailymed/Organizations/establishment/function/item_list/item'
12 |               passing xml_column
13 |               columns
14 |                 document_id 	text  path '../../../../../documentId',
15 | 				set_id  		text  path '../../../../../SetId',
16 | 				version_number	 text  path '../../../../../VersionNumber',
17 | 				dun				text  path '../../../DUN',
18 | 	            activity		text  path '../../name',
19 | 				item			text  path '.'
20 | 					) y
21 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/dailymed/stg_dailymed__organization_texts.sql:
--------------------------------------------------------------------------------
 1 |  /* staging.stg_dailymed__organization_texts */
 2 | 
 3 | with xml_table as
 4 | (
 5 | 	select zip_file, xml_content::xml as xml_column
 6 | 	from sagerx_lake.dailymed
 7 | )
 8 | 
 9 | select zip_file
10 | 		, document_id
11 | 		, set_id 
12 | 		, version_number
13 | 		, organization_text
14 | 		, row_num
15 | from (select zip_file
16 | 		, y.document_id
17 | 		, y.set_id
18 | 		, y.version_number
19 | 		, y.organization_text
20 | 		--,regexp_matches(organization_text, '(manufactured|distributed) (by|for):([\s\S]*)(?=manufactured|distributed|made)', 'ig') as mfdg_by_match
21 | 		,row_number() over (partition by zip_file order by length(organization_text) desc) as row_num
22 |     from   xml_table x,
23 |             xmltable('/dailymed/Organizations/OrganizationsText'
24 |               passing xml_column
25 |               columns 
26 |                 document_id 		text path '../../documentId',
27 | 				set_id  			text path '../../SetId',
28 | 				version_number	 text  path '../../VersionNumber',
29 | 				organization_text	text path '.' 
30 | 					) y
31 | 	) z
32 | where row_num = 1
33 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/dailymed/stg_dailymed__organizations.sql:
--------------------------------------------------------------------------------
 1 |  /* staging.stg_dailymed__organizations */
 2 | 
 3 | with xml_table as
 4 | (
 5 | 	select zip_file, xml_content::xml as xml_column
 6 | 	from sagerx_lake.dailymed
 7 | )
 8 | 
 9 | select zip_file, y.*
10 |     from   xml_table x,
11 |             xmltable('/dailymed/Organizations/establishment'
12 |               passing xml_column
13 |               columns 
14 |                 document_id 	text  path '../../documentId',
15 | 				set_id  		text  path '../../SetId',
16 | 				version_number	 text  path '../../VersionNumber',
17 | 				dun				text  path './DUN',
18 | 	            org_name		text  path './name',
19 | 	            org_type		text  path './type'
20 | 					) y
21 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/dailymed/stg_dailymed__package_label_section_images.sql:
--------------------------------------------------------------------------------
 1 |  /* stg_dailymed__package_label_section_images */
 2 | 
 3 | with
 4 | 
 5 | package_label_sections as
 6 | (
 7 | 	select * from {{ ref('stg_dailymed__package_label_sections') }}
 8 | ),
 9 | 
10 | images as (
11 | 
12 | 	select
13 | 		p.set_id,
14 | 		p.id as package_label_section_id,
15 | 		y.*
16 | 	from package_label_sections p,
17 | 		xmltable(
18 | 			'//MediaList/Media' passing media_list
19 | 			columns 
20 | 				image 		text  path 'Image',
21 | 				image_id  	text  path 'ID'
22 | 		) y
23 | 
24 | ),
25 | 
26 | id_images as (
27 | 
28 | 	select
29 | 		row_number() over() as id,
30 | 		*
31 | 	from images
32 | 
33 | )
34 | 
35 | select * from id_images
36 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/dailymed/stg_dailymed__package_label_section_ndcs.sql:
--------------------------------------------------------------------------------
 1 |  /* stg_dailymed__package_label_section_ndcs */
 2 | 
 3 | with
 4 | 
 5 | package_label_sections as
 6 | (
 7 | 	select * from {{ ref('stg_dailymed__package_label_sections') }}
 8 | ),
 9 | 
10 | ndcs as (
11 | 
12 | 	select
13 | 		p.set_id,
14 | 		p.id as package_label_section_id,
15 | 		-- TODO: account for NDCs with spaces instead of dashes
16 | 		-- example ndc 55292 140 01
17 | 		-- example set_id a0aad470-3f38-af97-e053-2995a90a383a
18 | 		regexp_replace(regexp_replace((regexp_matches(p.text, '(?:\d{4}|\d{5})\s*(?:-|–)\s*\d{3,6}\s*(?:-|–)\s*\d{1,2}|\d{11}|\d{10}', 'g'))[1], '\s', '', 'g'), '–', '-') as ndc
19 | 	from package_label_sections p
20 | 
21 | ),
22 | 
23 | id_ndcs as (
24 | 
25 | 	select
26 | 		row_number() over() as id,
27 | 		*
28 | 	from ndcs
29 | 
30 | )
31 | 
32 | select * from id_ndcs
33 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/dailymed/stg_dailymed__package_label_sections.sql:
--------------------------------------------------------------------------------
 1 |  /* staging.stg_dailymed__package_label_sections */
 2 | 
 3 | with xml_table as
 4 | (
 5 | 	select zip_file, xml_content::xml as xml_column
 6 | 	from sagerx_lake.dailymed
 7 | )
 8 | 
 9 | select
10 | 	zip_file
11 | 	, y.*
12 | from xml_table x,
13 | 	xmltable(
14 | 		'//PackageLabel' passing xml_column
15 | 		columns 
16 | 			document_id 	text  path '../../documentId',
17 | 			set_id  		text  path '../../SetId',
18 | 			version_number	text  path '../../VersionNumber',
19 | 			id				text  path 'ID',
20 | 			text			text  path 'Text',
21 | 			media_list		xml   path 'MediaList'
22 | 	) y
23 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/fda_enforcement/_fda_enforcement__models.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | models:
 4 |   - name: stg_fda_enforcement__reports
 5 |     description: "FDA enforcement reports"
 6 |     columns:
 7 |       - name: status
 8 |       - name: city
 9 |       - name: state
10 |       - name: country
11 |       - name: classification
12 |       - name: openfda
13 |       - name: product_type
14 |       - name: event_id
15 |       - name: recalling_firm
16 |       - name: address_1
17 |       - name: address_2
18 |       - name: postal_code
19 |       - name: voluntary_mandated
20 |       - name: initial_firm_notification
21 |       - name: distribution_pattern
22 |       - name: recall_number
23 |         description: "The recall number."
24 |         tests:
25 |           - unique
26 |           - not_null
27 |       - name: product_description
28 |       - name: product_quantity
29 |       - name: reason_for_recall
30 |       - name: recall_initiation_date
31 |       - name: center_classification_date
32 |       - name: report_date
33 |       - name: code_info
34 | 
35 | models:
36 |   - name: stg_fda_enforcement__regex_ndcs
37 |     description: "FDA enforcement NDCs from RegEx."
38 |     columns:
39 |     # primary key would be recall_number + ndc11
40 |       - name: recall_number
41 |       - name: ndc11
42 |       - name: ndc9
43 | 
44 | models:
45 |   - name: stg_fda_enforcement__json_ndcs
46 |     description: "FDA enforcement NDCs from JSON."
47 |     columns:
48 |     # primary key would be recall_number + ndc11
49 |       - name: recall_number
50 |       - name: ndc11
51 |       - name: ndc9
52 |       - name: app_num
53 | 
54 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/fda_enforcement/_fda_enforcement__sources.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | 
3 | sources:
4 |   - name: fda_enforcement
5 |     schema: sagerx_lake
6 |     tables:
7 |       - name: fda_enforcement
8 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/fda_enforcement/stg_fda_enforcement__json_ndcs.sql:
--------------------------------------------------------------------------------
 1 | -- stg_fda_enforcement__json_ndcs.sql
 2 | 
 3 | WITH base AS (
 4 | 	select
 5 | 		fdae.recall_number
 6 | 		, ndc.id_value #>> '{}' as ndc
 7 | 		, app_num.id_value #>> '{}' as app_num
 8 | 	from sagerx_lake.fda_enforcement fdae
 9 | 		, json_array_elements(openfda->'package_ndc') with ordinality ndc(id_value, line)
10 | 		, json_array_elements(openfda->'application_number') with ordinality app_num(id_value, line)
11 | ) 
12 | 
13 | select
14 | 	fdae.recall_number
15 | 	, {{ndc_to_11 ('ndc')}} as ndc11
16 | 	, left({{ ndc_to_11 ('ndc')}},9) as ndc9
17 | 	, app_num
18 | from sagerx_lake.fda_enforcement fdae
19 | 	, json_array_elements(openfda->'package_ndc') with ordinality ndc(id_value, line)
20 | 	, json_array_elements(openfda->'application_number') with ordinality app_num(id_value, line)
21 | where {{ndc_to_11('ndc')}} is not null


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/fda_enforcement/stg_fda_enforcement__regex_ndcs.sql:
--------------------------------------------------------------------------------
 1 | -- stg_fda_enforcement__regex_ndcs.sql
 2 | 
 3 | 
 4 | with
 5 | 
 6 | z_base as (
 7 | 	select
 8 | 		recall_number
 9 | 		, (regexp_matches(product_description, '(\m\d{1,5}-\d{1,4}-\d{1,2}\M|\m\d{11}\M)', 'g'))[1] as ndc
10 | 	from sagerx_lake.fda_enforcement
11 | ),
12 | z as (
13 | 	select 
14 | 	recall_number
15 | 	, {{ndc_to_11 ('ndc')}} as ndc11
16 | 	, left( {{ndc_to_11 ('ndc')}}, 9) as ndc9
17 | 	from z_base
18 | )	
19 | 
20 | select
21 | 	*
22 | from z
23 | where ndc11 is not null
24 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/fda_enforcement/stg_fda_enforcement__reports.sql:
--------------------------------------------------------------------------------
 1 | -- stg_fda_enforcement__reports.sql
 2 | 
 3 | select
 4 | 	status
 5 | 	, city
 6 | 	, state
 7 | 	, country
 8 | 	, classification
 9 | 	, openfda
10 | 	, product_type
11 | 	, event_id
12 | 	, recalling_firm
13 | 	, address_1
14 | 	, address_2
15 | 	, postal_code
16 | 	, voluntary_mandated
17 | 	, initial_firm_notification
18 | 	, distribution_pattern
19 | 	, recall_number
20 | 	, product_description
21 | 	, product_quantity
22 | 	, reason_for_recall
23 | 	, recall_initiation_date
24 | 	, center_classification_date
25 | 	, report_date
26 | 	, code_info
27 | from sagerx_lake.fda_enforcement
28 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/fda_excluded/stg_fda_excluded__classes.sql:
--------------------------------------------------------------------------------
 1 | -- stg_fda_excluded__classes.sql
 2 | with
 3 | 
 4 | product as (
 5 |     select * from {{ source('fda_excluded', 'fda_excluded_product') }}
 6 | )
 7 | 
 8 | , pharm_classes_array as (
 9 | 	select 
10 | 		product.productid
11 | 		, token
12 | 		, row_number() over (partition by product.productid order by token desc) as class_line
13 | 	from product, unnest(string_to_array(product.pharm_classes, ',')) as token
14 | )
15 | 
16 | select
17 | 	classes.productid
18 | 	, classes.class_line
19 | 	, trim(left(classes.token, position('[' in classes.token) -1 )) as class_name
20 | 	, substring(classes.token, '\[(.+)\]') as class_type
21 | from pharm_classes_array classes
22 | order by
23 | 	productid
24 | 	, class_line
25 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/fda_excluded/stg_fda_excluded__ndcs.sql:
--------------------------------------------------------------------------------
 1 | -- stg_fda_excluded__ndcs.sql
 2 | 
 3 | with
 4 | 
 5 | product as (
 6 |     
 7 |     select * from {{ source('fda_excluded', 'fda_excluded_product') }}
 8 | 
 9 | ),
10 | 
11 | package as (
12 | 
13 |     select * from {{ source('fda_excluded', 'fda_excluded_package') }}
14 | 
15 | )
16 | 
17 | select
18 | 	{{ndc_to_11 ('ndcpackagecode')}} as ndc11
19 | 	, package.productid
20 | 	, package.productndc
21 | 	, producttypename
22 | 	, proprietaryname
23 | 	, proprietarynamesuffix
24 | 	, nonproprietaryname
25 | 	, dosageformname
26 | 	, routename
27 | 	, product.startmarketingdate as product_startmarketingdate
28 | 	, product.endmarketingdate as product_endmarketingdate
29 | 	, marketingcategoryname
30 | 	, applicationnumber
31 | 	, labelername
32 | 	, substancename
33 | 	, active_numerator_strength
34 | 	, active_ingred_unit
35 | 	, pharm_classes
36 | 	, deaschedule
37 | 	, product.ndc_exclude_flag as product_ndc_exclude_flag
38 | 	, listing_record_certified_through
39 | 	, ndcpackagecode
40 | 	, packagedescription
41 | 	, package.startmarketingdate as package_startmarketingdate
42 | 	, package.endmarketingdate as package_endmarketingdate
43 | 	, package.ndc_exclude_flag as package_ndc_exclude_flag
44 | 	, sample_package
45 | from package
46 | left join product
47 | 	on package.productid = product.productid
48 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/fda_excluded/stg_fda_excluded__substances.sql:
--------------------------------------------------------------------------------
 1 | -- stg_fda_excluded__substances.sql
 2 | 
 3 | with
 4 | 
 5 | product as (    
 6 |     select * from {{ source('fda_excluded', 'fda_excluded_product') }}
 7 | )
 8 | 
 9 | , substancename_array as (
10 | 	select
11 | 		productid
12 | 		, substance
13 | 		, row_number() over(partition by productid) as substance_line
14 | 	from product, unnest(string_to_array(substancename, '; ')) as substance
15 | )
16 | 
17 | , strength_array as (
18 | 	select
19 | 		productid
20 | 		, strength
21 | 		, row_number() over(partition by productid) as strength_line
22 | 	from product, unnest(string_to_array(active_numerator_strength, '; ')) as strength
23 | )
24 | 
25 | , unit_array as (
26 | 	select
27 | 		productid
28 | 		, unit
29 | 		, row_number() over(partition by productid) as unit_line
30 | 	from product, unnest(string_to_array(active_ingred_unit, '; ')) as unit
31 | )
32 | 
33 | select
34 | 	substance.productid
35 | 	, substance.substance_line
36 | 	, substance.substance as substancename
37 | 	, strength.strength as active_numerator_strength
38 | 	, unit.unit as active_ingred_unit
39 | from substancename_array substance
40 | inner join strength_array strength
41 | 	on strength.productid = substance.productid
42 | 	and strength.strength_line = substance.substance_line
43 | inner join unit_array unit
44 | 	on unit.productid = substance.productid
45 | 	and unit.unit_line = substance.substance_line
46 | order by
47 | 	productid
48 | 	, substance_line


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/fda_ndc/stg_fda_ndc__classes.sql:
--------------------------------------------------------------------------------
 1 | -- stg_fda_ndc__classes.sql
 2 | 
 3 | with
 4 | 
 5 | product as (    
 6 |     select * from {{ source('fda_ndc', 'fda_ndc_product') }}
 7 | )
 8 | 
 9 | , pharm_classes_array as (
10 | 	select 
11 | 		product.productid
12 | 		, token
13 | 		, row_number() over (partition by product.productid order by token desc) as class_line
14 | 	from product, unnest(string_to_array(product.pharm_classes, ',')) as token
15 | )
16 | 
17 | select
18 | 	classes.productid
19 | 	, classes.class_line
20 | 	, trim(left(classes.token, position('[' in classes.token) -1 )) as class_name
21 | 	, substring(classes.token, '\[(.+)\]') as class_type
22 | from pharm_classes_array classes
23 | order by
24 | 	productid
25 | 	, class_line


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/fda_ndc/stg_fda_ndc__ndc_associations.sql:
--------------------------------------------------------------------------------
 1 | -- stg_fda_ndc__ndc_associations
 2 | 
 3 | with package as (
 4 | 
 5 |     select * 
 6 |     from {{ source('fda_ndc', 'fda_ndc_package') }}
 7 | 
 8 | ),
 9 | 
10 | extracted_ndc as (
11 | 
12 |     select 
13 |         package.ndcpackagecode,
14 |         regexp_matches(package.packagedescription, '\d+-\d+-\d+', 'g') as ndc_match,
15 | 		packagedescription
16 |     from package
17 | 
18 | ),
19 | 
20 | ndc_array as (
21 | 
22 |     select 
23 |         ndc.ndcpackagecode,
24 |         unnest(ndc.ndc_match) as token,
25 | 		packagedescription
26 |     from extracted_ndc ndc
27 | 
28 | ),
29 | 
30 | ranked_array as (
31 | 	
32 | 	select
33 | 		ndcpackagecode,
34 | 		token,
35 | 		row_number() over() as rn,
36 | 		packagedescription
37 | 	from ndc_array
38 | 
39 | ),
40 | 
41 | final_array as (
42 | 
43 |     select
44 |         ndcpackagecode,
45 |         token,
46 |         row_number() over (partition by ndcpackagecode order by rn) as ndc_line,
47 | 		packagedescription
48 |     from ranked_array
49 | 
50 | ),
51 | 
52 | ndc_associations as (
53 | 
54 |     select
55 |         ndcpackagecode as outer_ndc,
56 |         {{ ndc_to_11('ndcpackagecode') }} as outer_ndc11,
57 |         ndc_line,
58 |         token as ndc,
59 |         {{ ndc_to_11('token') }} as ndc11,
60 |         packagedescription
61 |     from final_array
62 |     order by
63 |         ndcpackagecode,
64 |         ndc_line
65 | 
66 | )
67 | 
68 | select * from ndc_associations
69 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/fda_ndc/stg_fda_ndc__ndcs.sql:
--------------------------------------------------------------------------------
 1 | -- stg_fda_ndc__ndcs.sql
 2 | 
 3 | with
 4 | 
 5 | product as (
 6 |     
 7 |     select * from {{ source('fda_ndc', 'fda_ndc_product') }}
 8 | 
 9 | ),
10 | 
11 | package as (
12 | 
13 |     select * from {{ source('fda_ndc', 'fda_ndc_package') }}
14 | 
15 | )
16 | 
17 | select
18 | 	{{ndc_to_11 ('ndcpackagecode') }} as ndc11
19 | 	, package.productid
20 | 	, package.productndc
21 | 	, producttypename
22 | 	, proprietaryname
23 | 	, proprietarynamesuffix
24 | 	, nonproprietaryname
25 | 	, dosageformname
26 | 	, routename
27 | 	, product.startmarketingdate as product_startmarketingdate
28 | 	, product.endmarketingdate as product_endmarketingdate
29 | 	, marketingcategoryname
30 | 	, applicationnumber
31 | 	, labelername
32 | 	, substancename
33 | 	, active_numerator_strength
34 | 	, active_ingred_unit
35 | 	, pharm_classes
36 | 	, deaschedule
37 | 	, product.ndc_exclude_flag as product_ndc_exclude_flag
38 | 	, listing_record_certified_through
39 | 	, ndcpackagecode
40 | 	, packagedescription
41 | 	, package.startmarketingdate as package_startmarketingdate
42 | 	, package.endmarketingdate as package_endmarketingdate
43 | 	, package.ndc_exclude_flag as package_ndc_exclude_flag
44 | 	, sample_package
45 | from package
46 | left join product
47 | 	on package.productid = product.productid
48 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/fda_ndc/stg_fda_ndc__substances.sql:
--------------------------------------------------------------------------------
 1 | -- stg_fda_ndc__substances.sql
 2 | 
 3 | with
 4 | 
 5 | product as (
 6 |     select * from {{ source('fda_ndc', 'fda_ndc_product') }}
 7 | )
 8 | 
 9 | , substancename_array as (
10 | 	select
11 | 		productid
12 | 		, substance
13 | 		, row_number() over(partition by productid) as substance_line
14 | 	from product, unnest(string_to_array(substancename, '; ')) as substance
15 | )
16 | 
17 | , strength_array as (
18 | 	select
19 | 		productid
20 | 		, strength
21 | 		, row_number() over(partition by productid) as strength_line
22 | 	from product, unnest(string_to_array(active_numerator_strength, '; ')) as strength
23 | )
24 | 
25 | , unit_array as (
26 | 	select
27 | 		productid
28 | 		, unit
29 | 		, row_number() over(partition by productid) as unit_line
30 | 	from product, unnest(string_to_array(active_ingred_unit, '; ')) as unit
31 | )
32 | 
33 | select
34 | 	substance.productid
35 | 	, substance.substance_line
36 | 	, substance.substance as substancename
37 | 	, strength.strength as active_numerator_strength
38 | 	, unit.unit as active_ingred_unit
39 | from substancename_array substance
40 | inner join strength_array strength
41 | 	on strength.productid = substance.productid
42 | 	and strength.strength_line = substance.substance_line
43 | inner join unit_array unit
44 | 	on unit.productid = substance.productid
45 | 	and unit.unit_line = substance.substance_line
46 | order by
47 | 	productid
48 | 	, substance_line


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/fda_unfinished/stg_fda_unfinished__ndcs.sql:
--------------------------------------------------------------------------------
 1 | -- stg_fda_unfinished__ndcs.sql
 2 | 
 3 | with
 4 | 
 5 | product as (
 6 |     
 7 |     select * from {{ source('fda_unfinished', 'fda_unfinished_product') }}
 8 | 
 9 | ),
10 | 
11 | package as (
12 | 
13 |     select * from {{ source('fda_unfinished', 'fda_unfinished_package') }}
14 | 
15 | )
16 | 
17 | select
18 | 	{{ndc_to_11 ('package.ndcpackagecode')}} as ndc11
19 | 	, package.productid
20 | 	, package.productndc
21 | 	, producttypename
22 | 	, nonproprietaryname
23 | 	, dosageformname
24 | 	, product.startmarketingdate as product_startmarketingdate
25 | 	, product.endmarketingdate as product_endmarketingdate
26 | 	, marketingcategoryname
27 | 	, labelername
28 | 	, substancename
29 | 	, active_numerator_strength
30 | 	, active_ingred_unit
31 | 	, deaschedule
32 | 	, listing_record_certified_through
33 | 	, ndcpackagecode
34 | 	, packagedescription
35 | 	, package.startmarketingdate as package_startmarketingdate
36 | 	, package.endmarketingdate as package_endmarketingdate
37 | from package
38 | left join product
39 | 	on package.productid = product.productid
40 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/fda_unfinished/stg_fda_unfinished__substances.sql:
--------------------------------------------------------------------------------
 1 | -- stg_fda_unfinished__substances.sql
 2 | 
 3 | with
 4 | 
 5 | product as (    
 6 |     select * from {{ source('fda_unfinished', 'fda_unfinished_product') }}
 7 | )
 8 | 
 9 | , substancename_array as (
10 | 	select
11 | 		productid
12 | 		, substance
13 | 		, row_number() over(partition by productid) as substance_line
14 | 	from product, unnest(string_to_array(substancename, '; ')) as substance
15 | )
16 | 
17 | , strength_array as (
18 | 	select
19 | 		productid
20 | 		, strength
21 | 		, row_number() over(partition by productid) as strength_line
22 | 	from product, unnest(string_to_array(active_numerator_strength, '; ')) as strength
23 | )
24 | 
25 | , unit_array as (
26 | 	select
27 | 		productid
28 | 		, unit
29 | 		, row_number() over(partition by productid) as unit_line
30 | 	from product, unnest(string_to_array(active_ingred_unit, '; ')) as unit
31 | )
32 | 
33 | select
34 | 	substance.productid
35 | 	, substance.substance_line
36 | 	, substance.substance as substancename
37 | 	, strength.strength as active_numerator_strength
38 | 	, unit.unit as active_ingred_unit
39 | from substancename_array substance
40 | inner join strength_array strength
41 | 	on strength.productid = substance.productid
42 | 	and strength.strength_line = substance.substance_line
43 | inner join unit_array unit
44 | 	on unit.productid = substance.productid
45 | 	and unit.unit_line = substance.substance_line
46 | order by
47 | 	productid
48 | 	, substance_line


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/fda_unii/_fda_unii__sources.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | sources:
 4 |   - name: fda_unii
 5 |     description: FDA UNII codes.
 6 |     schema: sagerx_lake
 7 |     tables:
 8 |       - name: fda_unii
 9 |         desciption: FDA UNII codes.
10 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/fda_unii/stg_fda_unii__unii_codes.sql:
--------------------------------------------------------------------------------
 1 | -- stg_fda_unii__unii_codes.sql
 2 | 
 3 | with
 4 | 
 5 | fda_unii as (
 6 |     select * from {{ source('fda_unii', 'fda_unii') }}
 7 | )
 8 | 
 9 | select
10 |     unii
11 |     , display_name
12 |     , rxcui
13 |     , pubchem
14 |     , rn
15 |     , ncit
16 |     , ncbi
17 |     , dailymed
18 | from fda_unii
19 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/mccpd/_mccpd__sources.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | sources:
 4 |   - name: mccpd
 5 |     schema: sagerx_lake
 6 |     tables:
 7 |       - name: mccpd
 8 |         description: >
 9 |           Mark Cuban Cost Plus Drugs pricing.
10 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/nadac/_nadac__models.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | models:
 4 |   - name: stg_nadac__nadac
 5 |     description: "Flags NADAC survey data to make it easy to find most recent price, first price, and dollar / percent changes between prices. Can also SUM change_type to see the number of price increases over time."
 6 |     columns:
 7 |       - name: ndc
 8 |         description: "The National Drug Code (NDC) is a numerical code maintained by the FDA that includes the labeler code, product code, and package code. The NDC is an 11-digit code."
 9 |         tests:
10 |           - not_null
11 |       - name: ndc_description
12 |         description: "Identifies the drug name, strength, and dosage form of the drug product."
13 |         tests:
14 |           - not_null
15 |       - name: nadac_per_unit
16 |         description: "The National Average Drug Acquisition Cost per unit. Staging table converts this to a numeric type."
17 |       - name: pricing_unit
18 |         description: "Indicates the pricing unit for the associated NDC ('ML', 'GM' or 'EA')."
19 |       - name: price_start_date
20 |         description: "The effective date of the NADAC Per Unit cost. Staging table converts this to a date type."
21 |       - name: most_recent_price
22 |         description: "True if the price is the most recent available price."
23 |       - name: first_price
24 |         description: "True if the price is the first available price."
25 |       - name: dollar_change
26 |         description: "Change between this price and previous price in dollars."
27 |       - name: percent_change
28 |         description: "Change between this price and previous price in percentage."
29 |       - name: change_type
30 |         description: "1 if the price went up, 0 if the price went down."
31 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/nadac/stg_nadac__nadac.sql:
--------------------------------------------------------------------------------
 1 | -- stg_nadac__nadac.sql
 2 | 
 3 | with
 4 | 
 5 | nadac as (
 6 | 
 7 |     select distinct 
 8 | 		ndc_description,
 9 | 		ndc,
10 | 		nadac_per_unit::numeric,
11 | 		effective_date::date,
12 | 		pricing_unit,
13 | 		pharmacy_type_indicator,
14 | 		otc,
15 | 		explanation_code,
16 | 		classification_for_rate_setting,
17 | 		corresponding_generic_drug_nadac_per_unit,
18 | 		corresponding_generic_drug_effective_date::date,
19 | 		as_of_date::date
20 | 	from {{ source('nadac', 'nadac') }}
21 | 
22 | )
23 | 
24 | select * from nadac
25 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/orange_book/_orange_book__models.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/purple_book/_purple_book__models.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/rxclass/_rxclass__models.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | models:
 4 |   - name: stg_rxclass__rxclass
 5 |     description: All RxClass mappings to the lowest level RxClass code.
 6 |     columns:
 7 |       - name: rxcui
 8 |       - name: name
 9 |       - name: tty
10 |       - name: rela
11 |       - name: class_id
12 |       - name: class_name
13 |       - name: class_type
14 |       - name: rela_source
15 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/rxclass/_rxclass__sources.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | sources:
 4 |   - name: rxclass
 5 |     description: |
 6 |       This data source represents all of RxClass.
 7 | 
 8 |       Below are descriptions of a select few of the RxClass sources.
 9 | 
10 |       ## ATCPROD
11 | 
12 |       > A product-level mapping of RxNorm to ATC1-4 classes.
13 | 
14 |       RxClass has added RxNorm product-level mapping for ATC. The product-level mapping is now the default source mapping for the ATC classes in the RxClass browser.
15 | 
16 |       When extended to RxNorm products, an ingredient-level mapping to ATC can lead to inapplicable mappings. For example, through its ingredient, timolol, the RxNorm product 1923428 (timolol 2.5 MG/ML Ophthalmic Solution) is associated with both ophthalmologicals and cardiovascular system medications, while only the former is accurate. In contrast, ATCPROD only associates this product with the ophthalmologicals class Beta blocking agents (S01ED).
17 | 
18 |       To use the RxNorm product-level mapping for ATC in the RxClass API, the relaSource parameter should be specified as “ATCPROD”.
19 |       https://rxnav.nlm.nih.gov/REST/rxclass/class/byRxcui.xml?rxcui=1923428&relaSource=ATC
20 |       https://rxnav.nlm.nih.gov/REST/rxclass/class/byRxcui.xml?rxcui=1923428&relaSource=ATCPROD
21 |       The RxNorm product-level mapping for ATC was produced by the National Library of Medicine. While not all active RxNorm products are covered by the mapping, the mapping accounts for over 97% of the Medicare Part-D prescriptions from 2012-2020.
22 | 
23 |       The original ingredient-level mapping to ATC from the WHO Collaborating Centre for Drug Statistics Methodology remains available in RxClass (select ATC under Edit Drug Sources) and through the RxClass API (relaSource=ATC).
24 |     schema: sagerx_lake
25 |     tables:
26 |       - name: rxclass
27 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/rxclass/stg_rxclass__rxclass.sql:
--------------------------------------------------------------------------------
 1 | -- stg_rxclass__rxclass.sql
 2 | 
 3 | with rxclass as (
 4 |     
 5 |     select
 6 |         *
 7 |     from {{ source('rxclass', 'rxclass') }}
 8 | 
 9 | )
10 | 
11 | select distinct
12 |     *
13 | from rxclass


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/rxnorm/_rxnorm__sources.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | sources:
 4 |   - name: rxnorm
 5 |     schema: sagerx_lake
 6 |     tables:
 7 |       - name: rxnorm_rxnatomarchive
 8 |       - name: rxnorm_rxnconso
 9 |       - name: rxnorm_rxncui
10 |       - name: rxnorm_rxncuichanges
11 |       - name: rxnorm_rxndoc
12 |       - name: rxnorm_rxnrel
13 |       - name: rxnorm_rxnsab
14 |       - name: rxnorm_rxnsat
15 |       - name: rxnorm_rxnsty
16 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/rxnorm/stg_rxnorm__all_ndcs.sql:
--------------------------------------------------------------------------------
 1 | -- stg_rxnorm__all_ndcs.sql
 2 | 
 3 | select
 4 |    {{ ndc_to_11 ('rxnsat.atv') }}as ndc11
 5 |     , rxnsat.atv as ndc
 6 |     , rxnsat.rxcui
 7 |     , rxnsat.sab
 8 | 	, case when rxnsat.suppress = 'N' then true else false end as active
 9 | 	, case when rxnsat.cvf = '4096' then true else false end as prescribable
10 | from sagerx_lake.rxnorm_rxnsat rxnsat
11 |     where rxnsat.atn = 'NDC'
12 | 	and rxnsat.sab in ('ATC', 'CVX', 'DRUGBANK', 'MSH', 'MTHCMSFRF', 'MTHSPL', 'RXNORM', 'USP', 'VANDF')
13 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/rxnorm/stg_rxnorm__atc_codes.sql:
--------------------------------------------------------------------------------
 1 | with atc as (
 2 | 	select distinct a.rxcui
 3 | 		,a.code
 4 | 		,b.atn
 5 | 		,b.atv as atc_class_level
 6 | 		,a.str as description
 7 | 		,a.sab
 8 | 		,a.tty
 9 | 	from (
10 | 		select *
11 | 		from sagerx_lake.rxnorm_rxnconso
12 | 		where sab = 'ATC'
13 | 		and tty not like 'RXN%'
14 | 		order by code
15 | 	) a
16 | 	left join sagerx_lake.rxnorm_rxnsat b
17 | 		on a.code= b.code
18 | 	where atn = 'ATC_LEVEL'
19 | 	order by code
20 | )
21 | 
22 | , atc_5 as (
23 | 	select
24 | 		*
25 | 	from atc
26 | 	where atc_class_level = '5'
27 | )
28 | 
29 | , atc_4 as (
30 | 	select
31 | 		*
32 | 	from atc
33 | 	where atc_class_level = '4'
34 | )
35 | 
36 | , atc_3 as (
37 | 	select
38 | 		*
39 | 	from atc
40 | 	where atc_class_level = '3'
41 | )
42 | 
43 | , atc_2 as (
44 | 	select
45 | 		*
46 | 	from atc
47 | 	where atc_class_level = '2'
48 | )
49 | 
50 | , atc_1 as (
51 | 	select
52 | 		*
53 | 	from atc
54 | 	where atc_class_level = '1'
55 | )
56 | 
57 | , sagerx_atc as (
58 | 
59 | select
60 | 	atc_1.code as atc_1_code
61 | 	, atc_1.description as atc_1_name
62 | 	, atc_2.code as atc_2_code
63 | 	, atc_2.description as atc_2_name
64 | 	, atc_3.code as atc_3_code
65 | 	, atc_3.description as atc_3_name
66 | 	, atc_4.code as atc_4_code
67 | 	, atc_4.description as atc_4_name
68 | 	, atc_5.code as atc_5_code
69 | 	, atc_5.description as atc_5_name
70 | 	, atc_5.rxcui as ingredient_rxcui
71 | 	, atc_5.description as ingredient_name
72 | 	, atc_5.tty as ingredient_tty
73 | 
74 | from atc_5
75 | left join atc_4
76 | 	on left(atc_5.code, 5) = atc_4.code
77 | left join atc_3
78 | 	on left(atc_4.code, 4) = atc_3.code
79 | left join atc_2
80 | 	on left(atc_3.code, 3) = atc_2.code
81 | left join atc_1
82 | 	on left(atc_2.code, 1) = atc_1.code
83 | )
84 | 	
85 | select * 
86 | from sagerx_atc
87 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/rxnorm/stg_rxnorm__brand_product_component_links.sql:
--------------------------------------------------------------------------------
 1 | -- stg_rxnorm__brand_product_component_links.sql
 2 | 
 3 | select distinct
 4 | 	product.rxcui as brand_product_rxcui
 5 | 	, case when product_component.rxcui is null
 6 |         then product.rxcui
 7 |         else product_component.rxcui
 8 |         end as brand_product_component_rxcui
 9 | from sagerx_lake.rxnorm_rxnconso product
10 | left join sagerx_lake.rxnorm_rxnrel rxnrel on rxnrel.rxcui2 = product.rxcui and rxnrel.rela = 'contains'
11 | left join sagerx_lake.rxnorm_rxnconso product_component
12 | 	on rxnrel.rxcui1 = product_component.rxcui
13 | 	and product_component.tty in ('SBD', 'SCD') -- NOTE: BPCKs can contain SBDs AND SCDs
14 | 	and product_component.sab = 'RXNORM'
15 | where product.tty in ('SBD', 'BPCK')
16 | 	and product.sab = 'RXNORM'
17 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/rxnorm/stg_rxnorm__brand_product_components.sql:
--------------------------------------------------------------------------------
 1 | -- stg_rxnorm__brand_product_components.sql
 2 | 
 3 | select distinct
 4 | 	case when product.tty = 'SBD' then product.rxcui else product_component.rxcui end rxcui
 5 | 	, case when product.tty = 'SBD' then product.str else product_component.str end name
 6 | 	, case when product.tty = 'SBD' then product.tty else product_component.tty end tty
 7 | 	, case when product_component.tty = 'SCD' then product_component.rxcui else rxnrel_scd.rxcui1 end clinical_product_component_rxcui
 8 | 	, rxnrel_bn.rxcui1 as brand_rxcui
 9 | 	, case when 
10 |             case when product.tty = 'SBD'
11 |             then product.suppress
12 |             else product_component.suppress
13 |             end = 'N' 
14 |         then true 
15 |         else false
16 |         end as active
17 | 	, case when 
18 |             case when product.tty = 'SBD'
19 |             then product.cvf
20 |             else product_component.cvf 
21 |             end = '4096' 
22 |         then true
23 |         else false
24 |         end as prescribable
25 | from sagerx_lake.rxnorm_rxnconso product
26 | left join sagerx_lake.rxnorm_rxnrel rxnrel on rxnrel.rxcui2 = product.rxcui and rxnrel.rela = 'contains'
27 | left join sagerx_lake.rxnorm_rxnconso product_component
28 | 	on rxnrel.rxcui1 = product_component.rxcui
29 | 	and product_component.tty in ('SBD', 'SCD') -- NOTE: BPCKs can contain SBDs AND SCDs
30 | 	and product_component.sab = 'RXNORM'
31 | left join sagerx_lake.rxnorm_rxnrel rxnrel_scd 
32 | 	on rxnrel_scd.rxcui2 = case when product_component.rxcui is null then product.rxcui else product_component.rxcui end 
33 | 	and rxnrel_scd.rela = 'tradename_of' -- rxnrel_scd.rxcui1 = clinical_product_component_rxcui
34 | left join sagerx_lake.rxnorm_rxnrel rxnrel_bn 
35 | 	on rxnrel_bn.rxcui2 = case when product_component.rxcui is null then product.rxcui else product_component.rxcui end 
36 | 	and rxnrel_bn.rela = 'has_ingredient' -- rxnrel_bn.rxcui1 = brand_rxcui
37 | where product.tty in ('SBD', 'BPCK')
38 | 	and product.sab = 'RXNORM'
39 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/rxnorm/stg_rxnorm__brand_products.sql:
--------------------------------------------------------------------------------
 1 | -- stg_rxnorm__brand_products.sql
 2 | 
 3 | select
 4 | 	product.rxcui as rxcui
 5 | 	, product.str as name
 6 | 	, product.tty as tty
 7 | 	, clinical_product.rxcui as clinical_product_rxcui
 8 | 	, case when product.suppress = 'N' then true else false end as active
 9 | 	, case when product.cvf = '4096' then true else false end as prescribable
10 | from sagerx_lake.rxnorm_rxnconso product
11 | left join sagerx_lake.rxnorm_rxnrel rxnrel on rxnrel.rxcui2 = product.rxcui and rxnrel.rela = 'tradename_of'
12 | left join sagerx_lake.rxnorm_rxnconso clinical_product
13 | 	on rxnrel.rxcui1 = clinical_product.rxcui
14 | 	and clinical_product.tty in ('SCD', 'GPCK')
15 | 	and clinical_product.sab = 'RXNORM'
16 | where product.tty in('SBD', 'BPCK')
17 | 	and product.sab = 'RXNORM'
18 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/rxnorm/stg_rxnorm__brands.sql:
--------------------------------------------------------------------------------
 1 | -- stg_rxnorm__brands.sql
 2 | 
 3 | with cte as (
 4 | 	select
 5 | 		sq.*
 6 | 		, row_number() over(partition by rxcui order by ingredient_tty desc) as rn
 7 | 	from (
 8 | 
 9 | 		select
10 | 			brand.rxcui as rxcui
11 | 			, brand.str as name
12 | 			, brand.tty as tty
13 | 			, ingredient.rxcui as ingredient_rxcui
14 | 			, ingredient.str as ingredient_name
15 | 			, ingredient.tty as ingredient_tty
16 | 		from sagerx_lake.rxnorm_rxnconso brand
17 | 		inner join sagerx_lake.rxnorm_rxnrel rxnrel on rxnrel.rxcui2 = brand.rxcui and rxnrel.rela = 'tradename_of'
18 | 		inner join sagerx_lake.rxnorm_rxnconso ingredient
19 | 			on rxnrel.rxcui1 = ingredient.rxcui
20 | 			and ingredient.tty = 'IN'
21 | 			and ingredient.sab = 'RXNORM'
22 | 		where brand.tty = 'BN'
23 | 			and brand.sab = 'RXNORM'
24 | 
25 | 		union all
26 | 
27 | 		select
28 | 			brand.rxcui as rxcui
29 | 			, brand.str as name
30 | 			, brand.tty as tty
31 | 			, ingredient.rxcui as ingredient_rxcui
32 | 			, ingredient.str as ingredient_name
33 | 			, ingredient.tty as ingredient_tty
34 | 		from sagerx_lake.rxnorm_rxnconso brand
35 | 		inner join sagerx_lake.rxnorm_rxnrel sbd_rxnrel on sbd_rxnrel.rxcui2 = brand.rxcui and sbd_rxnrel.rela = 'ingredient_of'
36 | 		inner join sagerx_lake.rxnorm_rxnrel scd_rxnrel on scd_rxnrel.rxcui2 = sbd_rxnrel.rxcui1 and scd_rxnrel.rela = 'tradename_of'
37 | 		inner join sagerx_lake.rxnorm_rxnrel ingredient_rxnrel on ingredient_rxnrel.rxcui2 = scd_rxnrel.rxcui1 and ingredient_rxnrel.rela = 'has_ingredients'
38 | 		left join sagerx_lake.rxnorm_rxnconso ingredient
39 | 			on ingredient_rxnrel.rxcui1 = ingredient.rxcui
40 | 			and ingredient.tty = 'MIN'
41 | 			and ingredient.sab = 'RXNORM'		
42 | 		where brand.tty = 'BN'
43 | 			and brand.sab = 'RXNORM'
44 | 	) sq
45 | )
46 | 
47 | select distinct
48 | 	brand.rxcui as rxcui
49 | 	, brand.str as name
50 | 	, brand.tty as tty
51 | 	, case when brand.suppress = 'N' then true else false end as active
52 | 	, case when brand.cvf = '4096' then true else false end as prescribable
53 | 	, cte.ingredient_rxcui as ingredient_rxcui
54 | from sagerx_lake.rxnorm_rxnconso product
55 | inner join sagerx_lake.rxnorm_rxnrel rxnrel on rxnrel.rxcui2 = product.rxcui and rxnrel.rela = 'has_ingredient'
56 | inner join sagerx_lake.rxnorm_rxnconso brand
57 | 	on rxnrel.rxcui1 = brand.rxcui
58 | 	and brand.tty = 'BN'
59 | 	and brand.sab = 'RXNORM'
60 | Left join cte on cte.rxcui = brand.rxcui and cte.rn < 2
61 | where product.tty = 'SBD'
62 | 	and product.sab = 'RXNORM'
63 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/rxnorm/stg_rxnorm__clinical_product_component_links.sql:
--------------------------------------------------------------------------------
 1 | -- stg_rxnorm__clinical_product_component_links.sql
 2 | 
 3 | select distinct
 4 | 	product.rxcui as clinical_product_rxcui
 5 | 	, case when product_component.rxcui is null
 6 |         then product.rxcui 
 7 |         else product_component.rxcui 
 8 |         end as clinical_product_component_rxcui
 9 | from sagerx_lake.rxnorm_rxnconso product
10 | left join sagerx_lake.rxnorm_rxnrel rxnrel on rxnrel.rxcui2 = product.rxcui and rxnrel.rela = 'contains'
11 | left join sagerx_lake.rxnorm_rxnconso product_component
12 |     on rxnrel.rxcui1 = product_component.rxcui
13 |     and product_component.tty = 'SCD'
14 |     and product_component.sab = 'RXNORM'
15 | where product.tty in('SCD', 'GPCK')
16 | 	and product.sab = 'RXNORM'
17 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/rxnorm/stg_rxnorm__clinical_products.sql:
--------------------------------------------------------------------------------
 1 | -- stg_rxnorm__clinical_products.sql
 2 | 
 3 | select
 4 | 	product.rxcui as rxcui
 5 | 	, product.str as name
 6 | 	, product.tty as tty
 7 | 	, case when product.suppress = 'N' then true else false end as active
 8 | 	, case when product.cvf = '4096' then true else false end as prescribable
 9 | from sagerx_lake.rxnorm_rxnconso product
10 | where product.tty in('SCD', 'GPCK')
11 | 	and product.sab = 'RXNORM'
12 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/rxnorm/stg_rxnorm__dose_form_group_links.sql:
--------------------------------------------------------------------------------
 1 | -- stg_rxnorm__dose_form_group_links.sql
 2 | 
 3 | select distinct
 4 | 	dose_form.rxcui dose_form_rxcui
 5 | 	, rxnrel.rxcui1 dose_form_group_rxcui
 6 | from sagerx_lake.rxnorm_rxnconso dose_form
 7 | inner join sagerx_lake.rxnorm_rxnrel rxnrel
 8 | 	on rxnrel.rxcui2 = dose_form.rxcui
 9 | 	and rxnrel.rela = 'isa'
10 | 	and rxnrel.sab = 'RXNORM'
11 | where dose_form.tty = 'DF'
12 | 	and dose_form.sab = 'RXNORM'
13 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/rxnorm/stg_rxnorm__dose_form_groups.sql:
--------------------------------------------------------------------------------
 1 | -- stg_rxnorm__dose_form_groups.sql
 2 | 
 3 | select
 4 | 	dose_form_group.rxcui rxcui
 5 | 	, dose_form_group.str name
 6 | 	, dose_form_group.tty tty
 7 | 	, case when dose_form_group.suppress = 'N' then true else false end as active
 8 | 	, case when dose_form_group.cvf = '4096' then true else false end as prescribable
 9 | from sagerx_lake.rxnorm_rxnconso dose_form_group
10 | where dose_form_group.tty = 'DFG'
11 | 	and dose_form_group.sab = 'RXNORM'
12 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/rxnorm/stg_rxnorm__dose_forms.sql:
--------------------------------------------------------------------------------
 1 | -- stg_rxnorm__dose_forms.sql
 2 | 
 3 | select
 4 | 	dose_form.rxcui rxcui
 5 | 	, dose_form.str name
 6 | 	, dose_form.tty tty
 7 | 	, case when dose_form.suppress = 'N' then true else false end as active
 8 | 	, case when dose_form.cvf = '4096' then true else false end as prescribable
 9 | from sagerx_lake.rxnorm_rxnconso dose_form
10 | where dose_form.tty = 'DF'
11 | 	and dose_form.sab = 'RXNORM'
12 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/rxnorm/stg_rxnorm__hcpcs_codes.sql:
--------------------------------------------------------------------------------
 1 | select distinct 
 2 |     b.rxcui, 
 3 |     a.atv as hcpcs_code, 
 4 |     b.tty, 
 5 |     b.str as drug_name
 6 | from sagerx_lake.rxnorm_rxnsat a
 7 | join sagerx_lake.rxnorm_rxnconso b on a.rxcui = b.rxcui
 8 | where a.atn = 'DHJC'
 9 | and a.atv like 'J%'
10 | and b.tty in ('GPCK', 'BPCK', 'SCD', 'SBD')
11 | order by a.atv
12 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/rxnorm/stg_rxnorm__ingredient_component_links.sql:
--------------------------------------------------------------------------------
 1 | -- stg_rxnorm__ingredient_component_links.sql
 2 | 
 3 | with cte as (
 4 | 	select
 5 | 		rxnrel.rxcui2 as ingredient_rxcui
 6 | 		, ingredient_component.rxcui as ingredient_component_rxcui
 7 | 		, ingredient_component.str as ingredient_component_name
 8 | 		, ingredient_component.tty as ingredient_component_tty
 9 | 	from
10 | 		sagerx_lake.rxnorm_rxnrel rxnrel
11 | 	inner join sagerx_lake.rxnorm_rxnconso ingredient_component
12 | 		on rxnrel.rxcui1 = ingredient_component.rxcui
13 | 	where rxnrel.rela = 'has_part'
14 | 		and ingredient_component.tty = 'IN'
15 | 		and ingredient_component.sab = 'RXNORM'
16 | )
17 | 
18 | select distinct
19 | 	ingredient.rxcui as ingredient_rxcui
20 | 	, case when cte.ingredient_component_rxcui is null
21 |         then ingredient.rxcui
22 |         else cte.ingredient_component_rxcui
23 |         end as ingredient_component_rxcui
24 | from sagerx_lake.rxnorm_rxnconso ingredient
25 | left join cte on ingredient.rxcui = cte.ingredient_rxcui
26 | where ingredient.tty in('IN', 'MIN')
27 | 	and ingredient.sab = 'RXNORM'
28 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/rxnorm/stg_rxnorm__ingredient_components.sql:
--------------------------------------------------------------------------------
 1 | -- stg_rxnorm__ingredient_components.sql
 2 | 
 3 | with cte as (
 4 | 	select
 5 | 		rxnrel.rxcui2 as ingredient_rxcui
 6 | 		, ingredient_component.rxcui as rxcui
 7 | 		, ingredient_component.str as name
 8 | 		, ingredient_component.tty as tty
 9 | 		, ingredient_component.suppress
10 | 		, ingredient_component.cvf
11 | 	from
12 | 		sagerx_lake.rxnorm_rxnrel rxnrel
13 | 	inner join sagerx_lake.rxnorm_rxnconso ingredient_component
14 | 		on rxnrel.rxcui1 = ingredient_component.rxcui
15 | 	where rxnrel.rela = 'has_part'
16 | 		and ingredient_component.tty = 'IN'
17 | 		and ingredient_component.sab = 'RXNORM'
18 | )
19 | 
20 | select distinct
21 | 	case when cte.rxcui is null then ingredient.rxcui else cte.rxcui end rxcui
22 | 	, case when cte.name is null then ingredient.str else cte.name end name
23 | 	, case when cte.tty is null then ingredient.tty else cte.tty end tty
24 | 	, case when 
25 | 		case when cte.rxcui is null then ingredient.suppress else cte.suppress end = 'N' then true else false end as active
26 | 	, case when 
27 | 		case when cte.rxcui is null then ingredient.cvf else cte.cvf end = '4096' then true else false end as prescribable
28 | from sagerx_lake.rxnorm_rxnconso ingredient
29 | left join cte on ingredient.rxcui = cte.ingredient_rxcui
30 | where ingredient.tty in('IN', 'MIN')
31 | 	and ingredient.sab = 'RXNORM'
32 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/rxnorm/stg_rxnorm__ingredient_strength_links.sql:
--------------------------------------------------------------------------------
 1 | -- stg_rxnorm__ingredient_strength_links.sql
 2 | 
 3 | select distinct
 4 | 	product_component.rxcui as clinical_product_component_rxcui
 5 | 	, ingredient.rxcui as ingredient_component_rxcui
 6 | 	, ingredient_strength.rxcui as ingredient_strength_rxcui
 7 | from sagerx_lake.rxnorm_rxnconso ingredient_strength
 8 | inner join sagerx_lake.rxnorm_rxnrel has_ingredient
 9 |     on has_ingredient.rxcui2 = ingredient_strength.rxcui
10 |     and has_ingredient.rela = 'has_ingredient'
11 | inner join sagerx_lake.rxnorm_rxnconso ingredient
12 | 	on ingredient.rxcui = has_ingredient.rxcui1
13 | 	and ingredient.tty = 'IN'
14 | 	and ingredient.sab = 'RXNORM'
15 | inner join sagerx_lake.rxnorm_rxnrel constitutes
16 |     on constitutes.rxcui2 = ingredient_strength.rxcui
17 |     and constitutes.rela = 'constitutes'
18 | inner join sagerx_lake.rxnorm_rxnconso product_component
19 | 	on product_component.rxcui = constitutes.rxcui1
20 | 	and product_component.tty = 'SCD'
21 | 	and product_component.sab = 'RXNORM'
22 | where ingredient_strength.tty = 'SCDC'
23 | 	and ingredient_strength.sab = 'RXNORM'
24 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/rxnorm/stg_rxnorm__ingredient_strengths.sql:
--------------------------------------------------------------------------------
 1 | -- stg_rxnorm__ingredient_strengths.sql
 2 | 
 3 | select
 4 | 	ingredient_strength.rxcui as rxcui
 5 | 	, ingredient_strength.str as name
 6 | 	, numerator_value.atv as numerator_value
 7 | 	, numerator_unit.atv as numerator_unit
 8 | 	, denominator_value.atv as denominator_value
 9 | 	, denominator_unit.atv as denominator_unit
10 | 	, text.atv as text
11 | 	, case when ingredient_strength.suppress = 'N'
12 |         then true
13 |         else false
14 |         end as active
15 | 	, case when ingredient_strength.cvf = '4096'
16 |         then true
17 |         else false
18 |         end as prescribable
19 | from sagerx_lake.rxnorm_rxnconso ingredient_strength
20 | left join sagerx_lake.rxnorm_rxnsat numerator_value
21 |     on numerator_value.rxcui = ingredient_strength.rxcui
22 |     and numerator_value.atn = 'RXN_BOSS_STRENGTH_NUM_VALUE'
23 | left join sagerx_lake.rxnorm_rxnsat numerator_unit
24 |     on numerator_unit.rxcui = ingredient_strength.rxcui
25 |     and numerator_unit.atn = 'RXN_BOSS_STRENGTH_NUM_UNIT'
26 | left join sagerx_lake.rxnorm_rxnsat denominator_value
27 |     on denominator_value.rxcui = ingredient_strength.rxcui
28 |     and denominator_value.atn = 'RXN_BOSS_STRENGTH_DENOM_VALUE'
29 | left join sagerx_lake.rxnorm_rxnsat denominator_unit
30 |     on denominator_unit.rxcui = ingredient_strength.rxcui
31 |     and denominator_unit.atn = 'RXN_BOSS_STRENGTH_DENOM_UNIT'
32 | left join sagerx_lake.rxnorm_rxnsat text
33 |     on text.rxcui = ingredient_strength.rxcui
34 |     and text.atn = 'RXN_STRENGTH'
35 | where ingredient_strength.tty = 'SCDC'
36 | 	and ingredient_strength.sab = 'RXNORM'
37 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/rxnorm/stg_rxnorm__ingredients.sql:
--------------------------------------------------------------------------------
 1 | -- stg_rxnorm__ingredients.sql
 2 | 
 3 | select
 4 | 	ingredient.rxcui rxcui
 5 | 	, ingredient.str name
 6 | 	, ingredient.tty tty
 7 | 	, case when ingredient.suppress = 'N' then true else false end as active
 8 | 	, case when ingredient.cvf = '4096' then true else false end as prescribable
 9 | from sagerx_lake.rxnorm_rxnconso ingredient
10 | where ingredient.tty in('IN', 'MIN')
11 | 	and ingredient.sab = 'RXNORM'
12 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/rxnorm/stg_rxnorm__mthspl_ndcs.sql:
--------------------------------------------------------------------------------
 1 | -- stg_rxnorm__mthspl_ndcs.sql
 2 | 
 3 | select
 4 |     {{ ndc_to_11 ('rxnsat.atv')}} as ndc11
 5 |     , rxnsat.atv as ndc
 6 |     , rxnsat.rxcui
 7 | 	, case when rxnsat.suppress = 'N' then true else false end as active
 8 | 	, case when rxnsat.cvf = '4096' then true else false end as prescribable
 9 | from sagerx_lake.rxnorm_rxnsat rxnsat
10 |     where rxnsat.atn = 'NDC'
11 |         and rxnsat.sab = 'MTHSPL'
12 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/rxnorm/stg_rxnorm__mthspl_products.sql:
--------------------------------------------------------------------------------
 1 | -- stg_rxnorm__mthspl_products.sql
 2 | 
 3 | select
 4 | 	product.rxcui as rxcui
 5 | 	, product.str as name
 6 | 	, product.tty as tty
 7 | 	, product.rxaui as rxaui
 8 | 	, product.code as ndc
 9 | 	, case when product.suppress = 'N' then true else false end as active
10 | 	, case when product.cvf = '4096' then true else false end as prescribable
11 | from sagerx_lake.rxnorm_rxnconso product
12 | where product.tty = 'DP'
13 | 	and product.sab = 'MTHSPL'
14 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/rxnorm/stg_rxnorm__mthspl_substances.sql:
--------------------------------------------------------------------------------
 1 | -- stg_rxnorm__mthspl_substances.sql
 2 | 
 3 | select
 4 | 	substance.rxcui rxcui
 5 | 	, substance.str name
 6 | 	, substance.tty tty
 7 | 	, substance.rxaui rxaui
 8 | 	, substance.code unii
 9 | 	, case when substance.suppress = 'N' then true else false end as active
10 | 	, case when substance.cvf = '4096' then true else false end as prescribable
11 | from sagerx_lake.rxnorm_rxnconso substance
12 | where substance.tty = 'SU'
13 | 	and substance.sab = 'MTHSPL'
14 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/rxnorm/stg_rxnorm__ndcs.sql:
--------------------------------------------------------------------------------
 1 | -- stg_rxnorm__ndcs.sql
 2 | 
 3 | select rxnsat.atv as ndc
 4 | 	,case when product.tty in ('BPCK','SBD') then clinical_product.rxcui
 5 | 		else rxnsat.rxcui end as clinical_product_rxcui		
 6 | 	,case when product.tty in ('BPCK','SBD') then rxnsat.rxcui
 7 | 		else null end as brand_product_rxcui
 8 | 	, case when rxnsat.suppress = 'N' then true else false end as active
 9 | 	, case when rxnsat.cvf = '4096' then true else false end as prescribable
10 | from sagerx_lake.rxnorm_rxnsat rxnsat
11 | 	inner join sagerx_lake.rxnorm_rxnconso product on rxnsat.rxaui = product.rxaui
12 | 	left join sagerx_lake.rxnorm_rxnrel rxnrel on rxnsat.rxcui = rxnrel.rxcui2 and rela = 'tradename_of' and product.tty in ('BPCK','SBD')
13 | 	left join sagerx_lake.rxnorm_rxnconso clinical_product
14 | 		on rxnrel.rxcui1 = clinical_product.rxcui
15 | 		and clinical_product.tty in ('SCD','GPCK')
16 | 		and clinical_product.sab = 'RXNORM'
17 | where rxnsat.atn = 'NDC'
18 | 	and rxnsat.sab in ('ATC', 'CVX', 'DRUGBANK', 'MSH', 'MTHCMSFRF', 'MTHSPL', 'RXNORM', 'USP', 'VANDF')
19 | 	and product.tty in ('SCD','SBD','GPCK','BPCK')
20 | 	and product.sab = 'RXNORM'
21 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/rxnorm/stg_rxnorm__precise_ingredient_links.sql:
--------------------------------------------------------------------------------
 1 | -- stg_rxnorm__precise_ingredient_links.sql
 2 | 
 3 | select distinct
 4 | 	ingredient_strength.rxcui as ingredient_strength_rxcui
 5 | 	, precise_ingredient.rxcui as precise_ingredient_rxcui
 6 | from sagerx_lake.rxnorm_rxnconso precise_ingredient
 7 | inner join sagerx_lake.rxnorm_rxnrel precise_ingredient_of
 8 |     on precise_ingredient_of.rxcui2 = precise_ingredient.rxcui
 9 |     and precise_ingredient_of.rela = 'precise_ingredient_of'
10 | inner join sagerx_lake.rxnorm_rxnconso ingredient_strength
11 | 	on ingredient_strength.rxcui = precise_ingredient_of.rxcui1
12 | 	and ingredient_strength.tty = 'SCDC'
13 | 	and ingredient_strength.sab = 'RXNORM'
14 | where precise_ingredient.tty = 'PIN'
15 | 	and precise_ingredient.sab = 'RXNORM'
16 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/rxnorm/stg_rxnorm__precise_ingredients.sql:
--------------------------------------------------------------------------------
 1 | -- stg_rxnorm__precise_ingredients.sql
 2 | 
 3 | select
 4 | 	ingredient.rxcui rxcui
 5 | 	, ingredient.str name
 6 | 	, ingredient.tty tty
 7 | 	, case when ingredient.suppress = 'N' then true else false end as active
 8 | 	, case when ingredient.cvf = '4096' then true else false end as prescribable
 9 | from sagerx_lake.rxnorm_rxnconso ingredient
10 | where ingredient.tty = 'PIN'
11 | 	and ingredient.sab = 'RXNORM'
12 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/rxnorm/stg_rxnorm__product_rxcuis.sql:
--------------------------------------------------------------------------------
1 | -- stg_rxnorm__product_rxcuis
2 | 
3 | select * from {{ source('rxnorm', 'rxnorm_rxnconso') }}
4 | where sab = 'RXNORM'
5 |     and tty in ('SCD', 'SBD', 'GPCK', 'BPCK')
6 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/rxnorm/stg_rxnorm__products.sql:
--------------------------------------------------------------------------------
 1 | -- stg_rxnorm__products.sql
 2 | 
 3 | select
 4 | 	product.rxcui as rxcui
 5 | 	, product.str as name
 6 | 	, product.tty as tty
 7 |     , case
 8 |         when brand_product.rxcui is not null then brand_product.clinical_product_rxcui
 9 |         else product.rxcui
10 |         end as clinical_product_rxcui
11 | 	, case
12 |         when product.suppress = 'N' then true
13 |         else false
14 |         end as active
15 | 	, case 
16 |         when product.cvf = '4096' then true 
17 |         else false
18 |         end as prescribable
19 | from {{ source('rxnorm', 'rxnorm_rxnconso') }} product
20 | left join {{ ref('stg_rxnorm__brand_products') }} brand_product
21 |     on product.rxcui = brand_product.rxcui
22 | where product.tty in('SCD', 'GPCK', 'SBD', 'BPCK')
23 | 	and product.sab = 'RXNORM'
24 | 
25 | /*
26 | with
27 | 
28 | rcp as (
29 | 
30 |     select * from {{ ref('stg_rxnorm__clinical_products') }}
31 | 
32 | ),
33 | 
34 | rbp as (
35 | 
36 |     select * from {{ ref('stg_rxnorm__brand_products') }}
37 | 
38 | )
39 | 
40 | select distinct
41 |     coalesce(rbp.rxcui, rcp.rxcui, null) as product_rxcui
42 |     , coalesce(rbp.name, rcp.name, null) as product_name
43 |     , coalesce(rbp.tty, rcp.tty, null) as product_tty
44 |     , rcp.rxcui as clinical_product_rxcui
45 |     , rcp.name as clinical_product_name
46 |     , rcp.tty as clinical_product_tty
47 | from rcp
48 | left join rbp
49 |     on rbp.clinical_product_rxcui = rcp.rxcui
50 | */
51 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/rxnorm_historical/_rxnorm_historical__sources.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | 
3 | sources:
4 |   - name: rxnorm_historical
5 |     schema: sagerx_lake
6 |     tables:
7 |       - name: rxnorm_historical
8 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/rxnorm_historical/stg_rxnorm_historical__most_recent_ndcs.sql:
--------------------------------------------------------------------------------
 1 | -- stg_rxnorm_historical__most_recent_ndcs.sql
 2 | 
 3 | with 
 4 | 
 5 | rxnorm_historical_ndcs as
 6 | (
 7 |     select * from {{ ref('stg_rxnorm_historical__ndcs') }}
 8 | )
 9 | 
10 | /*
11 | NOTE: we do this grouping and ranking to avoid NDCs that
12 | relate to RXCUIs that have been remapped to multiple RXCUIs
13 | - see issue #265 for more details
14 | */
15 | , grouped_and_ranked_rxnorm_historical_ndcs as
16 | (
17 |     
18 |     select
19 |         ndc
20 |         , end_date
21 |         , row_number() over (partition by ndc order by end_date desc) as end_date_line
22 |         , count(rxcui) as rxcui_count
23 |     from rxnorm_historical_ndcs
24 |     group by ndc, end_date
25 |     order by count(rxcui) desc
26 | 
27 | )
28 | 
29 | select
30 |     rxnorm_historical_ndcs.*
31 | from grouped_and_ranked_rxnorm_historical_ndcs
32 | inner join rxnorm_historical_ndcs
33 |     on rxnorm_historical_ndcs.ndc = grouped_and_ranked_rxnorm_historical_ndcs.ndc
34 |     and rxnorm_historical_ndcs.end_date = grouped_and_ranked_rxnorm_historical_ndcs.end_date
35 | where rxcui_count = 1 -- only NDCs that are associated with one RXCUI per end_date
36 | 	and end_date_line = 1 -- only NDCs that are most recently associated with an RXCUI
37 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/rxnorm_historical/stg_rxnorm_historical__ndcs.sql:
--------------------------------------------------------------------------------
1 | -- stg_rxnorm_historical__ndcs.sql
2 | 
3 | select
4 |     *
5 | from {{ source('rxnorm_historical', 'rxnorm_historical') }}
6 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/rxterms/_rxterms__models.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | models:
 4 |   - name: stg_rxterms__names
 5 |     description: Table of drug names and commonly used synonyms or abbreviations for the drugs. Only un-suppressed and not retired terms.
 6 |     columns:
 7 |       - name: name
 8 |         description: Drug name (either generic or brand name) and intended route.
 9 |       - name: synonyms
10 |         description: Commonly used synonyms or abbreviations for the drug.
11 | 
12 |   - name: stg_rxterms__strengths
13 |     description: Tables of drug strengths and their corresponding rxcuis. Only un-suppressed and not retired terms.
14 |     columns:
15 |       - name: rxcui
16 |         description: The RxNorm concept unique identifier for the drug.
17 |       - name: name
18 |         description: Drug name (either generic or brand name) and intended route.
19 |       - name: strength
20 |         description: Strength information parsed from the RxNorm full name. Concatenated strength and dose form values.
21 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/rxterms/stg_rxterms__names.sql:
--------------------------------------------------------------------------------
 1 | with
 2 | 
 3 | rxterms as (
 4 | 
 5 |     select * from {{ source('rxterms', 'rxterms') }}
 6 |     
 7 | )
 8 | 
 9 | select distinct
10 |     display_name as name
11 |     , display_name_synonym as synonyms
12 | from rxterms
13 | where suppress_for is null 
14 |     and is_retired is null
15 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/rxterms/stg_rxterms__strengths.sql:
--------------------------------------------------------------------------------
 1 | with
 2 | 
 3 | rxterms as (
 4 | 
 5 |     select * from {{ source('rxterms', 'rxterms') }}
 6 |     
 7 | )
 8 | 
 9 | select distinct
10 |     rxcui
11 |     , display_name as name
12 |     , concat(strength, ' ', new_dose_form) as strength
13 | from rxterms
14 | where suppress_for is null 
15 |     and is_retired is null
16 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/umls/_stg_umls__models.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | models:
 4 |   - name: stg_umls__crosswalk_codes
 5 |     description: |
 6 |       UMLS crosswalk from MeSH to ICD9, ICD10, and SNOMED.
 7 | 
 8 |       TODO: Make this more generic - not just MeSH but other
 9 |       potential "from_sources".
10 |     columns:
11 |       - name: from_source
12 |         description: The source of the original code.
13 |       - name: from_code
14 |         description: The original code to which we want to map other codes.
15 |       - name: to_source
16 |         description: The destination source we are mapping to.
17 |       - name: to_code
18 |         description: The synonymous code we are mapping to.
19 |       - name: to_name
20 |         description: The name of the concept we are mapping to.
21 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/umls/_stg_umls__sources.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | sources:
 4 |   - name: umls
 5 |     schema: sagerx_lake
 6 |     tables:
 7 |       - name: umls_crosswalk
 8 |         description: |
 9 |           Crosswalk between select vocabularies in UMLS.
10 | 
11 |           https://documentation.uts.nlm.nih.gov/rest/source-asserted-identifiers/crosswalk/
12 | 
13 |           Currently available:
14 |             - MSH -> ICD9CM
15 |             - MSH -> ICD10CM
16 |             - MSH -> SNOMEDCT_US
17 | 
18 |           A common use case of the CUIs in the UMLS is as a sort of
19 |           ‘bridge of synonymy’ between code sets. For a given 
20 |           source-asserted code, the crosswalk endpoint will return 
21 |           codes from other sources that have UMLS-asserted synonymy.
22 |           It is important to note that the synonymy asserted by the 
23 |           UMLS in the MRCONSO.RRF files (and the APIs derived from 
24 |           them) has not been rigorously tested and maintained in 
25 |           actual clinical care.
26 |           
27 |           With that disclaimer, users often have questions such as 
28 |           ‘I have a code from vocabulary x, what is the equivalent 
29 |           code from vocabulary y according to UMLS synonymy?’.
30 |           Although UMLS CUIs can be used as a starting point, 
31 |           results should be carefully reviewed for relevancy
32 |           in your use case.
33 | 


--------------------------------------------------------------------------------
/dbt/sagerx/models/staging/umls/stg_umls__crosswalk_codes.sql:
--------------------------------------------------------------------------------
 1 | -- stg_umls__crosswalk_codes.sql
 2 | 
 3 | select
 4 |     -- TODO: make DAG store the source name (MSH)
 5 |     -- so this is more general than just MeSH
 6 |     'MSH' as from_source,
 7 |     mesh_code as from_code,
 8 |     root_source as to_source,
 9 |     ui as to_code,
10 |     name as to_name
11 | from {{ source('umls', 'umls_crosswalk') }}
12 | where obsolete = false
13 | 


--------------------------------------------------------------------------------
/dbt/sagerx/seeds/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderxio/sagerx/8f64ff1bc743150661fb466a60d18318fca047a5/dbt/sagerx/seeds/.gitkeep


--------------------------------------------------------------------------------
/dbt/sagerx/seeds/_seeds__models.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | 
3 | seeds:
4 |   - name: usp_preservatives
5 |     description: |
6 |       A list of CAS RN identifiers and USP product names obtained manually from
7 |       searching the [USP catalog](https://store.usp.org/preservatives/category/USP-1213)
8 |       for products in the "Preservatives" category.      
9 | 


--------------------------------------------------------------------------------
/dbt/sagerx/seeds/usp_preservatives.csv:
--------------------------------------------------------------------------------
 1 | cas_rn,usp_product_name
 2 | 17927-65-0,Aluminum Sulfate (2 g)
 3 | 60-00-4,Edetic Acid (200 mg)
 4 | 79-09-4,Propionic Acid (1.5 mL/ampule; 3 ampules)
 5 | 6001-64-5,Chlorobutanol (200 mg)
 6 | 59-51-8,Racemethionine (200 mg)
 7 | 128-37-0,Butylated Hydroxytoluene (500 mg)
 8 | 5793-89-5,Calcium Saccharate (200 mg)
 9 | 121-00-6,3-tert-Butyl-4-hydroxyanisole (200 mg)
10 | 137-40-6,Sodium Propionate (200 mg)
11 | 89-65-6,Erythorbic Acid (50 mg)
12 | 122-99-6,Phenoxyethanol (500 mg) (2-Phenoxyethanol)
13 | 94-13-3,Propylparaben (200 mg)
14 | 8001-54-5,Benzalkonium Chloride (5 mL of approx. 4% aqueous solution)
15 | 7681-57-4,Sodium Metabisulfite (2 X 500 mg)
16 | 110-44-1,Sorbic Acid (1 g)
17 | 100-51-6,Benzyl Alcohol (500 mg/ampule)
18 | 99-76-3,Methylparaben (125 mg)
19 | 590-00-1,Potassium Sorbate (1 g)
20 | 24634-61-5,Potassium Sorbate (1 g)
21 | 532-32-1,Sodium Benzoate (1 g)
22 | 88-32-4,2-tert-Butyl-4-hydroxyanisole (200 mg)
23 | 120-47-8,Ethylparaben (200 mg)
24 | 90-64-2,Mandelic Acid (500 mg)
25 | 121-79-9,Propyl Gallate (200 mg)
26 | 4075-81-4,Calcium Propionate (100 mg)
27 | 94-26-8,Butylparaben (200 mg)
28 | 39236-46-9,Imidurea (200 mg)
29 | 520-45-6,Dehydroacetic Acid (200 mg)
30 | 57-09-0,Cetrimonium Bromide (1 g)
31 | 


--------------------------------------------------------------------------------
/dbt/sagerx/snapshots/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderxio/sagerx/8f64ff1bc743150661fb466a60d18318fca047a5/dbt/sagerx/snapshots/.gitkeep


--------------------------------------------------------------------------------
/dbt/sagerx/tests/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderxio/sagerx/8f64ff1bc743150661fb466a60d18318fca047a5/dbt/sagerx/tests/.gitkeep


--------------------------------------------------------------------------------
/docs/images/sagerx_airflow_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderxio/sagerx/8f64ff1bc743150661fb466a60d18318fca047a5/docs/images/sagerx_airflow_example.png


--------------------------------------------------------------------------------
/docs/images/sagerx_postgres_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderxio/sagerx/8f64ff1bc743150661fb466a60d18318fca047a5/docs/images/sagerx_postgres_example.png


--------------------------------------------------------------------------------
/docs/style_guide.md:
--------------------------------------------------------------------------------
 1 | # SageRx Style Guide
 2 | 
 3 | ## Purpose
 4 | 
 5 | This guide will help you understand how we structure this project, such as table and schema names.
 6 | 
 7 | ## Table Names
 8 | 
 9 | These also correspond to the underlying file names that create the tables. File names must be unique and correspond to the name of the model when selected and created in the warehouse.
10 | 
11 | We recommend putting as much clear information into the file name as possible, including a prefix for the layer the model exists in, important grouping information, and specific information about the entity or transformation in the model.
12 | 
13 | **Marts**:
14 | 
15 | - Name format: [concept]s.sql
16 | - Concept correctly captures the content of the table, since these are user facing this is important
17 | - Name should be plural
18 | 
19 | **Intermediates**:
20 | 
21 | - Name format: int\_[entity]s\_[verb]s
22 | - Verbs should capture the business logic or transformations conducted
23 | - Name should be plural
24 | - Staging table references should be in a CTE
25 | 
26 | **Staging**:
27 | 
28 | - Name format: stg\_[source]\_\_[entity]s
29 | - Entity captures the data values expected
30 | - Name should be plural
31 | - Staging models are the only place we’ll reference source tables, and our staging models should have a 1-to-1 relationship to our source tables
32 | - Source table references should be in a CTE
33 | 
34 | **Sources**:
35 | 
36 | - Name format: [source]\_[content]
37 | - Content captures the raw data that is imported
38 | - Name should be singular
39 | 
40 | ## Schema Names
41 | 
42 | **sagerx_lake**
43 | 
44 | - Contains raw data from data sources and seed tables, users can also access these tables to manipulate the data for their use cases.
45 | 
46 | **sagerx_dev**
47 | 
48 | - Contains tables in development live and in-progress data can be stored.
49 | 
50 | **sagerx**
51 | 
52 | - Contains user-facing tables live, these are expected to be "production" ready data.
53 | 
54 | ## DAG Philosophy
55 | 
56 | Our use case of airflow DAGs is to download the data and upload it to our database, this commonality has allowed us to create layers of abstraction.
57 | 
58 | **airflow operator**
59 | 
60 | - Creates a DAG with standard parameters.
61 | 
62 | **sagerx**
63 | 
64 | - Project specific functions on how to interact with the project and its data.
65 | - Add here common ways to interact with the project or the data.
66 | 
67 | **common_dag_tasks**
68 | 
69 | - Common operations performed by Airflow DAGs.
70 | - Useful in defining how we have standardized the way that DAGs run.
71 | - Add tasks here that abstract away common airflow operations.
72 | 
73 | **user_macros**
74 | 
75 | - Common functions used to manipulate data.
76 | - Add here common ways to process data.
77 | 


--------------------------------------------------------------------------------
/pgadmin/servers.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "Servers": {
 3 |     "1": {
 4 |       "Name": "sagerx",
 5 |       "Group": "Servers",
 6 |       "Host": "postgres",
 7 |       "Port": 5432,
 8 |       "MaintenanceDB": "postgres",
 9 |       "Username": "sagerx",
10 |       "SSLMode": "prefer",
11 |       "Comment": "Server with airflow and sagerx databases on it"
12 |     }
13 |   }
14 | }
15 | 


--------------------------------------------------------------------------------
/postgres/0_pg_stat_statement.sh:
--------------------------------------------------------------------------------
1 | sed -i "s/#shared_preload_libraries = ''/shared_preload_libraries = 'pg_stat_statements'\npg_stat_statements.max=10000\npg_stat_statements.track=all/g" /var/lib/postgresql/data/postgresql.conf
2 | 
3 | echo "Enabled pg_stat_statements"


--------------------------------------------------------------------------------
/postgres/1_airflow.sql:
--------------------------------------------------------------------------------
 1 | --Build airflow database and user
 2 | CREATE USER airflow WITH ENCRYPTED PASSWORD 'airflow';
 3 | CREATE DATABASE airflow;
 4 | GRANT ALL PRIVILEGES ON DATABASE airflow TO airflow;
 5 | 
 6 | --Make foreign data wrapper to allow sagerx read access to airflow tables
 7 | CREATE EXTENSION IF NOT EXISTS postgres_fdw;
 8 | CREATE SERVER airflow_fdw FOREIGN DATA WRAPPER postgres_fdw OPTIONS (host 'postgres', port '5432', dbname 'airflow');
 9 | CREATE USER MAPPING FOR sagerx SERVER airflow_fdw OPTIONS (user 'airflow', password 'airflow');
10 | GRANT USAGE ON FOREIGN SERVER airflow_fdw TO sagerx;


--------------------------------------------------------------------------------
/postgres/2_sagerx_setup.sql:
--------------------------------------------------------------------------------
 1 | --Make schemas for sagerx
 2 | CREATE SCHEMA sagerx_dev;
 3 | CREATE SCHEMA sagerx_lake;
 4 | CREATE SCHEMA sagerx;
 5 | 
 6 | --Add pg_stat_statements extension for query monitoring
 7 | CREATE EXTENSION IF NOT EXISTS pg_stat_statements;
 8 | 
 9 | CREATE TABLE sagerx.data_availability (
10 |     schema_name text,
11 |     table_name text,
12 |     has_data boolean,
13 |     materialized text
14 | );


--------------------------------------------------------------------------------