├── .editorconfig ├── .gitignore ├── README.md ├── benchmarks ├── .gitkeep └── trino │ ├── clickbench │ ├── .gitignore │ ├── hydrate.sh │ ├── queries │ │ ├── q01.sql │ │ ├── q02.sql │ │ ├── q03.sql │ │ ├── q04.sql │ │ ├── q05.sql │ │ ├── q06.sql │ │ ├── q07.sql │ │ ├── q08.sql │ │ ├── q09.sql │ │ ├── q10.sql │ │ ├── q11.sql │ │ ├── q12.sql │ │ ├── q13.sql │ │ ├── q14.sql │ │ ├── q15.sql │ │ ├── q16.sql │ │ ├── q17.sql │ │ ├── q18.sql │ │ ├── q19.sql │ │ ├── q20.sql │ │ ├── q21.sql │ │ ├── q22.sql │ │ ├── q23.sql │ │ ├── q24.sql │ │ ├── q25.sql │ │ ├── q26.sql │ │ ├── q27.sql │ │ ├── q28.sql │ │ ├── q29.sql │ │ ├── q30.sql │ │ ├── q31.sql │ │ ├── q32.sql │ │ ├── q33.sql │ │ ├── q34.sql │ │ ├── q35.sql │ │ ├── q36.sql │ │ ├── q37.sql │ │ ├── q38.sql │ │ ├── q39.sql │ │ ├── q40.sql │ │ ├── q41.sql │ │ ├── q42.sql │ │ └── q43.sql │ ├── sources │ │ ├── hits.sql │ │ └── sources.sql │ └── workspace.sdf.yml │ ├── imdb │ ├── .gitignore │ ├── hydrate.sh │ ├── queries │ │ ├── _01a.sql │ │ ├── _01b.sql │ │ ├── _01c.sql │ │ ├── _01d.sql │ │ ├── _02a.sql │ │ ├── _02b.sql │ │ ├── _02c.sql │ │ ├── _02d.sql │ │ ├── _03a.sql │ │ ├── _03b.sql │ │ ├── _03c.sql │ │ ├── _04a.sql │ │ ├── _04b.sql │ │ ├── _04c.sql │ │ ├── _05a.sql │ │ ├── _05b.sql │ │ ├── _05c.sql │ │ ├── _06a.sql │ │ ├── _06b.sql │ │ ├── _06c.sql │ │ ├── _06d.sql │ │ ├── _06e.sql │ │ ├── _06f.sql │ │ ├── _07a.sql │ │ ├── _07b.sql │ │ ├── _07c.sql │ │ ├── _08a.sql │ │ ├── _08b.sql │ │ ├── _08c.sql │ │ ├── _08d.sql │ │ ├── _09a.sql │ │ ├── _09b.sql │ │ ├── _09c.sql │ │ ├── _09d.sql │ │ ├── _10a.sql │ │ ├── _10b.sql │ │ ├── _10c.sql │ │ ├── _11a.sql │ │ ├── _11b.sql │ │ ├── _11c.sql │ │ ├── _11d.sql │ │ ├── _12a.sql │ │ ├── _12b.sql │ │ ├── _12c.sql │ │ ├── _13a.sql │ │ ├── _13b.sql │ │ ├── _13c.sql │ │ ├── _13d.sql │ │ ├── _14a.sql │ │ ├── _14b.sql │ │ ├── _14c.sql │ │ ├── _15a.sql │ │ ├── _15b.sql │ │ ├── _15c.sql │ │ ├── _15d.sql │ │ ├── _16a.sql │ │ ├── _16b.sql │ │ ├── _16c.sql │ │ ├── _16d.sql │ │ ├── _17a.sql │ │ ├── _17b.sql │ │ ├── _17c.sql │ │ ├── _17d.sql │ │ ├── _17e.sql │ │ ├── _17f.sql │ │ ├── _18a.sql │ │ ├── _18b.sql │ │ ├── _18c.sql │ │ ├── _19a.sql │ │ ├── _19b.sql │ │ ├── _19c.sql │ │ ├── _19d.sql │ │ ├── _20a.sql │ │ ├── _20b.sql │ │ ├── _20c.sql │ │ ├── _21a.sql │ │ ├── _21b.sql │ │ ├── _21c.sql │ │ ├── _22a.sql │ │ ├── _22b.sql │ │ ├── _22c.sql │ │ ├── _22d.sql │ │ ├── _23a.sql │ │ ├── _23b.sql │ │ ├── _23c.sql │ │ ├── _24a.sql │ │ ├── _24b.sql │ │ ├── _25a.sql │ │ ├── _25b.sql │ │ ├── _25c.sql │ │ ├── _26a.sql │ │ ├── _26b.sql │ │ ├── _26c.sql │ │ ├── _27a.sql │ │ ├── _27b.sql │ │ ├── _27c.sql │ │ ├── _28a.sql │ │ ├── _28b.sql │ │ ├── _28c.sql │ │ ├── _29a.sql │ │ ├── _29b.sql │ │ ├── _29c.sql │ │ ├── _30a.sql │ │ ├── _30b.sql │ │ ├── _30c.sql │ │ ├── _31a.sql │ │ ├── _31b.sql │ │ ├── _31c.sql │ │ ├── _32a.sql │ │ ├── _32b.sql │ │ ├── _33a.sql │ │ ├── _33b.sql │ │ └── _33c.sql │ ├── sources │ │ └── sources.sql │ └── workspace.sdf.yml │ └── tpch │ ├── .gitignore │ ├── hydrate.sh │ ├── queries │ ├── q1.sql │ ├── q10.sql │ ├── q11.sql │ ├── q12.sql │ ├── q13.sql │ ├── q14.sql │ ├── q15.sql │ ├── q16.sql │ ├── q17.sql │ ├── q18.sql │ ├── q19.sql │ ├── q2.sql │ ├── q20.sql │ ├── q21.sql │ ├── q22.sql │ ├── q3.sql │ ├── q4.sql │ ├── q5.sql │ ├── q6.sql │ ├── q7.sql │ ├── q8.sql │ ├── q9.sql │ └── revenue0.sql │ ├── sources │ ├── customer.sql │ ├── lineitem.sql │ ├── nation.sql │ ├── orders.sql │ ├── part.sql │ ├── partsupp.sql │ ├── region.sql │ └── supplier.sql │ └── workspace.sdf.yml ├── docs ├── assets │ ├── favicon.png │ └── logo │ │ ├── dark.svg │ │ └── light.svg ├── cloud │ ├── authentication.mdx │ ├── billing.mdx │ ├── credentials.mdx │ ├── deployment.mdx │ ├── environments.mdx │ ├── github_integration.mdx │ ├── introduction.mdx │ ├── organization_roles.mdx │ ├── reporting.mdx │ ├── sso.mdx │ ├── troubleshooting.mdx │ └── workspace_credentials.mdx ├── database │ ├── benchmarks.mdx │ ├── file-formats.mdx │ ├── introduction.mdx │ ├── orchestration.mdx │ ├── partitioning.mdx │ └── supported-functions │ │ ├── overview.mdx │ │ └── trino │ │ ├── aggregate-functions.mdx │ │ ├── array-functions.mdx │ │ ├── binary-functions.mdx │ │ ├── comparison-functions.mdx │ │ ├── conditional-functions.mdx │ │ ├── datetime-functions.mdx │ │ ├── math-functions.mdx │ │ ├── regexp-functions.mdx │ │ └── string-functions.mdx ├── guide │ ├── advanced │ │ ├── custom_libs.mdx │ │ ├── custom_scripts.mdx │ │ ├── index.mdx │ │ ├── local_compilation.mdx │ │ ├── logging.mdx │ │ ├── telemetry.mdx │ │ └── udf.mdx │ ├── basics │ │ ├── build_and_deployment.mdx │ │ ├── classifiers.mdx │ │ └── lineage_metadata.mdx │ ├── data-quality │ │ ├── checks.mdx │ │ ├── overview.mdx │ │ ├── reports.mdx │ │ ├── stats.mdx │ │ └── tests.mdx │ ├── macro-processing │ │ ├── intro-to-jinja.mdx │ │ ├── jinja-variables.mdx │ │ ├── jinja.mdx │ │ ├── overview.mdx │ │ └── sdf-variables.mdx │ ├── setup │ │ ├── environments.mdx │ │ ├── integrations.mdx │ │ ├── io.mdx │ │ ├── materialization.mdx │ │ └── workspaces.mdx │ └── transformation │ │ └── authentication.mdx ├── integrations │ ├── aws │ │ ├── S3 │ │ │ ├── getting-started.mdx │ │ │ └── s3-example.mdx │ │ └── redshift │ │ │ └── getting-started.mdx │ ├── bigquery │ │ ├── basic-materialization.mdx │ │ ├── getting-started.mdx │ │ ├── incremental-materialization.mdx │ │ ├── seeds.mdx │ │ └── snapshots.mdx │ ├── cicd │ │ └── ci_cd.mdx │ ├── dagster │ │ └── getting-started.mdx │ ├── databricks │ │ └── databricks.mdx │ ├── dbt │ │ ├── integrating.mdx │ │ └── migrating.mdx │ ├── openai │ │ └── ai-classification.mdx │ ├── overview.mdx │ └── snowflake │ │ ├── advanced.mdx │ │ ├── basic-materialization.mdx │ │ ├── getting-started.mdx │ │ ├── incremental-materialization.mdx │ │ ├── seeds.mdx │ │ └── snapshots.mdx ├── introduction │ ├── features.mdx │ ├── getting-started.mdx │ ├── open-source.mdx │ └── welcome.mdx ├── linter │ ├── dbt_projects.mdx │ ├── format.mdx │ ├── macros.mdx │ ├── overview.mdx │ └── reference.mdx ├── mint.json ├── reference │ ├── bigquery │ │ ├── aggregate_functions.mdx │ │ ├── approximate_aggregate_functions.mdx │ │ ├── array_functions.mdx │ │ ├── bit_functions.mdx │ │ ├── date_functions.mdx │ │ ├── datetime_functions.mdx │ │ ├── debugging_functions.mdx │ │ ├── geography_functions.mdx │ │ ├── hash_functions.mdx │ │ ├── interval_functions.mdx │ │ ├── json_functions.mdx │ │ ├── math_functions.mdx │ │ ├── other_expressions.mdx │ │ ├── search_functions.mdx │ │ ├── security_functions.mdx │ │ ├── statistical_aggregate_functions.mdx │ │ ├── string_functions.mdx │ │ ├── temporal_functions.mdx │ │ ├── time_functions.mdx │ │ ├── timestamp_functions.mdx │ │ └── utility_functions.mdx │ ├── caching.mdx │ ├── error-codes.mdx │ ├── redshift │ │ ├── aggregate_functions.mdx │ │ ├── math_functions.mdx │ │ ├── other_expressions.mdx │ │ ├── string_functions.mdx │ │ └── temporal_functions.mdx │ ├── sdf-cli.mdx │ ├── sdf-information-schema.mdx │ ├── sdf-yml.mdx │ ├── snowflake │ │ ├── account_usage_table_functions.mdx │ │ ├── aggregate_functions.mdx │ │ ├── bitwise_expression_functions.mdx │ │ ├── conditional_expression_functions.mdx │ │ ├── context_functions.mdx │ │ ├── conversion_functions.mdx │ │ ├── data_generation_functions.mdx │ │ ├── date_and_time_functions.mdx │ │ ├── encryption_functions.mdx │ │ ├── geospatial_functions.mdx │ │ ├── hash_functions.mdx │ │ ├── information_schema.mdx │ │ ├── metadata_functions.mdx │ │ ├── numeric_functions.mdx │ │ ├── semi-structured_and_structured_data_functions.mdx │ │ ├── string_and_binary_functions.mdx │ │ ├── string_functions.mdx │ │ ├── system_functions.mdx │ │ ├── table_functions.mdx │ │ ├── vector_similarity_functions.mdx │ │ └── window_functions.mdx │ ├── support.mdx │ └── trino │ │ ├── aggregate_functions.mdx │ │ ├── array_functions.mdx │ │ ├── binary_functions.mdx │ │ ├── bitwise_functions.mdx │ │ ├── color_functions.mdx │ │ ├── comparison_functions.mdx │ │ ├── conditional_functions.mdx │ │ ├── conversion_functions.mdx │ │ ├── datetime_functions.mdx │ │ ├── geospatial_functions.mdx │ │ ├── hyperloglog_functions.mdx │ │ ├── json_functions.mdx │ │ ├── lambda_functions.mdx │ │ ├── map_functions.mdx │ │ ├── math_functions.mdx │ │ ├── ml_functions.mdx │ │ ├── mongodb_functions.mdx │ │ ├── qdigest_functions.mdx │ │ ├── regexp_functions.mdx │ │ ├── sdf_execution_support │ │ └── all_functions.mdx │ │ ├── session_functions.mdx │ │ ├── setdigest_functions.mdx │ │ ├── string_functions.mdx │ │ ├── t-digest_functions.mdx │ │ ├── teradata_functions.mdx │ │ ├── url_functions.mdx │ │ ├── uuid_functions.mdx │ │ └── window_functions.mdx ├── releases │ ├── latest.mdx │ └── migrations │ │ ├── 11-12.mdx │ │ └── 12-13.mdx ├── snippets │ └── preview-warning.mdx ├── tutorials │ ├── creating-a-model.mdx │ ├── debugging.mdx │ ├── deprecating-a-model.mdx │ ├── enriching-your-warehouse.mdx │ ├── ensuring-data-quality.mdx │ ├── learn-more.mdx │ ├── script_test.mdx │ └── tutorials-intro.mdx └── use-case │ └── data_deletion.mdx ├── examples ├── .gitkeep ├── bigquery_incremental │ ├── models │ │ ├── last_hn_timestamp.sql │ │ └── popular_articles.sql │ └── workspace.sdf.yml ├── bigquery_starter │ ├── .gitignore │ ├── models │ │ ├── aggregate_orders.sql │ │ └── customers_over_100.sql │ ├── run_me_in_bq.sql │ └── workspace.sdf.yml ├── cybersyn_tech_innovation │ ├── .gitignore │ ├── models │ │ └── sdf_snowflake │ │ │ └── cybersyn_tech_innovation │ │ │ ├── all_nvidia_patents.sql │ │ │ ├── funder_aggregates.sql │ │ │ └── most_starred_repos.sql │ └── workspace.sdf.yml ├── github_analysis │ ├── .gitignore │ ├── checks │ │ └── no_timezone_comparison.sql │ ├── classification │ │ └── taxonomy.sdf.yml │ ├── metadata │ │ ├── sdf_snowflake │ │ │ └── stg │ │ │ │ ├── repo_event_aggregates.sdf.yml │ │ │ │ ├── repo_stars.sdf.yml │ │ │ │ └── star_growth.sdf.yml │ │ └── tech__innovation_essentials │ │ │ └── cybersyn │ │ │ ├── github_repos.sdf.yml │ │ │ └── github_stars.sdf.yml │ ├── models │ │ └── sdf_snowflake │ │ │ ├── analysis │ │ │ ├── activity_surges.sql │ │ │ ├── engagement_summary_by_repo.sql │ │ │ ├── event_dist_across_repo.sql │ │ │ ├── growth_repos.sql │ │ │ └── star_growth_by_repo.sql │ │ │ ├── dim │ │ │ ├── date.sql │ │ │ ├── event_type.sql │ │ │ └── repos.sql │ │ │ ├── fct │ │ │ ├── event_activity.sql │ │ │ ├── repo_activity.sql │ │ │ └── repo_engagement.sql │ │ │ └── stg │ │ │ ├── latest_repo_events.sql │ │ │ ├── repo_event_aggregates.sql │ │ │ ├── repo_names.sql │ │ │ ├── repo_stars.sql │ │ │ ├── star_dates.sql │ │ │ └── star_growth.sql │ └── workspace.sdf.yml ├── hello │ ├── .gitignore │ ├── models │ │ └── main.sql │ └── workspace.sdf.yml ├── hello_from_dbt │ ├── .gitignore │ ├── models │ │ └── main.sql │ ├── seeds │ │ └── .gitkeep │ └── workspace.sdf.yml ├── hello_with_pii │ ├── .gitignore │ ├── checks │ │ └── code_check.sql │ ├── models │ │ └── main.sql │ └── workspace.sdf.yml ├── hello_world_s3 │ ├── local │ │ ├── pop.csv │ │ ├── popdata.sql │ │ └── world_metrics.sql │ ├── remote │ │ ├── q1.sql │ │ └── un_pop_data.sql │ └── workspace.sdf.yml ├── jaffle_shop │ ├── .gitignore │ ├── models │ │ ├── analytics │ │ │ ├── customers.sql │ │ │ ├── meta.sdf.yml │ │ │ └── orders.sql │ │ ├── raw │ │ │ └── seeds.sdf.yml │ │ └── staging │ │ │ ├── stg_customers.sql │ │ │ ├── stg_orders.sql │ │ │ ├── stg_payments.sql │ │ │ └── tests.sdf.yml │ ├── seeds │ │ ├── .gitkeep │ │ ├── raw_customers.csv │ │ ├── raw_orders.csv │ │ └── raw_payments.csv │ └── workspace.sdf.yml ├── lineage │ ├── checks │ │ └── check_sink_phone_is_pii.sql │ ├── models │ │ ├── knis.sql │ │ ├── middle.sql │ │ ├── sink.sql │ │ └── source.sql │ └── workspace.sdf.yml ├── linter │ ├── .gitignore │ ├── ddls │ │ └── my_table.sdf.yml │ ├── models │ │ └── main.sql │ ├── seeds │ │ └── my_table.csv │ └── workspace.sdf.yml ├── moms_flower_shop │ ├── .gitignore │ ├── checks │ │ └── README.txt │ ├── classifications │ │ ├── column_classifiers.sdf.yml │ │ └── table_classifiers.sdf.yml │ ├── metadata │ │ ├── analytics │ │ │ ├── agg_installs_and_campaigns.sdf.yml │ │ │ └── dim_marketing_campaigns.sdf.yml │ │ ├── raw │ │ │ ├── raw_addresses.sdf.yml │ │ │ ├── raw_customers.sdf.yml │ │ │ ├── raw_inapp_events.sdf.yml │ │ │ └── raw_marketing_campaign_events.sdf.yml │ │ └── staging │ │ │ └── app_installs.sdf.yml │ ├── models │ │ ├── analytics │ │ │ └── agg_installs_and_campaigns.sql │ │ ├── raw │ │ │ ├── raw_addresses.sql │ │ │ ├── raw_customers.sql │ │ │ ├── raw_inapp_events.sql │ │ │ └── raw_marketing_campaign_events.sql │ │ └── staging │ │ │ ├── app_installs.sql │ │ │ ├── app_installs_v2.sql │ │ │ ├── customers.sql │ │ │ ├── inapp_events.sql │ │ │ ├── marketing_campaigns.sql │ │ │ └── stg_installs_per_campaign.sql │ ├── reports │ │ └── README.txt │ ├── seeds │ │ ├── csv │ │ │ └── README.txt │ │ └── parquet │ │ │ ├── addresses.parquet │ │ │ ├── customers.parquet │ │ │ ├── inapp_events.parquet │ │ │ └── marketing_campaign_events.parquet │ └── workspace.sdf.yml ├── moms_flower_shop_completed │ ├── .gitignore │ ├── checks │ │ ├── README.txt │ │ └── mixed_event_ids.sql │ ├── classifications │ │ ├── column_classifiers.sdf.yml │ │ └── table_classifiers.sdf.yml │ ├── metadata │ │ ├── analytics │ │ │ ├── agg_installs_and_campaigns.sdf.yml │ │ │ └── dim_marketing_campaigns.sdf.yml │ │ ├── raw │ │ │ ├── raw_addresses.sdf.yml │ │ │ ├── raw_customers.sdf.yml │ │ │ ├── raw_inapp_events.sdf.yml │ │ │ └── raw_marketing_campaign_events.sdf.yml │ │ └── staging │ │ │ ├── app_installs.sdf.yml │ │ │ └── inapp_events.sdf.yml │ ├── models │ │ ├── analytics │ │ │ ├── agg_installs_and_campaigns.sql │ │ │ └── dim_marketing_campaigns.sql │ │ ├── raw │ │ │ ├── raw_addresses.sql │ │ │ ├── raw_customers.sql │ │ │ ├── raw_inapp_events.sql │ │ │ └── raw_marketing_campaign_events.sql │ │ └── staging │ │ │ ├── app_installs.sql │ │ │ ├── app_installs_v2.sql │ │ │ ├── customers.sql │ │ │ ├── inapp_events.sql │ │ │ ├── marketing_campaigns.sql │ │ │ └── stg_installs_per_campaign.sql │ ├── reports │ │ ├── README.txt │ │ └── deprecated_table_reference.sql │ ├── seeds │ │ ├── csv │ │ │ └── README.txt │ │ └── parquet │ │ │ ├── addresses.parquet │ │ │ ├── customers.parquet │ │ │ ├── inapp_events.parquet │ │ │ └── marketing_campaign_events.parquet │ └── workspace.sdf.yml ├── pii_saas_platform │ ├── .gitignore │ ├── checks │ │ └── no_pii_in_external.sql │ ├── classification │ │ ├── taxonomy.sdf.yml │ │ └── users.sdf.yml │ ├── ddls │ │ └── payment │ │ │ └── public │ │ │ ├── invoices.sql │ │ │ ├── organizations.sql │ │ │ └── users.sql │ ├── models │ │ ├── external │ │ │ ├── invoice_stats.sql │ │ │ └── org_invoice_stats.sql │ │ └── internal │ │ │ ├── avg_invoice_amt.sql │ │ │ ├── invoice_payment_delay.sql │ │ │ ├── mau_per_org.sql │ │ │ ├── most_frequent_payer.sql │ │ │ ├── total_revenue_per_org.sql │ │ │ ├── users_per_domain.sql │ │ │ └── users_per_org.sql │ ├── reports │ │ └── tables_with_pii.sql │ └── workspace.sdf.yml ├── seeds │ ├── models │ │ └── french_customers.sql │ ├── seeds │ │ └── raw_customers.csv │ └── workspace.sdf.yml ├── snapshots │ ├── models │ │ ├── test1 │ │ │ ├── a1.sdf.yml │ │ │ ├── a1.sql │ │ │ ├── a2.sdf.yml │ │ │ ├── a2.sql │ │ │ ├── b.sdf.yml │ │ │ └── b.sql │ │ ├── test2 │ │ │ ├── a1.sdf.yml │ │ │ ├── a1.sql │ │ │ ├── a2.sdf.yml │ │ │ ├── a2.sql │ │ │ ├── b.sdf.yml │ │ │ └── b.sql │ │ └── test3 │ │ │ ├── a1.sdf.yml │ │ │ ├── a1.sql │ │ │ ├── a2.sdf.yml │ │ │ ├── a2.sql │ │ │ ├── b.sdf.yml │ │ │ └── b.sql │ └── workspace.sdf.yml └── tests │ ├── .gitignore │ ├── models │ └── raw_inapp_events.sql │ ├── seeds │ └── inapp_events.csv │ ├── src_metadata │ └── raw_inapp_events.sdf.yml │ └── workspace.sdf.yml └── schemas ├── .gitkeep └── sdf-definition-schema.json /.editorconfig: -------------------------------------------------------------------------------- 1 | [*] 2 | insert_final_newline = true 3 | -------------------------------------------------------------------------------- /benchmarks/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sdf-labs/sdf-cli/0f2623776d779dcc07adfe6492765b5c77d3d211/benchmarks/.gitkeep -------------------------------------------------------------------------------- /benchmarks/trino/clickbench/.gitignore: -------------------------------------------------------------------------------- 1 | sdftarget 2 | hits.parquet -------------------------------------------------------------------------------- /benchmarks/trino/clickbench/hydrate.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -xeuo pipefail 3 | 4 | # Downloads a ~1GB` Clickbench dataset in parquet format. 5 | 6 | cd "${BASH_SOURCE%/*}" 7 | curl -LO https://cdn.sdf.com/data/clickbench/hits.parquet -------------------------------------------------------------------------------- /benchmarks/trino/clickbench/queries/q01.sql: -------------------------------------------------------------------------------- 1 | SELECT COUNT(*) FROM hits; 2 | -------------------------------------------------------------------------------- /benchmarks/trino/clickbench/queries/q02.sql: -------------------------------------------------------------------------------- 1 | SELECT COUNT(*) FROM hits WHERE "AdvEngineID" <> 0; 2 | -------------------------------------------------------------------------------- /benchmarks/trino/clickbench/queries/q03.sql: -------------------------------------------------------------------------------- 1 | SELECT SUM("AdvEngineID"), COUNT(*), AVG("ResolutionWidth") FROM hits; -------------------------------------------------------------------------------- /benchmarks/trino/clickbench/queries/q04.sql: -------------------------------------------------------------------------------- 1 | SELECT AVG("UserID") FROM hits; 2 | -------------------------------------------------------------------------------- /benchmarks/trino/clickbench/queries/q05.sql: -------------------------------------------------------------------------------- 1 | SELECT COUNT(DISTINCT "UserID") FROM hits; 2 | -------------------------------------------------------------------------------- /benchmarks/trino/clickbench/queries/q06.sql: -------------------------------------------------------------------------------- 1 | SELECT COUNT(DISTINCT "SearchPhrase") FROM hits; 2 | -------------------------------------------------------------------------------- /benchmarks/trino/clickbench/queries/q07.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN("EventDate"), MAX("EventDate") FROM hits; 2 | -------------------------------------------------------------------------------- /benchmarks/trino/clickbench/queries/q08.sql: -------------------------------------------------------------------------------- 1 | SELECT "AdvEngineID", COUNT(*) FROM hits WHERE "AdvEngineID" <> 0 GROUP BY "AdvEngineID" ORDER BY COUNT(*) DESC; 2 | -------------------------------------------------------------------------------- /benchmarks/trino/clickbench/queries/q09.sql: -------------------------------------------------------------------------------- 1 | SELECT "RegionID", COUNT(DISTINCT "UserID") AS u FROM hits GROUP BY "RegionID" ORDER BY u DESC LIMIT 10; 2 | -------------------------------------------------------------------------------- /benchmarks/trino/clickbench/queries/q10.sql: -------------------------------------------------------------------------------- 1 | SELECT "RegionID", SUM("AdvEngineID"), COUNT(*) AS c, AVG("ResolutionWidth"), COUNT(DISTINCT "UserID") FROM hits GROUP BY "RegionID" ORDER BY c DESC LIMIT 10; 2 | -------------------------------------------------------------------------------- /benchmarks/trino/clickbench/queries/q11.sql: -------------------------------------------------------------------------------- 1 | SELECT "MobilePhoneModel", COUNT(DISTINCT "UserID") AS u FROM hits WHERE "MobilePhoneModel" <> '' GROUP BY "MobilePhoneModel" ORDER BY u DESC LIMIT 10; 2 | -------------------------------------------------------------------------------- /benchmarks/trino/clickbench/queries/q12.sql: -------------------------------------------------------------------------------- 1 | SELECT "MobilePhone", "MobilePhoneModel", COUNT(DISTINCT "UserID") AS u FROM hits WHERE "MobilePhoneModel" <> '' GROUP BY "MobilePhone", "MobilePhoneModel" ORDER BY u DESC LIMIT 10; 2 | -------------------------------------------------------------------------------- /benchmarks/trino/clickbench/queries/q13.sql: -------------------------------------------------------------------------------- 1 | SELECT "SearchPhrase", COUNT(*) AS c FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10; -------------------------------------------------------------------------------- /benchmarks/trino/clickbench/queries/q14.sql: -------------------------------------------------------------------------------- 1 | SELECT "SearchPhrase", COUNT(DISTINCT "UserID") AS u FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY u DESC LIMIT 10; -------------------------------------------------------------------------------- /benchmarks/trino/clickbench/queries/q15.sql: -------------------------------------------------------------------------------- 1 | SELECT "SearchEngineID", "SearchPhrase", COUNT(*) AS c FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchEngineID", "SearchPhrase" ORDER BY c DESC LIMIT 10; 2 | -------------------------------------------------------------------------------- /benchmarks/trino/clickbench/queries/q16.sql: -------------------------------------------------------------------------------- 1 | SELECT "UserID", COUNT(*) FROM hits GROUP BY "UserID" ORDER BY COUNT(*) DESC LIMIT 10; 2 | -------------------------------------------------------------------------------- /benchmarks/trino/clickbench/queries/q17.sql: -------------------------------------------------------------------------------- 1 | SELECT "UserID", "SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID", "SearchPhrase" ORDER BY COUNT(*) DESC LIMIT 10; 2 | -------------------------------------------------------------------------------- /benchmarks/trino/clickbench/queries/q18.sql: -------------------------------------------------------------------------------- 1 | SELECT "UserID", "SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID", "SearchPhrase" LIMIT 10; 2 | -------------------------------------------------------------------------------- /benchmarks/trino/clickbench/queries/q19.sql: -------------------------------------------------------------------------------- 1 | SELECT "UserID", extract(minute FROM to_timestamp_seconds("EventTime")) AS m, "SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID", m, "SearchPhrase" ORDER BY COUNT(*) DESC LIMIT 10; 2 | -------------------------------------------------------------------------------- /benchmarks/trino/clickbench/queries/q20.sql: -------------------------------------------------------------------------------- 1 | SELECT "UserID" FROM hits WHERE "UserID" = 435090932899640449; 2 | -------------------------------------------------------------------------------- /benchmarks/trino/clickbench/queries/q21.sql: -------------------------------------------------------------------------------- 1 | SELECT COUNT(*) FROM hits WHERE "URL" LIKE '%google%'; 2 | -------------------------------------------------------------------------------- /benchmarks/trino/clickbench/queries/q22.sql: -------------------------------------------------------------------------------- 1 | SELECT "SearchPhrase", MIN("URL"), COUNT(*) AS c FROM hits WHERE "URL" LIKE '%google%' AND "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10; 2 | -------------------------------------------------------------------------------- /benchmarks/trino/clickbench/queries/q23.sql: -------------------------------------------------------------------------------- 1 | SELECT "SearchPhrase", MIN("URL"), MIN("Title"), COUNT(*) AS c, COUNT(DISTINCT "UserID") FROM hits WHERE "Title" LIKE '%Google%' AND "URL" NOT LIKE '%.google.%' AND "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10; 2 | -------------------------------------------------------------------------------- /benchmarks/trino/clickbench/queries/q24.sql: -------------------------------------------------------------------------------- 1 | SELECT * FROM hits WHERE "URL" LIKE '%google%' ORDER BY to_timestamp_seconds("EventTime") LIMIT 10; 2 | -------------------------------------------------------------------------------- /benchmarks/trino/clickbench/queries/q25.sql: -------------------------------------------------------------------------------- 1 | SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY to_timestamp_seconds("EventTime") LIMIT 10; 2 | -------------------------------------------------------------------------------- /benchmarks/trino/clickbench/queries/q26.sql: -------------------------------------------------------------------------------- 1 | SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY "SearchPhrase" LIMIT 10; 2 | -------------------------------------------------------------------------------- /benchmarks/trino/clickbench/queries/q27.sql: -------------------------------------------------------------------------------- 1 | SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY to_timestamp_seconds("EventTime"), "SearchPhrase" LIMIT 10; 2 | -------------------------------------------------------------------------------- /benchmarks/trino/clickbench/queries/q28.sql: -------------------------------------------------------------------------------- 1 | SELECT "CounterID", AVG(length("URL")) AS l, COUNT(*) AS c FROM hits WHERE "URL" <> '' GROUP BY "CounterID" HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; 2 | -------------------------------------------------------------------------------- /benchmarks/trino/clickbench/queries/q29.sql: -------------------------------------------------------------------------------- 1 | SELECT REGEXP_REPLACE("Referer", '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length("Referer")) AS l, COUNT(*) AS c, MIN("Referer") FROM hits WHERE "Referer" <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; 2 | -------------------------------------------------------------------------------- /benchmarks/trino/clickbench/queries/q31.sql: -------------------------------------------------------------------------------- 1 | SELECT "SearchEngineID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), AVG("ResolutionWidth") FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchEngineID", "ClientIP" ORDER BY c DESC LIMIT 10; 2 | -------------------------------------------------------------------------------- /benchmarks/trino/clickbench/queries/q32.sql: -------------------------------------------------------------------------------- 1 | SELECT "WatchID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), AVG("ResolutionWidth") FROM hits WHERE "SearchPhrase" <> '' GROUP BY "WatchID", "ClientIP" ORDER BY c DESC LIMIT 10; 2 | -------------------------------------------------------------------------------- /benchmarks/trino/clickbench/queries/q33.sql: -------------------------------------------------------------------------------- 1 | SELECT "WatchID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), AVG("ResolutionWidth") FROM hits GROUP BY "WatchID", "ClientIP" ORDER BY c DESC LIMIT 10; 2 | -------------------------------------------------------------------------------- /benchmarks/trino/clickbench/queries/q34.sql: -------------------------------------------------------------------------------- 1 | SELECT "URL", COUNT(*) AS c FROM hits GROUP BY "URL" ORDER BY c DESC LIMIT 10; 2 | -------------------------------------------------------------------------------- /benchmarks/trino/clickbench/queries/q35.sql: -------------------------------------------------------------------------------- 1 | SELECT 1, "URL", COUNT(*) AS c FROM hits GROUP BY 1, "URL" ORDER BY c DESC LIMIT 10; 2 | -------------------------------------------------------------------------------- /benchmarks/trino/clickbench/queries/q36.sql: -------------------------------------------------------------------------------- 1 | SELECT "ClientIP", "ClientIP" - 1, "ClientIP" - 2, "ClientIP" - 3, COUNT(*) AS c FROM hits GROUP BY "ClientIP", "ClientIP" - 1, "ClientIP" - 2, "ClientIP" - 3 ORDER BY c DESC LIMIT 10; 2 | -------------------------------------------------------------------------------- /benchmarks/trino/clickbench/queries/q37.sql: -------------------------------------------------------------------------------- 1 | SELECT "URL", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND CAST(CAST("EventDate" AS INT) AS DATE) >= '2013-07-01' AND CAST(CAST("EventDate" AS INT) AS DATE) <= '2013-07-31' AND "DontCountHits" = 0 AND "IsRefresh" = 0 AND "URL" <> '' GROUP BY "URL" ORDER BY PageViews DESC LIMIT 10; 2 | -------------------------------------------------------------------------------- /benchmarks/trino/clickbench/queries/q38.sql: -------------------------------------------------------------------------------- 1 | SELECT "Title", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND CAST(CAST("EventDate" AS INT) AS DATE) >= '2013-07-01' AND CAST(CAST("EventDate" AS INT) AS DATE) <= '2013-07-31' AND "DontCountHits" = 0 AND "IsRefresh" = 0 AND "Title" <> '' GROUP BY "Title" ORDER BY PageViews DESC LIMIT 10; 2 | -------------------------------------------------------------------------------- /benchmarks/trino/clickbench/queries/q39.sql: -------------------------------------------------------------------------------- 1 | SELECT "URL", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND CAST(CAST("EventDate" AS INT) AS DATE) >= '2013-07-01' AND CAST(CAST("EventDate" AS INT) AS DATE) <= '2013-07-31' AND "IsRefresh" = 0 AND "IsLink" <> 0 AND "IsDownload" = 0 GROUP BY "URL" ORDER BY PageViews DESC OFFSET 1000 LIMIT 10; 2 | -------------------------------------------------------------------------------- /benchmarks/trino/clickbench/queries/q40.sql: -------------------------------------------------------------------------------- 1 | SELECT "TraficSourceID", "SearchEngineID", "AdvEngineID", CASE WHEN ("SearchEngineID" = 0 AND "AdvEngineID" = 0) THEN "Referer" ELSE '' END AS Src, "URL" AS Dst, COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND CAST(CAST("EventDate" AS INT) AS DATE) >= '2013-07-01' AND CAST(CAST("EventDate" AS INT) AS DATE) <= '2013-07-31' AND "IsRefresh" = 0 GROUP BY "TraficSourceID", "SearchEngineID", "AdvEngineID", Src, Dst ORDER BY PageViews DESC OFFSET 1000 LIMIT 10; 2 | -------------------------------------------------------------------------------- /benchmarks/trino/clickbench/queries/q41.sql: -------------------------------------------------------------------------------- 1 | SELECT "URLHash", CAST(CAST("EventDate" AS INT) AS DATE), COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND CAST(CAST("EventDate" AS INT) AS DATE) >= '2013-07-01' AND CAST(CAST("EventDate" AS INT) AS DATE) <= '2013-07-31' AND "IsRefresh" = 0 AND "TraficSourceID" IN (-1, 6) AND "RefererHash" = 3594120000172545465 GROUP BY "URLHash", CAST(CAST("EventDate" AS INT) AS DATE) ORDER BY PageViews DESC OFFSET 100 LIMIT 10; 2 | -------------------------------------------------------------------------------- /benchmarks/trino/clickbench/queries/q42.sql: -------------------------------------------------------------------------------- 1 | SELECT "WindowClientWidth", "WindowClientHeight", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND CAST(CAST("EventDate" AS INT) AS DATE) >= '2013-07-01' AND CAST(CAST("EventDate" AS INT) AS DATE) <= '2013-07-31' AND "IsRefresh" = 0 AND "DontCountHits" = 0 AND "URLHash" = 2868770270353813622 GROUP BY "WindowClientWidth", "WindowClientHeight" ORDER BY PageViews DESC OFFSET 1000 LIMIT 100; 2 | -------------------------------------------------------------------------------- /benchmarks/trino/clickbench/queries/q43.sql: -------------------------------------------------------------------------------- 1 | SELECT DATE_TRUNC('minute', to_timestamp_seconds("EventTime")) AS M, COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND CAST(CAST("EventDate" AS INT) AS DATE) >= '2013-07-14' AND CAST(CAST("EventDate" AS INT) AS DATE) <= '2013-07-15' AND "IsRefresh" = 0 AND "DontCountHits" = 0 GROUP BY DATE_TRUNC('minute', to_timestamp_seconds("EventTime")) ORDER BY DATE_TRUNC('minute', M) OFFSET 1000 LIMIT 10; 2 | -------------------------------------------------------------------------------- /benchmarks/trino/clickbench/sources/sources.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE hit_source with (format='PARQUET', LOCATION='hits.parquet'); -------------------------------------------------------------------------------- /benchmarks/trino/clickbench/workspace.sdf.yml: -------------------------------------------------------------------------------- 1 | workspace: 2 | edition: "1.3" 3 | name: "clickbench" 4 | defaults: 5 | dialect: trino 6 | description: > 7 | The ClickBench benchmark is designed to evaluate the performance of database systems using a dataset and queries derived from the real-world use cases of ClickHouse, 8 | a leading analytical database. This benchmark aims to measure how well different database systems handle large-scale analytical workloads. 9 | 10 | To run the benchmark: 11 | 1. Run the included hydrate.sh script which downloads relevant data 12 | 2. To execute all queries: `sdf run --no-cache` 13 | includes: 14 | - path: sources 15 | - path: queries 16 | - path: hits.parquet 17 | type: resource 18 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/.gitignore: -------------------------------------------------------------------------------- 1 | /imdb_data.zip 2 | /imdb_data 3 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/hydrate.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -xeuo pipefail 3 | 4 | # Downloads a ~1.2GB` IMDB dataset in zipped format. 5 | 6 | cd "${BASH_SOURCE%/*}" 7 | curl -LO https://cdn.sdf.com/data/imdb/imdb_data.zip 8 | unzip imdb_data.zip 9 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_01a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mc.note) AS production_note, 2 | MIN(t.title) AS movie_title, 3 | MIN(t.production_year) AS movie_year 4 | FROM company_type AS ct, 5 | info_type AS it, 6 | movie_companies AS mc, 7 | movie_info_idx AS mi_idx, 8 | title AS t 9 | WHERE ct.kind = 'production companies' 10 | AND it.info = 'top 250 rank' 11 | AND mc.note NOT LIKE '%(as Metro-Goldwyn-Mayer Pictures)%' 12 | AND (mc.note LIKE '%(co-production)%' 13 | OR mc.note LIKE '%(presents)%') 14 | AND ct.id = mc.company_type_id 15 | AND t.id = mc.movie_id 16 | AND t.id = mi_idx.movie_id 17 | AND mc.movie_id = mi_idx.movie_id 18 | AND it.id = mi_idx.info_type_id; 19 | 20 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_01b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mc.note) AS production_note, 2 | MIN(t.title) AS movie_title, 3 | MIN(t.production_year) AS movie_year 4 | FROM company_type AS ct, 5 | info_type AS it, 6 | movie_companies AS mc, 7 | movie_info_idx AS mi_idx, 8 | title AS t 9 | WHERE ct.kind = 'production companies' 10 | AND it.info = 'bottom 10 rank' 11 | AND mc.note NOT LIKE '%(as Metro-Goldwyn-Mayer Pictures)%' 12 | AND t.production_year BETWEEN 2005 AND 2010 13 | AND ct.id = mc.company_type_id 14 | AND t.id = mc.movie_id 15 | AND t.id = mi_idx.movie_id 16 | AND mc.movie_id = mi_idx.movie_id 17 | AND it.id = mi_idx.info_type_id; 18 | 19 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_01c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mc.note) AS production_note, 2 | MIN(t.title) AS movie_title, 3 | MIN(t.production_year) AS movie_year 4 | FROM company_type AS ct, 5 | info_type AS it, 6 | movie_companies AS mc, 7 | movie_info_idx AS mi_idx, 8 | title AS t 9 | WHERE ct.kind = 'production companies' 10 | AND it.info = 'top 250 rank' 11 | AND mc.note NOT LIKE '%(as Metro-Goldwyn-Mayer Pictures)%' 12 | AND (mc.note LIKE '%(co-production)%') 13 | AND t.production_year >2010 14 | AND ct.id = mc.company_type_id 15 | AND t.id = mc.movie_id 16 | AND t.id = mi_idx.movie_id 17 | AND mc.movie_id = mi_idx.movie_id 18 | AND it.id = mi_idx.info_type_id; 19 | 20 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_01d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mc.note) AS production_note, 2 | MIN(t.title) AS movie_title, 3 | MIN(t.production_year) AS movie_year 4 | FROM company_type AS ct, 5 | info_type AS it, 6 | movie_companies AS mc, 7 | movie_info_idx AS mi_idx, 8 | title AS t 9 | WHERE ct.kind = 'production companies' 10 | AND it.info = 'bottom 10 rank' 11 | AND mc.note NOT LIKE '%(as Metro-Goldwyn-Mayer Pictures)%' 12 | AND t.production_year >2000 13 | AND ct.id = mc.company_type_id 14 | AND t.id = mc.movie_id 15 | AND t.id = mi_idx.movie_id 16 | AND mc.movie_id = mi_idx.movie_id 17 | AND it.id = mi_idx.info_type_id; 18 | 19 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_02a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS movie_title 2 | FROM company_name AS cn, 3 | keyword AS k, 4 | movie_companies AS mc, 5 | movie_keyword AS mk, 6 | title AS t 7 | WHERE cn.country_code ='[de]' 8 | AND k.keyword ='character-name-in-title' 9 | AND cn.id = mc.company_id 10 | AND mc.movie_id = t.id 11 | AND t.id = mk.movie_id 12 | AND mk.keyword_id = k.id 13 | AND mc.movie_id = mk.movie_id; 14 | 15 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_02b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS movie_title 2 | FROM company_name AS cn, 3 | keyword AS k, 4 | movie_companies AS mc, 5 | movie_keyword AS mk, 6 | title AS t 7 | WHERE cn.country_code ='[nl]' 8 | AND k.keyword ='character-name-in-title' 9 | AND cn.id = mc.company_id 10 | AND mc.movie_id = t.id 11 | AND t.id = mk.movie_id 12 | AND mk.keyword_id = k.id 13 | AND mc.movie_id = mk.movie_id; 14 | 15 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_02c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS movie_title 2 | FROM company_name AS cn, 3 | keyword AS k, 4 | movie_companies AS mc, 5 | movie_keyword AS mk, 6 | title AS t 7 | WHERE cn.country_code ='[sm]' 8 | AND k.keyword ='character-name-in-title' 9 | AND cn.id = mc.company_id 10 | AND mc.movie_id = t.id 11 | AND t.id = mk.movie_id 12 | AND mk.keyword_id = k.id 13 | AND mc.movie_id = mk.movie_id; 14 | 15 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_02d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS movie_title 2 | FROM company_name AS cn, 3 | keyword AS k, 4 | movie_companies AS mc, 5 | movie_keyword AS mk, 6 | title AS t 7 | WHERE cn.country_code ='[us]' 8 | AND k.keyword ='character-name-in-title' 9 | AND cn.id = mc.company_id 10 | AND mc.movie_id = t.id 11 | AND t.id = mk.movie_id 12 | AND mk.keyword_id = k.id 13 | AND mc.movie_id = mk.movie_id; 14 | 15 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_03a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS movie_title 2 | FROM keyword AS k, 3 | movie_info AS mi, 4 | movie_keyword AS mk, 5 | title AS t 6 | WHERE k.keyword LIKE '%sequel%' 7 | AND mi.info IN ('Sweden', 8 | 'Norway', 9 | 'Germany', 10 | 'Denmark', 11 | 'Swedish', 12 | 'Denish', 13 | 'Norwegian', 14 | 'German') 15 | AND t.production_year > 2005 16 | AND t.id = mi.movie_id 17 | AND t.id = mk.movie_id 18 | AND mk.movie_id = mi.movie_id 19 | AND k.id = mk.keyword_id; 20 | 21 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_03b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS movie_title 2 | FROM keyword AS k, 3 | movie_info AS mi, 4 | movie_keyword AS mk, 5 | title AS t 6 | WHERE k.keyword LIKE '%sequel%' 7 | AND mi.info IN ('Bulgaria') 8 | AND t.production_year > 2010 9 | AND t.id = mi.movie_id 10 | AND t.id = mk.movie_id 11 | AND mk.movie_id = mi.movie_id 12 | AND k.id = mk.keyword_id; 13 | 14 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_03c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS movie_title 2 | FROM keyword AS k, 3 | movie_info AS mi, 4 | movie_keyword AS mk, 5 | title AS t 6 | WHERE k.keyword LIKE '%sequel%' 7 | AND mi.info IN ('Sweden', 8 | 'Norway', 9 | 'Germany', 10 | 'Denmark', 11 | 'Swedish', 12 | 'Denish', 13 | 'Norwegian', 14 | 'German', 15 | 'USA', 16 | 'American') 17 | AND t.production_year > 1990 18 | AND t.id = mi.movie_id 19 | AND t.id = mk.movie_id 20 | AND mk.movie_id = mi.movie_id 21 | AND k.id = mk.keyword_id; 22 | 23 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_04a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi_idx.info) AS rating, 2 | MIN(t.title) AS movie_title 3 | FROM info_type AS it, 4 | keyword AS k, 5 | movie_info_idx AS mi_idx, 6 | movie_keyword AS mk, 7 | title AS t 8 | WHERE it.info ='rating' 9 | AND k.keyword LIKE '%sequel%' 10 | AND mi_idx.info > '5.0' 11 | AND t.production_year > 2005 12 | AND t.id = mi_idx.movie_id 13 | AND t.id = mk.movie_id 14 | AND mk.movie_id = mi_idx.movie_id 15 | AND k.id = mk.keyword_id 16 | AND it.id = mi_idx.info_type_id; 17 | 18 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_04b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi_idx.info) AS rating, 2 | MIN(t.title) AS movie_title 3 | FROM info_type AS it, 4 | keyword AS k, 5 | movie_info_idx AS mi_idx, 6 | movie_keyword AS mk, 7 | title AS t 8 | WHERE it.info ='rating' 9 | AND k.keyword LIKE '%sequel%' 10 | AND mi_idx.info > '9.0' 11 | AND t.production_year > 2010 12 | AND t.id = mi_idx.movie_id 13 | AND t.id = mk.movie_id 14 | AND mk.movie_id = mi_idx.movie_id 15 | AND k.id = mk.keyword_id 16 | AND it.id = mi_idx.info_type_id; 17 | 18 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_04c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi_idx.info) AS rating, 2 | MIN(t.title) AS movie_title 3 | FROM info_type AS it, 4 | keyword AS k, 5 | movie_info_idx AS mi_idx, 6 | movie_keyword AS mk, 7 | title AS t 8 | WHERE it.info ='rating' 9 | AND k.keyword LIKE '%sequel%' 10 | AND mi_idx.info > '2.0' 11 | AND t.production_year > 1990 12 | AND t.id = mi_idx.movie_id 13 | AND t.id = mk.movie_id 14 | AND mk.movie_id = mi_idx.movie_id 15 | AND k.id = mk.keyword_id 16 | AND it.id = mi_idx.info_type_id; 17 | 18 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_05a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS typical_european_movie 2 | FROM company_type AS ct, 3 | info_type AS it, 4 | movie_companies AS mc, 5 | movie_info AS mi, 6 | title AS t 7 | WHERE ct.kind = 'production companies' 8 | AND mc.note LIKE '%(theatrical)%' 9 | AND mc.note LIKE '%(France)%' 10 | AND mi.info IN ('Sweden', 11 | 'Norway', 12 | 'Germany', 13 | 'Denmark', 14 | 'Swedish', 15 | 'Denish', 16 | 'Norwegian', 17 | 'German') 18 | AND t.production_year > 2005 19 | AND t.id = mi.movie_id 20 | AND t.id = mc.movie_id 21 | AND mc.movie_id = mi.movie_id 22 | AND ct.id = mc.company_type_id 23 | AND it.id = mi.info_type_id; 24 | 25 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_05b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS american_vhs_movie 2 | FROM company_type AS ct, 3 | info_type AS it, 4 | movie_companies AS mc, 5 | movie_info AS mi, 6 | title AS t 7 | WHERE ct.kind = 'production companies' 8 | AND mc.note LIKE '%(VHS)%' 9 | AND mc.note LIKE '%(USA)%' 10 | AND mc.note LIKE '%(1994)%' 11 | AND mi.info IN ('USA', 12 | 'America') 13 | AND t.production_year > 2010 14 | AND t.id = mi.movie_id 15 | AND t.id = mc.movie_id 16 | AND mc.movie_id = mi.movie_id 17 | AND ct.id = mc.company_type_id 18 | AND it.id = mi.info_type_id; 19 | 20 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_05c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS american_movie 2 | FROM company_type AS ct, 3 | info_type AS it, 4 | movie_companies AS mc, 5 | movie_info AS mi, 6 | title AS t 7 | WHERE ct.kind = 'production companies' 8 | AND mc.note NOT LIKE '%(TV)%' 9 | AND mc.note LIKE '%(USA)%' 10 | AND mi.info IN ('Sweden', 11 | 'Norway', 12 | 'Germany', 13 | 'Denmark', 14 | 'Swedish', 15 | 'Denish', 16 | 'Norwegian', 17 | 'German', 18 | 'USA', 19 | 'American') 20 | AND t.production_year > 1990 21 | AND t.id = mi.movie_id 22 | AND t.id = mc.movie_id 23 | AND mc.movie_id = mi.movie_id 24 | AND ct.id = mc.company_type_id 25 | AND it.id = mi.info_type_id; 26 | 27 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_06a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(k.keyword) AS movie_keyword, 2 | MIN(n.name) AS actor_name, 3 | MIN(t.title) AS marvel_movie 4 | FROM cast_info AS ci, 5 | keyword AS k, 6 | movie_keyword AS mk, 7 | name AS n, 8 | title AS t 9 | WHERE k.keyword = 'marvel-cinematic-universe' 10 | AND n.name LIKE '%Downey%Robert%' 11 | AND t.production_year > 2010 12 | AND k.id = mk.keyword_id 13 | AND t.id = mk.movie_id 14 | AND t.id = ci.movie_id 15 | AND ci.movie_id = mk.movie_id 16 | AND n.id = ci.person_id; 17 | 18 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_06b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(k.keyword) AS movie_keyword, 2 | MIN(n.name) AS actor_name, 3 | MIN(t.title) AS hero_movie 4 | FROM cast_info AS ci, 5 | keyword AS k, 6 | movie_keyword AS mk, 7 | name AS n, 8 | title AS t 9 | WHERE k.keyword IN ('superhero', 10 | 'sequel', 11 | 'second-part', 12 | 'marvel-comics', 13 | 'based-on-comic', 14 | 'tv-special', 15 | 'fight', 16 | 'violence') 17 | AND n.name LIKE '%Downey%Robert%' 18 | AND t.production_year > 2014 19 | AND k.id = mk.keyword_id 20 | AND t.id = mk.movie_id 21 | AND t.id = ci.movie_id 22 | AND ci.movie_id = mk.movie_id 23 | AND n.id = ci.person_id; 24 | 25 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_06c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(k.keyword) AS movie_keyword, 2 | MIN(n.name) AS actor_name, 3 | MIN(t.title) AS marvel_movie 4 | FROM cast_info AS ci, 5 | keyword AS k, 6 | movie_keyword AS mk, 7 | name AS n, 8 | title AS t 9 | WHERE k.keyword = 'marvel-cinematic-universe' 10 | AND n.name LIKE '%Downey%Robert%' 11 | AND t.production_year > 2014 12 | AND k.id = mk.keyword_id 13 | AND t.id = mk.movie_id 14 | AND t.id = ci.movie_id 15 | AND ci.movie_id = mk.movie_id 16 | AND n.id = ci.person_id; 17 | 18 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_06d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(k.keyword) AS movie_keyword, 2 | MIN(n.name) AS actor_name, 3 | MIN(t.title) AS hero_movie 4 | FROM cast_info AS ci, 5 | keyword AS k, 6 | movie_keyword AS mk, 7 | name AS n, 8 | title AS t 9 | WHERE k.keyword IN ('superhero', 10 | 'sequel', 11 | 'second-part', 12 | 'marvel-comics', 13 | 'based-on-comic', 14 | 'tv-special', 15 | 'fight', 16 | 'violence') 17 | AND n.name LIKE '%Downey%Robert%' 18 | AND t.production_year > 2000 19 | AND k.id = mk.keyword_id 20 | AND t.id = mk.movie_id 21 | AND t.id = ci.movie_id 22 | AND ci.movie_id = mk.movie_id 23 | AND n.id = ci.person_id; 24 | 25 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_06e.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(k.keyword) AS movie_keyword, 2 | MIN(n.name) AS actor_name, 3 | MIN(t.title) AS marvel_movie 4 | FROM cast_info AS ci, 5 | keyword AS k, 6 | movie_keyword AS mk, 7 | name AS n, 8 | title AS t 9 | WHERE k.keyword = 'marvel-cinematic-universe' 10 | AND n.name LIKE '%Downey%Robert%' 11 | AND t.production_year > 2000 12 | AND k.id = mk.keyword_id 13 | AND t.id = mk.movie_id 14 | AND t.id = ci.movie_id 15 | AND ci.movie_id = mk.movie_id 16 | AND n.id = ci.person_id; 17 | 18 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_06f.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(k.keyword) AS movie_keyword, 2 | MIN(n.name) AS actor_name, 3 | MIN(t.title) AS hero_movie 4 | FROM cast_info AS ci, 5 | keyword AS k, 6 | movie_keyword AS mk, 7 | name AS n, 8 | title AS t 9 | WHERE k.keyword IN ('superhero', 10 | 'sequel', 11 | 'second-part', 12 | 'marvel-comics', 13 | 'based-on-comic', 14 | 'tv-special', 15 | 'fight', 16 | 'violence') 17 | AND t.production_year > 2000 18 | AND k.id = mk.keyword_id 19 | AND t.id = mk.movie_id 20 | AND t.id = ci.movie_id 21 | AND ci.movie_id = mk.movie_id 22 | AND n.id = ci.person_id; 23 | 24 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_07a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS of_person, 2 | MIN(t.title) AS biography_movie 3 | FROM aka_name AS an, 4 | cast_info AS ci, 5 | info_type AS it, 6 | link_type AS lt, 7 | movie_link AS ml, 8 | name AS n, 9 | person_info AS pi, 10 | title AS t 11 | WHERE an.name LIKE '%a%' 12 | AND it.info ='mini biography' 13 | AND lt.link ='features' 14 | AND n.name_pcode_cf BETWEEN 'A' AND 'F' 15 | AND (n.gender='m' 16 | OR (n.gender = 'f' 17 | AND n.name LIKE 'B%')) 18 | AND pi.note ='Volker Boehm' 19 | AND t.production_year BETWEEN 1980 AND 1995 20 | AND n.id = an.person_id 21 | AND n.id = pi.person_id 22 | AND ci.person_id = n.id 23 | AND t.id = ci.movie_id 24 | AND ml.linked_movie_id = t.id 25 | AND lt.id = ml.link_type_id 26 | AND it.id = pi.info_type_id 27 | AND pi.person_id = an.person_id 28 | AND pi.person_id = ci.person_id 29 | AND an.person_id = ci.person_id 30 | AND ci.movie_id = ml.linked_movie_id; 31 | 32 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_07b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS of_person, 2 | MIN(t.title) AS biography_movie 3 | FROM aka_name AS an, 4 | cast_info AS ci, 5 | info_type AS it, 6 | link_type AS lt, 7 | movie_link AS ml, 8 | name AS n, 9 | person_info AS pi, 10 | title AS t 11 | WHERE an.name LIKE '%a%' 12 | AND it.info ='mini biography' 13 | AND lt.link ='features' 14 | AND n.name_pcode_cf LIKE 'D%' 15 | AND n.gender='m' 16 | AND pi.note ='Volker Boehm' 17 | AND t.production_year BETWEEN 1980 AND 1984 18 | AND n.id = an.person_id 19 | AND n.id = pi.person_id 20 | AND ci.person_id = n.id 21 | AND t.id = ci.movie_id 22 | AND ml.linked_movie_id = t.id 23 | AND lt.id = ml.link_type_id 24 | AND it.id = pi.info_type_id 25 | AND pi.person_id = an.person_id 26 | AND pi.person_id = ci.person_id 27 | AND an.person_id = ci.person_id 28 | AND ci.movie_id = ml.linked_movie_id; 29 | 30 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_07c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS cast_member_name, 2 | MIN(pi.info) AS cast_member_info 3 | FROM aka_name AS an, 4 | cast_info AS ci, 5 | info_type AS it, 6 | link_type AS lt, 7 | movie_link AS ml, 8 | name AS n, 9 | person_info AS pi, 10 | title AS t 11 | WHERE an.name IS NOT NULL 12 | AND (an.name LIKE '%a%' 13 | OR an.name LIKE 'A%') 14 | AND it.info ='mini biography' 15 | AND lt.link IN ('references', 16 | 'referenced in', 17 | 'features', 18 | 'featured in') 19 | AND n.name_pcode_cf BETWEEN 'A' AND 'F' 20 | AND (n.gender='m' 21 | OR (n.gender = 'f' 22 | AND n.name LIKE 'A%')) 23 | AND pi.note IS NOT NULL 24 | AND t.production_year BETWEEN 1980 AND 2010 25 | AND n.id = an.person_id 26 | AND n.id = pi.person_id 27 | AND ci.person_id = n.id 28 | AND t.id = ci.movie_id 29 | AND ml.linked_movie_id = t.id 30 | AND lt.id = ml.link_type_id 31 | AND it.id = pi.info_type_id 32 | AND pi.person_id = an.person_id 33 | AND pi.person_id = ci.person_id 34 | AND an.person_id = ci.person_id 35 | AND ci.movie_id = ml.linked_movie_id; 36 | 37 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_08a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(an1.name) AS actress_pseudonym, 2 | MIN(t.title) AS japanese_movie_dubbed 3 | FROM aka_name AS an1, 4 | cast_info AS ci, 5 | company_name AS cn, 6 | movie_companies AS mc, 7 | name AS n1, 8 | role_type AS rt, 9 | title AS t 10 | WHERE ci.note ='(voice: English version)' 11 | AND cn.country_code ='[jp]' 12 | AND mc.note LIKE '%(Japan)%' 13 | AND mc.note NOT LIKE '%(USA)%' 14 | AND n1.name LIKE '%Yo%' 15 | AND n1.name NOT LIKE '%Yu%' 16 | AND rt.role ='actress' 17 | AND an1.person_id = n1.id 18 | AND n1.id = ci.person_id 19 | AND ci.movie_id = t.id 20 | AND t.id = mc.movie_id 21 | AND mc.company_id = cn.id 22 | AND ci.role_id = rt.id 23 | AND an1.person_id = ci.person_id 24 | AND ci.movie_id = mc.movie_id; 25 | 26 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_08b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(an.name) AS acress_pseudonym, 2 | MIN(t.title) AS japanese_anime_movie 3 | FROM aka_name AS an, 4 | cast_info AS ci, 5 | company_name AS cn, 6 | movie_companies AS mc, 7 | name AS n, 8 | role_type AS rt, 9 | title AS t 10 | WHERE ci.note ='(voice: English version)' 11 | AND cn.country_code ='[jp]' 12 | AND mc.note LIKE '%(Japan)%' 13 | AND mc.note NOT LIKE '%(USA)%' 14 | AND (mc.note LIKE '%(2006)%' 15 | OR mc.note LIKE '%(2007)%') 16 | AND n.name LIKE '%Yo%' 17 | AND n.name NOT LIKE '%Yu%' 18 | AND rt.role ='actress' 19 | AND t.production_year BETWEEN 2006 AND 2007 20 | AND (t.title LIKE 'One Piece%' 21 | OR t.title LIKE 'Dragon Ball Z%') 22 | AND an.person_id = n.id 23 | AND n.id = ci.person_id 24 | AND ci.movie_id = t.id 25 | AND t.id = mc.movie_id 26 | AND mc.company_id = cn.id 27 | AND ci.role_id = rt.id 28 | AND an.person_id = ci.person_id 29 | AND ci.movie_id = mc.movie_id; 30 | 31 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_08c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(a1.name) AS writer_pseudo_name, 2 | MIN(t.title) AS movie_title 3 | FROM aka_name AS a1, 4 | cast_info AS ci, 5 | company_name AS cn, 6 | movie_companies AS mc, 7 | name AS n1, 8 | role_type AS rt, 9 | title AS t 10 | WHERE cn.country_code ='[us]' 11 | AND rt.role ='writer' 12 | AND a1.person_id = n1.id 13 | AND n1.id = ci.person_id 14 | AND ci.movie_id = t.id 15 | AND t.id = mc.movie_id 16 | AND mc.company_id = cn.id 17 | AND ci.role_id = rt.id 18 | AND a1.person_id = ci.person_id 19 | AND ci.movie_id = mc.movie_id; 20 | 21 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_08d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(an1.name) AS costume_designer_pseudo, 2 | MIN(t.title) AS movie_with_costumes 3 | FROM aka_name AS an1, 4 | cast_info AS ci, 5 | company_name AS cn, 6 | movie_companies AS mc, 7 | name AS n1, 8 | role_type AS rt, 9 | title AS t 10 | WHERE cn.country_code ='[us]' 11 | AND rt.role ='costume designer' 12 | AND an1.person_id = n1.id 13 | AND n1.id = ci.person_id 14 | AND ci.movie_id = t.id 15 | AND t.id = mc.movie_id 16 | AND mc.company_id = cn.id 17 | AND ci.role_id = rt.id 18 | AND an1.person_id = ci.person_id 19 | AND ci.movie_id = mc.movie_id; 20 | 21 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_09a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(an.name) AS alternative_name, 2 | MIN(chn.name) AS character_name, 3 | MIN(t.title) AS movie 4 | FROM aka_name AS an, 5 | char_name AS chn, 6 | cast_info AS ci, 7 | company_name AS cn, 8 | movie_companies AS mc, 9 | name AS n, 10 | role_type AS rt, 11 | title AS t 12 | WHERE ci.note IN ('(voice)', 13 | '(voice: Japanese version)', 14 | '(voice) (uncredited)', 15 | '(voice: English version)') 16 | AND cn.country_code ='[us]' 17 | AND mc.note IS NOT NULL 18 | AND (mc.note LIKE '%(USA)%' 19 | OR mc.note LIKE '%(worldwide)%') 20 | AND n.gender ='f' 21 | AND n.name LIKE '%Ang%' 22 | AND rt.role ='actress' 23 | AND t.production_year BETWEEN 2005 AND 2015 24 | AND ci.movie_id = t.id 25 | AND t.id = mc.movie_id 26 | AND ci.movie_id = mc.movie_id 27 | AND mc.company_id = cn.id 28 | AND ci.role_id = rt.id 29 | AND n.id = ci.person_id 30 | AND chn.id = ci.person_role_id 31 | AND an.person_id = n.id 32 | AND an.person_id = ci.person_id; 33 | 34 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_09b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(an.name) AS alternative_name, 2 | MIN(chn.name) AS voiced_character, 3 | MIN(n.name) AS voicing_actress, 4 | MIN(t.title) AS american_movie 5 | FROM aka_name AS an, 6 | char_name AS chn, 7 | cast_info AS ci, 8 | company_name AS cn, 9 | movie_companies AS mc, 10 | name AS n, 11 | role_type AS rt, 12 | title AS t 13 | WHERE ci.note = '(voice)' 14 | AND cn.country_code ='[us]' 15 | AND mc.note LIKE '%(200%)%' 16 | AND (mc.note LIKE '%(USA)%' 17 | OR mc.note LIKE '%(worldwide)%') 18 | AND n.gender ='f' 19 | AND n.name LIKE '%Angel%' 20 | AND rt.role ='actress' 21 | AND t.production_year BETWEEN 2007 AND 2010 22 | AND ci.movie_id = t.id 23 | AND t.id = mc.movie_id 24 | AND ci.movie_id = mc.movie_id 25 | AND mc.company_id = cn.id 26 | AND ci.role_id = rt.id 27 | AND n.id = ci.person_id 28 | AND chn.id = ci.person_role_id 29 | AND an.person_id = n.id 30 | AND an.person_id = ci.person_id; 31 | 32 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_09c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(an.name) AS alternative_name, 2 | MIN(chn.name) AS voiced_character_name, 3 | MIN(n.name) AS voicing_actress, 4 | MIN(t.title) AS american_movie 5 | FROM aka_name AS an, 6 | char_name AS chn, 7 | cast_info AS ci, 8 | company_name AS cn, 9 | movie_companies AS mc, 10 | name AS n, 11 | role_type AS rt, 12 | title AS t 13 | WHERE ci.note IN ('(voice)', 14 | '(voice: Japanese version)', 15 | '(voice) (uncredited)', 16 | '(voice: English version)') 17 | AND cn.country_code ='[us]' 18 | AND n.gender ='f' 19 | AND n.name LIKE '%An%' 20 | AND rt.role ='actress' 21 | AND ci.movie_id = t.id 22 | AND t.id = mc.movie_id 23 | AND ci.movie_id = mc.movie_id 24 | AND mc.company_id = cn.id 25 | AND ci.role_id = rt.id 26 | AND n.id = ci.person_id 27 | AND chn.id = ci.person_role_id 28 | AND an.person_id = n.id 29 | AND an.person_id = ci.person_id; 30 | 31 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_09d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(an.name) AS alternative_name, 2 | MIN(chn.name) AS voiced_char_name, 3 | MIN(n.name) AS voicing_actress, 4 | MIN(t.title) AS american_movie 5 | FROM aka_name AS an, 6 | char_name AS chn, 7 | cast_info AS ci, 8 | company_name AS cn, 9 | movie_companies AS mc, 10 | name AS n, 11 | role_type AS rt, 12 | title AS t 13 | WHERE ci.note IN ('(voice)', 14 | '(voice: Japanese version)', 15 | '(voice) (uncredited)', 16 | '(voice: English version)') 17 | AND cn.country_code ='[us]' 18 | AND n.gender ='f' 19 | AND rt.role ='actress' 20 | AND ci.movie_id = t.id 21 | AND t.id = mc.movie_id 22 | AND ci.movie_id = mc.movie_id 23 | AND mc.company_id = cn.id 24 | AND ci.role_id = rt.id 25 | AND n.id = ci.person_id 26 | AND chn.id = ci.person_role_id 27 | AND an.person_id = n.id 28 | AND an.person_id = ci.person_id; 29 | 30 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_10a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(chn.name) AS uncredited_voiced_character, 2 | MIN(t.title) AS russian_movie 3 | FROM char_name AS chn, 4 | cast_info AS ci, 5 | company_name AS cn, 6 | company_type AS ct, 7 | movie_companies AS mc, 8 | role_type AS rt, 9 | title AS t 10 | WHERE ci.note LIKE '%(voice)%' 11 | AND ci.note LIKE '%(uncredited)%' 12 | AND cn.country_code = '[ru]' 13 | AND rt.role = 'actor' 14 | AND t.production_year > 2005 15 | AND t.id = mc.movie_id 16 | AND t.id = ci.movie_id 17 | AND ci.movie_id = mc.movie_id 18 | AND chn.id = ci.person_role_id 19 | AND rt.id = ci.role_id 20 | AND cn.id = mc.company_id 21 | AND ct.id = mc.company_type_id; 22 | 23 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_10b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(chn.name) AS character, 2 | MIN(t.title) AS russian_mov_with_actor_producer 3 | FROM char_name AS chn, 4 | cast_info AS ci, 5 | company_name AS cn, 6 | company_type AS ct, 7 | movie_companies AS mc, 8 | role_type AS rt, 9 | title AS t 10 | WHERE ci.note LIKE '%(producer)%' 11 | AND cn.country_code = '[ru]' 12 | AND rt.role = 'actor' 13 | AND t.production_year > 2010 14 | AND t.id = mc.movie_id 15 | AND t.id = ci.movie_id 16 | AND ci.movie_id = mc.movie_id 17 | AND chn.id = ci.person_role_id 18 | AND rt.id = ci.role_id 19 | AND cn.id = mc.company_id 20 | AND ct.id = mc.company_type_id; 21 | 22 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_10c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(chn.name) AS character, 2 | MIN(t.title) AS movie_with_american_producer 3 | FROM char_name AS chn, 4 | cast_info AS ci, 5 | company_name AS cn, 6 | company_type AS ct, 7 | movie_companies AS mc, 8 | role_type AS rt, 9 | title AS t 10 | WHERE ci.note LIKE '%(producer)%' 11 | AND cn.country_code = '[us]' 12 | AND t.production_year > 1990 13 | AND t.id = mc.movie_id 14 | AND t.id = ci.movie_id 15 | AND ci.movie_id = mc.movie_id 16 | AND chn.id = ci.person_role_id 17 | AND rt.id = ci.role_id 18 | AND cn.id = mc.company_id 19 | AND ct.id = mc.company_type_id; 20 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_11a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS from_company, 2 | MIN(lt.link) AS movie_link_type, 3 | MIN(t.title) AS non_polish_sequel_movie 4 | FROM company_name AS cn, 5 | company_type AS ct, 6 | keyword AS k, 7 | link_type AS lt, 8 | movie_companies AS mc, 9 | movie_keyword AS mk, 10 | movie_link AS ml, 11 | title AS t 12 | WHERE cn.country_code !='[pl]' 13 | AND (cn.name LIKE '%Film%' 14 | OR cn.name LIKE '%Warner%') 15 | AND ct.kind ='production companies' 16 | AND k.keyword ='sequel' 17 | AND lt.link LIKE '%follow%' 18 | AND mc.note IS NULL 19 | AND t.production_year BETWEEN 1950 AND 2000 20 | AND lt.id = ml.link_type_id 21 | AND ml.movie_id = t.id 22 | AND t.id = mk.movie_id 23 | AND mk.keyword_id = k.id 24 | AND t.id = mc.movie_id 25 | AND mc.company_type_id = ct.id 26 | AND mc.company_id = cn.id 27 | AND ml.movie_id = mk.movie_id 28 | AND ml.movie_id = mc.movie_id 29 | AND mk.movie_id = mc.movie_id; 30 | 31 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_11b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS from_company, 2 | MIN(lt.link) AS movie_link_type, 3 | MIN(t.title) AS sequel_movie 4 | FROM company_name AS cn, 5 | company_type AS ct, 6 | keyword AS k, 7 | link_type AS lt, 8 | movie_companies AS mc, 9 | movie_keyword AS mk, 10 | movie_link AS ml, 11 | title AS t 12 | WHERE cn.country_code !='[pl]' 13 | AND (cn.name LIKE '%Film%' 14 | OR cn.name LIKE '%Warner%') 15 | AND ct.kind ='production companies' 16 | AND k.keyword ='sequel' 17 | AND lt.link LIKE '%follows%' 18 | AND mc.note IS NULL 19 | AND t.production_year = 1998 20 | AND t.title LIKE '%Money%' 21 | AND lt.id = ml.link_type_id 22 | AND ml.movie_id = t.id 23 | AND t.id = mk.movie_id 24 | AND mk.keyword_id = k.id 25 | AND t.id = mc.movie_id 26 | AND mc.company_type_id = ct.id 27 | AND mc.company_id = cn.id 28 | AND ml.movie_id = mk.movie_id 29 | AND ml.movie_id = mc.movie_id 30 | AND mk.movie_id = mc.movie_id; 31 | 32 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_11c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS from_company, 2 | MIN(mc.note) AS production_note, 3 | MIN(t.title) AS movie_based_on_book 4 | FROM company_name AS cn, 5 | company_type AS ct, 6 | keyword AS k, 7 | link_type AS lt, 8 | movie_companies AS mc, 9 | movie_keyword AS mk, 10 | movie_link AS ml, 11 | title AS t 12 | WHERE cn.country_code !='[pl]' 13 | AND (cn.name LIKE '20th Century Fox%' 14 | OR cn.name LIKE 'Twentieth Century Fox%') 15 | AND ct.kind != 'production companies' 16 | AND ct.kind IS NOT NULL 17 | AND k.keyword IN ('sequel', 18 | 'revenge', 19 | 'based-on-novel') 20 | AND mc.note IS NOT NULL 21 | AND t.production_year > 1950 22 | AND lt.id = ml.link_type_id 23 | AND ml.movie_id = t.id 24 | AND t.id = mk.movie_id 25 | AND mk.keyword_id = k.id 26 | AND t.id = mc.movie_id 27 | AND mc.company_type_id = ct.id 28 | AND mc.company_id = cn.id 29 | AND ml.movie_id = mk.movie_id 30 | AND ml.movie_id = mc.movie_id 31 | AND mk.movie_id = mc.movie_id; 32 | 33 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_11d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS from_company, 2 | MIN(mc.note) AS production_note, 3 | MIN(t.title) AS movie_based_on_book 4 | FROM company_name AS cn, 5 | company_type AS ct, 6 | keyword AS k, 7 | link_type AS lt, 8 | movie_companies AS mc, 9 | movie_keyword AS mk, 10 | movie_link AS ml, 11 | title AS t 12 | WHERE cn.country_code !='[pl]' 13 | AND ct.kind != 'production companies' 14 | AND ct.kind IS NOT NULL 15 | AND k.keyword IN ('sequel', 16 | 'revenge', 17 | 'based-on-novel') 18 | AND mc.note IS NOT NULL 19 | AND t.production_year > 1950 20 | AND lt.id = ml.link_type_id 21 | AND ml.movie_id = t.id 22 | AND t.id = mk.movie_id 23 | AND mk.keyword_id = k.id 24 | AND t.id = mc.movie_id 25 | AND mc.company_type_id = ct.id 26 | AND mc.company_id = cn.id 27 | AND ml.movie_id = mk.movie_id 28 | AND ml.movie_id = mc.movie_id 29 | AND mk.movie_id = mc.movie_id; 30 | 31 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_12a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS movie_company, 2 | MIN(mi_idx.info) AS rating, 3 | MIN(t.title) AS drama_horror_movie 4 | FROM company_name AS cn, 5 | company_type AS ct, 6 | info_type AS it1, 7 | info_type AS it2, 8 | movie_companies AS mc, 9 | movie_info AS mi, 10 | movie_info_idx AS mi_idx, 11 | title AS t 12 | WHERE cn.country_code = '[us]' 13 | AND ct.kind = 'production companies' 14 | AND it1.info = 'genres' 15 | AND it2.info = 'rating' 16 | AND mi.info IN ('Drama', 17 | 'Horror') 18 | AND mi_idx.info > '8.0' 19 | AND t.production_year BETWEEN 2005 AND 2008 20 | AND t.id = mi.movie_id 21 | AND t.id = mi_idx.movie_id 22 | AND mi.info_type_id = it1.id 23 | AND mi_idx.info_type_id = it2.id 24 | AND t.id = mc.movie_id 25 | AND ct.id = mc.company_type_id 26 | AND cn.id = mc.company_id 27 | AND mc.movie_id = mi.movie_id 28 | AND mc.movie_id = mi_idx.movie_id 29 | AND mi.movie_id = mi_idx.movie_id; 30 | 31 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_12b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS budget, 2 | MIN(t.title) AS unsuccsessful_movie 3 | FROM company_name AS cn, 4 | company_type AS ct, 5 | info_type AS it1, 6 | info_type AS it2, 7 | movie_companies AS mc, 8 | movie_info AS mi, 9 | movie_info_idx AS mi_idx, 10 | title AS t 11 | WHERE cn.country_code ='[us]' 12 | AND ct.kind IS NOT NULL 13 | AND (ct.kind ='production companies' 14 | OR ct.kind = 'distributors') 15 | AND it1.info ='budget' 16 | AND it2.info ='bottom 10 rank' 17 | AND t.production_year >2000 18 | AND (t.title LIKE 'Birdemic%' 19 | OR t.title LIKE '%Movie%') 20 | AND t.id = mi.movie_id 21 | AND t.id = mi_idx.movie_id 22 | AND mi.info_type_id = it1.id 23 | AND mi_idx.info_type_id = it2.id 24 | AND t.id = mc.movie_id 25 | AND ct.id = mc.company_type_id 26 | AND cn.id = mc.company_id 27 | AND mc.movie_id = mi.movie_id 28 | AND mc.movie_id = mi_idx.movie_id 29 | AND mi.movie_id = mi_idx.movie_id; 30 | 31 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_12c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS movie_company, 2 | MIN(mi_idx.info) AS rating, 3 | MIN(t.title) AS mainstream_movie 4 | FROM company_name AS cn, 5 | company_type AS ct, 6 | info_type AS it1, 7 | info_type AS it2, 8 | movie_companies AS mc, 9 | movie_info AS mi, 10 | movie_info_idx AS mi_idx, 11 | title AS t 12 | WHERE cn.country_code = '[us]' 13 | AND ct.kind = 'production companies' 14 | AND it1.info = 'genres' 15 | AND it2.info = 'rating' 16 | AND mi.info IN ('Drama', 17 | 'Horror', 18 | 'Western', 19 | 'Family') 20 | AND mi_idx.info > '7.0' 21 | AND t.production_year BETWEEN 2000 AND 2010 22 | AND t.id = mi.movie_id 23 | AND t.id = mi_idx.movie_id 24 | AND mi.info_type_id = it1.id 25 | AND mi_idx.info_type_id = it2.id 26 | AND t.id = mc.movie_id 27 | AND ct.id = mc.company_type_id 28 | AND cn.id = mc.company_id 29 | AND mc.movie_id = mi.movie_id 30 | AND mc.movie_id = mi_idx.movie_id 31 | AND mi.movie_id = mi_idx.movie_id; 32 | 33 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_13a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS release_date, 2 | MIN(miidx.info) AS rating, 3 | MIN(t.title) AS german_movie 4 | FROM company_name AS cn, 5 | company_type AS ct, 6 | info_type AS it, 7 | info_type AS it2, 8 | kind_type AS kt, 9 | movie_companies AS mc, 10 | movie_info AS mi, 11 | movie_info_idx AS miidx, 12 | title AS t 13 | WHERE cn.country_code ='[de]' 14 | AND ct.kind ='production companies' 15 | AND it.info ='rating' 16 | AND it2.info ='release dates' 17 | AND kt.kind ='movie' 18 | AND mi.movie_id = t.id 19 | AND it2.id = mi.info_type_id 20 | AND kt.id = t.kind_id 21 | AND mc.movie_id = t.id 22 | AND cn.id = mc.company_id 23 | AND ct.id = mc.company_type_id 24 | AND miidx.movie_id = t.id 25 | AND it.id = miidx.info_type_id 26 | AND mi.movie_id = miidx.movie_id 27 | AND mi.movie_id = mc.movie_id 28 | AND miidx.movie_id = mc.movie_id; 29 | 30 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_13b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS producing_company, 2 | MIN(miidx.info) AS rating, 3 | MIN(t.title) AS movie_about_winning 4 | FROM company_name AS cn, 5 | company_type AS ct, 6 | info_type AS it, 7 | info_type AS it2, 8 | kind_type AS kt, 9 | movie_companies AS mc, 10 | movie_info AS mi, 11 | movie_info_idx AS miidx, 12 | title AS t 13 | WHERE cn.country_code ='[us]' 14 | AND ct.kind ='production companies' 15 | AND it.info ='rating' 16 | AND it2.info ='release dates' 17 | AND kt.kind ='movie' 18 | AND t.title != '' 19 | AND (t.title LIKE '%Champion%' 20 | OR t.title LIKE '%Loser%') 21 | AND mi.movie_id = t.id 22 | AND it2.id = mi.info_type_id 23 | AND kt.id = t.kind_id 24 | AND mc.movie_id = t.id 25 | AND cn.id = mc.company_id 26 | AND ct.id = mc.company_type_id 27 | AND miidx.movie_id = t.id 28 | AND it.id = miidx.info_type_id 29 | AND mi.movie_id = miidx.movie_id 30 | AND mi.movie_id = mc.movie_id 31 | AND miidx.movie_id = mc.movie_id; 32 | 33 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_13c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS producing_company, 2 | MIN(miidx.info) AS rating, 3 | MIN(t.title) AS movie_about_winning 4 | FROM company_name AS cn, 5 | company_type AS ct, 6 | info_type AS it, 7 | info_type AS it2, 8 | kind_type AS kt, 9 | movie_companies AS mc, 10 | movie_info AS mi, 11 | movie_info_idx AS miidx, 12 | title AS t 13 | WHERE cn.country_code ='[us]' 14 | AND ct.kind ='production companies' 15 | AND it.info ='rating' 16 | AND it2.info ='release dates' 17 | AND kt.kind ='movie' 18 | AND t.title != '' 19 | AND (t.title LIKE 'Champion%' 20 | OR t.title LIKE 'Loser%') 21 | AND mi.movie_id = t.id 22 | AND it2.id = mi.info_type_id 23 | AND kt.id = t.kind_id 24 | AND mc.movie_id = t.id 25 | AND cn.id = mc.company_id 26 | AND ct.id = mc.company_type_id 27 | AND miidx.movie_id = t.id 28 | AND it.id = miidx.info_type_id 29 | AND mi.movie_id = miidx.movie_id 30 | AND mi.movie_id = mc.movie_id 31 | AND miidx.movie_id = mc.movie_id; 32 | 33 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_13d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS producing_company, 2 | MIN(miidx.info) AS rating, 3 | MIN(t.title) AS movie 4 | FROM company_name AS cn, 5 | company_type AS ct, 6 | info_type AS it, 7 | info_type AS it2, 8 | kind_type AS kt, 9 | movie_companies AS mc, 10 | movie_info AS mi, 11 | movie_info_idx AS miidx, 12 | title AS t 13 | WHERE cn.country_code ='[us]' 14 | AND ct.kind ='production companies' 15 | AND it.info ='rating' 16 | AND it2.info ='release dates' 17 | AND kt.kind ='movie' 18 | AND mi.movie_id = t.id 19 | AND it2.id = mi.info_type_id 20 | AND kt.id = t.kind_id 21 | AND mc.movie_id = t.id 22 | AND cn.id = mc.company_id 23 | AND ct.id = mc.company_type_id 24 | AND miidx.movie_id = t.id 25 | AND it.id = miidx.info_type_id 26 | AND mi.movie_id = miidx.movie_id 27 | AND mi.movie_id = mc.movie_id 28 | AND miidx.movie_id = mc.movie_id; 29 | 30 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_14a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi_idx.info) AS rating, 2 | MIN(t.title) AS northern_dark_movie 3 | FROM info_type AS it1, 4 | info_type AS it2, 5 | keyword AS k, 6 | kind_type AS kt, 7 | movie_info AS mi, 8 | movie_info_idx AS mi_idx, 9 | movie_keyword AS mk, 10 | title AS t 11 | WHERE it1.info = 'countries' 12 | AND it2.info = 'rating' 13 | AND k.keyword IN ('murder', 14 | 'murder-in-title', 15 | 'blood', 16 | 'violence') 17 | AND kt.kind = 'movie' 18 | AND mi.info IN ('Sweden', 19 | 'Norway', 20 | 'Germany', 21 | 'Denmark', 22 | 'Swedish', 23 | 'Denish', 24 | 'Norwegian', 25 | 'German', 26 | 'USA', 27 | 'American') 28 | AND mi_idx.info < '8.5' 29 | AND t.production_year > 2010 30 | AND kt.id = t.kind_id 31 | AND t.id = mi.movie_id 32 | AND t.id = mk.movie_id 33 | AND t.id = mi_idx.movie_id 34 | AND mk.movie_id = mi.movie_id 35 | AND mk.movie_id = mi_idx.movie_id 36 | AND mi.movie_id = mi_idx.movie_id 37 | AND k.id = mk.keyword_id 38 | AND it1.id = mi.info_type_id 39 | AND it2.id = mi_idx.info_type_id; 40 | 41 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_14b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi_idx.info) AS rating, 2 | MIN(t.title) AS western_dark_production 3 | FROM info_type AS it1, 4 | info_type AS it2, 5 | keyword AS k, 6 | kind_type AS kt, 7 | movie_info AS mi, 8 | movie_info_idx AS mi_idx, 9 | movie_keyword AS mk, 10 | title AS t 11 | WHERE it1.info = 'countries' 12 | AND it2.info = 'rating' 13 | AND k.keyword IN ('murder', 14 | 'murder-in-title') 15 | AND kt.kind = 'movie' 16 | AND mi.info IN ('Sweden', 17 | 'Norway', 18 | 'Germany', 19 | 'Denmark', 20 | 'Swedish', 21 | 'Denish', 22 | 'Norwegian', 23 | 'German', 24 | 'USA', 25 | 'American') 26 | AND mi_idx.info > '6.0' 27 | AND t.production_year > 2010 28 | AND (t.title LIKE '%murder%' 29 | OR t.title LIKE '%Murder%' 30 | OR t.title LIKE '%Mord%') 31 | AND kt.id = t.kind_id 32 | AND t.id = mi.movie_id 33 | AND t.id = mk.movie_id 34 | AND t.id = mi_idx.movie_id 35 | AND mk.movie_id = mi.movie_id 36 | AND mk.movie_id = mi_idx.movie_id 37 | AND mi.movie_id = mi_idx.movie_id 38 | AND k.id = mk.keyword_id 39 | AND it1.id = mi.info_type_id 40 | AND it2.id = mi_idx.info_type_id; 41 | 42 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_15a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS release_date, 2 | MIN(t.title) AS internet_movie 3 | FROM aka_title AS at, 4 | company_name AS cn, 5 | company_type AS ct, 6 | info_type AS it1, 7 | keyword AS k, 8 | movie_companies AS mc, 9 | movie_info AS mi, 10 | movie_keyword AS mk, 11 | title AS t 12 | WHERE cn.country_code = '[us]' 13 | AND it1.info = 'release dates' 14 | AND mc.note LIKE '%(200%)%' 15 | AND mc.note LIKE '%(worldwide)%' 16 | AND mi.note LIKE '%internet%' 17 | AND mi.info LIKE 'USA:% 200%' 18 | AND t.production_year > 2000 19 | AND t.id = at.movie_id 20 | AND t.id = mi.movie_id 21 | AND t.id = mk.movie_id 22 | AND t.id = mc.movie_id 23 | AND mk.movie_id = mi.movie_id 24 | AND mk.movie_id = mc.movie_id 25 | AND mk.movie_id = at.movie_id 26 | AND mi.movie_id = mc.movie_id 27 | AND mi.movie_id = at.movie_id 28 | AND mc.movie_id = at.movie_id 29 | AND k.id = mk.keyword_id 30 | AND it1.id = mi.info_type_id 31 | AND cn.id = mc.company_id 32 | AND ct.id = mc.company_type_id; 33 | 34 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_15b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS release_date, 2 | MIN(t.title) AS youtube_movie 3 | FROM aka_title AS at, 4 | company_name AS cn, 5 | company_type AS ct, 6 | info_type AS it1, 7 | keyword AS k, 8 | movie_companies AS mc, 9 | movie_info AS mi, 10 | movie_keyword AS mk, 11 | title AS t 12 | WHERE cn.country_code = '[us]' 13 | AND cn.name = 'YouTube' 14 | AND it1.info = 'release dates' 15 | AND mc.note LIKE '%(200%)%' 16 | AND mc.note LIKE '%(worldwide)%' 17 | AND mi.note LIKE '%internet%' 18 | AND mi.info LIKE 'USA:% 200%' 19 | AND t.production_year BETWEEN 2005 AND 2010 20 | AND t.id = at.movie_id 21 | AND t.id = mi.movie_id 22 | AND t.id = mk.movie_id 23 | AND t.id = mc.movie_id 24 | AND mk.movie_id = mi.movie_id 25 | AND mk.movie_id = mc.movie_id 26 | AND mk.movie_id = at.movie_id 27 | AND mi.movie_id = mc.movie_id 28 | AND mi.movie_id = at.movie_id 29 | AND mc.movie_id = at.movie_id 30 | AND k.id = mk.keyword_id 31 | AND it1.id = mi.info_type_id 32 | AND cn.id = mc.company_id 33 | AND ct.id = mc.company_type_id; 34 | 35 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_15c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS release_date, 2 | MIN(t.title) AS modern_american_internet_movie 3 | FROM aka_title AS at, 4 | company_name AS cn, 5 | company_type AS ct, 6 | info_type AS it1, 7 | keyword AS k, 8 | movie_companies AS mc, 9 | movie_info AS mi, 10 | movie_keyword AS mk, 11 | title AS t 12 | WHERE cn.country_code = '[us]' 13 | AND it1.info = 'release dates' 14 | AND mi.note LIKE '%internet%' 15 | AND mi.info IS NOT NULL 16 | AND (mi.info LIKE 'USA:% 199%' 17 | OR mi.info LIKE 'USA:% 200%') 18 | AND t.production_year > 1990 19 | AND t.id = at.movie_id 20 | AND t.id = mi.movie_id 21 | AND t.id = mk.movie_id 22 | AND t.id = mc.movie_id 23 | AND mk.movie_id = mi.movie_id 24 | AND mk.movie_id = mc.movie_id 25 | AND mk.movie_id = at.movie_id 26 | AND mi.movie_id = mc.movie_id 27 | AND mi.movie_id = at.movie_id 28 | AND mc.movie_id = at.movie_id 29 | AND k.id = mk.keyword_id 30 | AND it1.id = mi.info_type_id 31 | AND cn.id = mc.company_id 32 | AND ct.id = mc.company_type_id; 33 | 34 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_15d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(at.title) AS aka_title, 2 | MIN(t.title) AS internet_movie_title 3 | FROM aka_title AS at, 4 | company_name AS cn, 5 | company_type AS ct, 6 | info_type AS it1, 7 | keyword AS k, 8 | movie_companies AS mc, 9 | movie_info AS mi, 10 | movie_keyword AS mk, 11 | title AS t 12 | WHERE cn.country_code = '[us]' 13 | AND it1.info = 'release dates' 14 | AND mi.note LIKE '%internet%' 15 | AND t.production_year > 1990 16 | AND t.id = at.movie_id 17 | AND t.id = mi.movie_id 18 | AND t.id = mk.movie_id 19 | AND t.id = mc.movie_id 20 | AND mk.movie_id = mi.movie_id 21 | AND mk.movie_id = mc.movie_id 22 | AND mk.movie_id = at.movie_id 23 | AND mi.movie_id = mc.movie_id 24 | AND mi.movie_id = at.movie_id 25 | AND mc.movie_id = at.movie_id 26 | AND k.id = mk.keyword_id 27 | AND it1.id = mi.info_type_id 28 | AND cn.id = mc.company_id 29 | AND ct.id = mc.company_type_id; 30 | 31 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_16a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(an.name) AS cool_actor_pseudonym, 2 | MIN(t.title) AS series_named_after_char 3 | FROM aka_name AS an, 4 | cast_info AS ci, 5 | company_name AS cn, 6 | keyword AS k, 7 | movie_companies AS mc, 8 | movie_keyword AS mk, 9 | name AS n, 10 | title AS t 11 | WHERE cn.country_code ='[us]' 12 | AND k.keyword ='character-name-in-title' 13 | AND t.episode_nr >= 50 14 | AND t.episode_nr < 100 15 | AND an.person_id = n.id 16 | AND n.id = ci.person_id 17 | AND ci.movie_id = t.id 18 | AND t.id = mk.movie_id 19 | AND mk.keyword_id = k.id 20 | AND t.id = mc.movie_id 21 | AND mc.company_id = cn.id 22 | AND an.person_id = ci.person_id 23 | AND ci.movie_id = mc.movie_id 24 | AND ci.movie_id = mk.movie_id 25 | AND mc.movie_id = mk.movie_id; 26 | 27 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_16b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(an.name) AS cool_actor_pseudonym, 2 | MIN(t.title) AS series_named_after_char 3 | FROM aka_name AS an, 4 | cast_info AS ci, 5 | company_name AS cn, 6 | keyword AS k, 7 | movie_companies AS mc, 8 | movie_keyword AS mk, 9 | name AS n, 10 | title AS t 11 | WHERE cn.country_code ='[us]' 12 | AND k.keyword ='character-name-in-title' 13 | AND an.person_id = n.id 14 | AND n.id = ci.person_id 15 | AND ci.movie_id = t.id 16 | AND t.id = mk.movie_id 17 | AND mk.keyword_id = k.id 18 | AND t.id = mc.movie_id 19 | AND mc.company_id = cn.id 20 | AND an.person_id = ci.person_id 21 | AND ci.movie_id = mc.movie_id 22 | AND ci.movie_id = mk.movie_id 23 | AND mc.movie_id = mk.movie_id; 24 | 25 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_16c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(an.name) AS cool_actor_pseudonym, 2 | MIN(t.title) AS series_named_after_char 3 | FROM aka_name AS an, 4 | cast_info AS ci, 5 | company_name AS cn, 6 | keyword AS k, 7 | movie_companies AS mc, 8 | movie_keyword AS mk, 9 | name AS n, 10 | title AS t 11 | WHERE cn.country_code ='[us]' 12 | AND k.keyword ='character-name-in-title' 13 | AND t.episode_nr < 100 14 | AND an.person_id = n.id 15 | AND n.id = ci.person_id 16 | AND ci.movie_id = t.id 17 | AND t.id = mk.movie_id 18 | AND mk.keyword_id = k.id 19 | AND t.id = mc.movie_id 20 | AND mc.company_id = cn.id 21 | AND an.person_id = ci.person_id 22 | AND ci.movie_id = mc.movie_id 23 | AND ci.movie_id = mk.movie_id 24 | AND mc.movie_id = mk.movie_id; 25 | 26 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_16d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(an.name) AS cool_actor_pseudonym, 2 | MIN(t.title) AS series_named_after_char 3 | FROM aka_name AS an, 4 | cast_info AS ci, 5 | company_name AS cn, 6 | keyword AS k, 7 | movie_companies AS mc, 8 | movie_keyword AS mk, 9 | name AS n, 10 | title AS t 11 | WHERE cn.country_code ='[us]' 12 | AND k.keyword ='character-name-in-title' 13 | AND t.episode_nr >= 5 14 | AND t.episode_nr < 100 15 | AND an.person_id = n.id 16 | AND n.id = ci.person_id 17 | AND ci.movie_id = t.id 18 | AND t.id = mk.movie_id 19 | AND mk.keyword_id = k.id 20 | AND t.id = mc.movie_id 21 | AND mc.company_id = cn.id 22 | AND an.person_id = ci.person_id 23 | AND ci.movie_id = mc.movie_id 24 | AND ci.movie_id = mk.movie_id 25 | AND mc.movie_id = mk.movie_id; 26 | 27 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_17a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS member_in_charnamed_american_movie, 2 | MIN(n.name) AS a1 3 | FROM cast_info AS ci, 4 | company_name AS cn, 5 | keyword AS k, 6 | movie_companies AS mc, 7 | movie_keyword AS mk, 8 | name AS n, 9 | title AS t 10 | WHERE cn.country_code ='[us]' 11 | AND k.keyword ='character-name-in-title' 12 | AND n.name LIKE 'B%' 13 | AND n.id = ci.person_id 14 | AND ci.movie_id = t.id 15 | AND t.id = mk.movie_id 16 | AND mk.keyword_id = k.id 17 | AND t.id = mc.movie_id 18 | AND mc.company_id = cn.id 19 | AND ci.movie_id = mc.movie_id 20 | AND ci.movie_id = mk.movie_id 21 | AND mc.movie_id = mk.movie_id; 22 | 23 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_17b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS member_in_charnamed_movie, 2 | MIN(n.name) AS a1 3 | FROM cast_info AS ci, 4 | company_name AS cn, 5 | keyword AS k, 6 | movie_companies AS mc, 7 | movie_keyword AS mk, 8 | name AS n, 9 | title AS t 10 | WHERE k.keyword ='character-name-in-title' 11 | AND n.name LIKE 'Z%' 12 | AND n.id = ci.person_id 13 | AND ci.movie_id = t.id 14 | AND t.id = mk.movie_id 15 | AND mk.keyword_id = k.id 16 | AND t.id = mc.movie_id 17 | AND mc.company_id = cn.id 18 | AND ci.movie_id = mc.movie_id 19 | AND ci.movie_id = mk.movie_id 20 | AND mc.movie_id = mk.movie_id; 21 | 22 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_17c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS member_in_charnamed_movie, 2 | MIN(n.name) AS a1 3 | FROM cast_info AS ci, 4 | company_name AS cn, 5 | keyword AS k, 6 | movie_companies AS mc, 7 | movie_keyword AS mk, 8 | name AS n, 9 | title AS t 10 | WHERE k.keyword ='character-name-in-title' 11 | AND n.name LIKE 'X%' 12 | AND n.id = ci.person_id 13 | AND ci.movie_id = t.id 14 | AND t.id = mk.movie_id 15 | AND mk.keyword_id = k.id 16 | AND t.id = mc.movie_id 17 | AND mc.company_id = cn.id 18 | AND ci.movie_id = mc.movie_id 19 | AND ci.movie_id = mk.movie_id 20 | AND mc.movie_id = mk.movie_id; 21 | 22 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_17d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS member_in_charnamed_movie 2 | FROM cast_info AS ci, 3 | company_name AS cn, 4 | keyword AS k, 5 | movie_companies AS mc, 6 | movie_keyword AS mk, 7 | name AS n, 8 | title AS t 9 | WHERE k.keyword ='character-name-in-title' 10 | AND n.name LIKE '%Bert%' 11 | AND n.id = ci.person_id 12 | AND ci.movie_id = t.id 13 | AND t.id = mk.movie_id 14 | AND mk.keyword_id = k.id 15 | AND t.id = mc.movie_id 16 | AND mc.company_id = cn.id 17 | AND ci.movie_id = mc.movie_id 18 | AND ci.movie_id = mk.movie_id 19 | AND mc.movie_id = mk.movie_id; 20 | 21 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_17e.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS member_in_charnamed_movie 2 | FROM cast_info AS ci, 3 | company_name AS cn, 4 | keyword AS k, 5 | movie_companies AS mc, 6 | movie_keyword AS mk, 7 | name AS n, 8 | title AS t 9 | WHERE cn.country_code ='[us]' 10 | AND k.keyword ='character-name-in-title' 11 | AND n.id = ci.person_id 12 | AND ci.movie_id = t.id 13 | AND t.id = mk.movie_id 14 | AND mk.keyword_id = k.id 15 | AND t.id = mc.movie_id 16 | AND mc.company_id = cn.id 17 | AND ci.movie_id = mc.movie_id 18 | AND ci.movie_id = mk.movie_id 19 | AND mc.movie_id = mk.movie_id; 20 | 21 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_17f.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS member_in_charnamed_movie 2 | FROM cast_info AS ci, 3 | company_name AS cn, 4 | keyword AS k, 5 | movie_companies AS mc, 6 | movie_keyword AS mk, 7 | name AS n, 8 | title AS t 9 | WHERE k.keyword ='character-name-in-title' 10 | AND n.name LIKE '%B%' 11 | AND n.id = ci.person_id 12 | AND ci.movie_id = t.id 13 | AND t.id = mk.movie_id 14 | AND mk.keyword_id = k.id 15 | AND t.id = mc.movie_id 16 | AND mc.company_id = cn.id 17 | AND ci.movie_id = mc.movie_id 18 | AND ci.movie_id = mk.movie_id 19 | AND mc.movie_id = mk.movie_id; 20 | 21 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_18a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS movie_budget, 2 | MIN(mi_idx.info) AS movie_votes, 3 | MIN(t.title) AS movie_title 4 | FROM cast_info AS ci, 5 | info_type AS it1, 6 | info_type AS it2, 7 | movie_info AS mi, 8 | movie_info_idx AS mi_idx, 9 | name AS n, 10 | title AS t 11 | WHERE ci.note IN ('(producer)', 12 | '(executive producer)') 13 | AND it1.info = 'budget' 14 | AND it2.info = 'votes' 15 | AND n.gender = 'm' 16 | AND n.name LIKE '%Tim%' 17 | AND t.id = mi.movie_id 18 | AND t.id = mi_idx.movie_id 19 | AND t.id = ci.movie_id 20 | AND ci.movie_id = mi.movie_id 21 | AND ci.movie_id = mi_idx.movie_id 22 | AND mi.movie_id = mi_idx.movie_id 23 | AND n.id = ci.person_id 24 | AND it1.id = mi.info_type_id 25 | AND it2.id = mi_idx.info_type_id; 26 | 27 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_18b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS movie_budget, 2 | MIN(mi_idx.info) AS movie_votes, 3 | MIN(t.title) AS movie_title 4 | FROM cast_info AS ci, 5 | info_type AS it1, 6 | info_type AS it2, 7 | movie_info AS mi, 8 | movie_info_idx AS mi_idx, 9 | name AS n, 10 | title AS t 11 | WHERE ci.note IN ('(writer)', 12 | '(head writer)', 13 | '(written by)', 14 | '(story)', 15 | '(story editor)') 16 | AND it1.info = 'genres' 17 | AND it2.info = 'rating' 18 | AND mi.info IN ('Horror', 19 | 'Thriller') 20 | AND mi.note IS NULL 21 | AND mi_idx.info > '8.0' 22 | AND n.gender IS NOT NULL 23 | AND n.gender = 'f' 24 | AND t.production_year BETWEEN 2008 AND 2014 25 | AND t.id = mi.movie_id 26 | AND t.id = mi_idx.movie_id 27 | AND t.id = ci.movie_id 28 | AND ci.movie_id = mi.movie_id 29 | AND ci.movie_id = mi_idx.movie_id 30 | AND mi.movie_id = mi_idx.movie_id 31 | AND n.id = ci.person_id 32 | AND it1.id = mi.info_type_id 33 | AND it2.id = mi_idx.info_type_id; 34 | 35 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_18c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS movie_budget, 2 | MIN(mi_idx.info) AS movie_votes, 3 | MIN(t.title) AS movie_title 4 | FROM cast_info AS ci, 5 | info_type AS it1, 6 | info_type AS it2, 7 | movie_info AS mi, 8 | movie_info_idx AS mi_idx, 9 | name AS n, 10 | title AS t 11 | WHERE ci.note IN ('(writer)', 12 | '(head writer)', 13 | '(written by)', 14 | '(story)', 15 | '(story editor)') 16 | AND it1.info = 'genres' 17 | AND it2.info = 'votes' 18 | AND mi.info IN ('Horror', 19 | 'Action', 20 | 'Sci-Fi', 21 | 'Thriller', 22 | 'Crime', 23 | 'War') 24 | AND n.gender = 'm' 25 | AND t.id = mi.movie_id 26 | AND t.id = mi_idx.movie_id 27 | AND t.id = ci.movie_id 28 | AND ci.movie_id = mi.movie_id 29 | AND ci.movie_id = mi_idx.movie_id 30 | AND mi.movie_id = mi_idx.movie_id 31 | AND n.id = ci.person_id 32 | AND it1.id = mi.info_type_id 33 | AND it2.id = mi_idx.info_type_id; 34 | 35 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_19b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS voicing_actress, 2 | MIN(t.title) AS kung_fu_panda 3 | FROM aka_name AS an, 4 | char_name AS chn, 5 | cast_info AS ci, 6 | company_name AS cn, 7 | info_type AS it, 8 | movie_companies AS mc, 9 | movie_info AS mi, 10 | name AS n, 11 | role_type AS rt, 12 | title AS t 13 | WHERE ci.note = '(voice)' 14 | AND cn.country_code ='[us]' 15 | AND it.info = 'release dates' 16 | AND mc.note LIKE '%(200%)%' 17 | AND (mc.note LIKE '%(USA)%' 18 | OR mc.note LIKE '%(worldwide)%') 19 | AND mi.info IS NOT NULL 20 | AND (mi.info LIKE 'Japan:%2007%' 21 | OR mi.info LIKE 'USA:%2008%') 22 | AND n.gender ='f' 23 | AND n.name LIKE '%Angel%' 24 | AND rt.role ='actress' 25 | AND t.production_year BETWEEN 2007 AND 2008 26 | AND t.title LIKE '%Kung%Fu%Panda%' 27 | AND t.id = mi.movie_id 28 | AND t.id = mc.movie_id 29 | AND t.id = ci.movie_id 30 | AND mc.movie_id = ci.movie_id 31 | AND mc.movie_id = mi.movie_id 32 | AND mi.movie_id = ci.movie_id 33 | AND cn.id = mc.company_id 34 | AND it.id = mi.info_type_id 35 | AND n.id = ci.person_id 36 | AND rt.id = ci.role_id 37 | AND n.id = an.person_id 38 | AND ci.person_id = an.person_id 39 | AND chn.id = ci.person_role_id; 40 | 41 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_19c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS voicing_actress, 2 | MIN(t.title) AS jap_engl_voiced_movie 3 | FROM aka_name AS an, 4 | char_name AS chn, 5 | cast_info AS ci, 6 | company_name AS cn, 7 | info_type AS it, 8 | movie_companies AS mc, 9 | movie_info AS mi, 10 | name AS n, 11 | role_type AS rt, 12 | title AS t 13 | WHERE ci.note IN ('(voice)', 14 | '(voice: Japanese version)', 15 | '(voice) (uncredited)', 16 | '(voice: English version)') 17 | AND cn.country_code ='[us]' 18 | AND it.info = 'release dates' 19 | AND mi.info IS NOT NULL 20 | AND (mi.info LIKE 'Japan:%200%' 21 | OR mi.info LIKE 'USA:%200%') 22 | AND n.gender ='f' 23 | AND n.name LIKE '%An%' 24 | AND rt.role ='actress' 25 | AND t.production_year > 2000 26 | AND t.id = mi.movie_id 27 | AND t.id = mc.movie_id 28 | AND t.id = ci.movie_id 29 | AND mc.movie_id = ci.movie_id 30 | AND mc.movie_id = mi.movie_id 31 | AND mi.movie_id = ci.movie_id 32 | AND cn.id = mc.company_id 33 | AND it.id = mi.info_type_id 34 | AND n.id = ci.person_id 35 | AND rt.id = ci.role_id 36 | AND n.id = an.person_id 37 | AND ci.person_id = an.person_id 38 | AND chn.id = ci.person_role_id; 39 | 40 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_19d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS voicing_actress, 2 | MIN(t.title) AS jap_engl_voiced_movie 3 | FROM aka_name AS an, 4 | char_name AS chn, 5 | cast_info AS ci, 6 | company_name AS cn, 7 | info_type AS it, 8 | movie_companies AS mc, 9 | movie_info AS mi, 10 | name AS n, 11 | role_type AS rt, 12 | title AS t 13 | WHERE ci.note IN ('(voice)', 14 | '(voice: Japanese version)', 15 | '(voice) (uncredited)', 16 | '(voice: English version)') 17 | AND cn.country_code ='[us]' 18 | AND it.info = 'release dates' 19 | AND n.gender ='f' 20 | AND rt.role ='actress' 21 | AND t.production_year > 2000 22 | AND t.id = mi.movie_id 23 | AND t.id = mc.movie_id 24 | AND t.id = ci.movie_id 25 | AND mc.movie_id = ci.movie_id 26 | AND mc.movie_id = mi.movie_id 27 | AND mi.movie_id = ci.movie_id 28 | AND cn.id = mc.company_id 29 | AND it.id = mi.info_type_id 30 | AND n.id = ci.person_id 31 | AND rt.id = ci.role_id 32 | AND n.id = an.person_id 33 | AND ci.person_id = an.person_id 34 | AND chn.id = ci.person_role_id; 35 | 36 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_20a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS complete_downey_ironman_movie 2 | FROM complete_cast AS cc, 3 | comp_cast_type AS cct1, 4 | comp_cast_type AS cct2, 5 | char_name AS chn, 6 | cast_info AS ci, 7 | keyword AS k, 8 | kind_type AS kt, 9 | movie_keyword AS mk, 10 | name AS n, 11 | title AS t 12 | WHERE cct1.kind = 'cast' 13 | AND cct2.kind LIKE '%complete%' 14 | AND chn.name NOT LIKE '%Sherlock%' 15 | AND (chn.name LIKE '%Tony%Stark%' 16 | OR chn.name LIKE '%Iron%Man%') 17 | AND k.keyword IN ('superhero', 18 | 'sequel', 19 | 'second-part', 20 | 'marvel-comics', 21 | 'based-on-comic', 22 | 'tv-special', 23 | 'fight', 24 | 'violence') 25 | AND kt.kind = 'movie' 26 | AND t.production_year > 1950 27 | AND kt.id = t.kind_id 28 | AND t.id = mk.movie_id 29 | AND t.id = ci.movie_id 30 | AND t.id = cc.movie_id 31 | AND mk.movie_id = ci.movie_id 32 | AND mk.movie_id = cc.movie_id 33 | AND ci.movie_id = cc.movie_id 34 | AND chn.id = ci.person_role_id 35 | AND n.id = ci.person_id 36 | AND k.id = mk.keyword_id 37 | AND cct1.id = cc.subject_id 38 | AND cct2.id = cc.status_id; 39 | 40 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_20b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS complete_downey_ironman_movie 2 | FROM complete_cast AS cc, 3 | comp_cast_type AS cct1, 4 | comp_cast_type AS cct2, 5 | char_name AS chn, 6 | cast_info AS ci, 7 | keyword AS k, 8 | kind_type AS kt, 9 | movie_keyword AS mk, 10 | name AS n, 11 | title AS t 12 | WHERE cct1.kind = 'cast' 13 | AND cct2.kind LIKE '%complete%' 14 | AND chn.name NOT LIKE '%Sherlock%' 15 | AND (chn.name LIKE '%Tony%Stark%' 16 | OR chn.name LIKE '%Iron%Man%') 17 | AND k.keyword IN ('superhero', 18 | 'sequel', 19 | 'second-part', 20 | 'marvel-comics', 21 | 'based-on-comic', 22 | 'tv-special', 23 | 'fight', 24 | 'violence') 25 | AND kt.kind = 'movie' 26 | AND n.name LIKE '%Downey%Robert%' 27 | AND t.production_year > 2000 28 | AND kt.id = t.kind_id 29 | AND t.id = mk.movie_id 30 | AND t.id = ci.movie_id 31 | AND t.id = cc.movie_id 32 | AND mk.movie_id = ci.movie_id 33 | AND mk.movie_id = cc.movie_id 34 | AND ci.movie_id = cc.movie_id 35 | AND chn.id = ci.person_role_id 36 | AND n.id = ci.person_id 37 | AND k.id = mk.keyword_id 38 | AND cct1.id = cc.subject_id 39 | AND cct2.id = cc.status_id; 40 | 41 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_21b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS company_name, 2 | MIN(lt.link) AS link_type, 3 | MIN(t.title) AS german_follow_up 4 | FROM company_name AS cn, 5 | company_type AS ct, 6 | keyword AS k, 7 | link_type AS lt, 8 | movie_companies AS mc, 9 | movie_info AS mi, 10 | movie_keyword AS mk, 11 | movie_link AS ml, 12 | title AS t 13 | WHERE cn.country_code !='[pl]' 14 | AND (cn.name LIKE '%Film%' 15 | OR cn.name LIKE '%Warner%') 16 | AND ct.kind ='production companies' 17 | AND k.keyword ='sequel' 18 | AND lt.link LIKE '%follow%' 19 | AND mc.note IS NULL 20 | AND mi.info IN ('Germany', 21 | 'German') 22 | AND t.production_year BETWEEN 2000 AND 2010 23 | AND lt.id = ml.link_type_id 24 | AND ml.movie_id = t.id 25 | AND t.id = mk.movie_id 26 | AND mk.keyword_id = k.id 27 | AND t.id = mc.movie_id 28 | AND mc.company_type_id = ct.id 29 | AND mc.company_id = cn.id 30 | AND mi.movie_id = t.id 31 | AND ml.movie_id = mk.movie_id 32 | AND ml.movie_id = mc.movie_id 33 | AND mk.movie_id = mc.movie_id 34 | AND ml.movie_id = mi.movie_id 35 | AND mk.movie_id = mi.movie_id 36 | AND mc.movie_id = mi.movie_id; 37 | 38 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_23a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(kt.kind) AS movie_kind, 2 | MIN(t.title) AS complete_us_internet_movie 3 | FROM complete_cast AS cc, 4 | comp_cast_type AS cct1, 5 | company_name AS cn, 6 | company_type AS ct, 7 | info_type AS it1, 8 | keyword AS k, 9 | kind_type AS kt, 10 | movie_companies AS mc, 11 | movie_info AS mi, 12 | movie_keyword AS mk, 13 | title AS t 14 | WHERE cct1.kind = 'complete+verified' 15 | AND cn.country_code = '[us]' 16 | AND it1.info = 'release dates' 17 | AND kt.kind IN ('movie') 18 | AND mi.note LIKE '%internet%' 19 | AND mi.info IS NOT NULL 20 | AND (mi.info LIKE 'USA:% 199%' 21 | OR mi.info LIKE 'USA:% 200%') 22 | AND t.production_year > 2000 23 | AND kt.id = t.kind_id 24 | AND t.id = mi.movie_id 25 | AND t.id = mk.movie_id 26 | AND t.id = mc.movie_id 27 | AND t.id = cc.movie_id 28 | AND mk.movie_id = mi.movie_id 29 | AND mk.movie_id = mc.movie_id 30 | AND mk.movie_id = cc.movie_id 31 | AND mi.movie_id = mc.movie_id 32 | AND mi.movie_id = cc.movie_id 33 | AND mc.movie_id = cc.movie_id 34 | AND k.id = mk.keyword_id 35 | AND it1.id = mi.info_type_id 36 | AND cn.id = mc.company_id 37 | AND ct.id = mc.company_type_id 38 | AND cct1.id = cc.status_id; 39 | 40 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_23b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(kt.kind) AS movie_kind, 2 | MIN(t.title) AS complete_nerdy_internet_movie 3 | FROM complete_cast AS cc, 4 | comp_cast_type AS cct1, 5 | company_name AS cn, 6 | company_type AS ct, 7 | info_type AS it1, 8 | keyword AS k, 9 | kind_type AS kt, 10 | movie_companies AS mc, 11 | movie_info AS mi, 12 | movie_keyword AS mk, 13 | title AS t 14 | WHERE cct1.kind = 'complete+verified' 15 | AND cn.country_code = '[us]' 16 | AND it1.info = 'release dates' 17 | AND k.keyword IN ('nerd', 18 | 'loner', 19 | 'alienation', 20 | 'dignity') 21 | AND kt.kind IN ('movie') 22 | AND mi.note LIKE '%internet%' 23 | AND mi.info LIKE 'USA:% 200%' 24 | AND t.production_year > 2000 25 | AND kt.id = t.kind_id 26 | AND t.id = mi.movie_id 27 | AND t.id = mk.movie_id 28 | AND t.id = mc.movie_id 29 | AND t.id = cc.movie_id 30 | AND mk.movie_id = mi.movie_id 31 | AND mk.movie_id = mc.movie_id 32 | AND mk.movie_id = cc.movie_id 33 | AND mi.movie_id = mc.movie_id 34 | AND mi.movie_id = cc.movie_id 35 | AND mc.movie_id = cc.movie_id 36 | AND k.id = mk.keyword_id 37 | AND it1.id = mi.info_type_id 38 | AND cn.id = mc.company_id 39 | AND ct.id = mc.company_type_id 40 | AND cct1.id = cc.status_id; 41 | 42 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_32a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(lt.link) AS link_type, 2 | MIN(t1.title) AS first_movie, 3 | MIN(t2.title) AS second_movie 4 | FROM keyword AS k, 5 | link_type AS lt, 6 | movie_keyword AS mk, 7 | movie_link AS ml, 8 | title AS t1, 9 | title AS t2 10 | WHERE k.keyword ='10,000-mile-club' 11 | AND mk.keyword_id = k.id 12 | AND t1.id = mk.movie_id 13 | AND ml.movie_id = t1.id 14 | AND ml.linked_movie_id = t2.id 15 | AND lt.id = ml.link_type_id 16 | AND mk.movie_id = t1.id; 17 | 18 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/queries/_32b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(lt.link) AS link_type, 2 | MIN(t1.title) AS first_movie, 3 | MIN(t2.title) AS second_movie 4 | FROM keyword AS k, 5 | link_type AS lt, 6 | movie_keyword AS mk, 7 | movie_link AS ml, 8 | title AS t1, 9 | title AS t2 10 | WHERE k.keyword ='character-name-in-title' 11 | AND mk.keyword_id = k.id 12 | AND t1.id = mk.movie_id 13 | AND ml.movie_id = t1.id 14 | AND ml.linked_movie_id = t2.id 15 | AND lt.id = ml.link_type_id 16 | AND mk.movie_id = t1.id; 17 | 18 | -------------------------------------------------------------------------------- /benchmarks/trino/imdb/workspace.sdf.yml: -------------------------------------------------------------------------------- 1 | workspace: 2 | edition: "1.3" 3 | name: "imdb" 4 | defaults: 5 | dialect: trino 6 | description: > 7 | This queryset is based on the various tables made public by IMDB. Similar to the TPC-H benchmark, 8 | is designed to evaluate analytical database performance. It is also known as the join order benchmark. 9 | 10 | To run the benchmark: 11 | 1. Run the included hydrate.sh script which downloads relevant data 12 | 2. To execute all queries: `sdf run --no-cache` 13 | includes: 14 | - path: queries/ 15 | - path: sources 16 | - path: imdb_data 17 | type: resource 18 | -------------------------------------------------------------------------------- /benchmarks/trino/tpch/.gitignore: -------------------------------------------------------------------------------- 1 | /tpch_pd_scale_10.zip 2 | /tpch_pd_scale_10 3 | -------------------------------------------------------------------------------- /benchmarks/trino/tpch/hydrate.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -xeuo pipefail 3 | 4 | # Downloads a ~1.2GB` IMDB dataset in zipped format. 5 | 6 | cd "${BASH_SOURCE%/*}" 7 | curl -LO https://cdn.sdf.com/data/tpch/tpch_pd_scale_10.zip 8 | unzip tpch_pd_scale_10.zip 9 | -------------------------------------------------------------------------------- /benchmarks/trino/tpch/queries/q1.sql: -------------------------------------------------------------------------------- 1 | select 2 | l_returnflag, 3 | l_linestatus, 4 | sum(l_quantity) as sum_qty, 5 | sum(l_extendedprice) as sum_base_price, 6 | sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, 7 | sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge, 8 | avg(l_quantity) as avg_qty, 9 | avg(l_extendedprice) as avg_price, 10 | avg(l_discount) as avg_disc, 11 | count(*) as count_order 12 | from 13 | lineitem 14 | where 15 | l_shipdate <= date '1998-09-02' 16 | group by 17 | l_returnflag, 18 | l_linestatus 19 | order by 20 | l_returnflag, 21 | l_linestatus; -------------------------------------------------------------------------------- /benchmarks/trino/tpch/queries/q10.sql: -------------------------------------------------------------------------------- 1 | select 2 | c_custkey, 3 | c_name, 4 | sum(l_extendedprice * (1 - l_discount)) as revenue, 5 | c_acctbal, 6 | n_name, 7 | c_address, 8 | c_phone, 9 | c_comment 10 | from 11 | customer, 12 | orders, 13 | lineitem, 14 | nation 15 | where 16 | c_custkey = o_custkey 17 | and l_orderkey = o_orderkey 18 | and o_orderdate >= date '1993-10-01' 19 | and o_orderdate < date '1994-01-01' 20 | and l_returnflag = 'R' 21 | and c_nationkey = n_nationkey 22 | group by 23 | c_custkey, 24 | c_name, 25 | c_acctbal, 26 | c_phone, 27 | n_name, 28 | c_address, 29 | c_comment 30 | order by 31 | revenue desc; -------------------------------------------------------------------------------- /benchmarks/trino/tpch/queries/q11.sql: -------------------------------------------------------------------------------- 1 | select 2 | ps_partkey, 3 | sum(ps_supplycost * ps_availqty) as value 4 | from 5 | partsupp, 6 | supplier, 7 | nation 8 | where 9 | ps_suppkey = s_suppkey 10 | and s_nationkey = n_nationkey 11 | and n_name = 'GERMANY' 12 | group by 13 | ps_partkey 14 | having 15 | sum(ps_supplycost * ps_availqty) > ( 16 | select 17 | sum(ps_supplycost * ps_availqty) * 0.0001 18 | from 19 | partsupp, 20 | supplier, 21 | nation 22 | where 23 | ps_suppkey = s_suppkey 24 | and s_nationkey = n_nationkey 25 | and n_name = 'GERMANY' 26 | ) 27 | order by 28 | value desc; -------------------------------------------------------------------------------- /benchmarks/trino/tpch/queries/q12.sql: -------------------------------------------------------------------------------- 1 | select 2 | l_shipmode, 3 | sum(case 4 | when o_orderpriority = '1-URGENT' 5 | or o_orderpriority = '2-HIGH' 6 | then 1 7 | else 0 8 | end) as high_line_count, 9 | sum(case 10 | when o_orderpriority <> '1-URGENT' 11 | and o_orderpriority <> '2-HIGH' 12 | then 1 13 | else 0 14 | end) as low_line_count 15 | from 16 | lineitem 17 | join 18 | orders 19 | on 20 | l_orderkey = o_orderkey 21 | where 22 | l_shipmode in ('MAIL', 'SHIP') 23 | and l_commitdate < l_receiptdate 24 | and l_shipdate < l_commitdate 25 | and l_receiptdate >= date '1994-01-01' 26 | and l_receiptdate < date '1995-01-01' 27 | group by 28 | l_shipmode 29 | order by 30 | l_shipmode; -------------------------------------------------------------------------------- /benchmarks/trino/tpch/queries/q13.sql: -------------------------------------------------------------------------------- 1 | select 2 | c_count, 3 | count(*) as custdist 4 | from 5 | ( 6 | select 7 | c_custkey, 8 | count(o_orderkey) 9 | from 10 | customer left outer join orders on 11 | c_custkey = o_custkey 12 | and o_comment not like '%special%requests%' 13 | group by 14 | c_custkey 15 | ) as c_orders (c_custkey, c_count) 16 | group by 17 | c_count 18 | order by 19 | custdist desc, 20 | c_count desc; -------------------------------------------------------------------------------- /benchmarks/trino/tpch/queries/q14.sql: -------------------------------------------------------------------------------- 1 | select 2 | 100.00 * sum(case 3 | when p_type like 'PROMO%' 4 | then l_extendedprice * (1 - l_discount) 5 | else 0 6 | end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue 7 | from 8 | lineitem, 9 | part 10 | where 11 | l_partkey = p_partkey 12 | and l_shipdate >= date '1995-09-01' 13 | and l_shipdate < date '1995-10-01'; -------------------------------------------------------------------------------- /benchmarks/trino/tpch/queries/q15.sql: -------------------------------------------------------------------------------- 1 | create view revenue0 as 2 | select 3 | l_suppkey as supplier_no, 4 | sum(l_extendedprice * (1 - l_discount)) as total_revenue 5 | from 6 | lineitem 7 | where 8 | l_shipdate >= date '1996-01-01' 9 | and l_shipdate < date '1996-01-01' + interval '3' month 10 | group by 11 | l_suppkey; 12 | select 13 | s_suppkey, 14 | s_name, 15 | s_address, 16 | s_phone, 17 | total_revenue 18 | from 19 | supplier, 20 | revenue0 21 | where 22 | s_suppkey = supplier_no 23 | and total_revenue = ( 24 | select 25 | max(total_revenue) 26 | from 27 | revenue0 28 | ) 29 | order by 30 | s_suppkey; 31 | 32 | drop view revenue0; -------------------------------------------------------------------------------- /benchmarks/trino/tpch/queries/q16.sql: -------------------------------------------------------------------------------- 1 | select 2 | p_brand, 3 | p_type, 4 | p_size, 5 | count(distinct ps_suppkey) as supplier_cnt 6 | from 7 | partsupp, 8 | part 9 | where 10 | p_partkey = ps_partkey 11 | and p_brand <> 'Brand#45' 12 | and p_type not like 'MEDIUM POLISHED%' 13 | and p_size in (49, 14, 23, 45, 19, 3, 36, 9) 14 | and ps_suppkey not in ( 15 | select 16 | s_suppkey 17 | from 18 | supplier 19 | where 20 | s_comment like '%Customer%Complaints%' 21 | ) 22 | group by 23 | p_brand, 24 | p_type, 25 | p_size 26 | order by 27 | supplier_cnt desc, 28 | p_brand, 29 | p_type, 30 | p_size; -------------------------------------------------------------------------------- /benchmarks/trino/tpch/queries/q17.sql: -------------------------------------------------------------------------------- 1 | select 2 | sum(l_extendedprice) / 7.0 as avg_yearly 3 | from 4 | lineitem, 5 | part 6 | where 7 | p_partkey = l_partkey 8 | and p_brand = 'Brand#23' 9 | and p_container = 'MED BOX' 10 | and l_quantity < ( 11 | select 12 | 0.2 * avg(l_quantity) 13 | from 14 | lineitem 15 | where 16 | l_partkey = p_partkey 17 | ); -------------------------------------------------------------------------------- /benchmarks/trino/tpch/queries/q18.sql: -------------------------------------------------------------------------------- 1 | select 2 | c_name, 3 | c_custkey, 4 | o_orderkey, 5 | o_orderdate, 6 | o_totalprice, 7 | sum(l_quantity) 8 | from 9 | customer, 10 | orders, 11 | lineitem 12 | where 13 | o_orderkey in ( 14 | select 15 | l_orderkey 16 | from 17 | lineitem 18 | group by 19 | l_orderkey 20 | having 21 | sum(l_quantity) > 300 22 | ) 23 | and c_custkey = o_custkey 24 | and o_orderkey = l_orderkey 25 | group by 26 | c_name, 27 | c_custkey, 28 | o_orderkey, 29 | o_orderdate, 30 | o_totalprice 31 | order by 32 | o_totalprice desc, 33 | o_orderdate; -------------------------------------------------------------------------------- /benchmarks/trino/tpch/queries/q2.sql: -------------------------------------------------------------------------------- 1 | select 2 | s_acctbal, 3 | s_name, 4 | n_name, 5 | p_partkey, 6 | p_mfgr, 7 | s_address, 8 | s_phone, 9 | s_comment 10 | from 11 | part, 12 | supplier, 13 | partsupp, 14 | nation, 15 | region 16 | where 17 | p_partkey = ps_partkey 18 | and s_suppkey = ps_suppkey 19 | and p_size = 15 20 | and p_type like '%BRASS' 21 | and s_nationkey = n_nationkey 22 | and n_regionkey = r_regionkey 23 | and r_name = 'EUROPE' 24 | and ps_supplycost = ( 25 | select 26 | min(ps_supplycost) 27 | from 28 | partsupp, 29 | supplier, 30 | nation, 31 | region 32 | where 33 | p_partkey = ps_partkey 34 | and s_suppkey = ps_suppkey 35 | and s_nationkey = n_nationkey 36 | and n_regionkey = r_regionkey 37 | and r_name = 'EUROPE' 38 | ) 39 | order by 40 | s_acctbal desc, 41 | n_name, 42 | s_name, 43 | p_partkey; -------------------------------------------------------------------------------- /benchmarks/trino/tpch/queries/q20.sql: -------------------------------------------------------------------------------- 1 | select 2 | s_name, 3 | s_address 4 | from 5 | supplier, 6 | nation 7 | where 8 | s_suppkey in ( 9 | select 10 | ps_suppkey 11 | from 12 | partsupp 13 | where 14 | ps_partkey in ( 15 | select 16 | p_partkey 17 | from 18 | part 19 | where 20 | p_name like 'forest%' 21 | ) 22 | and ps_availqty > ( 23 | select 24 | 0.5 * sum(l_quantity) 25 | from 26 | lineitem 27 | where 28 | l_partkey = ps_partkey 29 | and l_suppkey = ps_suppkey 30 | and l_shipdate >= date '1994-01-01' 31 | and l_shipdate < date '1994-01-01' + interval '1' year 32 | ) 33 | ) 34 | and s_nationkey = n_nationkey 35 | and n_name = 'CANADA' 36 | order by 37 | s_name; -------------------------------------------------------------------------------- /benchmarks/trino/tpch/queries/q21.sql: -------------------------------------------------------------------------------- 1 | select 2 | s_name, 3 | count(*) as numwait 4 | from 5 | supplier, 6 | lineitem l1, 7 | orders, 8 | nation 9 | where 10 | s_suppkey = l1.l_suppkey 11 | and o_orderkey = l1.l_orderkey 12 | and o_orderstatus = 'F' 13 | and l1.l_receiptdate > l1.l_commitdate 14 | and exists ( 15 | select 16 | * 17 | from 18 | lineitem l2 19 | where 20 | l2.l_orderkey = l1.l_orderkey 21 | and l2.l_suppkey <> l1.l_suppkey 22 | ) 23 | and not exists ( 24 | select 25 | * 26 | from 27 | lineitem l3 28 | where 29 | l3.l_orderkey = l1.l_orderkey 30 | and l3.l_suppkey <> l1.l_suppkey 31 | and l3.l_receiptdate > l3.l_commitdate 32 | ) 33 | and s_nationkey = n_nationkey 34 | and n_name = 'SAUDI ARABIA' 35 | group by 36 | s_name 37 | order by 38 | numwait desc, 39 | s_name; -------------------------------------------------------------------------------- /benchmarks/trino/tpch/queries/q22.sql: -------------------------------------------------------------------------------- 1 | select 2 | cntrycode, 3 | count(*) as numcust, 4 | sum(c_acctbal) as totacctbal 5 | from 6 | ( 7 | select 8 | substring(c_phone from 1 for 2) as cntrycode, 9 | c_acctbal 10 | from 11 | customer 12 | where 13 | substring(c_phone from 1 for 2) in 14 | ('13', '31', '23', '29', '30', '18', '17') 15 | and c_acctbal > ( 16 | select 17 | avg(c_acctbal) 18 | from 19 | customer 20 | where 21 | c_acctbal > 0.00 22 | and substring(c_phone from 1 for 2) in 23 | ('13', '31', '23', '29', '30', '18', '17') 24 | ) 25 | and not exists ( 26 | select 27 | * 28 | from 29 | orders 30 | where 31 | o_custkey = c_custkey 32 | ) 33 | ) as custsale 34 | group by 35 | cntrycode 36 | order by 37 | cntrycode; -------------------------------------------------------------------------------- /benchmarks/trino/tpch/queries/q3.sql: -------------------------------------------------------------------------------- 1 | select 2 | l_orderkey, 3 | sum(l_extendedprice * (1 - l_discount)) as revenue, 4 | o_orderdate, 5 | o_shippriority 6 | from 7 | customer, 8 | orders, 9 | lineitem 10 | where 11 | c_mktsegment = 'BUILDING' 12 | and c_custkey = o_custkey 13 | and l_orderkey = o_orderkey 14 | and o_orderdate < date '1995-03-15' 15 | and l_shipdate > date '1995-03-15' 16 | group by 17 | l_orderkey, 18 | o_orderdate, 19 | o_shippriority 20 | order by 21 | revenue desc, 22 | o_orderdate; -------------------------------------------------------------------------------- /benchmarks/trino/tpch/queries/q4.sql: -------------------------------------------------------------------------------- 1 | select 2 | o_orderpriority, 3 | count(*) as order_count 4 | from 5 | orders 6 | where 7 | o_orderdate >= '1993-07-01' 8 | and o_orderdate < date '1993-07-01' + interval '3' month 9 | and exists ( 10 | select 11 | * 12 | from 13 | lineitem 14 | where 15 | l_orderkey = o_orderkey 16 | and l_commitdate < l_receiptdate 17 | ) 18 | group by 19 | o_orderpriority 20 | order by 21 | o_orderpriority; -------------------------------------------------------------------------------- /benchmarks/trino/tpch/queries/q5.sql: -------------------------------------------------------------------------------- 1 | select 2 | n_name, 3 | sum(l_extendedprice * (1 - l_discount)) as revenue 4 | from 5 | customer, 6 | orders, 7 | lineitem, 8 | supplier, 9 | nation, 10 | region 11 | where 12 | c_custkey = o_custkey 13 | and l_orderkey = o_orderkey 14 | and l_suppkey = s_suppkey 15 | and c_nationkey = s_nationkey 16 | and s_nationkey = n_nationkey 17 | and n_regionkey = r_regionkey 18 | and r_name = 'ASIA' 19 | and o_orderdate >= date '1994-01-01' 20 | and o_orderdate < date '1995-01-01' 21 | group by 22 | n_name 23 | order by 24 | revenue desc; -------------------------------------------------------------------------------- /benchmarks/trino/tpch/queries/q6.sql: -------------------------------------------------------------------------------- 1 | select 2 | sum(l_extendedprice * l_discount) as revenue 3 | from 4 | lineitem 5 | where 6 | l_shipdate >= date '1994-01-01' 7 | and l_shipdate < date '1995-01-01' 8 | and l_discount between 0.06 - 0.01 and 0.06 + 0.01 9 | and l_quantity < 24; -------------------------------------------------------------------------------- /benchmarks/trino/tpch/queries/q7.sql: -------------------------------------------------------------------------------- 1 | select 2 | supp_nation, 3 | cust_nation, 4 | l_year, 5 | sum(volume) as revenue 6 | from 7 | ( 8 | select 9 | n1.n_name as supp_nation, 10 | n2.n_name as cust_nation, 11 | extract(year from l_shipdate) as l_year, 12 | l_extendedprice * (1 - l_discount) as volume 13 | from 14 | supplier, 15 | lineitem, 16 | orders, 17 | customer, 18 | nation n1, 19 | nation n2 20 | where 21 | s_suppkey = l_suppkey 22 | and o_orderkey = l_orderkey 23 | and c_custkey = o_custkey 24 | and s_nationkey = n1.n_nationkey 25 | and c_nationkey = n2.n_nationkey 26 | and ( 27 | (n1.n_name = 'FRANCE' and n2.n_name = 'GERMANY') 28 | or (n1.n_name = 'GERMANY' and n2.n_name = 'FRANCE') 29 | ) 30 | and l_shipdate between date '1995-01-01' and date '1996-12-31' 31 | ) as shipping 32 | group by 33 | supp_nation, 34 | cust_nation, 35 | l_year 36 | order by 37 | supp_nation, 38 | cust_nation, 39 | l_year; 40 | -------------------------------------------------------------------------------- /benchmarks/trino/tpch/queries/q8.sql: -------------------------------------------------------------------------------- 1 | select 2 | o_year, 3 | sum(case 4 | when nation = 'BRAZIL' then volume 5 | else 0 6 | end) / sum(volume) as mkt_share 7 | from 8 | ( 9 | select 10 | extract(year from o_orderdate) as o_year, 11 | l_extendedprice * (1 - l_discount) as volume, 12 | n2.n_name as nation 13 | from 14 | part, 15 | supplier, 16 | lineitem, 17 | orders, 18 | customer, 19 | nation n1, 20 | nation n2, 21 | region 22 | where 23 | p_partkey = l_partkey 24 | and s_suppkey = l_suppkey 25 | and l_orderkey = o_orderkey 26 | and o_custkey = c_custkey 27 | and c_nationkey = n1.n_nationkey 28 | and n1.n_regionkey = r_regionkey 29 | and r_name = 'AMERICA' 30 | and s_nationkey = n2.n_nationkey 31 | and o_orderdate between date '1995-01-01' and date '1996-12-31' 32 | and p_type = 'ECONOMY ANODIZED STEEL' 33 | ) as all_nations 34 | group by 35 | o_year 36 | order by 37 | o_year; -------------------------------------------------------------------------------- /benchmarks/trino/tpch/queries/q9.sql: -------------------------------------------------------------------------------- 1 | select 2 | nation, 3 | o_year, 4 | sum(amount) as sum_profit 5 | from 6 | ( 7 | select 8 | n_name as nation, 9 | extract(year from o_orderdate) as o_year, 10 | l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount 11 | from 12 | part, 13 | supplier, 14 | lineitem, 15 | partsupp, 16 | orders, 17 | nation 18 | where 19 | s_suppkey = l_suppkey 20 | and ps_suppkey = l_suppkey 21 | and ps_partkey = l_partkey 22 | and p_partkey = l_partkey 23 | and o_orderkey = l_orderkey 24 | and s_nationkey = n_nationkey 25 | and p_name like '%green%' 26 | ) as profit 27 | group by 28 | nation, 29 | o_year 30 | order by 31 | nation, 32 | o_year desc; -------------------------------------------------------------------------------- /benchmarks/trino/tpch/queries/revenue0.sql: -------------------------------------------------------------------------------- 1 | create table revenue0 as 2 | select 3 | l_suppkey as supplier_no, 4 | sum(l_extendedprice * (1 - l_discount)) as total_revenue 5 | from 6 | lineitem 7 | where 8 | l_shipdate >= date '1996-01-01' 9 | and l_shipdate < date '1996-01-01' + interval '3' month 10 | group by 11 | l_suppkey; -------------------------------------------------------------------------------- /benchmarks/trino/tpch/sources/customer.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE customer with (format='PARQUET', LOCATION='tpch_pd_scale_10/customer/'); -------------------------------------------------------------------------------- /benchmarks/trino/tpch/sources/lineitem.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE lineitem with (format='PARQUET', LOCATION='tpch_pd_scale_10/lineitem/'); -------------------------------------------------------------------------------- /benchmarks/trino/tpch/sources/nation.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE nation with (format='PARQUET', LOCATION='tpch_pd_scale_10/nation/'); -------------------------------------------------------------------------------- /benchmarks/trino/tpch/sources/orders.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE orders with (format='PARQUET', LOCATION='tpch_pd_scale_10/orders/'); -------------------------------------------------------------------------------- /benchmarks/trino/tpch/sources/part.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE part with (format='PARQUET', LOCATION='tpch_pd_scale_10/part/'); -------------------------------------------------------------------------------- /benchmarks/trino/tpch/sources/partsupp.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE partsupp with (format='PARQUET', LOCATION='tpch_pd_scale_10/partsupp/'); -------------------------------------------------------------------------------- /benchmarks/trino/tpch/sources/region.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE region with (format='PARQUET', LOCATION='tpch_pd_scale_10/region/'); -------------------------------------------------------------------------------- /benchmarks/trino/tpch/sources/supplier.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE supplier with (format='PARQUET', LOCATION='tpch_pd_scale_10/supplier/'); -------------------------------------------------------------------------------- /benchmarks/trino/tpch/workspace.sdf.yml: -------------------------------------------------------------------------------- 1 | workspace: 2 | edition: "1.3" 3 | name: "tpch" 4 | defaults: 5 | dialect: trino 6 | description: > 7 | The TPC-H benchmark is a standard for measuring processing performance of analytical SQL engines. 8 | 9 | To run the benchmark: 10 | 1. Run the included hydrate.sh script which downloads relevant data 11 | 2. To execute all queries: `sdf run --no-cache` 12 | includes: 13 | - path: sources 14 | - path: queries 15 | - path: tpch_pd_scale_10 16 | type: resource 17 | -------------------------------------------------------------------------------- /docs/assets/favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sdf-labs/sdf-cli/0f2623776d779dcc07adfe6492765b5c77d3d211/docs/assets/favicon.png -------------------------------------------------------------------------------- /docs/cloud/authentication.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "CLI Authentication" 3 | --- 4 | 5 | ## Authenticating the CLI 6 | 7 | Before you can deploy your assets to the SDF Cloud, you'll first need to authenticate with the platform. 8 | 9 | 10 | The SDF Cloud is only available via the **Plus** and above plans at this time. If you'd like to get access, please [inquire](https://sdf.com/inquiries) 11 | 12 | 13 | Authenticating with your CLI is easy. Simply run the command below to begin: 14 | 15 | ```shell 16 | sdf auth login 17 | ``` 18 | 19 | Next, your default browser will open up and prompt you to login. Currently, Google and Okta are supported authentication providers. 20 | 21 | Once you've logged in, you'll be redirected to sucess page: 22 | 23 | 24 | 25 | Congratulations! You've successfully authenticated with the SDF Platform. You can now deploy, monitor, and interact with your assets on the platform. 26 | -------------------------------------------------------------------------------- /docs/cloud/introduction.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Overview" 3 | description: 4 | "SDF Cloud is an integrated, auto-generated data catalog with integrated column level lineage, data classification, and more." 5 | --- 6 | 7 | SDF Cloud is the fastest way to understand your data warehouse at a glance. Code-driven assets are searchable and clear, driving 8 | data awareness across a whole organization. Visualize column level lineage, data classifications, and reports at a glance. 9 | 10 | Assets are generated automatically through static analysis, so there is no configuration, migration, or setup time. It. Just. Works. 11 | 12 | 13 | 19 | 20 | -------------------------------------------------------------------------------- /docs/database/orchestration.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Orchestration" 3 | description: 4 | "Simple orchestration with SDF" 5 | --- 6 | -------------------------------------------------------------------------------- /docs/database/supported-functions/trino/comparison-functions.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Comparison Functions" 3 | --- 4 | 5 | 6 | ## `greatest` 7 | 8 | Returns the largest of the provided values. 9 | 10 | 11 | *Examples:* 12 | ```sql examples.sql 13 | SELECT greatest(1, 2, 3) AS value; -- value '3' 14 | ``` 15 | *Supported Signatures* 16 | ```sql 17 | function greatest($3, ...) returns $3 18 | ``` 19 | [🔗 Official Documentation](https://trino.io/docs/current/functions/comparison.html#greatest) 20 | 21 | ## `least` 22 | 23 | Returns the smallest of the provided values. 24 | 25 | 26 | *Examples:* 27 | ```sql examples.sql 28 | SELECT LEAST(5,6,7,1,2,3,4) -- list of columns or values -- value '1' 29 | ``` 30 | *Supported Signatures* 31 | ```sql 32 | function least($3, ...) returns $3 33 | ``` 34 | [🔗 Official Documentation](https://trino.io/docs/current/functions/comparison.html#least) 35 | 36 | 37 | -------------------------------------------------------------------------------- /docs/guide/advanced/telemetry.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Understanding Telemetry" 3 | --- 4 | 5 | SDF sends fully-anonymized telemetry to help us improve the tool by understanding how it is used. Telemetry is optional and can be disabled at any time. 6 | 7 | 8 | If you are willing and able to leave telemetry enabled, thank you! This will help us better understand how the CLI app is used, allowing us to improve your experience. 9 | 10 | 11 | ## What is tracked? 12 | 13 | The following information is included in the telemetry events: 14 | 15 | - Anonymous device ID 16 | - Commands executed 17 | - The start and end timestamps of a command execution 18 | - Platform information (device details, operating system, CPU architecture, number of CPUs, total/available memory, etc.) 19 | - Workspace metrics (dialect, number of models etc.) 20 | - Exit code and errors if there are any 21 | 22 | We use telemetry for aggregate analysis and do not tie telemetry events to a specific identity. 23 | 24 | ## Disabling telemetry 25 | 26 | You can disable the telemetry by setting the environment variable `DISABLE_SDF_CLI_TELEMETRY=1` before running the SDF. 27 | -------------------------------------------------------------------------------- /docs/guide/data-quality/stats.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Stats" 3 | --- 4 | 5 | ## Understanding Stats 6 | 7 | **COMING SOON** 8 | -------------------------------------------------------------------------------- /docs/guide/transformation/authentication.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: 'Overview' 3 | description: 'SDF has rich authentication capabilities' 4 | icon: "play" 5 | --- 6 | 7 | - where auth stuff is stored on different systems 8 | - how to login 9 | - how to logout 10 | - how to validate connections (auth status) 11 | - how to deal with multiple connections 12 | -------------------------------------------------------------------------------- /docs/integrations/openai/ai-classification.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: 'AI Classification' 3 | description: 'Use your own OpenAI keypair with SDF to accelerate Data Classification' 4 | icon: "robot" 5 | --- 6 | 7 | 8 | Documentation is still in progress. Please check back later for a full guide on how to use OpenAI with SDF. 9 | 10 | -------------------------------------------------------------------------------- /docs/linter/format.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Formatting" 3 | description: 4 | "Overview of Formatting" 5 | version: "Preview" 6 | --- 7 | 8 | ## SQL Formatting 9 | The SDF formatter is an extremely fast SQL formatter designed as a drop-in replacement for SQLFluff. It is available natively as part of the SDF cli. 10 | 11 | ### `sdf format` 12 | The entrypoint for the formatter is `sdf format`. 13 | ``` 14 | sdf format # Formats all files in the SDF Workspace 15 | sdf format /path/to/file.sql # Formats one specific file 16 | sdf format /path/to/dir/*.sql # Formats all files matching glob pattern 17 | ``` 18 | 19 | ### Formatting Configuration 20 | You may set a formatting configuration for an SDF project. 21 | 22 | ```yml workspace.sdf.yml 23 | sdf-args: 24 | lint: > 25 | ... 26 | format: > 27 | -l line-length=80 28 | -l indent=2 29 | -l commas=leading 30 | ``` 31 | 32 | ### Formatting Reference 33 | The following formatting options are supported. 34 | 35 | | Type | Configuration | Auto-Fix | Default | 36 | | ------- | -------------- | -------- | ---------- | 37 | | Layout | `line-length` | ✅ | `80` | 38 | | Layout | `indent` | ✅ | `2` | 39 | | Layout | `commas` | ✅ | `trailing` | 40 | -------------------------------------------------------------------------------- /docs/linter/macros.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Macros (jinja)" 3 | description: 4 | "Integrating jinja macros into your linting configuration" 5 | version: "Preview" 6 | --- 7 | 8 | ## Jinja Templating 9 | SDF Lint and Format are guaranteed to be compatible with all elements in an SDF workspace, 10 | including jinja macros, and materialization libraries. 11 | 12 | Jinja does not materially impact the performance of SDF Lint, and lint violations are provided 13 | at the level of the source SQL code, not pre-processed (ie. post jinja-expansion) level. This makes 14 | debugging easier. 15 | 16 | ### Configuration 17 | No additional configuration is needed for SDF workspaces using macros. 18 | 19 | ### Linting dbt Projects 20 | SDF lint is not (as of now) compatible with dbt projects. If you'd like that capability, let us know in 21 | our [community slack](https://sdf.com/join)! 22 | -------------------------------------------------------------------------------- /docs/reference/bigquery/approximate_aggregate_functions.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Approximate Aggregate Functions" 3 | --- 4 | 5 | ## `approx_count_distinct` 6 | 7 | *Supported Signatures* 8 | ```sql 9 | function approx_count_distinct($1) returns bigint 10 | ``` 11 | [🔗 Official Documentation](https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-and-operators#approx_count_distinct) 12 | 13 | ## `approx_quantiles` 14 | 15 | *Supported Signatures* 16 | ```sql 17 | function approx_quantiles($1, bigint) returns array<$1> 18 | ``` 19 | [🔗 Official Documentation](https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-and-operators#approx_quantiles) 20 | 21 | ## `approx_top_count` 22 | 23 | *Supported Signatures* 24 | ```sql 25 | function approx_top_count($1, bigint) returns array> 26 | ``` 27 | [🔗 Official Documentation](https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-and-operators#approx_top_count) 28 | 29 | ## `approx_top_sum` 30 | 31 | *Supported Signatures* 32 | ```sql 33 | function approx_top_sum($1, bigint, bigint) returns array> 34 | ``` 35 | [🔗 Official Documentation](https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-and-operators#approx_top_sum) 36 | 37 | 38 | -------------------------------------------------------------------------------- /docs/reference/bigquery/bit_functions.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Bit Functions" 3 | --- 4 | 5 | ## `bit_count` 6 | 7 | *Supported Signatures* 8 | ```sql 9 | function bit_count $1($1) returns bigint 10 | where $1 in (int, bytes) 11 | ``` 12 | [🔗 Official Documentation](https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-and-operators#bit_count) 13 | 14 | 15 | -------------------------------------------------------------------------------- /docs/reference/bigquery/debugging_functions.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Debugging Functions" 3 | --- 4 | 5 | ## `error` 6 | 7 | *Supported Signatures* 8 | ```sql 9 | function error(string) returns string 10 | ``` 11 | [🔗 Official Documentation](https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-and-operators#error) 12 | 13 | 14 | -------------------------------------------------------------------------------- /docs/reference/bigquery/other_expressions.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Other Expressions" 3 | --- 4 | 5 | ## `if` 6 | 7 | *Supported Signatures* 8 | ```sql 9 | function if(bool, $1, $1) returns $1 10 | ``` 11 | [🔗 Official Documentation](https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-and-operators#if) 12 | 13 | ## `reclassify` 14 | 15 | Changes the classification label of the first argument from the expected classifier specified by the second argument to the desired classifier specified by the third. The expected and desired classifiers must belong to the same classifier group. No other classifiers attached to the first argument are impacted. The function will emit a warning if the the first argument does not have the expected classifier 16 | 17 | *Examples:* 18 | ```sql examples.sql 19 | select reclassify(12345, 'pii.clear_text', 'pii.masked') as value; -- value '12345' 20 | ``` 21 | *Supported Signatures* 22 | ```sql 23 | function reclassify($1, string, string) returns $1 24 | function reclassify($1, string) returns $1 25 | ``` 26 | [🔗 Official Documentation](https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-and-operators#reclassify) 27 | 28 | 29 | -------------------------------------------------------------------------------- /docs/reference/bigquery/search_functions.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Interval Functions" 3 | --- 4 | 5 | ## `search` 6 | 7 | *Supported Signatures* 8 | ```sql 9 | function search $1($1, string) returns bool 10 | where $1 in (string, json, array) 11 | ``` 12 | [🔗 Official Documentation](https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-and-operators#search) 13 | 14 | 15 | -------------------------------------------------------------------------------- /docs/reference/bigquery/security_functions.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Security Functions" 3 | --- 4 | 5 | ## `session_user` 6 | 7 | *Supported Signatures* 8 | ```sql 9 | function session_user() returns string 10 | ``` 11 | [🔗 Official Documentation](https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-and-operators#session_user) 12 | 13 | 14 | -------------------------------------------------------------------------------- /docs/reference/bigquery/statistical_aggregate_functions.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Statistical Aggregation Functions" 3 | --- 4 | 5 | ## `variance` 6 | 7 | *Supported Signatures* 8 | ```sql 9 | function variance $1($1) returns double 10 | where $1 in (int64, double) 11 | ``` 12 | [🔗 Official Documentation](https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-and-operators#variance) 13 | 14 | 15 | -------------------------------------------------------------------------------- /docs/reference/bigquery/temporal_functions.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Temporal Functions" 3 | --- 4 | 5 | ## `date_sub` 6 | 7 | *Supported Signatures* 8 | ```sql 9 | function date_sub $1($1, interval) returns $1 10 | where $1 in (date, timestamp) 11 | ``` 12 | [🔗 Official Documentation](https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-and-operators#date_sub) 13 | 14 | 15 | -------------------------------------------------------------------------------- /docs/reference/bigquery/utility_functions.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Utility Functions" 3 | --- 4 | 5 | ## `generate_uuid` 6 | 7 | *Supported Signatures* 8 | ```sql 9 | function generate_uuid() returns string 10 | ``` 11 | [🔗 Official Documentation](https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-and-operators#generate_uuid) 12 | 13 | 14 | -------------------------------------------------------------------------------- /docs/reference/redshift/aggregate_functions.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Aggregate Functions" 3 | --- 4 | 5 | ## `approx_count_distinct` 6 | 7 | *Supported Signatures* 8 | ```sql 9 | function approx_count_distinct(bigint) returns bigint 10 | ``` 11 | [🔗 Official Documentation](https://docs.aws.amazon.com/redshift/latest/dg/c_SQL_functions.html) 12 | 13 | ## `regexp_count` 14 | 15 | *Supported Signatures* 16 | ```sql 17 | function regexp_count(varchar, varchar) returns bigint 18 | function regexp_count(varchar, varchar, bigint) returns bigint 19 | function regexp_count(varchar, varchar, bigint, varchar) returns bigint 20 | ``` 21 | [🔗 Official Documentation](https://docs.aws.amazon.com/redshift/latest/dg/REGEXP_COUNT.html) 22 | 23 | 24 | -------------------------------------------------------------------------------- /docs/reference/redshift/math_functions.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Math Functions" 3 | --- 4 | 5 | ## `checksum` 6 | 7 | *Supported Signatures* 8 | ```sql 9 | function checksum(varchar) returns bigint 10 | ``` 11 | [🔗 Official Documentation](https://docs.aws.amazon.com/redshift/latest/dg/r_CHECKSUM.html) 12 | 13 | ## `is_integer` 14 | 15 | *Supported Signatures* 16 | ```sql 17 | function is_integer(super) returns boolean 18 | ``` 19 | [🔗 Official Documentation](https://docs.aws.amazon.com/redshift/latest/dg/r_is_integer.html) 20 | 21 | 22 | -------------------------------------------------------------------------------- /docs/reference/snowflake/account_usage_table_functions.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Account Usage Table Functions" 3 | --- 4 | 5 | 6 | ## `tag_references_with_lineage` 7 | 8 | *Supported Signatures* 9 | ```sql 10 | function tag_references_with_lineage(string) returns struct< 11 | tag_database string, 12 | tag_schema string, 13 | tag_id bigint, 14 | tag_name string, 15 | tag_value string, 16 | level string, 17 | object_database string, 18 | object_schema string, 19 | object_id bigint, 20 | object_name string, 21 | object_deleted timestamp, 22 | domain string, 23 | column_id bigint, 24 | column_name string 25 | > 26 | 27 | ``` 28 | [🔗 Official Documentation](https://docs.snowflake.com/en/sql-reference/functions/tag_references_with_lineage) 29 | 30 | 31 | -------------------------------------------------------------------------------- /docs/reference/snowflake/hash_functions.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Hash Functions" 3 | --- 4 | 5 | 6 | ## `hash` 7 | 8 | hash value 9 | *Supported Signatures* 10 | ```sql 11 | function hash(...) returns decimal(19, 0) 12 | ``` 13 | [🔗 Official Documentation](https://docs.snowflake.com/en/sql-reference/functions/hash) 14 | 15 | 16 | -------------------------------------------------------------------------------- /docs/reference/snowflake/metadata_functions.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Metadata Functions" 3 | --- 4 | 5 | 6 | ## `generate_column_description` 7 | 8 | parse the result from INFER_SCHEMA and generate a copy-able column description 9 | *Supported Signatures* 10 | ```sql 11 | function generate_column_description(array, varchar) returns varchar 12 | ``` 13 | [🔗 Official Documentation](https://docs.snowflake.com/en/sql-reference/functions/generate_column_description) 14 | 15 | ## `get_ddl` 16 | 17 | DDL used to create the object 18 | *Supported Signatures* 19 | ```sql 20 | function get_ddl(varchar, varchar) returns varchar 21 | function get_ddl(varchar, varchar, boolean) returns varchar 22 | function get_ddl(varchar, decimal(38, 0)) returns varchar 23 | function get_ddl(varchar, decimal(38, 0), boolean) returns varchar 24 | ``` 25 | [🔗 Official Documentation](https://docs.snowflake.com/en/sql-reference/functions/get_ddl) 26 | 27 | 28 | -------------------------------------------------------------------------------- /docs/reference/snowflake/vector_similarity_functions.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Vector Similarity Functions" 3 | --- 4 | 5 | 6 | ## `vector_inner_product` 7 | 8 | Inner product between vectors 9 | *Supported Signatures* 10 | ```sql 11 | function vector_inner_product(array, array) returns double 12 | ``` 13 | [🔗 Official Documentation](https://docs.snowflake.com/en/sql-reference/functions/vector_inner_product) 14 | 15 | ## `vector_l2_distance` 16 | 17 | L2 distance between vectors 18 | *Supported Signatures* 19 | ```sql 20 | function vector_l2_distance(array, array) returns double 21 | ``` 22 | [🔗 Official Documentation](https://docs.snowflake.com/en/sql-reference/functions/vector_l2_distance) 23 | 24 | 25 | -------------------------------------------------------------------------------- /docs/reference/trino/comparison_functions.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Comparison Functions" 3 | --- 4 | 5 | ## `greatest` 6 | 7 | Returns the largest of the provided values. 8 | 9 | 10 | *Examples:* 11 | ```sql examples.sql 12 | SELECT greatest(1, 2, 3) AS value; -- value '3' 13 | ``` 14 | *Supported Signatures* 15 | ```sql 16 | function greatest($3, ...) returns $3 17 | ``` 18 | [🔗 Official Documentation](https://trino.io/docs/current/functions/comparison.html#greatest) 19 | 20 | ## `least` 21 | 22 | Returns the smallest of the provided values. 23 | 24 | 25 | *Examples:* 26 | ```sql examples.sql 27 | SELECT LEAST(5,6,7,1,2,3,4) -- list of columns or values -- value '1' 28 | ``` 29 | *Supported Signatures* 30 | ```sql 31 | function least($3, ...) returns $3 32 | ``` 33 | [🔗 Official Documentation](https://trino.io/docs/current/functions/comparison.html#least) 34 | 35 | 36 | -------------------------------------------------------------------------------- /docs/reference/trino/hyperloglog_functions.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Hyperloglog Functions" 3 | --- 4 | 5 | ## `approx_set` 6 | 7 | Returns the HyperLogLog sketch of the input data set of x. This data sketch underlies approx_distinct() and can be stored and used later by calling cardinality(). 8 | 9 | *Supported Signatures* 10 | ```sql 11 | function approx_set(bigint) returns hyperloglog 12 | function approx_set(double) returns hyperloglog 13 | function approx_set(varchar) returns hyperloglog 14 | ``` 15 | [🔗 Official Documentation](https://trino.io/docs/current/functions/hyperloglog.html#approx_set) 16 | 17 | ## `empty_approx_set` 18 | 19 | Returns an empty HyperLogLog. 20 | 21 | *Supported Signatures* 22 | ```sql 23 | function empty_approx_set() returns hyperloglog 24 | ``` 25 | [🔗 Official Documentation](https://trino.io/docs/current/functions/hyperloglog.html#empty_approx_set) 26 | 27 | ## `merge` 28 | 29 | Returns the HyperLogLog of the aggregate union of the individual hll HyperLogLog structures. 30 | 31 | *Supported Signatures* 32 | ```sql 33 | function merge(hyperloglog) returns hyperloglog 34 | function merge(qdigest) returns qdigest 35 | function merge(tdigest) returns tdigest 36 | ``` 37 | [🔗 Official Documentation](https://trino.io/docs/current/functions/hyperloglog.html#merge) 38 | 39 | 40 | -------------------------------------------------------------------------------- /docs/reference/trino/lambda_functions.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Lambda Functions" 3 | --- 4 | 5 | ## `reduce_agg` 6 | 7 | Reduces all input values into a single value. inputFunction will be invoked for each non-null input value. In addition to taking the input value, inputFunction takes the current state, initially initialState, and returns the new state. combineFunction will be invoked to combine two states into a new state. The final state is returned. 8 | 9 | *Supported Signatures* 10 | ```sql 11 | function reduce_agg($1, $10, function($10, $1, $10), function($10, $10, $10)) returns $10 12 | ``` 13 | [🔗 Official Documentation](https://trino.io/docs/current/functions/aggregate.html#reduce_agg) 14 | 15 | ## `regexp_replace` 16 | 17 | Removes every instance of the substring matched by the regular expression pattern from string. 18 | 19 | 20 | *Examples:* 21 | ```sql examples.sql 22 | SELECT REGEXP_REPLACE('text foo another text', 'foo', 'bar') -- value 'text bar another text' 23 | ``` 24 | *Supported Signatures* 25 | ```sql 26 | function regexp_replace(varchar, joniregexp) returns varchar 27 | function regexp_replace(varchar, joniregexp, varchar) returns varchar 28 | ``` 29 | [🔗 Official Documentation](https://trino.io/docs/current/functions/regexp.html#regexp_replace) 30 | 31 | 32 | -------------------------------------------------------------------------------- /docs/reference/trino/mongodb_functions.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Mongodb Functions" 3 | --- 4 | 5 | ## `objectid` 6 | 7 | Extracts the TIMESTAMP WITH TIME ZONE from a given ObjectId. 8 | 9 | *Supported Signatures* 10 | ```sql 11 | function objectid() returns objectid 12 | function objectid(varchar) returns objectid 13 | ``` 14 | [🔗 Official Documentation](https://trino.io/docs/current/connector/mongodb.html#objectid) 15 | 16 | ## `objectid_timestamp` 17 | 18 | Extracts the TIMESTAMP WITH TIME ZONE from a given ObjectId. 19 | 20 | *Supported Signatures* 21 | ```sql 22 | function objectid_timestamp(objectid) returns timestamp 23 | ``` 24 | [🔗 Official Documentation](https://trino.io/docs/current/functions/../connector/mongodb.html#objectid_timestamp) 25 | 26 | ## `timestamp_objectid` 27 | 28 | Creates an ObjectId from a TIMESTAMP WITH TIME ZONE. 29 | 30 | *Supported Signatures* 31 | ```sql 32 | function timestamp_objectid(timestamp(0)) returns objectid 33 | ``` 34 | [🔗 Official Documentation](https://trino.io/docs/current/functions/../connector/mongodb.html#timestamp_objectid) 35 | 36 | 37 | -------------------------------------------------------------------------------- /docs/reference/trino/t-digest_functions.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "T-digest Functions" 3 | --- 4 | 5 | ## `tdigest_agg` 6 | 7 | Composes all input values of x into a tdigest. x can be of any numeric type. 8 | 9 | *Supported Signatures* 10 | ```sql 11 | function tdigest_agg(double) returns tdigest 12 | function tdigest_agg(double, double) returns tdigest 13 | ``` 14 | [🔗 Official Documentation](https://trino.io/docs/current/functions/tdigest.html#tdigest_agg) 15 | 16 | 17 | -------------------------------------------------------------------------------- /docs/reference/trino/uuid_functions.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Uuid Functions" 3 | --- 4 | 5 | ## `uuid` 6 | 7 | Returns a pseudo randomly generated UUID (type 4). 8 | 9 | 10 | *Examples:* 11 | ```sql examples.sql 12 | SELECT uuid() as value; -- value '22552909-5560-4234-a383-5a968a4e2a91' 13 | ``` 14 | *Supported Signatures* 15 | ```sql 16 | function uuid() returns uuid 17 | ``` 18 | *Note: uuid() is volatile, i.e. might return a 19 | different value for the same input.* 20 | 21 | [🔗 Official Documentation](https://trino.io/docs/current/functions/uuid.html#uuid) 22 | 23 | 24 | -------------------------------------------------------------------------------- /docs/snippets/preview-warning.mdx: -------------------------------------------------------------------------------- 1 | 2 | {title} is currently only available in our preview release. Preview features are not recommended for production use. For more information on how to install or update to the Preview release channel, see [here](/introduction/install#release-tracks). 3 | 4 | -------------------------------------------------------------------------------- /docs/tutorials/learn-more.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Learn more" 3 | --- 4 | 5 | ## Tutorials Summary 6 | We hope you enjoyed our series of tutorials and got to leaned more about SDF. 7 | 8 | We learned how to: 9 | * [Create a model](/tutorials/creating-a-model) 10 | * [Debug with SDF](/tutorials/debugging) 11 | * [Deprecate a model](/tutorials/deprecating-a-model) 12 | * [Enrich your warehouse](/tutorials/enriching-your-warehouse) 13 | * [Ensure data quality](/tutorials/ensuring-data-quality) 14 | 15 | 16 | ## Quick Links 17 | There is much more that SDF can do! 18 | 19 | 20 | Connect to a project with Snowflake or Redshift 21 | 22 | 23 | Visualize your data warehouse with SDF cloud beta 24 | 25 | 26 | Learn about different materialization types 27 | 28 | 29 | Transfomr your warehouse dynamically using variables and macros 30 | 31 | 32 | -------------------------------------------------------------------------------- /examples/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sdf-labs/sdf-cli/0f2623776d779dcc07adfe6492765b5c77d3d211/examples/.gitkeep -------------------------------------------------------------------------------- /examples/bigquery_incremental/models/last_hn_timestamp.sql: -------------------------------------------------------------------------------- 1 | SELECT MAX(`timestamp`) as ts FROM bigquery-public-data.hacker_news.`full` 2 | -------------------------------------------------------------------------------- /examples/bigquery_incremental/models/popular_articles.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | * 3 | FROM 4 | bigquery-public-data.hacker_news.`full` 5 | WHERE 6 | {% if builtin.is_incremental_mode %} 7 | -- Only fetch rows that are newer than the newest row in the previous materialization of this table 8 | `timestamp` >= (SELECT MAX(`timestamp`) FROM popular_articles) 9 | {% else %} 10 | `timestamp` >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 7 DAY) 11 | {% endif %} 12 | AND title IS NOT NULL 13 | AND (dead IS NULL OR dead = FALSE) 14 | AND score > 100 15 | -------------------------------------------------------------------------------- /examples/bigquery_incremental/workspace.sdf.yml: -------------------------------------------------------------------------------- 1 | workspace: 2 | name: bigquery_incremental 3 | edition: "1.3" 4 | description: > 5 | This workspace demonstrates incremental tables with BigQuery. 6 | 7 | Make sure to replace `` with your BigQuery project name. 8 | 9 | To follow along with SDF's official guide: https://docs.sdf.com/integrations/bigquery/incremental-materialization 10 | 11 | defaults: 12 | dialect: bigquery 13 | preprocessor: jinja 14 | catalog: 15 | 16 | includes: 17 | - path: models 18 | 19 | integrations: 20 | - provider: bigquery 21 | type: database 22 | sources: 23 | - pattern: bigquery-public-data.*.* 24 | targets: 25 | - pattern: "*.pub.*" 26 | -------------------------------------------------------------------------------- /examples/bigquery_starter/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | #sdf gitignore 3 | /sdftarget 4 | -------------------------------------------------------------------------------- /examples/bigquery_starter/models/aggregate_orders.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | o.customer_id, 3 | c.customer_name, 4 | COUNT(o.order_id) AS total_orders, 5 | SUM(o.quantity * p.price) AS total_spent 6 | FROM 7 | orders o 8 | JOIN 9 | customers c ON o.customer_id = c.customer_id 10 | JOIN 11 | products p ON o.product_id = p.product_id 12 | GROUP BY 13 | o.customer_id, c.customer_name; -------------------------------------------------------------------------------- /examples/bigquery_starter/models/customers_over_100.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | customer_id, 3 | customer_name, 4 | total_orders, 5 | total_spent 6 | FROM 7 | aggregate_orders 8 | WHERE 9 | total_spent > 100 10 | ORDER BY 11 | total_spent DESC; -------------------------------------------------------------------------------- /examples/bigquery_starter/workspace.sdf.yml: -------------------------------------------------------------------------------- 1 | workspace: 2 | name: bigquery_starter 3 | edition: "1.3" 4 | description: > 5 | This is a starter workspace for BigQuery users. It includes a script to create a dataset and some initial tables with seed data, then one table and one view transforming 6 | the seed data. 7 | 8 | Make sure to replace `` with your BigQuery project name. 9 | 10 | To follow along with SDF's official guide: https://docs.sdf.com/integrations/bigquery/basic-materialization 11 | 12 | defaults: 13 | catalog: 14 | schema: sdf_ecommerce_example 15 | dialect: bigquery 16 | 17 | includes: 18 | - path: models 19 | 20 | integrations: 21 | - provider: bigquery 22 | type: database 23 | sources: 24 | - pattern: "*.sdf_ecommerce_example.*" 25 | targets: 26 | - pattern: "*.sdf_ecommerce_example.*" 27 | 28 | -------------------------------------------------------------------------------- /examples/cybersyn_tech_innovation/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | #sdf gitignore 3 | /sdftarget 4 | -------------------------------------------------------------------------------- /examples/cybersyn_tech_innovation/models/sdf_snowflake/cybersyn_tech_innovation/all_nvidia_patents.sql: -------------------------------------------------------------------------------- 1 | /* 2 | Find all patents where Nvidia is the designated assignee 3 | */ 4 | WITH patents_in_last_year AS ( 5 | SELECT 6 | patent_id, 7 | patent_type, 8 | invention_title 9 | FROM tech__innovation_essentials.cybersyn.uspto_patent_index AS patent 10 | WHERE patent.DOCUMENT_PUBLICATION_DATE > DATEADD(YEAR, -1, CURRENT_DATE()) 11 | ) 12 | SELECT 13 | patent.patent_id, 14 | patent.patent_type, 15 | patent.invention_title 16 | FROM tech__innovation_essentials.cybersyn.uspto_contributor_index AS contrib 17 | JOIN tech__innovation_essentials.cybersyn.uspto_patent_contributor_relationships AS rship 18 | ON (contrib.contributor_id = rship.contributor_id) 19 | JOIN patents_in_last_year AS patent 20 | ON (rship.patent_id = patent.patent_id) 21 | WHERE contrib.contributor_name ILIKE 'NVIDIA CORPORATION' 22 | AND rship.contribution_type = 'Assignee - United States Company Or Corporation' -------------------------------------------------------------------------------- /examples/cybersyn_tech_innovation/models/sdf_snowflake/cybersyn_tech_innovation/funder_aggregates.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | FUNDER_NAME, 3 | COUNT(*) AS total_funders, 4 | COALESCE(SUM(WORKS_COUNT), 0) AS total_works_count, 5 | COALESCE(SUM(WORKS_CITED_BY_COUNT), 0) AS total_citations, 6 | COALESCE(SUM(GRANTS_COUNT), 0) AS total_grants, 7 | COUNT(DISTINCT FUNDER_COUNTRY_GEO_ID) AS unique_country_count, 8 | MIN(CREATED_DATE) AS first_created_date, 9 | MAX(UPDATED_DATE) AS last_updated_date 10 | FROM TECH__INNOVATION_ESSENTIALS.CYBERSYN.OPENALEX_FUNDERS_INDEX 11 | GROUP BY FUNDER_NAME 12 | ORDER BY total_citations DESC -------------------------------------------------------------------------------- /examples/cybersyn_tech_innovation/models/sdf_snowflake/cybersyn_tech_innovation/most_starred_repos.sql: -------------------------------------------------------------------------------- 1 | /* 2 | Pull the repos with the most stars in the past year 3 | */ 4 | WITH latest_repo_name AS ( 5 | SELECT 6 | repo_name, 7 | repo_id 8 | FROM tech__innovation_essentials.cybersyn.github_repos 9 | QUALIFY ROW_NUMBER() OVER (PARTITION BY repo_id ORDER BY first_seen DESC) = 1 10 | ) 11 | SELECT 12 | repo.repo_name, 13 | repo.repo_id, 14 | SUM(stars.count) AS sum_stars 15 | FROM tech__innovation_essentials.cybersyn.github_stars AS stars 16 | JOIN latest_repo_name AS repo 17 | ON (repo.repo_id = stars.repo_id) 18 | WHERE stars.date >= DATEADD('day', -365, CURRENT_DATE) 19 | GROUP BY repo.repo_name, repo.repo_id 20 | ORDER BY sum_stars DESC NULLS LAST 21 | LIMIT 50 -------------------------------------------------------------------------------- /examples/github_analysis/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | #sdf gitignore 3 | /sdftarget 4 | -------------------------------------------------------------------------------- /examples/github_analysis/checks/no_timezone_comparison.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | column_id, 3 | classifiers 4 | FROM 5 | sdf.information_schema.columns 6 | WHERE 7 | CONTAINS_ARRAY_VARCHAR(classifiers, 'DATE.') AND 8 | CARDINALITY(ARRAY_DISTINCT(classifiers)) > 1; -------------------------------------------------------------------------------- /examples/github_analysis/classification/taxonomy.sdf.yml: -------------------------------------------------------------------------------- 1 | classifier: 2 | name: DATE 3 | labels: 4 | - name: UTC 5 | - name: PST 6 | - name: EST -------------------------------------------------------------------------------- /examples/github_analysis/metadata/sdf_snowflake/stg/repo_event_aggregates.sdf.yml: -------------------------------------------------------------------------------- 1 | table: 2 | name: repo_event_aggregates 3 | description: > 4 | This table aggregates the number of events that have occurred for each repository. 5 | columns: 6 | - name: event_count 7 | description: The number of events that have occurred for a repository. 8 | tests: 9 | - expect: not_null() -------------------------------------------------------------------------------- /examples/github_analysis/metadata/sdf_snowflake/stg/repo_stars.sdf.yml: -------------------------------------------------------------------------------- 1 | table: 2 | name: repo_stars 3 | description: > 4 | The number of stars per repository. 5 | columns: 6 | - name: total_stars 7 | description: The total number of stars that a repository has. 8 | tests: 9 | - expect: minimum(0) 10 | -------------------------------------------------------------------------------- /examples/github_analysis/metadata/sdf_snowflake/stg/star_growth.sdf.yml: -------------------------------------------------------------------------------- 1 | table: 2 | name: star_growth 3 | description: > 4 | This table shows the number of stars that have been added to a repository over the course of all events on the repo. 5 | columns: 6 | - name: first_star_count 7 | description: The number of stars that a repository had before the first event recorded. 8 | tests: 9 | - expect: minimum(0) 10 | - name: last_star_count 11 | description: The number of stars that a repository had after the last event recorded. 12 | tests: 13 | - expect: minimum(0) 14 | - name: total_stars_acquired 15 | description: The total number of stars that a repository has acquired over the course of all events. 16 | tests: 17 | - expect: minimum(0) 18 | - name: star_growth 19 | description: The difference between the stars that a repository had after the last event recorded and the stars that a repository had before the first event recorded. 20 | tests: 21 | - expect: minimum(0) 22 | -------------------------------------------------------------------------------- /examples/github_analysis/metadata/tech__innovation_essentials/cybersyn/github_repos.sdf.yml: -------------------------------------------------------------------------------- 1 | table: 2 | name: github_repos 3 | columns: 4 | - name: first_seen 5 | classifiers: 6 | - DATE.PST 7 | - name: last_seen 8 | classifiers: 9 | - DATE.PST -------------------------------------------------------------------------------- /examples/github_analysis/metadata/tech__innovation_essentials/cybersyn/github_stars.sdf.yml: -------------------------------------------------------------------------------- 1 | table: 2 | name: github_stars 3 | columns: 4 | - name: date 5 | classifiers: 6 | - DATE.UTC -------------------------------------------------------------------------------- /examples/github_analysis/models/sdf_snowflake/analysis/engagement_summary_by_repo.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | d.repo_name, 3 | re.actor_count, 4 | re.event_type_count, 5 | re.total_events, 6 | re.first_interaction, 7 | re.last_interaction 8 | FROM fct.repo_engagement re 9 | JOIN dim.repos d 10 | ON re.repo_id = d.repo_id 11 | ORDER BY re.total_events DESC, re.actor_count DESC; 12 | 13 | -------------------------------------------------------------------------------- /examples/github_analysis/models/sdf_snowflake/analysis/event_dist_across_repo.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | d.repo_name, 3 | e.event_type, 4 | sum(e.event_count) AS total_events, 5 | min(e.first_event) AS first_event, 6 | max(e.last_event) AS last_event 7 | FROM fct.event_activity e 8 | JOIN dim.repos d 9 | ON e.repo_id = d.repo_id 10 | GROUP BY d.repo_name, e.event_type 11 | ORDER BY total_events DESC; 12 | 13 | -------------------------------------------------------------------------------- /examples/github_analysis/models/sdf_snowflake/analysis/growth_repos.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | d.repo_name, 3 | e.actor_count, 4 | e.total_events, 5 | a.star_growth, 6 | a.total_stars 7 | FROM fct.repo_engagement e 8 | JOIN fct.repo_activity a 9 | ON e.repo_id = a.repo_id 10 | JOIN dim.repos d 11 | ON e.repo_id = d.repo_id 12 | WHERE a.star_growth < 10 -- Low star growth threshold 13 | ORDER BY 14 | e.actor_count DESC, 15 | e.total_events DESC 16 | ; 17 | 18 | -------------------------------------------------------------------------------- /examples/github_analysis/models/sdf_snowflake/analysis/star_growth_by_repo.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | d.repo_name, 3 | t.year, 4 | t.month, 5 | SUM(f.star_growth) AS star_growth 6 | FROM fct.repo_activity f 7 | JOIN dim.repos d 8 | ON f.repo_id = d.repo_id 9 | JOIN dim.date t 10 | ON f.first_star_date = t.date 11 | GROUP BY d.repo_name, t.year, t.month 12 | ORDER BY t.year, t.month DESC; 13 | 14 | -------------------------------------------------------------------------------- /examples/github_analysis/models/sdf_snowflake/dim/date.sql: -------------------------------------------------------------------------------- 1 | SELECT DISTINCT 2 | DATE_TRUNC('day', s.date) AS date, 3 | EXTRACT (year FROM s.date) AS year, 4 | EXTRACT (month FROM s.date) AS month, 5 | EXTRACT (day FROM s.date) AS day, 6 | EXTRACT (quarter FROM s.date) AS quarter 7 | FROM tech__innovation_essentials.cybersyn.github_stars s 8 | -------------------------------------------------------------------------------- /examples/github_analysis/models/sdf_snowflake/dim/event_type.sql: -------------------------------------------------------------------------------- 1 | SELECT DISTINCT 2 | type AS event_type, 3 | COUNT(*) AS event_count 4 | FROM tech__innovation_essentials.cybersyn.github_events 5 | WHERE type IS NOT NULL 6 | GROUP BY type 7 | -------------------------------------------------------------------------------- /examples/github_analysis/models/sdf_snowflake/dim/repos.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | r.repo_id, 3 | n.repo_name, 4 | r.first_seen, 5 | r.last_seen, 6 | e.event_count, 7 | l.last_event_timestamp 8 | FROM tech__innovation_essentials.cybersyn.github_repos r 9 | LEFT JOIN stg.repo_event_aggregates e 10 | ON r.repo_id = e.repo_id 11 | LEFT JOIN stg.latest_repo_events l 12 | ON r.repo_id = l.repo_id 13 | LEFT JOIN stg.repo_names n 14 | ON r.repo_id = n.repo_id 15 | 16 | -------------------------------------------------------------------------------- /examples/github_analysis/models/sdf_snowflake/fct/event_activity.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | e.repo_id, 3 | e.type AS event_type, 4 | COUNT(e.id) AS event_count, -- Total count of events by type 5 | MIN( 6 | e.created_at_timestamp 7 | ) AS first_event, -- First occurrence of the event 8 | MAX( 9 | e.created_at_timestamp 10 | ) AS last_event -- Last occurrence of the event 11 | FROM tech__innovation_essentials.cybersyn.github_events e 12 | WHERE e.repo_id IS NOT NULL 13 | GROUP BY e.repo_id, e.type 14 | -------------------------------------------------------------------------------- /examples/github_analysis/models/sdf_snowflake/fct/repo_activity.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | g.repo_id, 3 | d.repo_name, 4 | g.first_star_date, 5 | g.last_star_date, 6 | g.star_growth, 7 | g.total_stars_acquired as total_stars, 8 | SUM(e.event_count) as total_events, 9 | SUM(e.event_count) / NULLIF(g.star_growth, 0) as events_per_star -- Example metric 10 | FROM stg.star_growth g 11 | JOIN dim.repos d 12 | ON g.repo_id = d.repo_id 13 | LEFT JOIN stg.repo_event_aggregates e 14 | ON g.repo_id = e.repo_id 15 | GROUP BY g.repo_id, d.repo_name, g.first_star_date, g.last_star_date, g.star_growth, g.total_stars_acquired 16 | -------------------------------------------------------------------------------- /examples/github_analysis/models/sdf_snowflake/fct/repo_engagement.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | e.id, 3 | e.type, 4 | e.repo_id, 5 | e.actor_id, 6 | COUNT(DISTINCT e.actor_id) AS actor_count, -- Number of distinct users who interacted 7 | COUNT( 8 | DISTINCT e.type 9 | ) AS event_type_count, -- Number of distinct event types 10 | COUNT( 11 | e.id 12 | ) AS total_events, -- Total number of events 13 | MIN( 14 | e.created_at_timestamp 15 | ) AS first_interaction, -- First interaction timestamp 16 | MAX( 17 | e.created_at_timestamp 18 | ) AS last_interaction -- Last interaction timestamp 19 | FROM tech__innovation_essentials.cybersyn.github_events e 20 | WHERE e.repo_id IS NOT NULL 21 | GROUP BY e.id, e.type, e.repo_id, e.actor_id 22 | -------------------------------------------------------------------------------- /examples/github_analysis/models/sdf_snowflake/stg/latest_repo_events.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | repo_id, 3 | max(created_at_timestamp) as last_event_timestamp 4 | FROM tech__innovation_essentials.cybersyn.github_events 5 | WHERE repo_id IS NOT NULL 6 | GROUP BY repo_id 7 | -------------------------------------------------------------------------------- /examples/github_analysis/models/sdf_snowflake/stg/repo_event_aggregates.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | repo_id, 3 | type, 4 | count(*) as event_count 5 | FROM tech__innovation_essentials.cybersyn.github_events 6 | WHERE repo_id IS NOT NULL 7 | GROUP BY repo_id, type 8 | -------------------------------------------------------------------------------- /examples/github_analysis/models/sdf_snowflake/stg/repo_names.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | repo_name, 3 | repo_id 4 | FROM tech__innovation_essentials.cybersyn.github_repos 5 | QUALIFY row_number() OVER (PARTITION BY repo_id ORDER BY first_seen DESC) = 1 6 | -------------------------------------------------------------------------------- /examples/github_analysis/models/sdf_snowflake/stg/repo_stars.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | repo_id, 3 | SUM(count) AS total_stars 4 | FROM tech__innovation_essentials.cybersyn.github_stars s 5 | WHERE s.count IS NOT NULL 6 | GROUP BY repo_id 7 | -------------------------------------------------------------------------------- /examples/github_analysis/models/sdf_snowflake/stg/star_dates.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | repo_id, 3 | MIN(date) AS first_star_date, 4 | MAX(date) AS last_star_date 5 | from tech__innovation_essentials.cybersyn.github_stars 6 | GROUP BY repo_id 7 | -------------------------------------------------------------------------------- /examples/github_analysis/models/sdf_snowflake/stg/star_growth.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | s.repo_id, 3 | s.date, 4 | MIN(s.date) AS first_star_date, 5 | MAX(s.date) AS last_star_date, 6 | SUM( 7 | CASE 8 | WHEN s.date = d.first_star_date 9 | THEN s.count 10 | ELSE 0 11 | END 12 | ) AS first_star_count, 13 | SUM( 14 | CASE 15 | WHEN s.date = d.last_star_date 16 | THEN s.count 17 | ELSE 0 18 | END 19 | ) AS last_star_count, 20 | SUM(s.count) AS total_stars_acquired, 21 | ( 22 | SUM( 23 | CASE 24 | WHEN s.date = d.last_star_date 25 | THEN s.count 26 | ELSE 0 27 | END 28 | ) 29 | - SUM( 30 | CASE 31 | WHEN s.date = d.first_star_date 32 | THEN s.count 33 | ELSE 0 34 | END 35 | ) 36 | ) AS star_growth 37 | FROM tech__innovation_essentials.cybersyn.github_stars s 38 | JOIN stg.star_dates d 39 | ON s.repo_id = d.repo_id 40 | GROUP BY s.repo_id, s.date 41 | -------------------------------------------------------------------------------- /examples/hello/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | #sdf gitignore 3 | /sdftarget 4 | -------------------------------------------------------------------------------- /examples/hello/models/main.sql: -------------------------------------------------------------------------------- 1 | select 'Hello World!' as message 2 | -------------------------------------------------------------------------------- /examples/hello/workspace.sdf.yml: -------------------------------------------------------------------------------- 1 | workspace: 2 | name: hello 3 | edition: "1.3" 4 | description: "A minimal workspace" 5 | 6 | includes: 7 | - path: models 8 | -------------------------------------------------------------------------------- /examples/hello_from_dbt/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | #sdf gitignore 3 | /sdftarget 4 | -------------------------------------------------------------------------------- /examples/hello_from_dbt/models/main.sql: -------------------------------------------------------------------------------- 1 | select 'Hello World!' as message; -------------------------------------------------------------------------------- /examples/hello_from_dbt/seeds/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sdf-labs/sdf-cli/0f2623776d779dcc07adfe6492765b5c77d3d211/examples/hello_from_dbt/seeds/.gitkeep -------------------------------------------------------------------------------- /examples/hello_from_dbt/workspace.sdf.yml: -------------------------------------------------------------------------------- 1 | workspace: 2 | name: hello 3 | edition: "1.3" 4 | 5 | defaults: 6 | preprocessor: jinja 7 | 8 | includes: 9 | - path: models 10 | - path: seeds 11 | type: resource 12 | -------------------------------------------------------------------------------- /examples/hello_with_pii/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | #sdf gitignore 3 | /sdftarget 4 | -------------------------------------------------------------------------------- /examples/hello_with_pii/checks/code_check.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | DISTINCT c.table_name as "table_name", 3 | c.column_name as "column name", 4 | c.classifiers 5 | FROM 6 | sdf.information_schema.columns c 7 | WHERE 8 | CONTAINS_ARRAY_VARCHAR(c.classifiers, 'PII.name') -------------------------------------------------------------------------------- /examples/hello_with_pii/models/main.sql: -------------------------------------------------------------------------------- 1 | select 2 | 'hello' as column_1, 3 | 'Jeffrey Walters' as column_2; -------------------------------------------------------------------------------- /examples/hello_with_pii/workspace.sdf.yml: -------------------------------------------------------------------------------- 1 | workspace: 2 | name: hello 3 | edition: "1.3" 4 | defaults: 5 | dialect: trino 6 | 7 | includes: 8 | - path: models 9 | --- 10 | classifier: 11 | name: PII 12 | labels: 13 | - name: name 14 | --- 15 | table: 16 | name: main 17 | columns: 18 | - name: column_2 19 | # UNCOMMENT THE BELOW 20 | # classifiers: 21 | # - PII.name 22 | -------------------------------------------------------------------------------- /examples/hello_world_s3/local/popdata.sql: -------------------------------------------------------------------------------- 1 | create table popdata with ( 2 | format='csv', 3 | skip_header_line_count=1, -- skip the first line of the file 4 | location='local/pop.csv' 5 | ); 6 | -------------------------------------------------------------------------------- /examples/hello_world_s3/local/world_metrics.sql: -------------------------------------------------------------------------------- 1 | select 2 | sum(population) as world_pop, 3 | min(population) as smallest_country, 4 | max(population) as largest_country 5 | from popdata; 6 | 7 | select 8 | country, 9 | split(date, ' ')[1] as day, 10 | split_part(source, ' ', 2) as source, 11 | split_part(source, ' ', 3) as source3 12 | from popdata; 13 | -------------------------------------------------------------------------------- /examples/hello_world_s3/remote/q1.sql: -------------------------------------------------------------------------------- 1 | -- Let's find the least populated country in 1999 2 | SELECT 3 | region_or_country, 4 | "ISO3_Alpha_code", 5 | "Population_Density_Per_Square_KM" 6 | FROM un_pop_data 7 | WHERE "Year" = 1999 8 | ORDER BY "Population_Density_Per_Square_KM" 9 | LIMIT 1; -------------------------------------------------------------------------------- /examples/hello_world_s3/remote/un_pop_data.sql: -------------------------------------------------------------------------------- 1 | -- Creates an root table with an S3 Location 2 | -- Note: Set aws Region 3 | create table un_pop_data WITH ( 4 | FORMAT='CSV', 5 | skip_header_line_count=1, 6 | LOCATION='s3://sdfdatasets/hello-world/world_population_full.csv' 7 | ); -------------------------------------------------------------------------------- /examples/hello_world_s3/workspace.sdf.yml: -------------------------------------------------------------------------------- 1 | workspace: 2 | name: hello_world 3 | edition: "1.3" 4 | description: > 5 | Hello World! Let's analyze the world population. 6 | 7 | To build using the local population data (./local/pop.csv), run using the 'local' environment: 'sdf run -e local --show all' 8 | 9 | To pull data from s3 (s3://sdfdatasets), first authenticate SDF with a local AWS profile. This can be any AWS profile, as the S3 bucket is public. 10 | Use: `sdf auth login aws --profile ` 11 | 12 | Next, run using the remote environment: 'sdf run -e remote --show all'. 13 | 14 | Type 'sdf compile' to view schema information for either environment. 15 | 16 | defaults: 17 | dialect: trino 18 | 19 | --- 20 | environment: 21 | name: local 22 | 23 | includes: 24 | - path: local 25 | type: model 26 | - path: local/pop.csv 27 | type: resource 28 | --- 29 | environment: 30 | name: remote 31 | 32 | includes: 33 | - path: remote 34 | 35 | integrations: 36 | - provider: s3 37 | type: data 38 | buckets: 39 | - uri: s3://sdfdatasets 40 | region: us-east-1 41 | -------------------------------------------------------------------------------- /examples/jaffle_shop/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | #sdf gitignore 3 | /sdftarget 4 | -------------------------------------------------------------------------------- /examples/jaffle_shop/models/analytics/orders.sql: -------------------------------------------------------------------------------- 1 | {% set payment_methods = ['credit_card', 'coupon', 'bank_transfer', 'gift_card'] %} 2 | 3 | with orders as ( 4 | 5 | select * from staging.stg_orders 6 | 7 | ), 8 | 9 | payments as ( 10 | 11 | select * from staging.stg_payments 12 | 13 | ), 14 | 15 | order_payments as ( 16 | 17 | select 18 | order_id, 19 | 20 | {% for payment_method in payment_methods -%} 21 | sum(case when payment_method = '{{ payment_method }}' then amount else 0 end) as {{ payment_method }}_amount, 22 | {% endfor -%} 23 | 24 | sum(amount) as total_amount 25 | 26 | from payments 27 | 28 | group by order_id 29 | 30 | ), 31 | 32 | final as ( 33 | 34 | select 35 | orders.order_id, 36 | orders.customer_id, 37 | orders.order_date, 38 | orders.status, 39 | 40 | {% for payment_method in payment_methods -%} 41 | 42 | order_payments.{{ payment_method }}_amount, 43 | 44 | {% endfor -%} 45 | 46 | order_payments.total_amount as amount 47 | 48 | from orders 49 | 50 | 51 | left join order_payments 52 | on orders.order_id = order_payments.order_id 53 | 54 | ) 55 | 56 | select * from final -------------------------------------------------------------------------------- /examples/jaffle_shop/models/raw/seeds.sdf.yml: -------------------------------------------------------------------------------- 1 | table: 2 | name: raw_customers 3 | location: seeds/raw_customers.csv 4 | with-header: true 5 | file-format: csv 6 | --- 7 | table: 8 | name: raw_orders 9 | location: seeds/raw_orders.csv 10 | with-header: true 11 | file-format: csv 12 | --- 13 | table: 14 | name: raw_payments 15 | location: seeds/raw_payments.csv 16 | with-header: true 17 | file-format: csv -------------------------------------------------------------------------------- /examples/jaffle_shop/models/staging/stg_customers.sql: -------------------------------------------------------------------------------- 1 | with source as ( 2 | 3 | select * from raw.raw_customers 4 | 5 | ), 6 | 7 | renamed as ( 8 | 9 | select 10 | id as customer_id, 11 | first_name, 12 | last_name 13 | 14 | from source 15 | 16 | ) 17 | 18 | select * from renamed; -------------------------------------------------------------------------------- /examples/jaffle_shop/models/staging/stg_orders.sql: -------------------------------------------------------------------------------- 1 | with source as ( 2 | 3 | select * from raw.raw_orders 4 | 5 | ), 6 | 7 | renamed as ( 8 | 9 | select 10 | id as order_id, 11 | user_id as customer_id, 12 | order_date, 13 | status 14 | 15 | from source 16 | 17 | ) 18 | 19 | select * from renamed; -------------------------------------------------------------------------------- /examples/jaffle_shop/models/staging/stg_payments.sql: -------------------------------------------------------------------------------- 1 | with source as ( 2 | 3 | select * from raw.raw_payments 4 | 5 | ), 6 | 7 | renamed as ( 8 | 9 | select 10 | id as payment_id, 11 | order_id, 12 | payment_method, 13 | 14 | -- `amount` is currently stored in cents, so we convert it to dollars 15 | amount / 100 as amount 16 | 17 | from source 18 | 19 | ) 20 | 21 | select * from renamed; -------------------------------------------------------------------------------- /examples/jaffle_shop/models/staging/tests.sdf.yml: -------------------------------------------------------------------------------- 1 | table: 2 | name: stg_customers 3 | columns: 4 | - name: customer_id 5 | tests: 6 | - expect: unique() 7 | - expect: not_null() 8 | --- 9 | table: 10 | name: stg_orders 11 | columns: 12 | - name: order_id 13 | tests: 14 | - expect: unique() 15 | - expect: not_null() 16 | - name: status 17 | tests: 18 | - expect: in_accepted_values(['placed', 'shipped', 'completed', 'return_pending', 'returned']) 19 | --- 20 | table: 21 | name: stg_payments 22 | columns: 23 | - name: payment_id 24 | tests: 25 | - expect: unique() 26 | - expect: not_null() 27 | - name: payment_method 28 | tests: 29 | - expect: in_accepted_values(['credit_card', 'coupon', 'bank_transfer', 'gift_card']) -------------------------------------------------------------------------------- /examples/jaffle_shop/seeds/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sdf-labs/sdf-cli/0f2623776d779dcc07adfe6492765b5c77d3d211/examples/jaffle_shop/seeds/.gitkeep -------------------------------------------------------------------------------- /examples/jaffle_shop/workspace.sdf.yml: -------------------------------------------------------------------------------- 1 | workspace: 2 | name: jaffle_shop # The name of this workspace -- required 3 | edition: "1.3" # The edition of this workspace -- required 4 | description: 5 | This workspace models the DBT / DuckDB Jaffle Shop example project. 6 | The functionality of both DBT and DuckDB are encapsulated by SDF, as this project can be run entirely locally, with no external dependencies on data or compute providers. 7 | 8 | Try compiling first with `sdf compile`. This validates all SQL and dependencies are correct. 9 | Then, run everything locally with `sdf run`. This will run the entire DAG locally. 10 | Lastly, track lineage with `sdf lineage`. Specify the fully qualified name after the command to see lineage for a specific table of column. 11 | 12 | defaults: 13 | catalog: jaffle_shop 14 | schema: public 15 | preprocessor: jinja 16 | materialization: table 17 | 18 | includes: 19 | - path: models # The path to sql sources for this workspace -- at least one path is required 20 | index: schema-table-name # Infers the schema from the directory name, enabling faster compile and a more intuitive project structure. 21 | - path: seeds 22 | type: resource 23 | - path: models/staging 24 | index: schema-table-name 25 | defaults: 26 | materialization: view 27 | -------------------------------------------------------------------------------- /examples/lineage/checks/check_sink_phone_is_pii.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | DISTINCT c.table_name as "table_name", 3 | c.column_name as "column name", 4 | c.classifiers 5 | FROM 6 | sdf.information_schema.columns c 7 | WHERE 8 | CONTAINS_ARRAY_VARCHAR(c.classifiers, '%DATA.pii%') 9 | and c.table_id like '%lineage.pub.sink' -------------------------------------------------------------------------------- /examples/lineage/models/knis.sql: -------------------------------------------------------------------------------- 1 | select txn_date, sum(qty) as qty 2 | from middle 3 | group by txn_date -------------------------------------------------------------------------------- /examples/lineage/models/middle.sql: -------------------------------------------------------------------------------- 1 | select user_id, max(phone) as phone, txn_date, sum(qty) as qty 2 | from source 3 | group by user_id, txn_date -------------------------------------------------------------------------------- /examples/lineage/models/sink.sql: -------------------------------------------------------------------------------- 1 | select user_id as uid, phone, txn_date, qty 2 | from middle 3 | where qty > 180 -------------------------------------------------------------------------------- /examples/lineage/models/source.sql: -------------------------------------------------------------------------------- 1 | select column1 as user_id, 2 | column2 as phone, 3 | column3 as txn_date, 4 | column4 as qty from 5 | (VALUES 6 | (1, '555-1212', '2022-01-01', 100), 7 | (1, '555-1212', '2022-02-01', 50), 8 | (1, '555-1212', '2022-03-01', 75), 9 | (2, '444-1313', '2022-01-01', 200), 10 | (2, '444-1313', '2022-02-01', 100), 11 | (3, '333-1414', '2022-03-01', 300)) 12 | -------------------------------------------------------------------------------- /examples/lineage/workspace.sdf.yml: -------------------------------------------------------------------------------- 1 | workspace: 2 | name: lineage 3 | edition: "1.3" 4 | description: > 5 | Creates a three stage pipeline with one source ('source.sql') and two sinks, called 'sink.sql' and 'knis.sql'. It shows how to compute lineage. 6 | 7 | Type 'sdf build' to run the pipeline 8 | 9 | Type 'sdf compile' to view information on classifiers and schema 10 | 11 | Type 'sdf lineage' to view lineage in the cli 12 | 13 | Type 'sdf auth login' and 'sdf deploy' to deploy and view lineage on the sdf console 14 | 15 | Try removing the DATA.pii classifier on source. 16 | 17 | includes: 18 | - path: models 19 | - path: checks 20 | 21 | --- 22 | environment: 23 | name: trino 24 | 25 | defaults: 26 | dialect: trino 27 | 28 | includes: 29 | - type: model 30 | path: models/ 31 | - type: check 32 | path: checks/ 33 | --- 34 | classifier: 35 | name: DATA 36 | 37 | labels: 38 | - name: uid 39 | - name: pii 40 | --- 41 | table: 42 | name: source 43 | 44 | columns: 45 | - name: user_id 46 | classifiers: 47 | - DATA.uid 48 | - name: phone 49 | classifiers: 50 | - DATA.pii 51 | -------------------------------------------------------------------------------- /examples/linter/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | #sdf gitignore 3 | /sdftarget 4 | -------------------------------------------------------------------------------- /examples/linter/ddls/my_table.sdf.yml: -------------------------------------------------------------------------------- 1 | table: 2 | name: my_table 3 | location: seeds/my_table.csv 4 | columns: 5 | - name: num 6 | datatype: int 7 | - name: cool_col 8 | datatype: varchar 9 | - name: bool_col 10 | datatype: boolean -------------------------------------------------------------------------------- /examples/linter/models/main.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | DISTINCT('Hello World!') as message, 3 | num / 100.0 AS "num%", 4 | num as "NUM", 5 | my_table.cool_col, 6 | NULL as null_col, 7 | true AS true_col 8 | from my_table 9 | WHERE bool_col = 'TRUE' 10 | Order By 1, cool_col 11 | ; -------------------------------------------------------------------------------- /examples/linter/seeds/my_table.csv: -------------------------------------------------------------------------------- 1 | num,cool_col,bool_col 2 | 1,'cool_col_1',TRUE 3 | 2,'cool_col_2',TRUE 4 | 3,'cool_col_3',FALSE 5 | 4,'cool_col_4',TRUE 6 | 5,'cool_col_5',false -------------------------------------------------------------------------------- /examples/linter/workspace.sdf.yml: -------------------------------------------------------------------------------- 1 | workspace: 2 | name: linter 3 | edition: "1.3" 4 | description: "An example workspace with linting and formatting issues" 5 | 6 | includes: 7 | - path: models 8 | - path: seeds 9 | 10 | defaults: 11 | dialect: snowflake 12 | -------------------------------------------------------------------------------- /examples/moms_flower_shop/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | #sdf gitignore 3 | /sdftarget 4 | -------------------------------------------------------------------------------- /examples/moms_flower_shop/checks/README.txt: -------------------------------------------------------------------------------- 1 | This folder will contain static analysis checks against SDF's information schema -------------------------------------------------------------------------------- /examples/moms_flower_shop/classifications/column_classifiers.sdf.yml: -------------------------------------------------------------------------------- 1 | classifier: 2 | name: EVENT 3 | labels: 4 | - name: inapp 5 | - name: marketing 6 | 7 | --- 8 | classifier: 9 | name: PII 10 | labels: 11 | - name: name 12 | - name: address 13 | - name: email 14 | - name: gender 15 | -------------------------------------------------------------------------------- /examples/moms_flower_shop/classifications/table_classifiers.sdf.yml: -------------------------------------------------------------------------------- 1 | classifier: 2 | name: TABLE_STATUS 3 | labels: 4 | - name: deprecated 5 | - name: dev 6 | - name: public 7 | propagate: false 8 | 9 | --- 10 | classifier: 11 | name: RETENTION 12 | labels: 13 | - name: d7 14 | - name: d30 15 | - name: d90 16 | - name: d180 17 | - name: infinity 18 | propagate: false 19 | -------------------------------------------------------------------------------- /examples/moms_flower_shop/metadata/analytics/agg_installs_and_campaigns.sdf.yml: -------------------------------------------------------------------------------- 1 | table: 2 | name: agg_installs_and_campaigns 3 | # Uncomment below to begin the "Enriching Your Warehouse" Tutorial >>>>> 4 | # classifiers: 5 | # - RETENTION.infinity 6 | # <<<<< -------------------------------------------------------------------------------- /examples/moms_flower_shop/metadata/analytics/dim_marketing_campaigns.sdf.yml: -------------------------------------------------------------------------------- 1 | table: 2 | name: dim_marketing_campaigns 3 | # Uncomment below to begin the "Enriching Your Warehouse" Tutorial >>>>> 4 | # classifiers: 5 | # - RETENTION.infinity 6 | # <<<<< -------------------------------------------------------------------------------- /examples/moms_flower_shop/metadata/raw/raw_addresses.sdf.yml: -------------------------------------------------------------------------------- 1 | table: 2 | name: raw_addresses 3 | description: > 4 | All relevant information related to street addresses known to mom s flower shop. 5 | This information comes from the user input into the mobile app. 6 | 7 | # Uncomment below to begin the "Enriching Your Warehouse" Tutorial >>>>> 8 | # classifiers: 9 | # - RETENTION.d7 10 | # <<<<< 11 | 12 | columns: 13 | - name: index 14 | description: Row number 15 | 16 | - name: address_id 17 | description: A unique identifier of an address 18 | 19 | - name: full_address 20 | description: The full address associated with the address_id 21 | 22 | - name: street_number 23 | description: The address street number associated with the address_id 24 | 25 | - name: street_name 26 | description: The address street name associated with the address_id 27 | 28 | - name: state 29 | description: The address US state associated with the address_id 30 | 31 | - name: city 32 | description: The address US city associated with the address_id 33 | -------------------------------------------------------------------------------- /examples/moms_flower_shop/metadata/staging/app_installs.sdf.yml: -------------------------------------------------------------------------------- 1 | table: 2 | name: app_installs 3 | description: > 4 | This table is a staging table which adds campaign information 5 | to app install events 6 | # Uncomment here to add a "deprecated" classifier to the table 7 | # classifiers: 8 | # - TABLE_STATUS.deprecated -------------------------------------------------------------------------------- /examples/moms_flower_shop/models/analytics/agg_installs_and_campaigns.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | -- install events data 3 | DATE_FORMAT(install_time, '%Y-%m-%d') AS install_date, 4 | campaign_name, 5 | platform, 6 | COUNT(DISTINCT customer_id) AS distinct_installs 7 | FROM staging.app_installs 8 | GROUP BY 1,2,3 -------------------------------------------------------------------------------- /examples/moms_flower_shop/models/raw/raw_addresses.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE raw_addresses 2 | WITH (FORMAT='PARQUET', LOCATION='seeds/parquet/addresses.parquet'); -------------------------------------------------------------------------------- /examples/moms_flower_shop/models/raw/raw_customers.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE raw_customers 2 | WITH (FORMAT='PARQUET', LOCATION='seeds/parquet/customers.parquet'); -------------------------------------------------------------------------------- /examples/moms_flower_shop/models/raw/raw_inapp_events.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE raw_inapp_events 2 | WITH (FORMAT='PARQUET', LOCATION='seeds/parquet/inapp_events.parquet'); -------------------------------------------------------------------------------- /examples/moms_flower_shop/models/raw/raw_marketing_campaign_events.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE raw_marketing_campaign_events 2 | WITH (FORMAT='PARQUET', LOCATION='seeds/parquet/marketing_campaign_events.parquet'); -------------------------------------------------------------------------------- /examples/moms_flower_shop/models/staging/app_installs.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | -- install events data 3 | COALESCE(m.event_id, i.event_id) AS event_id, 4 | i.customer_id, 5 | i.event_time AS install_time, 6 | i.platform, 7 | 8 | -- marketing campaigns data - if doesn't exist than organic 9 | COALESCE(m.campaign_id, -1) AS campaign_id, 10 | COALESCE(m.campaign_name, 'organic') AS campaign_name, 11 | COALESCE(m.c_name, 'organic') AS campaign_type 12 | FROM inapp_events i 13 | LEFT OUTER JOIN raw.raw_marketing_campaign_events m 14 | ON (i.event_id = m.event_id) 15 | WHERE event_name = 'install' -------------------------------------------------------------------------------- /examples/moms_flower_shop/models/staging/app_installs_v2.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | DISTINCT 3 | -- install events data 4 | i.event_id, 5 | i.customer_id, 6 | i.event_time AS install_time, 7 | i.platform, 8 | 9 | -- marketing campaigns data - if doesn't exist than organic 10 | COALESCE(m.campaign_id, -1) AS campaign_id, 11 | COALESCE(m.campaign_name, 'organic') AS campaign_name, 12 | COALESCE(m.c_name, 'organic') AS campaign_type 13 | FROM inapp_events i 14 | LEFT OUTER JOIN raw.raw_marketing_campaign_events m 15 | ON (i.campaign_id = m.campaign_id) 16 | WHERE event_name = 'install' -------------------------------------------------------------------------------- /examples/moms_flower_shop/models/staging/customers.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | c.id AS customer_id, 3 | c.first_name, 4 | c.last_name, 5 | c.first_name || ' ' || c.last_name AS full_name, 6 | c.email, 7 | c.gender, 8 | 9 | -- Marketing info 10 | i.campaign_id, 11 | i.campaign_name, 12 | i.campaign_type, 13 | 14 | -- Address info 15 | c.address_id, 16 | a.full_address, 17 | a.state 18 | FROM raw.raw_customers c 19 | 20 | LEFT OUTER JOIN app_installs_v2 i 21 | ON (c.id = i.customer_id) 22 | 23 | LEFT OUTER JOIN raw.raw_addresses a 24 | ON (c.address_id = a.address_id) 25 | -------------------------------------------------------------------------------- /examples/moms_flower_shop/models/staging/inapp_events.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | event_id, 3 | customer_id, 4 | FROM_UNIXTIME(event_time/1000) AS event_time, 5 | event_name, 6 | event_value, 7 | additional_details, 8 | platform, 9 | campaign_id 10 | FROM raw.raw_inapp_events -------------------------------------------------------------------------------- /examples/moms_flower_shop/models/staging/marketing_campaigns.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | campaign_id, 3 | campaign_name, 4 | SUBSTR(c_name, 1, LENGTH(c_name)-1) AS campaign_type, 5 | MIN( 6 | FROM_UNIXTIME(event_time/1000) -- convert unixtime from milliseconds to seconds 7 | ) AS start_time, 8 | MAX( 9 | FROM_UNIXTIME(event_time/1000) -- convert unixtime from milliseconds to seconds 10 | ) AS end_time, 11 | COUNT(event_time) AS campaign_duration, 12 | SUM(cost) AS total_campaign_spent, 13 | ARRAY_AGG(event_id) AS event_ids 14 | FROM raw.raw_marketing_campaign_events 15 | GROUP BY 16 | campaign_id, 17 | campaign_name, 18 | campaign_type 19 | -------------------------------------------------------------------------------- /examples/moms_flower_shop/models/staging/stg_installs_per_campaign.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | campaign_id, 3 | COUNT(event_id) AS total_num_installs 4 | FROM app_installs 5 | GROUP BY 1 6 | -------------------------------------------------------------------------------- /examples/moms_flower_shop/reports/README.txt: -------------------------------------------------------------------------------- 1 | This folder will contain data warehouse reports based on SDF's information schema -------------------------------------------------------------------------------- /examples/moms_flower_shop/seeds/csv/README.txt: -------------------------------------------------------------------------------- 1 | To explore the raw source files as CSVs, open the project on GitHub: (...) -------------------------------------------------------------------------------- /examples/moms_flower_shop/seeds/parquet/addresses.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sdf-labs/sdf-cli/0f2623776d779dcc07adfe6492765b5c77d3d211/examples/moms_flower_shop/seeds/parquet/addresses.parquet -------------------------------------------------------------------------------- /examples/moms_flower_shop/seeds/parquet/customers.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sdf-labs/sdf-cli/0f2623776d779dcc07adfe6492765b5c77d3d211/examples/moms_flower_shop/seeds/parquet/customers.parquet -------------------------------------------------------------------------------- /examples/moms_flower_shop/seeds/parquet/inapp_events.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sdf-labs/sdf-cli/0f2623776d779dcc07adfe6492765b5c77d3d211/examples/moms_flower_shop/seeds/parquet/inapp_events.parquet -------------------------------------------------------------------------------- /examples/moms_flower_shop/seeds/parquet/marketing_campaign_events.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sdf-labs/sdf-cli/0f2623776d779dcc07adfe6492765b5c77d3d211/examples/moms_flower_shop/seeds/parquet/marketing_campaign_events.parquet -------------------------------------------------------------------------------- /examples/moms_flower_shop_completed/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | #sdf gitignore 3 | /sdftarget 4 | -------------------------------------------------------------------------------- /examples/moms_flower_shop_completed/checks/README.txt: -------------------------------------------------------------------------------- 1 | This folder will contain static analysis checks against SDF's information schema -------------------------------------------------------------------------------- /examples/moms_flower_shop_completed/checks/mixed_event_ids.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | DISTINCT c.table_name as "table_name", 3 | c.column_name as "column name", 4 | c.classifiers 5 | FROM 6 | sdf.information_schema.columns c 7 | WHERE 8 | -- more than one EVENT classifier is assigned 9 | CAST(c.classifiers AS VARCHAR) LIKE '%EVENT%EVENT%' -------------------------------------------------------------------------------- /examples/moms_flower_shop_completed/classifications/column_classifiers.sdf.yml: -------------------------------------------------------------------------------- 1 | classifier: 2 | name: EVENT 3 | labels: 4 | - name: inapp 5 | - name: marketing 6 | 7 | --- 8 | classifier: 9 | name: PII 10 | labels: 11 | - name: name 12 | - name: address 13 | - name: email 14 | - name: gender 15 | -------------------------------------------------------------------------------- /examples/moms_flower_shop_completed/classifications/table_classifiers.sdf.yml: -------------------------------------------------------------------------------- 1 | classifier: 2 | name: TABLE_STATUS 3 | labels: 4 | - name: deprecated 5 | - name: dev 6 | - name: public 7 | propagate: false 8 | 9 | --- 10 | classifier: 11 | name: RETENTION 12 | labels: 13 | - name: d7 14 | - name: d30 15 | - name: d90 16 | - name: d180 17 | - name: infinity 18 | propagate: false 19 | -------------------------------------------------------------------------------- /examples/moms_flower_shop_completed/metadata/analytics/agg_installs_and_campaigns.sdf.yml: -------------------------------------------------------------------------------- 1 | table: 2 | name: agg_installs_and_campaigns 3 | # Uncomment below to begin the "Enriching Your Warehouse" Tutorial >>>>> 4 | classifiers: 5 | - RETENTION.infinity 6 | # <<<<< 7 | -------------------------------------------------------------------------------- /examples/moms_flower_shop_completed/metadata/analytics/dim_marketing_campaigns.sdf.yml: -------------------------------------------------------------------------------- 1 | table: 2 | name: dim_marketing_campaigns 3 | # Uncomment below to begin the "Enriching Your Warehouse" Tutorial >>>>> 4 | classifiers: 5 | - RETENTION.infinity 6 | # <<<<< -------------------------------------------------------------------------------- /examples/moms_flower_shop_completed/metadata/raw/raw_addresses.sdf.yml: -------------------------------------------------------------------------------- 1 | table: 2 | name: raw_addresses 3 | description: > 4 | All relevant information related to street addresses known to mom's flower shop. 5 | This information comes from the user input into the mobile app. 6 | 7 | # Uncomment below to begin the "Enriching Your Warehouse" Tutorial >>>>> 8 | classifiers: 9 | - RETENTION.d7 10 | # <<<<< 11 | 12 | columns: 13 | - name: index 14 | description: Row number 15 | 16 | - name: address_id 17 | description: A unique identifier of an address 18 | 19 | - name: full_address 20 | description: The full address associated with the address_id 21 | 22 | - name: street_number 23 | description: The address street number associated with the address_id 24 | 25 | - name: street_name 26 | description: The address street name associated with the address_id 27 | 28 | - name: state 29 | description: The address US state associated with the address_id 30 | 31 | - name: city 32 | description: The address US city associated with the address_id 33 | -------------------------------------------------------------------------------- /examples/moms_flower_shop_completed/metadata/staging/app_installs.sdf.yml: -------------------------------------------------------------------------------- 1 | table: 2 | name: app_installs 3 | description: > 4 | This table is a staging table which adds campaign information 5 | to app install events 6 | # Uncomment here to add a "deprecated" classifier to the table 7 | classifiers: 8 | - TABLE_STATUS.deprecated -------------------------------------------------------------------------------- /examples/moms_flower_shop_completed/metadata/staging/inapp_events.sdf.yml: -------------------------------------------------------------------------------- 1 | table: 2 | name: inapp_events 3 | tests: 4 | - expect: unique_columns(["event_id"]) 5 | severity: error 6 | columns: 7 | - name: event_value 8 | tests: 9 | - expect: valid_scalar("""event_value >= 0""") 10 | severity: error 11 | - expect: minimum(0) 12 | severity: error -------------------------------------------------------------------------------- /examples/moms_flower_shop_completed/models/analytics/agg_installs_and_campaigns.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | -- install events data 3 | DATE_FORMAT(install_time, '%Y-%m-%d') AS install_date, 4 | campaign_name, 5 | platform, 6 | COUNT(DISTINCT customer_id) AS distinct_installs 7 | FROM staging.app_installs_v2 8 | GROUP BY 1,2,3 -------------------------------------------------------------------------------- /examples/moms_flower_shop_completed/models/analytics/dim_marketing_campaigns.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | -- marketing campaigns dimensions 3 | m.campaign_id, 4 | m.campaign_name, 5 | -- metrics 6 | i.total_num_installs, 7 | total_campaign_spent / 8 | NULLIF(i.total_num_installs, 0) AS avg_customer_acquisition_cost, 9 | campaign_duration / 10 | NULLIF(i.total_num_installs, 0) AS install_duration_ratio 11 | FROM staging.marketing_campaigns m 12 | LEFT OUTER JOIN staging.stg_installs_per_campaign i 13 | ON (m.campaign_id = i.campaign_id) 14 | ORDER BY total_num_installs DESC NULLS LAST -------------------------------------------------------------------------------- /examples/moms_flower_shop_completed/models/raw/raw_addresses.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE raw_addresses 2 | WITH (FORMAT='PARQUET', LOCATION='seeds/parquet/addresses.parquet'); -------------------------------------------------------------------------------- /examples/moms_flower_shop_completed/models/raw/raw_customers.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE raw_customers 2 | WITH (FORMAT='PARQUET', LOCATION='seeds/parquet/customers.parquet'); -------------------------------------------------------------------------------- /examples/moms_flower_shop_completed/models/raw/raw_inapp_events.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE raw_inapp_events 2 | WITH (FORMAT='PARQUET', LOCATION='seeds/parquet/inapp_events.parquet'); -------------------------------------------------------------------------------- /examples/moms_flower_shop_completed/models/raw/raw_marketing_campaign_events.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE raw_marketing_campaign_events 2 | WITH (FORMAT='PARQUET', LOCATION='seeds/parquet/marketing_campaign_events.parquet'); -------------------------------------------------------------------------------- /examples/moms_flower_shop_completed/models/staging/app_installs.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | -- install events data 3 | COALESCE(m.event_id, i.event_id) AS event_id, 4 | i.customer_id, 5 | i.event_time AS install_time, 6 | i.platform, 7 | 8 | -- marketing campaigns data - if doesn't exist than organic 9 | COALESCE(m.campaign_id, -1) AS campaign_id, 10 | COALESCE(m.campaign_name, 'organic') AS campaign_name, 11 | COALESCE(m.c_name, 'organic') AS campaign_type 12 | FROM inapp_events i 13 | LEFT OUTER JOIN raw.raw_marketing_campaign_events m 14 | ON (i.event_id = m.event_id) 15 | WHERE event_name = 'install' -------------------------------------------------------------------------------- /examples/moms_flower_shop_completed/models/staging/app_installs_v2.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | DISTINCT 3 | -- install events data 4 | i.event_id, 5 | i.customer_id, 6 | i.event_time AS install_time, 7 | i.platform, 8 | 9 | -- marketing campaigns data - if doesn't exist than organic 10 | COALESCE(m.campaign_id, -1) AS campaign_id, 11 | COALESCE(m.campaign_name, 'organic') AS campaign_name, 12 | COALESCE(m.c_name, 'organic') AS campaign_type 13 | FROM inapp_events i 14 | LEFT OUTER JOIN raw.raw_marketing_campaign_events m 15 | ON (i.campaign_id = m.campaign_id) 16 | WHERE event_name = 'install' -------------------------------------------------------------------------------- /examples/moms_flower_shop_completed/models/staging/customers.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | c.id AS customer_id, 3 | c.first_name, 4 | c.last_name, 5 | c.first_name || ' ' || c.last_name AS full_name, 6 | c.email, 7 | c.gender, 8 | 9 | -- Marketing info 10 | i.campaign_id, 11 | i.campaign_name, 12 | i.campaign_type, 13 | 14 | -- Address info 15 | c.address_id, 16 | a.full_address, 17 | a.state 18 | FROM raw.raw_customers c 19 | 20 | LEFT OUTER JOIN app_installs_v2 i 21 | ON (c.id = i.customer_id) 22 | 23 | LEFT OUTER JOIN raw.raw_addresses a 24 | ON (c.address_id = a.address_id) 25 | -------------------------------------------------------------------------------- /examples/moms_flower_shop_completed/models/staging/inapp_events.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | event_id, 3 | customer_id, 4 | FROM_UNIXTIME(event_time/1000) AS event_time, 5 | event_name, 6 | event_value, 7 | additional_details, 8 | platform, 9 | campaign_id 10 | FROM raw.raw_inapp_events -------------------------------------------------------------------------------- /examples/moms_flower_shop_completed/models/staging/marketing_campaigns.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | campaign_id, 3 | campaign_name, 4 | SUBSTR(c_name, 1, LENGTH(c_name)-1) AS campaign_type, 5 | MIN( 6 | FROM_UNIXTIME(event_time/1000) -- convert unixtime from milliseconds to seconds 7 | ) AS start_time, 8 | MAX( 9 | FROM_UNIXTIME(event_time/1000) -- convert unixtime from milliseconds to seconds 10 | ) AS end_time, 11 | COUNT(event_time) AS campaign_duration, 12 | SUM(cost) AS total_campaign_spent, 13 | ARRAY_AGG(event_id) AS event_ids 14 | FROM raw.raw_marketing_campaign_events 15 | GROUP BY 16 | campaign_id, 17 | campaign_name, 18 | campaign_type 19 | -------------------------------------------------------------------------------- /examples/moms_flower_shop_completed/models/staging/stg_installs_per_campaign.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | campaign_id, 3 | COUNT(event_id) AS total_num_installs 4 | FROM app_installs_v2 5 | GROUP BY 1 6 | -------------------------------------------------------------------------------- /examples/moms_flower_shop_completed/reports/README.txt: -------------------------------------------------------------------------------- 1 | This folder will contain data warehouse reports based on SDF's information schema -------------------------------------------------------------------------------- /examples/moms_flower_shop_completed/reports/deprecated_table_reference.sql: -------------------------------------------------------------------------------- 1 | WITH 2 | deprecated_tables AS ( 3 | SELECT 4 | table_id 5 | FROM sdf.information_schema.tables 6 | WHERE 7 | CONTAINS(classifiers, 'TABLE_STATUS.deprecated') 8 | ) 9 | 10 | SELECT 11 | to_table_id AS table_id, 12 | from_table_id AS upstream_deprecated_table_id 13 | FROM sdf.information_schema.table_lineage 14 | WHERE from_table_id IN (SELECT table_id FROM deprecated_tables) 15 | AND to_table_id IS NOT NULL 16 | -------------------------------------------------------------------------------- /examples/moms_flower_shop_completed/seeds/csv/README.txt: -------------------------------------------------------------------------------- 1 | To explore the raw source files as CSVs, open the project on GitHub: (...) -------------------------------------------------------------------------------- /examples/moms_flower_shop_completed/seeds/parquet/addresses.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sdf-labs/sdf-cli/0f2623776d779dcc07adfe6492765b5c77d3d211/examples/moms_flower_shop_completed/seeds/parquet/addresses.parquet -------------------------------------------------------------------------------- /examples/moms_flower_shop_completed/seeds/parquet/customers.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sdf-labs/sdf-cli/0f2623776d779dcc07adfe6492765b5c77d3d211/examples/moms_flower_shop_completed/seeds/parquet/customers.parquet -------------------------------------------------------------------------------- /examples/moms_flower_shop_completed/seeds/parquet/inapp_events.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sdf-labs/sdf-cli/0f2623776d779dcc07adfe6492765b5c77d3d211/examples/moms_flower_shop_completed/seeds/parquet/inapp_events.parquet -------------------------------------------------------------------------------- /examples/moms_flower_shop_completed/seeds/parquet/marketing_campaign_events.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sdf-labs/sdf-cli/0f2623776d779dcc07adfe6492765b5c77d3d211/examples/moms_flower_shop_completed/seeds/parquet/marketing_campaign_events.parquet -------------------------------------------------------------------------------- /examples/moms_flower_shop_completed/workspace.sdf.yml: -------------------------------------------------------------------------------- 1 | workspace: 2 | name: moms_flower_shop 3 | edition: "1.3" 4 | description: > 5 | This workspace represents the data warehouse of mom's flower shop. 6 | 7 | It contains raw data regarding: 8 | 1. Customers 9 | 2. Marketing campaigns 10 | 3. Mobile in-app events 11 | 4. Street addresses 12 | 13 | That data is available in the seeds folder and is referenced in models/raw 14 | to be loaded and used by SDF. Data transformations are performed and additional 15 | models are available in the staging and analytics folders under the models folder. 16 | 17 | includes: 18 | - path: models 19 | type: model 20 | index: schema-table-name 21 | - path: seeds/parquet 22 | type: resource 23 | - path: metadata 24 | type: metadata 25 | index: schema-table-name 26 | - path: classifications 27 | type: metadata 28 | - path: reports 29 | type: report 30 | - path: checks 31 | type: check 32 | 33 | defaults: 34 | preprocessor: jinja 35 | --- 36 | environment: 37 | name: dev 38 | integrations: 39 | - provider: sdf 40 | type: database 41 | targets: 42 | - pattern: moms_flower_shop.*.* 43 | rename-as: moms_workshed.${1}.${2} 44 | 45 | -------------------------------------------------------------------------------- /examples/pii_saas_platform/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | #sdf gitignore 3 | /sdftarget 4 | -------------------------------------------------------------------------------- /examples/pii_saas_platform/checks/no_pii_in_external.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | c.table_id 3 | FROM 4 | sdf.information_schema.columns c 5 | WHERE 6 | CONTAINS_ARRAY_VARCHAR(c.classifiers, 'PII') 7 | AND c.schema_name = 'external' 8 | GROUP BY c.table_id; 9 | -------------------------------------------------------------------------------- /examples/pii_saas_platform/classification/taxonomy.sdf.yml: -------------------------------------------------------------------------------- 1 | classifier: 2 | name: PII 3 | labels: 4 | - name: name 5 | - name: email 6 | - name: phone -------------------------------------------------------------------------------- /examples/pii_saas_platform/classification/users.sdf.yml: -------------------------------------------------------------------------------- 1 | table: 2 | name: payment.public.users 3 | description: DDL for the users table 4 | columns: 5 | - name: name 6 | classifiers: 7 | - PII.name 8 | - name: email 9 | classifiers: 10 | - PII.email 11 | - name: phone 12 | classifiers: 13 | - PII.phone -------------------------------------------------------------------------------- /examples/pii_saas_platform/ddls/payment/public/invoices.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE invoices ( 2 | invoice_id INT, 3 | organization_id INT, 4 | amount DECIMAL(10, 2), 5 | issued_date TIMESTAMP, 6 | due_date TIMESTAMP, 7 | payer_user_id INT, 8 | paid_date TIMESTAMP, 9 | status VARCHAR(50), 10 | FOREIGN KEY (organization_id) REFERENCES organizations(organization_id), 11 | FOREIGN KEY (user_id) REFERENCES users(user_id) 12 | ); -------------------------------------------------------------------------------- /examples/pii_saas_platform/ddls/payment/public/organizations.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE organizations ( 2 | organization_id INT, 3 | name VARCHAR(255), 4 | created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP 5 | ); 6 | -------------------------------------------------------------------------------- /examples/pii_saas_platform/ddls/payment/public/users.sql: -------------------------------------------------------------------------------- 1 | 2 | CREATE TABLE users ( 3 | user_id INT, 4 | organization_id INT, 5 | name VARCHAR(255), 6 | email VARCHAR(255), 7 | phone VARCHAR(255), 8 | role VARCHAR(100), 9 | created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, 10 | FOREIGN KEY (organization_id) REFERENCES organizations(organization_id) 11 | ); 12 | -------------------------------------------------------------------------------- /examples/pii_saas_platform/models/external/invoice_stats.sql: -------------------------------------------------------------------------------- 1 | -- WARNING: This query is an example of what NOT to do. It exposes sensitive user information. 2 | SELECT 3 | u.user_id, 4 | u.name, 5 | u.email, 6 | i.invoice_id, 7 | i.amount, 8 | i.status 9 | FROM 10 | payment.public.users u 11 | JOIN 12 | payment.public.invoices i ON u.user_id = i.payer_user_id; -------------------------------------------------------------------------------- /examples/pii_saas_platform/models/external/org_invoice_stats.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | o.organization_id, 3 | o.name AS organization_name, 4 | COUNT(i.invoice_id) AS total_invoices, 5 | AVG(i.amount) AS average_invoice_amount, 6 | SUM(CASE WHEN i.status = 'Paid' THEN 1 ELSE 0 END) / COUNT(i.invoice_id) * 100 AS percent_invoices_paid 7 | FROM 8 | payment.public.organizations o 9 | LEFT JOIN 10 | payment.public.invoices i ON o.organization_id = i.organization_id 11 | GROUP BY 12 | o.organization_id, o.name; -------------------------------------------------------------------------------- /examples/pii_saas_platform/models/internal/avg_invoice_amt.sql: -------------------------------------------------------------------------------- 1 | SELECT AVG(amount) AS average_invoice_amount 2 | FROM payment.public.invoices ; 3 | -------------------------------------------------------------------------------- /examples/pii_saas_platform/models/internal/invoice_payment_delay.sql: -------------------------------------------------------------------------------- 1 | SELECT invoice_id, DATEDIFF(day, due_date, paid_date) AS delay_days 2 | FROM payment.public.invoices 3 | WHERE status = 'Paid' AND paid_date > due_date; 4 | 5 | -------------------------------------------------------------------------------- /examples/pii_saas_platform/models/internal/mau_per_org.sql: -------------------------------------------------------------------------------- 1 | SELECT organization_id, 2 | DATE_TRUNC('MONTH', created_at) AS month, 3 | COUNT(DISTINCT user_id) AS monthly_active_users 4 | FROM payment.public.users 5 | GROUP BY organization_id, month; -------------------------------------------------------------------------------- /examples/pii_saas_platform/models/internal/most_frequent_payer.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | o.organization_id, 3 | o.name AS organization_name, 4 | u.user_id, 5 | u.email, 6 | COUNT(i.invoice_id) AS payment_count 7 | FROM 8 | payment.public.invoices i 9 | JOIN 10 | payment.public.users u ON i.payer_user_id = u.user_id 11 | JOIN 12 | payment.public.organizations o ON u.organization_id = o.organization_id 13 | WHERE 14 | i.status = 'Paid' -- Considering only paid invoices 15 | GROUP BY 16 | o.organization_id, o.name, u.user_id, u.email 17 | ORDER BY 18 | o.organization_id, payment_count DESC; 19 | -------------------------------------------------------------------------------- /examples/pii_saas_platform/models/internal/total_revenue_per_org.sql: -------------------------------------------------------------------------------- 1 | SELECT organization_id, SUM(amount) AS total_revenue 2 | FROM payment.public.invoices 3 | WHERE status = 'Paid' 4 | GROUP BY organization_id; 5 | -------------------------------------------------------------------------------- /examples/pii_saas_platform/models/internal/users_per_domain.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | o.organization_id, 3 | o.name AS organization_name, 4 | SPLIT_PART(u.email, '@', 2) AS email_domain, 5 | COUNT(*) AS user_count 6 | FROM 7 | payment.public.users u 8 | JOIN 9 | payment.public.organizations o ON u.organization_id = o.organization_id 10 | GROUP BY 11 | o.organization_id, o.name, email_domain 12 | ORDER BY 13 | o.organization_id, user_count DESC; 14 | -------------------------------------------------------------------------------- /examples/pii_saas_platform/models/internal/users_per_org.sql: -------------------------------------------------------------------------------- 1 | SELECT o.organization_id, o.name, COUNT(u.user_id) AS user_count 2 | FROM payment.public.organizations o 3 | JOIN payment.public.users u ON o.organization_id = u.organization_id 4 | GROUP BY o.organization_id, o.name; 5 | -------------------------------------------------------------------------------- /examples/pii_saas_platform/reports/tables_with_pii.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | t.table_id, 3 | t.description, 4 | t.dialect 5 | FROM 6 | sdf.information_schema.tables t 7 | JOIN 8 | sdf.information_schema.columns c ON t.table_id = c.table_id 9 | WHERE CONTAINS_ARRAY_VARCHAR(c.classifiers, 'PII') 10 | GROUP BY 1,2,3; -------------------------------------------------------------------------------- /examples/pii_saas_platform/workspace.sdf.yml: -------------------------------------------------------------------------------- 1 | workspace: 2 | name: pii_saas_platform # The name of this workspace -- required 3 | edition: "1.3" # The edition of this workspace -- required 4 | description: > 5 | This workspace mocks a Snowflake environment of users, organizations, and invoices and demonstrates code contracts & reports in action. 6 | 7 | Try `sdf compile`, `sdf test`, and `sdf report` to see the magic happen 8 | 9 | defaults: 10 | dialect: snowflake # The dialect of SQL used in this workspace, defaults to "trino" 11 | 12 | includes: 13 | - path: classification 14 | - path: checks 15 | type: check 16 | defaults: 17 | catalog: sdf 18 | schema: checks 19 | dialect: trino 20 | - path: reports 21 | type: report 22 | defaults: 23 | catalog: sdf 24 | schema: reports 25 | dialect: trino 26 | - path: models # The path to sql sources for this workspace -- at least one path is required 27 | defaults: 28 | catalog: transformations 29 | index: schema-table-name 30 | - path: ddls # The path to ddl sources for this workspace -- at least one path is required 31 | index: catalog-schema-table-name 32 | -------------------------------------------------------------------------------- /examples/seeds/models/french_customers.sql: -------------------------------------------------------------------------------- 1 | select * from raw_customers 2 | where country = 'France' 3 | -------------------------------------------------------------------------------- /examples/seeds/workspace.sdf.yml: -------------------------------------------------------------------------------- 1 | workspace: 2 | name: seeds 3 | edition: "1.3" 4 | description: > 5 | This workspace demonstrates a seed table: a table whose data is provided in a CSV file 6 | 7 | To follow along with SDF's official guide: https://docs.sdf.com/integrations/snowflake/seeds 8 | 9 | defaults: 10 | preprocessor: jinja 11 | 12 | --- 13 | environment: 14 | name: test 15 | includes: 16 | - path: seeds 17 | type: seed 18 | - path: models 19 | -------------------------------------------------------------------------------- /examples/snapshots/models/test1/a1.sdf.yml: -------------------------------------------------------------------------------- 1 | table: 2 | name: a1 3 | materialization: table 4 | -------------------------------------------------------------------------------- /examples/snapshots/models/test1/a1.sql: -------------------------------------------------------------------------------- 1 | select * from values 2 | (1, CAST('Jack' AS VARCHAR), '2022-01-01'), 3 | (2, 'Bob', '2022-01-01'), 4 | (3, 'Jane', '2022-01-01') 5 | as T(id, name, "event time") 6 | -------------------------------------------------------------------------------- /examples/snapshots/models/test1/a2.sdf.yml: -------------------------------------------------------------------------------- 1 | table: 2 | name: a2 3 | materialization: table 4 | -------------------------------------------------------------------------------- /examples/snapshots/models/test1/a2.sql: -------------------------------------------------------------------------------- 1 | select * from values 2 | (1, CAST('Jacob' AS VARCHAR), 10, '2022-01-02'), 3 | (3, 'Mary Jane', 20, '2022-01-01'), 4 | (44, 'Cloe', 30, '2022-01-02') 5 | as T(id, name, age, "event time") 6 | -------------------------------------------------------------------------------- /examples/snapshots/models/test1/b.sdf.yml: -------------------------------------------------------------------------------- 1 | table: 2 | name: b 3 | materialization: snapshot-table 4 | snapshot-options: 5 | strategy: timestamp 6 | unique-key: id 7 | updated-at: event time 8 | -------------------------------------------------------------------------------- /examples/snapshots/models/test1/b.sql: -------------------------------------------------------------------------------- 1 | {% if builtin.is_snapshot_mode %} 2 | select * from a2 3 | {% else %} 4 | select * from a1 5 | {% endif %} 6 | -------------------------------------------------------------------------------- /examples/snapshots/models/test2/a1.sdf.yml: -------------------------------------------------------------------------------- 1 | table: 2 | name: a1 3 | materialization: table 4 | -------------------------------------------------------------------------------- /examples/snapshots/models/test2/a1.sql: -------------------------------------------------------------------------------- 1 | select * from values 2 | (1, CAST('Jack' AS VARCHAR), '2022-01-01'), 3 | (2, 'Bob', '2022-01-01'), 4 | (3, 'Jane', '2022-01-01') 5 | as T(id, name, "event time") 6 | -------------------------------------------------------------------------------- /examples/snapshots/models/test2/a2.sdf.yml: -------------------------------------------------------------------------------- 1 | table: 2 | name: a2 3 | materialization: table 4 | -------------------------------------------------------------------------------- /examples/snapshots/models/test2/a2.sql: -------------------------------------------------------------------------------- 1 | select * from values 2 | (1, CAST('Jacob' AS VARCHAR), 10, '2022-01-02'), 3 | (3, 'Mary Jane', 20, '2022-01-01'), 4 | (44, 'Cloe', 30, '2022-01-02') 5 | as T(id, name, age, "event time") 6 | -------------------------------------------------------------------------------- /examples/snapshots/models/test2/b.sdf.yml: -------------------------------------------------------------------------------- 1 | table: 2 | name: b 3 | materialization: snapshot-table 4 | snapshot-options: 5 | strategy: check 6 | unique-key: id 7 | check-cols: all 8 | -------------------------------------------------------------------------------- /examples/snapshots/models/test2/b.sql: -------------------------------------------------------------------------------- 1 | {% if builtin.is_snapshot_mode %} 2 | select * from a2 3 | {% else %} 4 | select * from a1 5 | {% endif %} 6 | -------------------------------------------------------------------------------- /examples/snapshots/models/test3/a1.sdf.yml: -------------------------------------------------------------------------------- 1 | table: 2 | name: a1 3 | materialization: table 4 | -------------------------------------------------------------------------------- /examples/snapshots/models/test3/a1.sql: -------------------------------------------------------------------------------- 1 | select * from values 2 | (1, CAST('Jack' AS VARCHAR), '2022-01-01'), 3 | (2, 'Bob', '2022-01-01'), 4 | (3, 'Jane', '2022-01-01') 5 | as T(id, name, "event time") 6 | -------------------------------------------------------------------------------- /examples/snapshots/models/test3/a2.sdf.yml: -------------------------------------------------------------------------------- 1 | table: 2 | name: a2 3 | materialization: table 4 | -------------------------------------------------------------------------------- /examples/snapshots/models/test3/a2.sql: -------------------------------------------------------------------------------- 1 | select * from values 2 | (1, CAST('Jacob' AS VARCHAR), 10, '2022-01-02'), 3 | (3, 'Mary Jane', 20, '2022-01-01'), 4 | (44, 'Cloe', 30, '2022-01-02') 5 | as T(id, name, age, "event time") 6 | -------------------------------------------------------------------------------- /examples/snapshots/models/test3/b.sdf.yml: -------------------------------------------------------------------------------- 1 | table: 2 | name: b 3 | materialization: snapshot-table 4 | snapshot-options: 5 | strategy: check 6 | unique-key: id 7 | check-cols: 8 | !cols ['event time'] 9 | -------------------------------------------------------------------------------- /examples/snapshots/models/test3/b.sql: -------------------------------------------------------------------------------- 1 | {% if builtin.is_snapshot_mode %} 2 | select * from a2 3 | {% else %} 4 | select * from a1 5 | {% endif %} 6 | -------------------------------------------------------------------------------- /examples/snapshots/workspace.sdf.yml: -------------------------------------------------------------------------------- 1 | workspace: 2 | name: snapshots 3 | edition: "1.3" 4 | description: > 5 | This workspace uses a simple scenario of one source table (A) and one derived table (B) to demonstrate snapshots 6 | Running and compiling this workspace requires a connection to a Snowflake account. 7 | The provider in this workspace is using the `default` credentials. Run sdf auth login snowflake with no name provided to set these. 8 | 9 | To follow along with SDF's official guide: https://docs.sdf.com/integrations/snowflake/snapshots 10 | 11 | defaults: 12 | preprocessor: jinja 13 | --- 14 | environment: 15 | name: test1 16 | description: timestamp strategy 17 | includes: 18 | - path: models/test1 19 | --- 20 | environment: 21 | name: test2 22 | description: check strategy all columns 23 | includes: 24 | - path: models/test2 25 | --- 26 | environment: 27 | name: test3 28 | description: check strategy selected columns 29 | includes: 30 | - path: models/test3 31 | -------------------------------------------------------------------------------- /examples/tests/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | #sdf gitignore 3 | /sdftarget 4 | -------------------------------------------------------------------------------- /examples/tests/models/raw_inapp_events.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE raw_inapp_events 2 | WITH (FORMAT='CSV', skip_header_line_count=1, LOCATION='seeds/inapp_events.csv'); -------------------------------------------------------------------------------- /examples/tests/workspace.sdf.yml: -------------------------------------------------------------------------------- 1 | workspace: 2 | name: tests_workspace # The name of this workspace -- required 3 | edition: "1.3" # The edition of this workspace -- required 4 | description: > 5 | This workspace demonstrates how to use the SDF built-in tests library 6 | 7 | includes: 8 | - path: models # The path to sql models for this workspace -- at least one path is required 9 | - path: seeds # Where raw data is stored locally 10 | type: resource 11 | - path: src_metadata # Where table metadata and tests are stored 12 | 13 | defaults: 14 | preprocessor: jinja 15 | -------------------------------------------------------------------------------- /schemas/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sdf-labs/sdf-cli/0f2623776d779dcc07adfe6492765b5c77d3d211/schemas/.gitkeep -------------------------------------------------------------------------------- /schemas/sdf-definition-schema.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sdf-labs/sdf-cli/0f2623776d779dcc07adfe6492765b5c77d3d211/schemas/sdf-definition-schema.json --------------------------------------------------------------------------------