├── .editorconfig
├── .gitignore
├── README.md
├── benchmarks
├── .gitkeep
└── trino
│ ├── clickbench
│ ├── .gitignore
│ ├── hydrate.sh
│ ├── queries
│ │ ├── q01.sql
│ │ ├── q02.sql
│ │ ├── q03.sql
│ │ ├── q04.sql
│ │ ├── q05.sql
│ │ ├── q06.sql
│ │ ├── q07.sql
│ │ ├── q08.sql
│ │ ├── q09.sql
│ │ ├── q10.sql
│ │ ├── q11.sql
│ │ ├── q12.sql
│ │ ├── q13.sql
│ │ ├── q14.sql
│ │ ├── q15.sql
│ │ ├── q16.sql
│ │ ├── q17.sql
│ │ ├── q18.sql
│ │ ├── q19.sql
│ │ ├── q20.sql
│ │ ├── q21.sql
│ │ ├── q22.sql
│ │ ├── q23.sql
│ │ ├── q24.sql
│ │ ├── q25.sql
│ │ ├── q26.sql
│ │ ├── q27.sql
│ │ ├── q28.sql
│ │ ├── q29.sql
│ │ ├── q30.sql
│ │ ├── q31.sql
│ │ ├── q32.sql
│ │ ├── q33.sql
│ │ ├── q34.sql
│ │ ├── q35.sql
│ │ ├── q36.sql
│ │ ├── q37.sql
│ │ ├── q38.sql
│ │ ├── q39.sql
│ │ ├── q40.sql
│ │ ├── q41.sql
│ │ ├── q42.sql
│ │ └── q43.sql
│ ├── sources
│ │ ├── hits.sql
│ │ └── sources.sql
│ └── workspace.sdf.yml
│ ├── imdb
│ ├── .gitignore
│ ├── hydrate.sh
│ ├── queries
│ │ ├── _01a.sql
│ │ ├── _01b.sql
│ │ ├── _01c.sql
│ │ ├── _01d.sql
│ │ ├── _02a.sql
│ │ ├── _02b.sql
│ │ ├── _02c.sql
│ │ ├── _02d.sql
│ │ ├── _03a.sql
│ │ ├── _03b.sql
│ │ ├── _03c.sql
│ │ ├── _04a.sql
│ │ ├── _04b.sql
│ │ ├── _04c.sql
│ │ ├── _05a.sql
│ │ ├── _05b.sql
│ │ ├── _05c.sql
│ │ ├── _06a.sql
│ │ ├── _06b.sql
│ │ ├── _06c.sql
│ │ ├── _06d.sql
│ │ ├── _06e.sql
│ │ ├── _06f.sql
│ │ ├── _07a.sql
│ │ ├── _07b.sql
│ │ ├── _07c.sql
│ │ ├── _08a.sql
│ │ ├── _08b.sql
│ │ ├── _08c.sql
│ │ ├── _08d.sql
│ │ ├── _09a.sql
│ │ ├── _09b.sql
│ │ ├── _09c.sql
│ │ ├── _09d.sql
│ │ ├── _10a.sql
│ │ ├── _10b.sql
│ │ ├── _10c.sql
│ │ ├── _11a.sql
│ │ ├── _11b.sql
│ │ ├── _11c.sql
│ │ ├── _11d.sql
│ │ ├── _12a.sql
│ │ ├── _12b.sql
│ │ ├── _12c.sql
│ │ ├── _13a.sql
│ │ ├── _13b.sql
│ │ ├── _13c.sql
│ │ ├── _13d.sql
│ │ ├── _14a.sql
│ │ ├── _14b.sql
│ │ ├── _14c.sql
│ │ ├── _15a.sql
│ │ ├── _15b.sql
│ │ ├── _15c.sql
│ │ ├── _15d.sql
│ │ ├── _16a.sql
│ │ ├── _16b.sql
│ │ ├── _16c.sql
│ │ ├── _16d.sql
│ │ ├── _17a.sql
│ │ ├── _17b.sql
│ │ ├── _17c.sql
│ │ ├── _17d.sql
│ │ ├── _17e.sql
│ │ ├── _17f.sql
│ │ ├── _18a.sql
│ │ ├── _18b.sql
│ │ ├── _18c.sql
│ │ ├── _19a.sql
│ │ ├── _19b.sql
│ │ ├── _19c.sql
│ │ ├── _19d.sql
│ │ ├── _20a.sql
│ │ ├── _20b.sql
│ │ ├── _20c.sql
│ │ ├── _21a.sql
│ │ ├── _21b.sql
│ │ ├── _21c.sql
│ │ ├── _22a.sql
│ │ ├── _22b.sql
│ │ ├── _22c.sql
│ │ ├── _22d.sql
│ │ ├── _23a.sql
│ │ ├── _23b.sql
│ │ ├── _23c.sql
│ │ ├── _24a.sql
│ │ ├── _24b.sql
│ │ ├── _25a.sql
│ │ ├── _25b.sql
│ │ ├── _25c.sql
│ │ ├── _26a.sql
│ │ ├── _26b.sql
│ │ ├── _26c.sql
│ │ ├── _27a.sql
│ │ ├── _27b.sql
│ │ ├── _27c.sql
│ │ ├── _28a.sql
│ │ ├── _28b.sql
│ │ ├── _28c.sql
│ │ ├── _29a.sql
│ │ ├── _29b.sql
│ │ ├── _29c.sql
│ │ ├── _30a.sql
│ │ ├── _30b.sql
│ │ ├── _30c.sql
│ │ ├── _31a.sql
│ │ ├── _31b.sql
│ │ ├── _31c.sql
│ │ ├── _32a.sql
│ │ ├── _32b.sql
│ │ ├── _33a.sql
│ │ ├── _33b.sql
│ │ └── _33c.sql
│ ├── sources
│ │ └── sources.sql
│ └── workspace.sdf.yml
│ └── tpch
│ ├── .gitignore
│ ├── hydrate.sh
│ ├── queries
│ ├── q1.sql
│ ├── q10.sql
│ ├── q11.sql
│ ├── q12.sql
│ ├── q13.sql
│ ├── q14.sql
│ ├── q15.sql
│ ├── q16.sql
│ ├── q17.sql
│ ├── q18.sql
│ ├── q19.sql
│ ├── q2.sql
│ ├── q20.sql
│ ├── q21.sql
│ ├── q22.sql
│ ├── q3.sql
│ ├── q4.sql
│ ├── q5.sql
│ ├── q6.sql
│ ├── q7.sql
│ ├── q8.sql
│ ├── q9.sql
│ └── revenue0.sql
│ ├── sources
│ ├── customer.sql
│ ├── lineitem.sql
│ ├── nation.sql
│ ├── orders.sql
│ ├── part.sql
│ ├── partsupp.sql
│ ├── region.sql
│ └── supplier.sql
│ └── workspace.sdf.yml
├── docs
├── assets
│ ├── favicon.png
│ └── logo
│ │ ├── dark.svg
│ │ └── light.svg
├── cloud
│ ├── authentication.mdx
│ ├── billing.mdx
│ ├── credentials.mdx
│ ├── deployment.mdx
│ ├── environments.mdx
│ ├── github_integration.mdx
│ ├── introduction.mdx
│ ├── organization_roles.mdx
│ ├── reporting.mdx
│ ├── sso.mdx
│ ├── troubleshooting.mdx
│ └── workspace_credentials.mdx
├── database
│ ├── benchmarks.mdx
│ ├── file-formats.mdx
│ ├── introduction.mdx
│ ├── orchestration.mdx
│ ├── partitioning.mdx
│ └── supported-functions
│ │ ├── overview.mdx
│ │ └── trino
│ │ ├── aggregate-functions.mdx
│ │ ├── array-functions.mdx
│ │ ├── binary-functions.mdx
│ │ ├── comparison-functions.mdx
│ │ ├── conditional-functions.mdx
│ │ ├── datetime-functions.mdx
│ │ ├── math-functions.mdx
│ │ ├── regexp-functions.mdx
│ │ └── string-functions.mdx
├── guide
│ ├── advanced
│ │ ├── custom_libs.mdx
│ │ ├── custom_scripts.mdx
│ │ ├── index.mdx
│ │ ├── local_compilation.mdx
│ │ ├── logging.mdx
│ │ ├── telemetry.mdx
│ │ └── udf.mdx
│ ├── basics
│ │ ├── build_and_deployment.mdx
│ │ ├── classifiers.mdx
│ │ └── lineage_metadata.mdx
│ ├── data-quality
│ │ ├── checks.mdx
│ │ ├── overview.mdx
│ │ ├── reports.mdx
│ │ ├── stats.mdx
│ │ └── tests.mdx
│ ├── macro-processing
│ │ ├── intro-to-jinja.mdx
│ │ ├── jinja-variables.mdx
│ │ ├── jinja.mdx
│ │ ├── overview.mdx
│ │ └── sdf-variables.mdx
│ ├── setup
│ │ ├── environments.mdx
│ │ ├── integrations.mdx
│ │ ├── io.mdx
│ │ ├── materialization.mdx
│ │ └── workspaces.mdx
│ └── transformation
│ │ └── authentication.mdx
├── integrations
│ ├── aws
│ │ ├── S3
│ │ │ ├── getting-started.mdx
│ │ │ └── s3-example.mdx
│ │ └── redshift
│ │ │ └── getting-started.mdx
│ ├── bigquery
│ │ ├── basic-materialization.mdx
│ │ ├── getting-started.mdx
│ │ ├── incremental-materialization.mdx
│ │ ├── seeds.mdx
│ │ └── snapshots.mdx
│ ├── cicd
│ │ └── ci_cd.mdx
│ ├── dagster
│ │ └── getting-started.mdx
│ ├── databricks
│ │ └── databricks.mdx
│ ├── dbt
│ │ ├── integrating.mdx
│ │ └── migrating.mdx
│ ├── openai
│ │ └── ai-classification.mdx
│ ├── overview.mdx
│ └── snowflake
│ │ ├── advanced.mdx
│ │ ├── basic-materialization.mdx
│ │ ├── getting-started.mdx
│ │ ├── incremental-materialization.mdx
│ │ ├── seeds.mdx
│ │ └── snapshots.mdx
├── introduction
│ ├── features.mdx
│ ├── getting-started.mdx
│ ├── open-source.mdx
│ └── welcome.mdx
├── linter
│ ├── dbt_projects.mdx
│ ├── format.mdx
│ ├── macros.mdx
│ ├── overview.mdx
│ └── reference.mdx
├── mint.json
├── reference
│ ├── bigquery
│ │ ├── aggregate_functions.mdx
│ │ ├── approximate_aggregate_functions.mdx
│ │ ├── array_functions.mdx
│ │ ├── bit_functions.mdx
│ │ ├── date_functions.mdx
│ │ ├── datetime_functions.mdx
│ │ ├── debugging_functions.mdx
│ │ ├── geography_functions.mdx
│ │ ├── hash_functions.mdx
│ │ ├── interval_functions.mdx
│ │ ├── json_functions.mdx
│ │ ├── math_functions.mdx
│ │ ├── other_expressions.mdx
│ │ ├── search_functions.mdx
│ │ ├── security_functions.mdx
│ │ ├── statistical_aggregate_functions.mdx
│ │ ├── string_functions.mdx
│ │ ├── temporal_functions.mdx
│ │ ├── time_functions.mdx
│ │ ├── timestamp_functions.mdx
│ │ └── utility_functions.mdx
│ ├── caching.mdx
│ ├── error-codes.mdx
│ ├── redshift
│ │ ├── aggregate_functions.mdx
│ │ ├── math_functions.mdx
│ │ ├── other_expressions.mdx
│ │ ├── string_functions.mdx
│ │ └── temporal_functions.mdx
│ ├── sdf-cli.mdx
│ ├── sdf-information-schema.mdx
│ ├── sdf-yml.mdx
│ ├── snowflake
│ │ ├── account_usage_table_functions.mdx
│ │ ├── aggregate_functions.mdx
│ │ ├── bitwise_expression_functions.mdx
│ │ ├── conditional_expression_functions.mdx
│ │ ├── context_functions.mdx
│ │ ├── conversion_functions.mdx
│ │ ├── data_generation_functions.mdx
│ │ ├── date_and_time_functions.mdx
│ │ ├── encryption_functions.mdx
│ │ ├── geospatial_functions.mdx
│ │ ├── hash_functions.mdx
│ │ ├── information_schema.mdx
│ │ ├── metadata_functions.mdx
│ │ ├── numeric_functions.mdx
│ │ ├── semi-structured_and_structured_data_functions.mdx
│ │ ├── string_and_binary_functions.mdx
│ │ ├── string_functions.mdx
│ │ ├── system_functions.mdx
│ │ ├── table_functions.mdx
│ │ ├── vector_similarity_functions.mdx
│ │ └── window_functions.mdx
│ ├── support.mdx
│ └── trino
│ │ ├── aggregate_functions.mdx
│ │ ├── array_functions.mdx
│ │ ├── binary_functions.mdx
│ │ ├── bitwise_functions.mdx
│ │ ├── color_functions.mdx
│ │ ├── comparison_functions.mdx
│ │ ├── conditional_functions.mdx
│ │ ├── conversion_functions.mdx
│ │ ├── datetime_functions.mdx
│ │ ├── geospatial_functions.mdx
│ │ ├── hyperloglog_functions.mdx
│ │ ├── json_functions.mdx
│ │ ├── lambda_functions.mdx
│ │ ├── map_functions.mdx
│ │ ├── math_functions.mdx
│ │ ├── ml_functions.mdx
│ │ ├── mongodb_functions.mdx
│ │ ├── qdigest_functions.mdx
│ │ ├── regexp_functions.mdx
│ │ ├── sdf_execution_support
│ │ └── all_functions.mdx
│ │ ├── session_functions.mdx
│ │ ├── setdigest_functions.mdx
│ │ ├── string_functions.mdx
│ │ ├── t-digest_functions.mdx
│ │ ├── teradata_functions.mdx
│ │ ├── url_functions.mdx
│ │ ├── uuid_functions.mdx
│ │ └── window_functions.mdx
├── releases
│ ├── latest.mdx
│ └── migrations
│ │ ├── 11-12.mdx
│ │ └── 12-13.mdx
├── snippets
│ └── preview-warning.mdx
├── tutorials
│ ├── creating-a-model.mdx
│ ├── debugging.mdx
│ ├── deprecating-a-model.mdx
│ ├── enriching-your-warehouse.mdx
│ ├── ensuring-data-quality.mdx
│ ├── learn-more.mdx
│ ├── script_test.mdx
│ └── tutorials-intro.mdx
└── use-case
│ └── data_deletion.mdx
├── examples
├── .gitkeep
├── bigquery_incremental
│ ├── models
│ │ ├── last_hn_timestamp.sql
│ │ └── popular_articles.sql
│ └── workspace.sdf.yml
├── bigquery_starter
│ ├── .gitignore
│ ├── models
│ │ ├── aggregate_orders.sql
│ │ └── customers_over_100.sql
│ ├── run_me_in_bq.sql
│ └── workspace.sdf.yml
├── cybersyn_tech_innovation
│ ├── .gitignore
│ ├── models
│ │ └── sdf_snowflake
│ │ │ └── cybersyn_tech_innovation
│ │ │ ├── all_nvidia_patents.sql
│ │ │ ├── funder_aggregates.sql
│ │ │ └── most_starred_repos.sql
│ └── workspace.sdf.yml
├── github_analysis
│ ├── .gitignore
│ ├── checks
│ │ └── no_timezone_comparison.sql
│ ├── classification
│ │ └── taxonomy.sdf.yml
│ ├── metadata
│ │ ├── sdf_snowflake
│ │ │ └── stg
│ │ │ │ ├── repo_event_aggregates.sdf.yml
│ │ │ │ ├── repo_stars.sdf.yml
│ │ │ │ └── star_growth.sdf.yml
│ │ └── tech__innovation_essentials
│ │ │ └── cybersyn
│ │ │ ├── github_repos.sdf.yml
│ │ │ └── github_stars.sdf.yml
│ ├── models
│ │ └── sdf_snowflake
│ │ │ ├── analysis
│ │ │ ├── activity_surges.sql
│ │ │ ├── engagement_summary_by_repo.sql
│ │ │ ├── event_dist_across_repo.sql
│ │ │ ├── growth_repos.sql
│ │ │ └── star_growth_by_repo.sql
│ │ │ ├── dim
│ │ │ ├── date.sql
│ │ │ ├── event_type.sql
│ │ │ └── repos.sql
│ │ │ ├── fct
│ │ │ ├── event_activity.sql
│ │ │ ├── repo_activity.sql
│ │ │ └── repo_engagement.sql
│ │ │ └── stg
│ │ │ ├── latest_repo_events.sql
│ │ │ ├── repo_event_aggregates.sql
│ │ │ ├── repo_names.sql
│ │ │ ├── repo_stars.sql
│ │ │ ├── star_dates.sql
│ │ │ └── star_growth.sql
│ └── workspace.sdf.yml
├── hello
│ ├── .gitignore
│ ├── models
│ │ └── main.sql
│ └── workspace.sdf.yml
├── hello_from_dbt
│ ├── .gitignore
│ ├── models
│ │ └── main.sql
│ ├── seeds
│ │ └── .gitkeep
│ └── workspace.sdf.yml
├── hello_with_pii
│ ├── .gitignore
│ ├── checks
│ │ └── code_check.sql
│ ├── models
│ │ └── main.sql
│ └── workspace.sdf.yml
├── hello_world_s3
│ ├── local
│ │ ├── pop.csv
│ │ ├── popdata.sql
│ │ └── world_metrics.sql
│ ├── remote
│ │ ├── q1.sql
│ │ └── un_pop_data.sql
│ └── workspace.sdf.yml
├── jaffle_shop
│ ├── .gitignore
│ ├── models
│ │ ├── analytics
│ │ │ ├── customers.sql
│ │ │ ├── meta.sdf.yml
│ │ │ └── orders.sql
│ │ ├── raw
│ │ │ └── seeds.sdf.yml
│ │ └── staging
│ │ │ ├── stg_customers.sql
│ │ │ ├── stg_orders.sql
│ │ │ ├── stg_payments.sql
│ │ │ └── tests.sdf.yml
│ ├── seeds
│ │ ├── .gitkeep
│ │ ├── raw_customers.csv
│ │ ├── raw_orders.csv
│ │ └── raw_payments.csv
│ └── workspace.sdf.yml
├── lineage
│ ├── checks
│ │ └── check_sink_phone_is_pii.sql
│ ├── models
│ │ ├── knis.sql
│ │ ├── middle.sql
│ │ ├── sink.sql
│ │ └── source.sql
│ └── workspace.sdf.yml
├── linter
│ ├── .gitignore
│ ├── ddls
│ │ └── my_table.sdf.yml
│ ├── models
│ │ └── main.sql
│ ├── seeds
│ │ └── my_table.csv
│ └── workspace.sdf.yml
├── moms_flower_shop
│ ├── .gitignore
│ ├── checks
│ │ └── README.txt
│ ├── classifications
│ │ ├── column_classifiers.sdf.yml
│ │ └── table_classifiers.sdf.yml
│ ├── metadata
│ │ ├── analytics
│ │ │ ├── agg_installs_and_campaigns.sdf.yml
│ │ │ └── dim_marketing_campaigns.sdf.yml
│ │ ├── raw
│ │ │ ├── raw_addresses.sdf.yml
│ │ │ ├── raw_customers.sdf.yml
│ │ │ ├── raw_inapp_events.sdf.yml
│ │ │ └── raw_marketing_campaign_events.sdf.yml
│ │ └── staging
│ │ │ └── app_installs.sdf.yml
│ ├── models
│ │ ├── analytics
│ │ │ └── agg_installs_and_campaigns.sql
│ │ ├── raw
│ │ │ ├── raw_addresses.sql
│ │ │ ├── raw_customers.sql
│ │ │ ├── raw_inapp_events.sql
│ │ │ └── raw_marketing_campaign_events.sql
│ │ └── staging
│ │ │ ├── app_installs.sql
│ │ │ ├── app_installs_v2.sql
│ │ │ ├── customers.sql
│ │ │ ├── inapp_events.sql
│ │ │ ├── marketing_campaigns.sql
│ │ │ └── stg_installs_per_campaign.sql
│ ├── reports
│ │ └── README.txt
│ ├── seeds
│ │ ├── csv
│ │ │ └── README.txt
│ │ └── parquet
│ │ │ ├── addresses.parquet
│ │ │ ├── customers.parquet
│ │ │ ├── inapp_events.parquet
│ │ │ └── marketing_campaign_events.parquet
│ └── workspace.sdf.yml
├── moms_flower_shop_completed
│ ├── .gitignore
│ ├── checks
│ │ ├── README.txt
│ │ └── mixed_event_ids.sql
│ ├── classifications
│ │ ├── column_classifiers.sdf.yml
│ │ └── table_classifiers.sdf.yml
│ ├── metadata
│ │ ├── analytics
│ │ │ ├── agg_installs_and_campaigns.sdf.yml
│ │ │ └── dim_marketing_campaigns.sdf.yml
│ │ ├── raw
│ │ │ ├── raw_addresses.sdf.yml
│ │ │ ├── raw_customers.sdf.yml
│ │ │ ├── raw_inapp_events.sdf.yml
│ │ │ └── raw_marketing_campaign_events.sdf.yml
│ │ └── staging
│ │ │ ├── app_installs.sdf.yml
│ │ │ └── inapp_events.sdf.yml
│ ├── models
│ │ ├── analytics
│ │ │ ├── agg_installs_and_campaigns.sql
│ │ │ └── dim_marketing_campaigns.sql
│ │ ├── raw
│ │ │ ├── raw_addresses.sql
│ │ │ ├── raw_customers.sql
│ │ │ ├── raw_inapp_events.sql
│ │ │ └── raw_marketing_campaign_events.sql
│ │ └── staging
│ │ │ ├── app_installs.sql
│ │ │ ├── app_installs_v2.sql
│ │ │ ├── customers.sql
│ │ │ ├── inapp_events.sql
│ │ │ ├── marketing_campaigns.sql
│ │ │ └── stg_installs_per_campaign.sql
│ ├── reports
│ │ ├── README.txt
│ │ └── deprecated_table_reference.sql
│ ├── seeds
│ │ ├── csv
│ │ │ └── README.txt
│ │ └── parquet
│ │ │ ├── addresses.parquet
│ │ │ ├── customers.parquet
│ │ │ ├── inapp_events.parquet
│ │ │ └── marketing_campaign_events.parquet
│ └── workspace.sdf.yml
├── pii_saas_platform
│ ├── .gitignore
│ ├── checks
│ │ └── no_pii_in_external.sql
│ ├── classification
│ │ ├── taxonomy.sdf.yml
│ │ └── users.sdf.yml
│ ├── ddls
│ │ └── payment
│ │ │ └── public
│ │ │ ├── invoices.sql
│ │ │ ├── organizations.sql
│ │ │ └── users.sql
│ ├── models
│ │ ├── external
│ │ │ ├── invoice_stats.sql
│ │ │ └── org_invoice_stats.sql
│ │ └── internal
│ │ │ ├── avg_invoice_amt.sql
│ │ │ ├── invoice_payment_delay.sql
│ │ │ ├── mau_per_org.sql
│ │ │ ├── most_frequent_payer.sql
│ │ │ ├── total_revenue_per_org.sql
│ │ │ ├── users_per_domain.sql
│ │ │ └── users_per_org.sql
│ ├── reports
│ │ └── tables_with_pii.sql
│ └── workspace.sdf.yml
├── seeds
│ ├── models
│ │ └── french_customers.sql
│ ├── seeds
│ │ └── raw_customers.csv
│ └── workspace.sdf.yml
├── snapshots
│ ├── models
│ │ ├── test1
│ │ │ ├── a1.sdf.yml
│ │ │ ├── a1.sql
│ │ │ ├── a2.sdf.yml
│ │ │ ├── a2.sql
│ │ │ ├── b.sdf.yml
│ │ │ └── b.sql
│ │ ├── test2
│ │ │ ├── a1.sdf.yml
│ │ │ ├── a1.sql
│ │ │ ├── a2.sdf.yml
│ │ │ ├── a2.sql
│ │ │ ├── b.sdf.yml
│ │ │ └── b.sql
│ │ └── test3
│ │ │ ├── a1.sdf.yml
│ │ │ ├── a1.sql
│ │ │ ├── a2.sdf.yml
│ │ │ ├── a2.sql
│ │ │ ├── b.sdf.yml
│ │ │ └── b.sql
│ └── workspace.sdf.yml
└── tests
│ ├── .gitignore
│ ├── models
│ └── raw_inapp_events.sql
│ ├── seeds
│ └── inapp_events.csv
│ ├── src_metadata
│ └── raw_inapp_events.sdf.yml
│ └── workspace.sdf.yml
└── schemas
├── .gitkeep
└── sdf-definition-schema.json
/.editorconfig:
--------------------------------------------------------------------------------
1 | [*]
2 | insert_final_newline = true
3 |
--------------------------------------------------------------------------------
/benchmarks/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sdf-labs/sdf-cli/0f2623776d779dcc07adfe6492765b5c77d3d211/benchmarks/.gitkeep
--------------------------------------------------------------------------------
/benchmarks/trino/clickbench/.gitignore:
--------------------------------------------------------------------------------
1 | sdftarget
2 | hits.parquet
--------------------------------------------------------------------------------
/benchmarks/trino/clickbench/hydrate.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | set -xeuo pipefail
3 |
4 | # Downloads a ~1GB` Clickbench dataset in parquet format.
5 |
6 | cd "${BASH_SOURCE%/*}"
7 | curl -LO https://cdn.sdf.com/data/clickbench/hits.parquet
--------------------------------------------------------------------------------
/benchmarks/trino/clickbench/queries/q01.sql:
--------------------------------------------------------------------------------
1 | SELECT COUNT(*) FROM hits;
2 |
--------------------------------------------------------------------------------
/benchmarks/trino/clickbench/queries/q02.sql:
--------------------------------------------------------------------------------
1 | SELECT COUNT(*) FROM hits WHERE "AdvEngineID" <> 0;
2 |
--------------------------------------------------------------------------------
/benchmarks/trino/clickbench/queries/q03.sql:
--------------------------------------------------------------------------------
1 | SELECT SUM("AdvEngineID"), COUNT(*), AVG("ResolutionWidth") FROM hits;
--------------------------------------------------------------------------------
/benchmarks/trino/clickbench/queries/q04.sql:
--------------------------------------------------------------------------------
1 | SELECT AVG("UserID") FROM hits;
2 |
--------------------------------------------------------------------------------
/benchmarks/trino/clickbench/queries/q05.sql:
--------------------------------------------------------------------------------
1 | SELECT COUNT(DISTINCT "UserID") FROM hits;
2 |
--------------------------------------------------------------------------------
/benchmarks/trino/clickbench/queries/q06.sql:
--------------------------------------------------------------------------------
1 | SELECT COUNT(DISTINCT "SearchPhrase") FROM hits;
2 |
--------------------------------------------------------------------------------
/benchmarks/trino/clickbench/queries/q07.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN("EventDate"), MAX("EventDate") FROM hits;
2 |
--------------------------------------------------------------------------------
/benchmarks/trino/clickbench/queries/q08.sql:
--------------------------------------------------------------------------------
1 | SELECT "AdvEngineID", COUNT(*) FROM hits WHERE "AdvEngineID" <> 0 GROUP BY "AdvEngineID" ORDER BY COUNT(*) DESC;
2 |
--------------------------------------------------------------------------------
/benchmarks/trino/clickbench/queries/q09.sql:
--------------------------------------------------------------------------------
1 | SELECT "RegionID", COUNT(DISTINCT "UserID") AS u FROM hits GROUP BY "RegionID" ORDER BY u DESC LIMIT 10;
2 |
--------------------------------------------------------------------------------
/benchmarks/trino/clickbench/queries/q10.sql:
--------------------------------------------------------------------------------
1 | SELECT "RegionID", SUM("AdvEngineID"), COUNT(*) AS c, AVG("ResolutionWidth"), COUNT(DISTINCT "UserID") FROM hits GROUP BY "RegionID" ORDER BY c DESC LIMIT 10;
2 |
--------------------------------------------------------------------------------
/benchmarks/trino/clickbench/queries/q11.sql:
--------------------------------------------------------------------------------
1 | SELECT "MobilePhoneModel", COUNT(DISTINCT "UserID") AS u FROM hits WHERE "MobilePhoneModel" <> '' GROUP BY "MobilePhoneModel" ORDER BY u DESC LIMIT 10;
2 |
--------------------------------------------------------------------------------
/benchmarks/trino/clickbench/queries/q12.sql:
--------------------------------------------------------------------------------
1 | SELECT "MobilePhone", "MobilePhoneModel", COUNT(DISTINCT "UserID") AS u FROM hits WHERE "MobilePhoneModel" <> '' GROUP BY "MobilePhone", "MobilePhoneModel" ORDER BY u DESC LIMIT 10;
2 |
--------------------------------------------------------------------------------
/benchmarks/trino/clickbench/queries/q13.sql:
--------------------------------------------------------------------------------
1 | SELECT "SearchPhrase", COUNT(*) AS c FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10;
--------------------------------------------------------------------------------
/benchmarks/trino/clickbench/queries/q14.sql:
--------------------------------------------------------------------------------
1 | SELECT "SearchPhrase", COUNT(DISTINCT "UserID") AS u FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY u DESC LIMIT 10;
--------------------------------------------------------------------------------
/benchmarks/trino/clickbench/queries/q15.sql:
--------------------------------------------------------------------------------
1 | SELECT "SearchEngineID", "SearchPhrase", COUNT(*) AS c FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchEngineID", "SearchPhrase" ORDER BY c DESC LIMIT 10;
2 |
--------------------------------------------------------------------------------
/benchmarks/trino/clickbench/queries/q16.sql:
--------------------------------------------------------------------------------
1 | SELECT "UserID", COUNT(*) FROM hits GROUP BY "UserID" ORDER BY COUNT(*) DESC LIMIT 10;
2 |
--------------------------------------------------------------------------------
/benchmarks/trino/clickbench/queries/q17.sql:
--------------------------------------------------------------------------------
1 | SELECT "UserID", "SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID", "SearchPhrase" ORDER BY COUNT(*) DESC LIMIT 10;
2 |
--------------------------------------------------------------------------------
/benchmarks/trino/clickbench/queries/q18.sql:
--------------------------------------------------------------------------------
1 | SELECT "UserID", "SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID", "SearchPhrase" LIMIT 10;
2 |
--------------------------------------------------------------------------------
/benchmarks/trino/clickbench/queries/q19.sql:
--------------------------------------------------------------------------------
1 | SELECT "UserID", extract(minute FROM to_timestamp_seconds("EventTime")) AS m, "SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID", m, "SearchPhrase" ORDER BY COUNT(*) DESC LIMIT 10;
2 |
--------------------------------------------------------------------------------
/benchmarks/trino/clickbench/queries/q20.sql:
--------------------------------------------------------------------------------
1 | SELECT "UserID" FROM hits WHERE "UserID" = 435090932899640449;
2 |
--------------------------------------------------------------------------------
/benchmarks/trino/clickbench/queries/q21.sql:
--------------------------------------------------------------------------------
1 | SELECT COUNT(*) FROM hits WHERE "URL" LIKE '%google%';
2 |
--------------------------------------------------------------------------------
/benchmarks/trino/clickbench/queries/q22.sql:
--------------------------------------------------------------------------------
1 | SELECT "SearchPhrase", MIN("URL"), COUNT(*) AS c FROM hits WHERE "URL" LIKE '%google%' AND "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10;
2 |
--------------------------------------------------------------------------------
/benchmarks/trino/clickbench/queries/q23.sql:
--------------------------------------------------------------------------------
1 | SELECT "SearchPhrase", MIN("URL"), MIN("Title"), COUNT(*) AS c, COUNT(DISTINCT "UserID") FROM hits WHERE "Title" LIKE '%Google%' AND "URL" NOT LIKE '%.google.%' AND "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10;
2 |
--------------------------------------------------------------------------------
/benchmarks/trino/clickbench/queries/q24.sql:
--------------------------------------------------------------------------------
1 | SELECT * FROM hits WHERE "URL" LIKE '%google%' ORDER BY to_timestamp_seconds("EventTime") LIMIT 10;
2 |
--------------------------------------------------------------------------------
/benchmarks/trino/clickbench/queries/q25.sql:
--------------------------------------------------------------------------------
1 | SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY to_timestamp_seconds("EventTime") LIMIT 10;
2 |
--------------------------------------------------------------------------------
/benchmarks/trino/clickbench/queries/q26.sql:
--------------------------------------------------------------------------------
1 | SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY "SearchPhrase" LIMIT 10;
2 |
--------------------------------------------------------------------------------
/benchmarks/trino/clickbench/queries/q27.sql:
--------------------------------------------------------------------------------
1 | SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY to_timestamp_seconds("EventTime"), "SearchPhrase" LIMIT 10;
2 |
--------------------------------------------------------------------------------
/benchmarks/trino/clickbench/queries/q28.sql:
--------------------------------------------------------------------------------
1 | SELECT "CounterID", AVG(length("URL")) AS l, COUNT(*) AS c FROM hits WHERE "URL" <> '' GROUP BY "CounterID" HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;
2 |
--------------------------------------------------------------------------------
/benchmarks/trino/clickbench/queries/q29.sql:
--------------------------------------------------------------------------------
1 | SELECT REGEXP_REPLACE("Referer", '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length("Referer")) AS l, COUNT(*) AS c, MIN("Referer") FROM hits WHERE "Referer" <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;
2 |
--------------------------------------------------------------------------------
/benchmarks/trino/clickbench/queries/q31.sql:
--------------------------------------------------------------------------------
1 | SELECT "SearchEngineID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), AVG("ResolutionWidth") FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchEngineID", "ClientIP" ORDER BY c DESC LIMIT 10;
2 |
--------------------------------------------------------------------------------
/benchmarks/trino/clickbench/queries/q32.sql:
--------------------------------------------------------------------------------
1 | SELECT "WatchID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), AVG("ResolutionWidth") FROM hits WHERE "SearchPhrase" <> '' GROUP BY "WatchID", "ClientIP" ORDER BY c DESC LIMIT 10;
2 |
--------------------------------------------------------------------------------
/benchmarks/trino/clickbench/queries/q33.sql:
--------------------------------------------------------------------------------
1 | SELECT "WatchID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), AVG("ResolutionWidth") FROM hits GROUP BY "WatchID", "ClientIP" ORDER BY c DESC LIMIT 10;
2 |
--------------------------------------------------------------------------------
/benchmarks/trino/clickbench/queries/q34.sql:
--------------------------------------------------------------------------------
1 | SELECT "URL", COUNT(*) AS c FROM hits GROUP BY "URL" ORDER BY c DESC LIMIT 10;
2 |
--------------------------------------------------------------------------------
/benchmarks/trino/clickbench/queries/q35.sql:
--------------------------------------------------------------------------------
1 | SELECT 1, "URL", COUNT(*) AS c FROM hits GROUP BY 1, "URL" ORDER BY c DESC LIMIT 10;
2 |
--------------------------------------------------------------------------------
/benchmarks/trino/clickbench/queries/q36.sql:
--------------------------------------------------------------------------------
1 | SELECT "ClientIP", "ClientIP" - 1, "ClientIP" - 2, "ClientIP" - 3, COUNT(*) AS c FROM hits GROUP BY "ClientIP", "ClientIP" - 1, "ClientIP" - 2, "ClientIP" - 3 ORDER BY c DESC LIMIT 10;
2 |
--------------------------------------------------------------------------------
/benchmarks/trino/clickbench/queries/q37.sql:
--------------------------------------------------------------------------------
1 | SELECT "URL", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND CAST(CAST("EventDate" AS INT) AS DATE) >= '2013-07-01' AND CAST(CAST("EventDate" AS INT) AS DATE) <= '2013-07-31' AND "DontCountHits" = 0 AND "IsRefresh" = 0 AND "URL" <> '' GROUP BY "URL" ORDER BY PageViews DESC LIMIT 10;
2 |
--------------------------------------------------------------------------------
/benchmarks/trino/clickbench/queries/q38.sql:
--------------------------------------------------------------------------------
1 | SELECT "Title", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND CAST(CAST("EventDate" AS INT) AS DATE) >= '2013-07-01' AND CAST(CAST("EventDate" AS INT) AS DATE) <= '2013-07-31' AND "DontCountHits" = 0 AND "IsRefresh" = 0 AND "Title" <> '' GROUP BY "Title" ORDER BY PageViews DESC LIMIT 10;
2 |
--------------------------------------------------------------------------------
/benchmarks/trino/clickbench/queries/q39.sql:
--------------------------------------------------------------------------------
1 | SELECT "URL", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND CAST(CAST("EventDate" AS INT) AS DATE) >= '2013-07-01' AND CAST(CAST("EventDate" AS INT) AS DATE) <= '2013-07-31' AND "IsRefresh" = 0 AND "IsLink" <> 0 AND "IsDownload" = 0 GROUP BY "URL" ORDER BY PageViews DESC OFFSET 1000 LIMIT 10;
2 |
--------------------------------------------------------------------------------
/benchmarks/trino/clickbench/queries/q40.sql:
--------------------------------------------------------------------------------
1 | SELECT "TraficSourceID", "SearchEngineID", "AdvEngineID", CASE WHEN ("SearchEngineID" = 0 AND "AdvEngineID" = 0) THEN "Referer" ELSE '' END AS Src, "URL" AS Dst, COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND CAST(CAST("EventDate" AS INT) AS DATE) >= '2013-07-01' AND CAST(CAST("EventDate" AS INT) AS DATE) <= '2013-07-31' AND "IsRefresh" = 0 GROUP BY "TraficSourceID", "SearchEngineID", "AdvEngineID", Src, Dst ORDER BY PageViews DESC OFFSET 1000 LIMIT 10;
2 |
--------------------------------------------------------------------------------
/benchmarks/trino/clickbench/queries/q41.sql:
--------------------------------------------------------------------------------
1 | SELECT "URLHash", CAST(CAST("EventDate" AS INT) AS DATE), COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND CAST(CAST("EventDate" AS INT) AS DATE) >= '2013-07-01' AND CAST(CAST("EventDate" AS INT) AS DATE) <= '2013-07-31' AND "IsRefresh" = 0 AND "TraficSourceID" IN (-1, 6) AND "RefererHash" = 3594120000172545465 GROUP BY "URLHash", CAST(CAST("EventDate" AS INT) AS DATE) ORDER BY PageViews DESC OFFSET 100 LIMIT 10;
2 |
--------------------------------------------------------------------------------
/benchmarks/trino/clickbench/queries/q42.sql:
--------------------------------------------------------------------------------
1 | SELECT "WindowClientWidth", "WindowClientHeight", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND CAST(CAST("EventDate" AS INT) AS DATE) >= '2013-07-01' AND CAST(CAST("EventDate" AS INT) AS DATE) <= '2013-07-31' AND "IsRefresh" = 0 AND "DontCountHits" = 0 AND "URLHash" = 2868770270353813622 GROUP BY "WindowClientWidth", "WindowClientHeight" ORDER BY PageViews DESC OFFSET 1000 LIMIT 100;
2 |
--------------------------------------------------------------------------------
/benchmarks/trino/clickbench/queries/q43.sql:
--------------------------------------------------------------------------------
1 | SELECT DATE_TRUNC('minute', to_timestamp_seconds("EventTime")) AS M, COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND CAST(CAST("EventDate" AS INT) AS DATE) >= '2013-07-14' AND CAST(CAST("EventDate" AS INT) AS DATE) <= '2013-07-15' AND "IsRefresh" = 0 AND "DontCountHits" = 0 GROUP BY DATE_TRUNC('minute', to_timestamp_seconds("EventTime")) ORDER BY DATE_TRUNC('minute', M) OFFSET 1000 LIMIT 10;
2 |
--------------------------------------------------------------------------------
/benchmarks/trino/clickbench/sources/sources.sql:
--------------------------------------------------------------------------------
1 | CREATE TABLE hit_source with (format='PARQUET', LOCATION='hits.parquet');
--------------------------------------------------------------------------------
/benchmarks/trino/clickbench/workspace.sdf.yml:
--------------------------------------------------------------------------------
1 | workspace:
2 | edition: "1.3"
3 | name: "clickbench"
4 | defaults:
5 | dialect: trino
6 | description: >
7 | The ClickBench benchmark is designed to evaluate the performance of database systems using a dataset and queries derived from the real-world use cases of ClickHouse,
8 | a leading analytical database. This benchmark aims to measure how well different database systems handle large-scale analytical workloads.
9 |
10 | To run the benchmark:
11 | 1. Run the included hydrate.sh script which downloads relevant data
12 | 2. To execute all queries: `sdf run --no-cache`
13 | includes:
14 | - path: sources
15 | - path: queries
16 | - path: hits.parquet
17 | type: resource
18 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/.gitignore:
--------------------------------------------------------------------------------
1 | /imdb_data.zip
2 | /imdb_data
3 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/hydrate.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | set -xeuo pipefail
3 |
4 | # Downloads a ~1.2GB` IMDB dataset in zipped format.
5 |
6 | cd "${BASH_SOURCE%/*}"
7 | curl -LO https://cdn.sdf.com/data/imdb/imdb_data.zip
8 | unzip imdb_data.zip
9 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_01a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mc.note) AS production_note,
2 | MIN(t.title) AS movie_title,
3 | MIN(t.production_year) AS movie_year
4 | FROM company_type AS ct,
5 | info_type AS it,
6 | movie_companies AS mc,
7 | movie_info_idx AS mi_idx,
8 | title AS t
9 | WHERE ct.kind = 'production companies'
10 | AND it.info = 'top 250 rank'
11 | AND mc.note NOT LIKE '%(as Metro-Goldwyn-Mayer Pictures)%'
12 | AND (mc.note LIKE '%(co-production)%'
13 | OR mc.note LIKE '%(presents)%')
14 | AND ct.id = mc.company_type_id
15 | AND t.id = mc.movie_id
16 | AND t.id = mi_idx.movie_id
17 | AND mc.movie_id = mi_idx.movie_id
18 | AND it.id = mi_idx.info_type_id;
19 |
20 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_01b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mc.note) AS production_note,
2 | MIN(t.title) AS movie_title,
3 | MIN(t.production_year) AS movie_year
4 | FROM company_type AS ct,
5 | info_type AS it,
6 | movie_companies AS mc,
7 | movie_info_idx AS mi_idx,
8 | title AS t
9 | WHERE ct.kind = 'production companies'
10 | AND it.info = 'bottom 10 rank'
11 | AND mc.note NOT LIKE '%(as Metro-Goldwyn-Mayer Pictures)%'
12 | AND t.production_year BETWEEN 2005 AND 2010
13 | AND ct.id = mc.company_type_id
14 | AND t.id = mc.movie_id
15 | AND t.id = mi_idx.movie_id
16 | AND mc.movie_id = mi_idx.movie_id
17 | AND it.id = mi_idx.info_type_id;
18 |
19 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_01c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mc.note) AS production_note,
2 | MIN(t.title) AS movie_title,
3 | MIN(t.production_year) AS movie_year
4 | FROM company_type AS ct,
5 | info_type AS it,
6 | movie_companies AS mc,
7 | movie_info_idx AS mi_idx,
8 | title AS t
9 | WHERE ct.kind = 'production companies'
10 | AND it.info = 'top 250 rank'
11 | AND mc.note NOT LIKE '%(as Metro-Goldwyn-Mayer Pictures)%'
12 | AND (mc.note LIKE '%(co-production)%')
13 | AND t.production_year >2010
14 | AND ct.id = mc.company_type_id
15 | AND t.id = mc.movie_id
16 | AND t.id = mi_idx.movie_id
17 | AND mc.movie_id = mi_idx.movie_id
18 | AND it.id = mi_idx.info_type_id;
19 |
20 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_01d.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mc.note) AS production_note,
2 | MIN(t.title) AS movie_title,
3 | MIN(t.production_year) AS movie_year
4 | FROM company_type AS ct,
5 | info_type AS it,
6 | movie_companies AS mc,
7 | movie_info_idx AS mi_idx,
8 | title AS t
9 | WHERE ct.kind = 'production companies'
10 | AND it.info = 'bottom 10 rank'
11 | AND mc.note NOT LIKE '%(as Metro-Goldwyn-Mayer Pictures)%'
12 | AND t.production_year >2000
13 | AND ct.id = mc.company_type_id
14 | AND t.id = mc.movie_id
15 | AND t.id = mi_idx.movie_id
16 | AND mc.movie_id = mi_idx.movie_id
17 | AND it.id = mi_idx.info_type_id;
18 |
19 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_02a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(t.title) AS movie_title
2 | FROM company_name AS cn,
3 | keyword AS k,
4 | movie_companies AS mc,
5 | movie_keyword AS mk,
6 | title AS t
7 | WHERE cn.country_code ='[de]'
8 | AND k.keyword ='character-name-in-title'
9 | AND cn.id = mc.company_id
10 | AND mc.movie_id = t.id
11 | AND t.id = mk.movie_id
12 | AND mk.keyword_id = k.id
13 | AND mc.movie_id = mk.movie_id;
14 |
15 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_02b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(t.title) AS movie_title
2 | FROM company_name AS cn,
3 | keyword AS k,
4 | movie_companies AS mc,
5 | movie_keyword AS mk,
6 | title AS t
7 | WHERE cn.country_code ='[nl]'
8 | AND k.keyword ='character-name-in-title'
9 | AND cn.id = mc.company_id
10 | AND mc.movie_id = t.id
11 | AND t.id = mk.movie_id
12 | AND mk.keyword_id = k.id
13 | AND mc.movie_id = mk.movie_id;
14 |
15 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_02c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(t.title) AS movie_title
2 | FROM company_name AS cn,
3 | keyword AS k,
4 | movie_companies AS mc,
5 | movie_keyword AS mk,
6 | title AS t
7 | WHERE cn.country_code ='[sm]'
8 | AND k.keyword ='character-name-in-title'
9 | AND cn.id = mc.company_id
10 | AND mc.movie_id = t.id
11 | AND t.id = mk.movie_id
12 | AND mk.keyword_id = k.id
13 | AND mc.movie_id = mk.movie_id;
14 |
15 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_02d.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(t.title) AS movie_title
2 | FROM company_name AS cn,
3 | keyword AS k,
4 | movie_companies AS mc,
5 | movie_keyword AS mk,
6 | title AS t
7 | WHERE cn.country_code ='[us]'
8 | AND k.keyword ='character-name-in-title'
9 | AND cn.id = mc.company_id
10 | AND mc.movie_id = t.id
11 | AND t.id = mk.movie_id
12 | AND mk.keyword_id = k.id
13 | AND mc.movie_id = mk.movie_id;
14 |
15 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_03a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(t.title) AS movie_title
2 | FROM keyword AS k,
3 | movie_info AS mi,
4 | movie_keyword AS mk,
5 | title AS t
6 | WHERE k.keyword LIKE '%sequel%'
7 | AND mi.info IN ('Sweden',
8 | 'Norway',
9 | 'Germany',
10 | 'Denmark',
11 | 'Swedish',
12 | 'Denish',
13 | 'Norwegian',
14 | 'German')
15 | AND t.production_year > 2005
16 | AND t.id = mi.movie_id
17 | AND t.id = mk.movie_id
18 | AND mk.movie_id = mi.movie_id
19 | AND k.id = mk.keyword_id;
20 |
21 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_03b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(t.title) AS movie_title
2 | FROM keyword AS k,
3 | movie_info AS mi,
4 | movie_keyword AS mk,
5 | title AS t
6 | WHERE k.keyword LIKE '%sequel%'
7 | AND mi.info IN ('Bulgaria')
8 | AND t.production_year > 2010
9 | AND t.id = mi.movie_id
10 | AND t.id = mk.movie_id
11 | AND mk.movie_id = mi.movie_id
12 | AND k.id = mk.keyword_id;
13 |
14 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_03c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(t.title) AS movie_title
2 | FROM keyword AS k,
3 | movie_info AS mi,
4 | movie_keyword AS mk,
5 | title AS t
6 | WHERE k.keyword LIKE '%sequel%'
7 | AND mi.info IN ('Sweden',
8 | 'Norway',
9 | 'Germany',
10 | 'Denmark',
11 | 'Swedish',
12 | 'Denish',
13 | 'Norwegian',
14 | 'German',
15 | 'USA',
16 | 'American')
17 | AND t.production_year > 1990
18 | AND t.id = mi.movie_id
19 | AND t.id = mk.movie_id
20 | AND mk.movie_id = mi.movie_id
21 | AND k.id = mk.keyword_id;
22 |
23 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_04a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi_idx.info) AS rating,
2 | MIN(t.title) AS movie_title
3 | FROM info_type AS it,
4 | keyword AS k,
5 | movie_info_idx AS mi_idx,
6 | movie_keyword AS mk,
7 | title AS t
8 | WHERE it.info ='rating'
9 | AND k.keyword LIKE '%sequel%'
10 | AND mi_idx.info > '5.0'
11 | AND t.production_year > 2005
12 | AND t.id = mi_idx.movie_id
13 | AND t.id = mk.movie_id
14 | AND mk.movie_id = mi_idx.movie_id
15 | AND k.id = mk.keyword_id
16 | AND it.id = mi_idx.info_type_id;
17 |
18 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_04b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi_idx.info) AS rating,
2 | MIN(t.title) AS movie_title
3 | FROM info_type AS it,
4 | keyword AS k,
5 | movie_info_idx AS mi_idx,
6 | movie_keyword AS mk,
7 | title AS t
8 | WHERE it.info ='rating'
9 | AND k.keyword LIKE '%sequel%'
10 | AND mi_idx.info > '9.0'
11 | AND t.production_year > 2010
12 | AND t.id = mi_idx.movie_id
13 | AND t.id = mk.movie_id
14 | AND mk.movie_id = mi_idx.movie_id
15 | AND k.id = mk.keyword_id
16 | AND it.id = mi_idx.info_type_id;
17 |
18 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_04c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi_idx.info) AS rating,
2 | MIN(t.title) AS movie_title
3 | FROM info_type AS it,
4 | keyword AS k,
5 | movie_info_idx AS mi_idx,
6 | movie_keyword AS mk,
7 | title AS t
8 | WHERE it.info ='rating'
9 | AND k.keyword LIKE '%sequel%'
10 | AND mi_idx.info > '2.0'
11 | AND t.production_year > 1990
12 | AND t.id = mi_idx.movie_id
13 | AND t.id = mk.movie_id
14 | AND mk.movie_id = mi_idx.movie_id
15 | AND k.id = mk.keyword_id
16 | AND it.id = mi_idx.info_type_id;
17 |
18 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_05a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(t.title) AS typical_european_movie
2 | FROM company_type AS ct,
3 | info_type AS it,
4 | movie_companies AS mc,
5 | movie_info AS mi,
6 | title AS t
7 | WHERE ct.kind = 'production companies'
8 | AND mc.note LIKE '%(theatrical)%'
9 | AND mc.note LIKE '%(France)%'
10 | AND mi.info IN ('Sweden',
11 | 'Norway',
12 | 'Germany',
13 | 'Denmark',
14 | 'Swedish',
15 | 'Denish',
16 | 'Norwegian',
17 | 'German')
18 | AND t.production_year > 2005
19 | AND t.id = mi.movie_id
20 | AND t.id = mc.movie_id
21 | AND mc.movie_id = mi.movie_id
22 | AND ct.id = mc.company_type_id
23 | AND it.id = mi.info_type_id;
24 |
25 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_05b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(t.title) AS american_vhs_movie
2 | FROM company_type AS ct,
3 | info_type AS it,
4 | movie_companies AS mc,
5 | movie_info AS mi,
6 | title AS t
7 | WHERE ct.kind = 'production companies'
8 | AND mc.note LIKE '%(VHS)%'
9 | AND mc.note LIKE '%(USA)%'
10 | AND mc.note LIKE '%(1994)%'
11 | AND mi.info IN ('USA',
12 | 'America')
13 | AND t.production_year > 2010
14 | AND t.id = mi.movie_id
15 | AND t.id = mc.movie_id
16 | AND mc.movie_id = mi.movie_id
17 | AND ct.id = mc.company_type_id
18 | AND it.id = mi.info_type_id;
19 |
20 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_05c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(t.title) AS american_movie
2 | FROM company_type AS ct,
3 | info_type AS it,
4 | movie_companies AS mc,
5 | movie_info AS mi,
6 | title AS t
7 | WHERE ct.kind = 'production companies'
8 | AND mc.note NOT LIKE '%(TV)%'
9 | AND mc.note LIKE '%(USA)%'
10 | AND mi.info IN ('Sweden',
11 | 'Norway',
12 | 'Germany',
13 | 'Denmark',
14 | 'Swedish',
15 | 'Denish',
16 | 'Norwegian',
17 | 'German',
18 | 'USA',
19 | 'American')
20 | AND t.production_year > 1990
21 | AND t.id = mi.movie_id
22 | AND t.id = mc.movie_id
23 | AND mc.movie_id = mi.movie_id
24 | AND ct.id = mc.company_type_id
25 | AND it.id = mi.info_type_id;
26 |
27 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_06a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(k.keyword) AS movie_keyword,
2 | MIN(n.name) AS actor_name,
3 | MIN(t.title) AS marvel_movie
4 | FROM cast_info AS ci,
5 | keyword AS k,
6 | movie_keyword AS mk,
7 | name AS n,
8 | title AS t
9 | WHERE k.keyword = 'marvel-cinematic-universe'
10 | AND n.name LIKE '%Downey%Robert%'
11 | AND t.production_year > 2010
12 | AND k.id = mk.keyword_id
13 | AND t.id = mk.movie_id
14 | AND t.id = ci.movie_id
15 | AND ci.movie_id = mk.movie_id
16 | AND n.id = ci.person_id;
17 |
18 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_06b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(k.keyword) AS movie_keyword,
2 | MIN(n.name) AS actor_name,
3 | MIN(t.title) AS hero_movie
4 | FROM cast_info AS ci,
5 | keyword AS k,
6 | movie_keyword AS mk,
7 | name AS n,
8 | title AS t
9 | WHERE k.keyword IN ('superhero',
10 | 'sequel',
11 | 'second-part',
12 | 'marvel-comics',
13 | 'based-on-comic',
14 | 'tv-special',
15 | 'fight',
16 | 'violence')
17 | AND n.name LIKE '%Downey%Robert%'
18 | AND t.production_year > 2014
19 | AND k.id = mk.keyword_id
20 | AND t.id = mk.movie_id
21 | AND t.id = ci.movie_id
22 | AND ci.movie_id = mk.movie_id
23 | AND n.id = ci.person_id;
24 |
25 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_06c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(k.keyword) AS movie_keyword,
2 | MIN(n.name) AS actor_name,
3 | MIN(t.title) AS marvel_movie
4 | FROM cast_info AS ci,
5 | keyword AS k,
6 | movie_keyword AS mk,
7 | name AS n,
8 | title AS t
9 | WHERE k.keyword = 'marvel-cinematic-universe'
10 | AND n.name LIKE '%Downey%Robert%'
11 | AND t.production_year > 2014
12 | AND k.id = mk.keyword_id
13 | AND t.id = mk.movie_id
14 | AND t.id = ci.movie_id
15 | AND ci.movie_id = mk.movie_id
16 | AND n.id = ci.person_id;
17 |
18 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_06d.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(k.keyword) AS movie_keyword,
2 | MIN(n.name) AS actor_name,
3 | MIN(t.title) AS hero_movie
4 | FROM cast_info AS ci,
5 | keyword AS k,
6 | movie_keyword AS mk,
7 | name AS n,
8 | title AS t
9 | WHERE k.keyword IN ('superhero',
10 | 'sequel',
11 | 'second-part',
12 | 'marvel-comics',
13 | 'based-on-comic',
14 | 'tv-special',
15 | 'fight',
16 | 'violence')
17 | AND n.name LIKE '%Downey%Robert%'
18 | AND t.production_year > 2000
19 | AND k.id = mk.keyword_id
20 | AND t.id = mk.movie_id
21 | AND t.id = ci.movie_id
22 | AND ci.movie_id = mk.movie_id
23 | AND n.id = ci.person_id;
24 |
25 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_06e.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(k.keyword) AS movie_keyword,
2 | MIN(n.name) AS actor_name,
3 | MIN(t.title) AS marvel_movie
4 | FROM cast_info AS ci,
5 | keyword AS k,
6 | movie_keyword AS mk,
7 | name AS n,
8 | title AS t
9 | WHERE k.keyword = 'marvel-cinematic-universe'
10 | AND n.name LIKE '%Downey%Robert%'
11 | AND t.production_year > 2000
12 | AND k.id = mk.keyword_id
13 | AND t.id = mk.movie_id
14 | AND t.id = ci.movie_id
15 | AND ci.movie_id = mk.movie_id
16 | AND n.id = ci.person_id;
17 |
18 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_06f.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(k.keyword) AS movie_keyword,
2 | MIN(n.name) AS actor_name,
3 | MIN(t.title) AS hero_movie
4 | FROM cast_info AS ci,
5 | keyword AS k,
6 | movie_keyword AS mk,
7 | name AS n,
8 | title AS t
9 | WHERE k.keyword IN ('superhero',
10 | 'sequel',
11 | 'second-part',
12 | 'marvel-comics',
13 | 'based-on-comic',
14 | 'tv-special',
15 | 'fight',
16 | 'violence')
17 | AND t.production_year > 2000
18 | AND k.id = mk.keyword_id
19 | AND t.id = mk.movie_id
20 | AND t.id = ci.movie_id
21 | AND ci.movie_id = mk.movie_id
22 | AND n.id = ci.person_id;
23 |
24 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_07a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(n.name) AS of_person,
2 | MIN(t.title) AS biography_movie
3 | FROM aka_name AS an,
4 | cast_info AS ci,
5 | info_type AS it,
6 | link_type AS lt,
7 | movie_link AS ml,
8 | name AS n,
9 | person_info AS pi,
10 | title AS t
11 | WHERE an.name LIKE '%a%'
12 | AND it.info ='mini biography'
13 | AND lt.link ='features'
14 | AND n.name_pcode_cf BETWEEN 'A' AND 'F'
15 | AND (n.gender='m'
16 | OR (n.gender = 'f'
17 | AND n.name LIKE 'B%'))
18 | AND pi.note ='Volker Boehm'
19 | AND t.production_year BETWEEN 1980 AND 1995
20 | AND n.id = an.person_id
21 | AND n.id = pi.person_id
22 | AND ci.person_id = n.id
23 | AND t.id = ci.movie_id
24 | AND ml.linked_movie_id = t.id
25 | AND lt.id = ml.link_type_id
26 | AND it.id = pi.info_type_id
27 | AND pi.person_id = an.person_id
28 | AND pi.person_id = ci.person_id
29 | AND an.person_id = ci.person_id
30 | AND ci.movie_id = ml.linked_movie_id;
31 |
32 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_07b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(n.name) AS of_person,
2 | MIN(t.title) AS biography_movie
3 | FROM aka_name AS an,
4 | cast_info AS ci,
5 | info_type AS it,
6 | link_type AS lt,
7 | movie_link AS ml,
8 | name AS n,
9 | person_info AS pi,
10 | title AS t
11 | WHERE an.name LIKE '%a%'
12 | AND it.info ='mini biography'
13 | AND lt.link ='features'
14 | AND n.name_pcode_cf LIKE 'D%'
15 | AND n.gender='m'
16 | AND pi.note ='Volker Boehm'
17 | AND t.production_year BETWEEN 1980 AND 1984
18 | AND n.id = an.person_id
19 | AND n.id = pi.person_id
20 | AND ci.person_id = n.id
21 | AND t.id = ci.movie_id
22 | AND ml.linked_movie_id = t.id
23 | AND lt.id = ml.link_type_id
24 | AND it.id = pi.info_type_id
25 | AND pi.person_id = an.person_id
26 | AND pi.person_id = ci.person_id
27 | AND an.person_id = ci.person_id
28 | AND ci.movie_id = ml.linked_movie_id;
29 |
30 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_07c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(n.name) AS cast_member_name,
2 | MIN(pi.info) AS cast_member_info
3 | FROM aka_name AS an,
4 | cast_info AS ci,
5 | info_type AS it,
6 | link_type AS lt,
7 | movie_link AS ml,
8 | name AS n,
9 | person_info AS pi,
10 | title AS t
11 | WHERE an.name IS NOT NULL
12 | AND (an.name LIKE '%a%'
13 | OR an.name LIKE 'A%')
14 | AND it.info ='mini biography'
15 | AND lt.link IN ('references',
16 | 'referenced in',
17 | 'features',
18 | 'featured in')
19 | AND n.name_pcode_cf BETWEEN 'A' AND 'F'
20 | AND (n.gender='m'
21 | OR (n.gender = 'f'
22 | AND n.name LIKE 'A%'))
23 | AND pi.note IS NOT NULL
24 | AND t.production_year BETWEEN 1980 AND 2010
25 | AND n.id = an.person_id
26 | AND n.id = pi.person_id
27 | AND ci.person_id = n.id
28 | AND t.id = ci.movie_id
29 | AND ml.linked_movie_id = t.id
30 | AND lt.id = ml.link_type_id
31 | AND it.id = pi.info_type_id
32 | AND pi.person_id = an.person_id
33 | AND pi.person_id = ci.person_id
34 | AND an.person_id = ci.person_id
35 | AND ci.movie_id = ml.linked_movie_id;
36 |
37 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_08a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(an1.name) AS actress_pseudonym,
2 | MIN(t.title) AS japanese_movie_dubbed
3 | FROM aka_name AS an1,
4 | cast_info AS ci,
5 | company_name AS cn,
6 | movie_companies AS mc,
7 | name AS n1,
8 | role_type AS rt,
9 | title AS t
10 | WHERE ci.note ='(voice: English version)'
11 | AND cn.country_code ='[jp]'
12 | AND mc.note LIKE '%(Japan)%'
13 | AND mc.note NOT LIKE '%(USA)%'
14 | AND n1.name LIKE '%Yo%'
15 | AND n1.name NOT LIKE '%Yu%'
16 | AND rt.role ='actress'
17 | AND an1.person_id = n1.id
18 | AND n1.id = ci.person_id
19 | AND ci.movie_id = t.id
20 | AND t.id = mc.movie_id
21 | AND mc.company_id = cn.id
22 | AND ci.role_id = rt.id
23 | AND an1.person_id = ci.person_id
24 | AND ci.movie_id = mc.movie_id;
25 |
26 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_08b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(an.name) AS acress_pseudonym,
2 | MIN(t.title) AS japanese_anime_movie
3 | FROM aka_name AS an,
4 | cast_info AS ci,
5 | company_name AS cn,
6 | movie_companies AS mc,
7 | name AS n,
8 | role_type AS rt,
9 | title AS t
10 | WHERE ci.note ='(voice: English version)'
11 | AND cn.country_code ='[jp]'
12 | AND mc.note LIKE '%(Japan)%'
13 | AND mc.note NOT LIKE '%(USA)%'
14 | AND (mc.note LIKE '%(2006)%'
15 | OR mc.note LIKE '%(2007)%')
16 | AND n.name LIKE '%Yo%'
17 | AND n.name NOT LIKE '%Yu%'
18 | AND rt.role ='actress'
19 | AND t.production_year BETWEEN 2006 AND 2007
20 | AND (t.title LIKE 'One Piece%'
21 | OR t.title LIKE 'Dragon Ball Z%')
22 | AND an.person_id = n.id
23 | AND n.id = ci.person_id
24 | AND ci.movie_id = t.id
25 | AND t.id = mc.movie_id
26 | AND mc.company_id = cn.id
27 | AND ci.role_id = rt.id
28 | AND an.person_id = ci.person_id
29 | AND ci.movie_id = mc.movie_id;
30 |
31 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_08c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(a1.name) AS writer_pseudo_name,
2 | MIN(t.title) AS movie_title
3 | FROM aka_name AS a1,
4 | cast_info AS ci,
5 | company_name AS cn,
6 | movie_companies AS mc,
7 | name AS n1,
8 | role_type AS rt,
9 | title AS t
10 | WHERE cn.country_code ='[us]'
11 | AND rt.role ='writer'
12 | AND a1.person_id = n1.id
13 | AND n1.id = ci.person_id
14 | AND ci.movie_id = t.id
15 | AND t.id = mc.movie_id
16 | AND mc.company_id = cn.id
17 | AND ci.role_id = rt.id
18 | AND a1.person_id = ci.person_id
19 | AND ci.movie_id = mc.movie_id;
20 |
21 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_08d.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(an1.name) AS costume_designer_pseudo,
2 | MIN(t.title) AS movie_with_costumes
3 | FROM aka_name AS an1,
4 | cast_info AS ci,
5 | company_name AS cn,
6 | movie_companies AS mc,
7 | name AS n1,
8 | role_type AS rt,
9 | title AS t
10 | WHERE cn.country_code ='[us]'
11 | AND rt.role ='costume designer'
12 | AND an1.person_id = n1.id
13 | AND n1.id = ci.person_id
14 | AND ci.movie_id = t.id
15 | AND t.id = mc.movie_id
16 | AND mc.company_id = cn.id
17 | AND ci.role_id = rt.id
18 | AND an1.person_id = ci.person_id
19 | AND ci.movie_id = mc.movie_id;
20 |
21 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_09a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(an.name) AS alternative_name,
2 | MIN(chn.name) AS character_name,
3 | MIN(t.title) AS movie
4 | FROM aka_name AS an,
5 | char_name AS chn,
6 | cast_info AS ci,
7 | company_name AS cn,
8 | movie_companies AS mc,
9 | name AS n,
10 | role_type AS rt,
11 | title AS t
12 | WHERE ci.note IN ('(voice)',
13 | '(voice: Japanese version)',
14 | '(voice) (uncredited)',
15 | '(voice: English version)')
16 | AND cn.country_code ='[us]'
17 | AND mc.note IS NOT NULL
18 | AND (mc.note LIKE '%(USA)%'
19 | OR mc.note LIKE '%(worldwide)%')
20 | AND n.gender ='f'
21 | AND n.name LIKE '%Ang%'
22 | AND rt.role ='actress'
23 | AND t.production_year BETWEEN 2005 AND 2015
24 | AND ci.movie_id = t.id
25 | AND t.id = mc.movie_id
26 | AND ci.movie_id = mc.movie_id
27 | AND mc.company_id = cn.id
28 | AND ci.role_id = rt.id
29 | AND n.id = ci.person_id
30 | AND chn.id = ci.person_role_id
31 | AND an.person_id = n.id
32 | AND an.person_id = ci.person_id;
33 |
34 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_09b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(an.name) AS alternative_name,
2 | MIN(chn.name) AS voiced_character,
3 | MIN(n.name) AS voicing_actress,
4 | MIN(t.title) AS american_movie
5 | FROM aka_name AS an,
6 | char_name AS chn,
7 | cast_info AS ci,
8 | company_name AS cn,
9 | movie_companies AS mc,
10 | name AS n,
11 | role_type AS rt,
12 | title AS t
13 | WHERE ci.note = '(voice)'
14 | AND cn.country_code ='[us]'
15 | AND mc.note LIKE '%(200%)%'
16 | AND (mc.note LIKE '%(USA)%'
17 | OR mc.note LIKE '%(worldwide)%')
18 | AND n.gender ='f'
19 | AND n.name LIKE '%Angel%'
20 | AND rt.role ='actress'
21 | AND t.production_year BETWEEN 2007 AND 2010
22 | AND ci.movie_id = t.id
23 | AND t.id = mc.movie_id
24 | AND ci.movie_id = mc.movie_id
25 | AND mc.company_id = cn.id
26 | AND ci.role_id = rt.id
27 | AND n.id = ci.person_id
28 | AND chn.id = ci.person_role_id
29 | AND an.person_id = n.id
30 | AND an.person_id = ci.person_id;
31 |
32 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_09c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(an.name) AS alternative_name,
2 | MIN(chn.name) AS voiced_character_name,
3 | MIN(n.name) AS voicing_actress,
4 | MIN(t.title) AS american_movie
5 | FROM aka_name AS an,
6 | char_name AS chn,
7 | cast_info AS ci,
8 | company_name AS cn,
9 | movie_companies AS mc,
10 | name AS n,
11 | role_type AS rt,
12 | title AS t
13 | WHERE ci.note IN ('(voice)',
14 | '(voice: Japanese version)',
15 | '(voice) (uncredited)',
16 | '(voice: English version)')
17 | AND cn.country_code ='[us]'
18 | AND n.gender ='f'
19 | AND n.name LIKE '%An%'
20 | AND rt.role ='actress'
21 | AND ci.movie_id = t.id
22 | AND t.id = mc.movie_id
23 | AND ci.movie_id = mc.movie_id
24 | AND mc.company_id = cn.id
25 | AND ci.role_id = rt.id
26 | AND n.id = ci.person_id
27 | AND chn.id = ci.person_role_id
28 | AND an.person_id = n.id
29 | AND an.person_id = ci.person_id;
30 |
31 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_09d.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(an.name) AS alternative_name,
2 | MIN(chn.name) AS voiced_char_name,
3 | MIN(n.name) AS voicing_actress,
4 | MIN(t.title) AS american_movie
5 | FROM aka_name AS an,
6 | char_name AS chn,
7 | cast_info AS ci,
8 | company_name AS cn,
9 | movie_companies AS mc,
10 | name AS n,
11 | role_type AS rt,
12 | title AS t
13 | WHERE ci.note IN ('(voice)',
14 | '(voice: Japanese version)',
15 | '(voice) (uncredited)',
16 | '(voice: English version)')
17 | AND cn.country_code ='[us]'
18 | AND n.gender ='f'
19 | AND rt.role ='actress'
20 | AND ci.movie_id = t.id
21 | AND t.id = mc.movie_id
22 | AND ci.movie_id = mc.movie_id
23 | AND mc.company_id = cn.id
24 | AND ci.role_id = rt.id
25 | AND n.id = ci.person_id
26 | AND chn.id = ci.person_role_id
27 | AND an.person_id = n.id
28 | AND an.person_id = ci.person_id;
29 |
30 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_10a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(chn.name) AS uncredited_voiced_character,
2 | MIN(t.title) AS russian_movie
3 | FROM char_name AS chn,
4 | cast_info AS ci,
5 | company_name AS cn,
6 | company_type AS ct,
7 | movie_companies AS mc,
8 | role_type AS rt,
9 | title AS t
10 | WHERE ci.note LIKE '%(voice)%'
11 | AND ci.note LIKE '%(uncredited)%'
12 | AND cn.country_code = '[ru]'
13 | AND rt.role = 'actor'
14 | AND t.production_year > 2005
15 | AND t.id = mc.movie_id
16 | AND t.id = ci.movie_id
17 | AND ci.movie_id = mc.movie_id
18 | AND chn.id = ci.person_role_id
19 | AND rt.id = ci.role_id
20 | AND cn.id = mc.company_id
21 | AND ct.id = mc.company_type_id;
22 |
23 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_10b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(chn.name) AS character,
2 | MIN(t.title) AS russian_mov_with_actor_producer
3 | FROM char_name AS chn,
4 | cast_info AS ci,
5 | company_name AS cn,
6 | company_type AS ct,
7 | movie_companies AS mc,
8 | role_type AS rt,
9 | title AS t
10 | WHERE ci.note LIKE '%(producer)%'
11 | AND cn.country_code = '[ru]'
12 | AND rt.role = 'actor'
13 | AND t.production_year > 2010
14 | AND t.id = mc.movie_id
15 | AND t.id = ci.movie_id
16 | AND ci.movie_id = mc.movie_id
17 | AND chn.id = ci.person_role_id
18 | AND rt.id = ci.role_id
19 | AND cn.id = mc.company_id
20 | AND ct.id = mc.company_type_id;
21 |
22 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_10c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(chn.name) AS character,
2 | MIN(t.title) AS movie_with_american_producer
3 | FROM char_name AS chn,
4 | cast_info AS ci,
5 | company_name AS cn,
6 | company_type AS ct,
7 | movie_companies AS mc,
8 | role_type AS rt,
9 | title AS t
10 | WHERE ci.note LIKE '%(producer)%'
11 | AND cn.country_code = '[us]'
12 | AND t.production_year > 1990
13 | AND t.id = mc.movie_id
14 | AND t.id = ci.movie_id
15 | AND ci.movie_id = mc.movie_id
16 | AND chn.id = ci.person_role_id
17 | AND rt.id = ci.role_id
18 | AND cn.id = mc.company_id
19 | AND ct.id = mc.company_type_id;
20 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_11a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn.name) AS from_company,
2 | MIN(lt.link) AS movie_link_type,
3 | MIN(t.title) AS non_polish_sequel_movie
4 | FROM company_name AS cn,
5 | company_type AS ct,
6 | keyword AS k,
7 | link_type AS lt,
8 | movie_companies AS mc,
9 | movie_keyword AS mk,
10 | movie_link AS ml,
11 | title AS t
12 | WHERE cn.country_code !='[pl]'
13 | AND (cn.name LIKE '%Film%'
14 | OR cn.name LIKE '%Warner%')
15 | AND ct.kind ='production companies'
16 | AND k.keyword ='sequel'
17 | AND lt.link LIKE '%follow%'
18 | AND mc.note IS NULL
19 | AND t.production_year BETWEEN 1950 AND 2000
20 | AND lt.id = ml.link_type_id
21 | AND ml.movie_id = t.id
22 | AND t.id = mk.movie_id
23 | AND mk.keyword_id = k.id
24 | AND t.id = mc.movie_id
25 | AND mc.company_type_id = ct.id
26 | AND mc.company_id = cn.id
27 | AND ml.movie_id = mk.movie_id
28 | AND ml.movie_id = mc.movie_id
29 | AND mk.movie_id = mc.movie_id;
30 |
31 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_11b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn.name) AS from_company,
2 | MIN(lt.link) AS movie_link_type,
3 | MIN(t.title) AS sequel_movie
4 | FROM company_name AS cn,
5 | company_type AS ct,
6 | keyword AS k,
7 | link_type AS lt,
8 | movie_companies AS mc,
9 | movie_keyword AS mk,
10 | movie_link AS ml,
11 | title AS t
12 | WHERE cn.country_code !='[pl]'
13 | AND (cn.name LIKE '%Film%'
14 | OR cn.name LIKE '%Warner%')
15 | AND ct.kind ='production companies'
16 | AND k.keyword ='sequel'
17 | AND lt.link LIKE '%follows%'
18 | AND mc.note IS NULL
19 | AND t.production_year = 1998
20 | AND t.title LIKE '%Money%'
21 | AND lt.id = ml.link_type_id
22 | AND ml.movie_id = t.id
23 | AND t.id = mk.movie_id
24 | AND mk.keyword_id = k.id
25 | AND t.id = mc.movie_id
26 | AND mc.company_type_id = ct.id
27 | AND mc.company_id = cn.id
28 | AND ml.movie_id = mk.movie_id
29 | AND ml.movie_id = mc.movie_id
30 | AND mk.movie_id = mc.movie_id;
31 |
32 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_11c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn.name) AS from_company,
2 | MIN(mc.note) AS production_note,
3 | MIN(t.title) AS movie_based_on_book
4 | FROM company_name AS cn,
5 | company_type AS ct,
6 | keyword AS k,
7 | link_type AS lt,
8 | movie_companies AS mc,
9 | movie_keyword AS mk,
10 | movie_link AS ml,
11 | title AS t
12 | WHERE cn.country_code !='[pl]'
13 | AND (cn.name LIKE '20th Century Fox%'
14 | OR cn.name LIKE 'Twentieth Century Fox%')
15 | AND ct.kind != 'production companies'
16 | AND ct.kind IS NOT NULL
17 | AND k.keyword IN ('sequel',
18 | 'revenge',
19 | 'based-on-novel')
20 | AND mc.note IS NOT NULL
21 | AND t.production_year > 1950
22 | AND lt.id = ml.link_type_id
23 | AND ml.movie_id = t.id
24 | AND t.id = mk.movie_id
25 | AND mk.keyword_id = k.id
26 | AND t.id = mc.movie_id
27 | AND mc.company_type_id = ct.id
28 | AND mc.company_id = cn.id
29 | AND ml.movie_id = mk.movie_id
30 | AND ml.movie_id = mc.movie_id
31 | AND mk.movie_id = mc.movie_id;
32 |
33 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_11d.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn.name) AS from_company,
2 | MIN(mc.note) AS production_note,
3 | MIN(t.title) AS movie_based_on_book
4 | FROM company_name AS cn,
5 | company_type AS ct,
6 | keyword AS k,
7 | link_type AS lt,
8 | movie_companies AS mc,
9 | movie_keyword AS mk,
10 | movie_link AS ml,
11 | title AS t
12 | WHERE cn.country_code !='[pl]'
13 | AND ct.kind != 'production companies'
14 | AND ct.kind IS NOT NULL
15 | AND k.keyword IN ('sequel',
16 | 'revenge',
17 | 'based-on-novel')
18 | AND mc.note IS NOT NULL
19 | AND t.production_year > 1950
20 | AND lt.id = ml.link_type_id
21 | AND ml.movie_id = t.id
22 | AND t.id = mk.movie_id
23 | AND mk.keyword_id = k.id
24 | AND t.id = mc.movie_id
25 | AND mc.company_type_id = ct.id
26 | AND mc.company_id = cn.id
27 | AND ml.movie_id = mk.movie_id
28 | AND ml.movie_id = mc.movie_id
29 | AND mk.movie_id = mc.movie_id;
30 |
31 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_12a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn.name) AS movie_company,
2 | MIN(mi_idx.info) AS rating,
3 | MIN(t.title) AS drama_horror_movie
4 | FROM company_name AS cn,
5 | company_type AS ct,
6 | info_type AS it1,
7 | info_type AS it2,
8 | movie_companies AS mc,
9 | movie_info AS mi,
10 | movie_info_idx AS mi_idx,
11 | title AS t
12 | WHERE cn.country_code = '[us]'
13 | AND ct.kind = 'production companies'
14 | AND it1.info = 'genres'
15 | AND it2.info = 'rating'
16 | AND mi.info IN ('Drama',
17 | 'Horror')
18 | AND mi_idx.info > '8.0'
19 | AND t.production_year BETWEEN 2005 AND 2008
20 | AND t.id = mi.movie_id
21 | AND t.id = mi_idx.movie_id
22 | AND mi.info_type_id = it1.id
23 | AND mi_idx.info_type_id = it2.id
24 | AND t.id = mc.movie_id
25 | AND ct.id = mc.company_type_id
26 | AND cn.id = mc.company_id
27 | AND mc.movie_id = mi.movie_id
28 | AND mc.movie_id = mi_idx.movie_id
29 | AND mi.movie_id = mi_idx.movie_id;
30 |
31 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_12b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi.info) AS budget,
2 | MIN(t.title) AS unsuccsessful_movie
3 | FROM company_name AS cn,
4 | company_type AS ct,
5 | info_type AS it1,
6 | info_type AS it2,
7 | movie_companies AS mc,
8 | movie_info AS mi,
9 | movie_info_idx AS mi_idx,
10 | title AS t
11 | WHERE cn.country_code ='[us]'
12 | AND ct.kind IS NOT NULL
13 | AND (ct.kind ='production companies'
14 | OR ct.kind = 'distributors')
15 | AND it1.info ='budget'
16 | AND it2.info ='bottom 10 rank'
17 | AND t.production_year >2000
18 | AND (t.title LIKE 'Birdemic%'
19 | OR t.title LIKE '%Movie%')
20 | AND t.id = mi.movie_id
21 | AND t.id = mi_idx.movie_id
22 | AND mi.info_type_id = it1.id
23 | AND mi_idx.info_type_id = it2.id
24 | AND t.id = mc.movie_id
25 | AND ct.id = mc.company_type_id
26 | AND cn.id = mc.company_id
27 | AND mc.movie_id = mi.movie_id
28 | AND mc.movie_id = mi_idx.movie_id
29 | AND mi.movie_id = mi_idx.movie_id;
30 |
31 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_12c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn.name) AS movie_company,
2 | MIN(mi_idx.info) AS rating,
3 | MIN(t.title) AS mainstream_movie
4 | FROM company_name AS cn,
5 | company_type AS ct,
6 | info_type AS it1,
7 | info_type AS it2,
8 | movie_companies AS mc,
9 | movie_info AS mi,
10 | movie_info_idx AS mi_idx,
11 | title AS t
12 | WHERE cn.country_code = '[us]'
13 | AND ct.kind = 'production companies'
14 | AND it1.info = 'genres'
15 | AND it2.info = 'rating'
16 | AND mi.info IN ('Drama',
17 | 'Horror',
18 | 'Western',
19 | 'Family')
20 | AND mi_idx.info > '7.0'
21 | AND t.production_year BETWEEN 2000 AND 2010
22 | AND t.id = mi.movie_id
23 | AND t.id = mi_idx.movie_id
24 | AND mi.info_type_id = it1.id
25 | AND mi_idx.info_type_id = it2.id
26 | AND t.id = mc.movie_id
27 | AND ct.id = mc.company_type_id
28 | AND cn.id = mc.company_id
29 | AND mc.movie_id = mi.movie_id
30 | AND mc.movie_id = mi_idx.movie_id
31 | AND mi.movie_id = mi_idx.movie_id;
32 |
33 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_13a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi.info) AS release_date,
2 | MIN(miidx.info) AS rating,
3 | MIN(t.title) AS german_movie
4 | FROM company_name AS cn,
5 | company_type AS ct,
6 | info_type AS it,
7 | info_type AS it2,
8 | kind_type AS kt,
9 | movie_companies AS mc,
10 | movie_info AS mi,
11 | movie_info_idx AS miidx,
12 | title AS t
13 | WHERE cn.country_code ='[de]'
14 | AND ct.kind ='production companies'
15 | AND it.info ='rating'
16 | AND it2.info ='release dates'
17 | AND kt.kind ='movie'
18 | AND mi.movie_id = t.id
19 | AND it2.id = mi.info_type_id
20 | AND kt.id = t.kind_id
21 | AND mc.movie_id = t.id
22 | AND cn.id = mc.company_id
23 | AND ct.id = mc.company_type_id
24 | AND miidx.movie_id = t.id
25 | AND it.id = miidx.info_type_id
26 | AND mi.movie_id = miidx.movie_id
27 | AND mi.movie_id = mc.movie_id
28 | AND miidx.movie_id = mc.movie_id;
29 |
30 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_13b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn.name) AS producing_company,
2 | MIN(miidx.info) AS rating,
3 | MIN(t.title) AS movie_about_winning
4 | FROM company_name AS cn,
5 | company_type AS ct,
6 | info_type AS it,
7 | info_type AS it2,
8 | kind_type AS kt,
9 | movie_companies AS mc,
10 | movie_info AS mi,
11 | movie_info_idx AS miidx,
12 | title AS t
13 | WHERE cn.country_code ='[us]'
14 | AND ct.kind ='production companies'
15 | AND it.info ='rating'
16 | AND it2.info ='release dates'
17 | AND kt.kind ='movie'
18 | AND t.title != ''
19 | AND (t.title LIKE '%Champion%'
20 | OR t.title LIKE '%Loser%')
21 | AND mi.movie_id = t.id
22 | AND it2.id = mi.info_type_id
23 | AND kt.id = t.kind_id
24 | AND mc.movie_id = t.id
25 | AND cn.id = mc.company_id
26 | AND ct.id = mc.company_type_id
27 | AND miidx.movie_id = t.id
28 | AND it.id = miidx.info_type_id
29 | AND mi.movie_id = miidx.movie_id
30 | AND mi.movie_id = mc.movie_id
31 | AND miidx.movie_id = mc.movie_id;
32 |
33 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_13c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn.name) AS producing_company,
2 | MIN(miidx.info) AS rating,
3 | MIN(t.title) AS movie_about_winning
4 | FROM company_name AS cn,
5 | company_type AS ct,
6 | info_type AS it,
7 | info_type AS it2,
8 | kind_type AS kt,
9 | movie_companies AS mc,
10 | movie_info AS mi,
11 | movie_info_idx AS miidx,
12 | title AS t
13 | WHERE cn.country_code ='[us]'
14 | AND ct.kind ='production companies'
15 | AND it.info ='rating'
16 | AND it2.info ='release dates'
17 | AND kt.kind ='movie'
18 | AND t.title != ''
19 | AND (t.title LIKE 'Champion%'
20 | OR t.title LIKE 'Loser%')
21 | AND mi.movie_id = t.id
22 | AND it2.id = mi.info_type_id
23 | AND kt.id = t.kind_id
24 | AND mc.movie_id = t.id
25 | AND cn.id = mc.company_id
26 | AND ct.id = mc.company_type_id
27 | AND miidx.movie_id = t.id
28 | AND it.id = miidx.info_type_id
29 | AND mi.movie_id = miidx.movie_id
30 | AND mi.movie_id = mc.movie_id
31 | AND miidx.movie_id = mc.movie_id;
32 |
33 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_13d.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn.name) AS producing_company,
2 | MIN(miidx.info) AS rating,
3 | MIN(t.title) AS movie
4 | FROM company_name AS cn,
5 | company_type AS ct,
6 | info_type AS it,
7 | info_type AS it2,
8 | kind_type AS kt,
9 | movie_companies AS mc,
10 | movie_info AS mi,
11 | movie_info_idx AS miidx,
12 | title AS t
13 | WHERE cn.country_code ='[us]'
14 | AND ct.kind ='production companies'
15 | AND it.info ='rating'
16 | AND it2.info ='release dates'
17 | AND kt.kind ='movie'
18 | AND mi.movie_id = t.id
19 | AND it2.id = mi.info_type_id
20 | AND kt.id = t.kind_id
21 | AND mc.movie_id = t.id
22 | AND cn.id = mc.company_id
23 | AND ct.id = mc.company_type_id
24 | AND miidx.movie_id = t.id
25 | AND it.id = miidx.info_type_id
26 | AND mi.movie_id = miidx.movie_id
27 | AND mi.movie_id = mc.movie_id
28 | AND miidx.movie_id = mc.movie_id;
29 |
30 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_14a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi_idx.info) AS rating,
2 | MIN(t.title) AS northern_dark_movie
3 | FROM info_type AS it1,
4 | info_type AS it2,
5 | keyword AS k,
6 | kind_type AS kt,
7 | movie_info AS mi,
8 | movie_info_idx AS mi_idx,
9 | movie_keyword AS mk,
10 | title AS t
11 | WHERE it1.info = 'countries'
12 | AND it2.info = 'rating'
13 | AND k.keyword IN ('murder',
14 | 'murder-in-title',
15 | 'blood',
16 | 'violence')
17 | AND kt.kind = 'movie'
18 | AND mi.info IN ('Sweden',
19 | 'Norway',
20 | 'Germany',
21 | 'Denmark',
22 | 'Swedish',
23 | 'Denish',
24 | 'Norwegian',
25 | 'German',
26 | 'USA',
27 | 'American')
28 | AND mi_idx.info < '8.5'
29 | AND t.production_year > 2010
30 | AND kt.id = t.kind_id
31 | AND t.id = mi.movie_id
32 | AND t.id = mk.movie_id
33 | AND t.id = mi_idx.movie_id
34 | AND mk.movie_id = mi.movie_id
35 | AND mk.movie_id = mi_idx.movie_id
36 | AND mi.movie_id = mi_idx.movie_id
37 | AND k.id = mk.keyword_id
38 | AND it1.id = mi.info_type_id
39 | AND it2.id = mi_idx.info_type_id;
40 |
41 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_14b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi_idx.info) AS rating,
2 | MIN(t.title) AS western_dark_production
3 | FROM info_type AS it1,
4 | info_type AS it2,
5 | keyword AS k,
6 | kind_type AS kt,
7 | movie_info AS mi,
8 | movie_info_idx AS mi_idx,
9 | movie_keyword AS mk,
10 | title AS t
11 | WHERE it1.info = 'countries'
12 | AND it2.info = 'rating'
13 | AND k.keyword IN ('murder',
14 | 'murder-in-title')
15 | AND kt.kind = 'movie'
16 | AND mi.info IN ('Sweden',
17 | 'Norway',
18 | 'Germany',
19 | 'Denmark',
20 | 'Swedish',
21 | 'Denish',
22 | 'Norwegian',
23 | 'German',
24 | 'USA',
25 | 'American')
26 | AND mi_idx.info > '6.0'
27 | AND t.production_year > 2010
28 | AND (t.title LIKE '%murder%'
29 | OR t.title LIKE '%Murder%'
30 | OR t.title LIKE '%Mord%')
31 | AND kt.id = t.kind_id
32 | AND t.id = mi.movie_id
33 | AND t.id = mk.movie_id
34 | AND t.id = mi_idx.movie_id
35 | AND mk.movie_id = mi.movie_id
36 | AND mk.movie_id = mi_idx.movie_id
37 | AND mi.movie_id = mi_idx.movie_id
38 | AND k.id = mk.keyword_id
39 | AND it1.id = mi.info_type_id
40 | AND it2.id = mi_idx.info_type_id;
41 |
42 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_15a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi.info) AS release_date,
2 | MIN(t.title) AS internet_movie
3 | FROM aka_title AS at,
4 | company_name AS cn,
5 | company_type AS ct,
6 | info_type AS it1,
7 | keyword AS k,
8 | movie_companies AS mc,
9 | movie_info AS mi,
10 | movie_keyword AS mk,
11 | title AS t
12 | WHERE cn.country_code = '[us]'
13 | AND it1.info = 'release dates'
14 | AND mc.note LIKE '%(200%)%'
15 | AND mc.note LIKE '%(worldwide)%'
16 | AND mi.note LIKE '%internet%'
17 | AND mi.info LIKE 'USA:% 200%'
18 | AND t.production_year > 2000
19 | AND t.id = at.movie_id
20 | AND t.id = mi.movie_id
21 | AND t.id = mk.movie_id
22 | AND t.id = mc.movie_id
23 | AND mk.movie_id = mi.movie_id
24 | AND mk.movie_id = mc.movie_id
25 | AND mk.movie_id = at.movie_id
26 | AND mi.movie_id = mc.movie_id
27 | AND mi.movie_id = at.movie_id
28 | AND mc.movie_id = at.movie_id
29 | AND k.id = mk.keyword_id
30 | AND it1.id = mi.info_type_id
31 | AND cn.id = mc.company_id
32 | AND ct.id = mc.company_type_id;
33 |
34 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_15b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi.info) AS release_date,
2 | MIN(t.title) AS youtube_movie
3 | FROM aka_title AS at,
4 | company_name AS cn,
5 | company_type AS ct,
6 | info_type AS it1,
7 | keyword AS k,
8 | movie_companies AS mc,
9 | movie_info AS mi,
10 | movie_keyword AS mk,
11 | title AS t
12 | WHERE cn.country_code = '[us]'
13 | AND cn.name = 'YouTube'
14 | AND it1.info = 'release dates'
15 | AND mc.note LIKE '%(200%)%'
16 | AND mc.note LIKE '%(worldwide)%'
17 | AND mi.note LIKE '%internet%'
18 | AND mi.info LIKE 'USA:% 200%'
19 | AND t.production_year BETWEEN 2005 AND 2010
20 | AND t.id = at.movie_id
21 | AND t.id = mi.movie_id
22 | AND t.id = mk.movie_id
23 | AND t.id = mc.movie_id
24 | AND mk.movie_id = mi.movie_id
25 | AND mk.movie_id = mc.movie_id
26 | AND mk.movie_id = at.movie_id
27 | AND mi.movie_id = mc.movie_id
28 | AND mi.movie_id = at.movie_id
29 | AND mc.movie_id = at.movie_id
30 | AND k.id = mk.keyword_id
31 | AND it1.id = mi.info_type_id
32 | AND cn.id = mc.company_id
33 | AND ct.id = mc.company_type_id;
34 |
35 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_15c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi.info) AS release_date,
2 | MIN(t.title) AS modern_american_internet_movie
3 | FROM aka_title AS at,
4 | company_name AS cn,
5 | company_type AS ct,
6 | info_type AS it1,
7 | keyword AS k,
8 | movie_companies AS mc,
9 | movie_info AS mi,
10 | movie_keyword AS mk,
11 | title AS t
12 | WHERE cn.country_code = '[us]'
13 | AND it1.info = 'release dates'
14 | AND mi.note LIKE '%internet%'
15 | AND mi.info IS NOT NULL
16 | AND (mi.info LIKE 'USA:% 199%'
17 | OR mi.info LIKE 'USA:% 200%')
18 | AND t.production_year > 1990
19 | AND t.id = at.movie_id
20 | AND t.id = mi.movie_id
21 | AND t.id = mk.movie_id
22 | AND t.id = mc.movie_id
23 | AND mk.movie_id = mi.movie_id
24 | AND mk.movie_id = mc.movie_id
25 | AND mk.movie_id = at.movie_id
26 | AND mi.movie_id = mc.movie_id
27 | AND mi.movie_id = at.movie_id
28 | AND mc.movie_id = at.movie_id
29 | AND k.id = mk.keyword_id
30 | AND it1.id = mi.info_type_id
31 | AND cn.id = mc.company_id
32 | AND ct.id = mc.company_type_id;
33 |
34 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_15d.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(at.title) AS aka_title,
2 | MIN(t.title) AS internet_movie_title
3 | FROM aka_title AS at,
4 | company_name AS cn,
5 | company_type AS ct,
6 | info_type AS it1,
7 | keyword AS k,
8 | movie_companies AS mc,
9 | movie_info AS mi,
10 | movie_keyword AS mk,
11 | title AS t
12 | WHERE cn.country_code = '[us]'
13 | AND it1.info = 'release dates'
14 | AND mi.note LIKE '%internet%'
15 | AND t.production_year > 1990
16 | AND t.id = at.movie_id
17 | AND t.id = mi.movie_id
18 | AND t.id = mk.movie_id
19 | AND t.id = mc.movie_id
20 | AND mk.movie_id = mi.movie_id
21 | AND mk.movie_id = mc.movie_id
22 | AND mk.movie_id = at.movie_id
23 | AND mi.movie_id = mc.movie_id
24 | AND mi.movie_id = at.movie_id
25 | AND mc.movie_id = at.movie_id
26 | AND k.id = mk.keyword_id
27 | AND it1.id = mi.info_type_id
28 | AND cn.id = mc.company_id
29 | AND ct.id = mc.company_type_id;
30 |
31 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_16a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(an.name) AS cool_actor_pseudonym,
2 | MIN(t.title) AS series_named_after_char
3 | FROM aka_name AS an,
4 | cast_info AS ci,
5 | company_name AS cn,
6 | keyword AS k,
7 | movie_companies AS mc,
8 | movie_keyword AS mk,
9 | name AS n,
10 | title AS t
11 | WHERE cn.country_code ='[us]'
12 | AND k.keyword ='character-name-in-title'
13 | AND t.episode_nr >= 50
14 | AND t.episode_nr < 100
15 | AND an.person_id = n.id
16 | AND n.id = ci.person_id
17 | AND ci.movie_id = t.id
18 | AND t.id = mk.movie_id
19 | AND mk.keyword_id = k.id
20 | AND t.id = mc.movie_id
21 | AND mc.company_id = cn.id
22 | AND an.person_id = ci.person_id
23 | AND ci.movie_id = mc.movie_id
24 | AND ci.movie_id = mk.movie_id
25 | AND mc.movie_id = mk.movie_id;
26 |
27 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_16b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(an.name) AS cool_actor_pseudonym,
2 | MIN(t.title) AS series_named_after_char
3 | FROM aka_name AS an,
4 | cast_info AS ci,
5 | company_name AS cn,
6 | keyword AS k,
7 | movie_companies AS mc,
8 | movie_keyword AS mk,
9 | name AS n,
10 | title AS t
11 | WHERE cn.country_code ='[us]'
12 | AND k.keyword ='character-name-in-title'
13 | AND an.person_id = n.id
14 | AND n.id = ci.person_id
15 | AND ci.movie_id = t.id
16 | AND t.id = mk.movie_id
17 | AND mk.keyword_id = k.id
18 | AND t.id = mc.movie_id
19 | AND mc.company_id = cn.id
20 | AND an.person_id = ci.person_id
21 | AND ci.movie_id = mc.movie_id
22 | AND ci.movie_id = mk.movie_id
23 | AND mc.movie_id = mk.movie_id;
24 |
25 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_16c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(an.name) AS cool_actor_pseudonym,
2 | MIN(t.title) AS series_named_after_char
3 | FROM aka_name AS an,
4 | cast_info AS ci,
5 | company_name AS cn,
6 | keyword AS k,
7 | movie_companies AS mc,
8 | movie_keyword AS mk,
9 | name AS n,
10 | title AS t
11 | WHERE cn.country_code ='[us]'
12 | AND k.keyword ='character-name-in-title'
13 | AND t.episode_nr < 100
14 | AND an.person_id = n.id
15 | AND n.id = ci.person_id
16 | AND ci.movie_id = t.id
17 | AND t.id = mk.movie_id
18 | AND mk.keyword_id = k.id
19 | AND t.id = mc.movie_id
20 | AND mc.company_id = cn.id
21 | AND an.person_id = ci.person_id
22 | AND ci.movie_id = mc.movie_id
23 | AND ci.movie_id = mk.movie_id
24 | AND mc.movie_id = mk.movie_id;
25 |
26 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_16d.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(an.name) AS cool_actor_pseudonym,
2 | MIN(t.title) AS series_named_after_char
3 | FROM aka_name AS an,
4 | cast_info AS ci,
5 | company_name AS cn,
6 | keyword AS k,
7 | movie_companies AS mc,
8 | movie_keyword AS mk,
9 | name AS n,
10 | title AS t
11 | WHERE cn.country_code ='[us]'
12 | AND k.keyword ='character-name-in-title'
13 | AND t.episode_nr >= 5
14 | AND t.episode_nr < 100
15 | AND an.person_id = n.id
16 | AND n.id = ci.person_id
17 | AND ci.movie_id = t.id
18 | AND t.id = mk.movie_id
19 | AND mk.keyword_id = k.id
20 | AND t.id = mc.movie_id
21 | AND mc.company_id = cn.id
22 | AND an.person_id = ci.person_id
23 | AND ci.movie_id = mc.movie_id
24 | AND ci.movie_id = mk.movie_id
25 | AND mc.movie_id = mk.movie_id;
26 |
27 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_17a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(n.name) AS member_in_charnamed_american_movie,
2 | MIN(n.name) AS a1
3 | FROM cast_info AS ci,
4 | company_name AS cn,
5 | keyword AS k,
6 | movie_companies AS mc,
7 | movie_keyword AS mk,
8 | name AS n,
9 | title AS t
10 | WHERE cn.country_code ='[us]'
11 | AND k.keyword ='character-name-in-title'
12 | AND n.name LIKE 'B%'
13 | AND n.id = ci.person_id
14 | AND ci.movie_id = t.id
15 | AND t.id = mk.movie_id
16 | AND mk.keyword_id = k.id
17 | AND t.id = mc.movie_id
18 | AND mc.company_id = cn.id
19 | AND ci.movie_id = mc.movie_id
20 | AND ci.movie_id = mk.movie_id
21 | AND mc.movie_id = mk.movie_id;
22 |
23 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_17b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(n.name) AS member_in_charnamed_movie,
2 | MIN(n.name) AS a1
3 | FROM cast_info AS ci,
4 | company_name AS cn,
5 | keyword AS k,
6 | movie_companies AS mc,
7 | movie_keyword AS mk,
8 | name AS n,
9 | title AS t
10 | WHERE k.keyword ='character-name-in-title'
11 | AND n.name LIKE 'Z%'
12 | AND n.id = ci.person_id
13 | AND ci.movie_id = t.id
14 | AND t.id = mk.movie_id
15 | AND mk.keyword_id = k.id
16 | AND t.id = mc.movie_id
17 | AND mc.company_id = cn.id
18 | AND ci.movie_id = mc.movie_id
19 | AND ci.movie_id = mk.movie_id
20 | AND mc.movie_id = mk.movie_id;
21 |
22 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_17c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(n.name) AS member_in_charnamed_movie,
2 | MIN(n.name) AS a1
3 | FROM cast_info AS ci,
4 | company_name AS cn,
5 | keyword AS k,
6 | movie_companies AS mc,
7 | movie_keyword AS mk,
8 | name AS n,
9 | title AS t
10 | WHERE k.keyword ='character-name-in-title'
11 | AND n.name LIKE 'X%'
12 | AND n.id = ci.person_id
13 | AND ci.movie_id = t.id
14 | AND t.id = mk.movie_id
15 | AND mk.keyword_id = k.id
16 | AND t.id = mc.movie_id
17 | AND mc.company_id = cn.id
18 | AND ci.movie_id = mc.movie_id
19 | AND ci.movie_id = mk.movie_id
20 | AND mc.movie_id = mk.movie_id;
21 |
22 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_17d.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(n.name) AS member_in_charnamed_movie
2 | FROM cast_info AS ci,
3 | company_name AS cn,
4 | keyword AS k,
5 | movie_companies AS mc,
6 | movie_keyword AS mk,
7 | name AS n,
8 | title AS t
9 | WHERE k.keyword ='character-name-in-title'
10 | AND n.name LIKE '%Bert%'
11 | AND n.id = ci.person_id
12 | AND ci.movie_id = t.id
13 | AND t.id = mk.movie_id
14 | AND mk.keyword_id = k.id
15 | AND t.id = mc.movie_id
16 | AND mc.company_id = cn.id
17 | AND ci.movie_id = mc.movie_id
18 | AND ci.movie_id = mk.movie_id
19 | AND mc.movie_id = mk.movie_id;
20 |
21 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_17e.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(n.name) AS member_in_charnamed_movie
2 | FROM cast_info AS ci,
3 | company_name AS cn,
4 | keyword AS k,
5 | movie_companies AS mc,
6 | movie_keyword AS mk,
7 | name AS n,
8 | title AS t
9 | WHERE cn.country_code ='[us]'
10 | AND k.keyword ='character-name-in-title'
11 | AND n.id = ci.person_id
12 | AND ci.movie_id = t.id
13 | AND t.id = mk.movie_id
14 | AND mk.keyword_id = k.id
15 | AND t.id = mc.movie_id
16 | AND mc.company_id = cn.id
17 | AND ci.movie_id = mc.movie_id
18 | AND ci.movie_id = mk.movie_id
19 | AND mc.movie_id = mk.movie_id;
20 |
21 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_17f.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(n.name) AS member_in_charnamed_movie
2 | FROM cast_info AS ci,
3 | company_name AS cn,
4 | keyword AS k,
5 | movie_companies AS mc,
6 | movie_keyword AS mk,
7 | name AS n,
8 | title AS t
9 | WHERE k.keyword ='character-name-in-title'
10 | AND n.name LIKE '%B%'
11 | AND n.id = ci.person_id
12 | AND ci.movie_id = t.id
13 | AND t.id = mk.movie_id
14 | AND mk.keyword_id = k.id
15 | AND t.id = mc.movie_id
16 | AND mc.company_id = cn.id
17 | AND ci.movie_id = mc.movie_id
18 | AND ci.movie_id = mk.movie_id
19 | AND mc.movie_id = mk.movie_id;
20 |
21 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_18a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi.info) AS movie_budget,
2 | MIN(mi_idx.info) AS movie_votes,
3 | MIN(t.title) AS movie_title
4 | FROM cast_info AS ci,
5 | info_type AS it1,
6 | info_type AS it2,
7 | movie_info AS mi,
8 | movie_info_idx AS mi_idx,
9 | name AS n,
10 | title AS t
11 | WHERE ci.note IN ('(producer)',
12 | '(executive producer)')
13 | AND it1.info = 'budget'
14 | AND it2.info = 'votes'
15 | AND n.gender = 'm'
16 | AND n.name LIKE '%Tim%'
17 | AND t.id = mi.movie_id
18 | AND t.id = mi_idx.movie_id
19 | AND t.id = ci.movie_id
20 | AND ci.movie_id = mi.movie_id
21 | AND ci.movie_id = mi_idx.movie_id
22 | AND mi.movie_id = mi_idx.movie_id
23 | AND n.id = ci.person_id
24 | AND it1.id = mi.info_type_id
25 | AND it2.id = mi_idx.info_type_id;
26 |
27 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_18b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi.info) AS movie_budget,
2 | MIN(mi_idx.info) AS movie_votes,
3 | MIN(t.title) AS movie_title
4 | FROM cast_info AS ci,
5 | info_type AS it1,
6 | info_type AS it2,
7 | movie_info AS mi,
8 | movie_info_idx AS mi_idx,
9 | name AS n,
10 | title AS t
11 | WHERE ci.note IN ('(writer)',
12 | '(head writer)',
13 | '(written by)',
14 | '(story)',
15 | '(story editor)')
16 | AND it1.info = 'genres'
17 | AND it2.info = 'rating'
18 | AND mi.info IN ('Horror',
19 | 'Thriller')
20 | AND mi.note IS NULL
21 | AND mi_idx.info > '8.0'
22 | AND n.gender IS NOT NULL
23 | AND n.gender = 'f'
24 | AND t.production_year BETWEEN 2008 AND 2014
25 | AND t.id = mi.movie_id
26 | AND t.id = mi_idx.movie_id
27 | AND t.id = ci.movie_id
28 | AND ci.movie_id = mi.movie_id
29 | AND ci.movie_id = mi_idx.movie_id
30 | AND mi.movie_id = mi_idx.movie_id
31 | AND n.id = ci.person_id
32 | AND it1.id = mi.info_type_id
33 | AND it2.id = mi_idx.info_type_id;
34 |
35 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_18c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(mi.info) AS movie_budget,
2 | MIN(mi_idx.info) AS movie_votes,
3 | MIN(t.title) AS movie_title
4 | FROM cast_info AS ci,
5 | info_type AS it1,
6 | info_type AS it2,
7 | movie_info AS mi,
8 | movie_info_idx AS mi_idx,
9 | name AS n,
10 | title AS t
11 | WHERE ci.note IN ('(writer)',
12 | '(head writer)',
13 | '(written by)',
14 | '(story)',
15 | '(story editor)')
16 | AND it1.info = 'genres'
17 | AND it2.info = 'votes'
18 | AND mi.info IN ('Horror',
19 | 'Action',
20 | 'Sci-Fi',
21 | 'Thriller',
22 | 'Crime',
23 | 'War')
24 | AND n.gender = 'm'
25 | AND t.id = mi.movie_id
26 | AND t.id = mi_idx.movie_id
27 | AND t.id = ci.movie_id
28 | AND ci.movie_id = mi.movie_id
29 | AND ci.movie_id = mi_idx.movie_id
30 | AND mi.movie_id = mi_idx.movie_id
31 | AND n.id = ci.person_id
32 | AND it1.id = mi.info_type_id
33 | AND it2.id = mi_idx.info_type_id;
34 |
35 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_19b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(n.name) AS voicing_actress,
2 | MIN(t.title) AS kung_fu_panda
3 | FROM aka_name AS an,
4 | char_name AS chn,
5 | cast_info AS ci,
6 | company_name AS cn,
7 | info_type AS it,
8 | movie_companies AS mc,
9 | movie_info AS mi,
10 | name AS n,
11 | role_type AS rt,
12 | title AS t
13 | WHERE ci.note = '(voice)'
14 | AND cn.country_code ='[us]'
15 | AND it.info = 'release dates'
16 | AND mc.note LIKE '%(200%)%'
17 | AND (mc.note LIKE '%(USA)%'
18 | OR mc.note LIKE '%(worldwide)%')
19 | AND mi.info IS NOT NULL
20 | AND (mi.info LIKE 'Japan:%2007%'
21 | OR mi.info LIKE 'USA:%2008%')
22 | AND n.gender ='f'
23 | AND n.name LIKE '%Angel%'
24 | AND rt.role ='actress'
25 | AND t.production_year BETWEEN 2007 AND 2008
26 | AND t.title LIKE '%Kung%Fu%Panda%'
27 | AND t.id = mi.movie_id
28 | AND t.id = mc.movie_id
29 | AND t.id = ci.movie_id
30 | AND mc.movie_id = ci.movie_id
31 | AND mc.movie_id = mi.movie_id
32 | AND mi.movie_id = ci.movie_id
33 | AND cn.id = mc.company_id
34 | AND it.id = mi.info_type_id
35 | AND n.id = ci.person_id
36 | AND rt.id = ci.role_id
37 | AND n.id = an.person_id
38 | AND ci.person_id = an.person_id
39 | AND chn.id = ci.person_role_id;
40 |
41 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_19c.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(n.name) AS voicing_actress,
2 | MIN(t.title) AS jap_engl_voiced_movie
3 | FROM aka_name AS an,
4 | char_name AS chn,
5 | cast_info AS ci,
6 | company_name AS cn,
7 | info_type AS it,
8 | movie_companies AS mc,
9 | movie_info AS mi,
10 | name AS n,
11 | role_type AS rt,
12 | title AS t
13 | WHERE ci.note IN ('(voice)',
14 | '(voice: Japanese version)',
15 | '(voice) (uncredited)',
16 | '(voice: English version)')
17 | AND cn.country_code ='[us]'
18 | AND it.info = 'release dates'
19 | AND mi.info IS NOT NULL
20 | AND (mi.info LIKE 'Japan:%200%'
21 | OR mi.info LIKE 'USA:%200%')
22 | AND n.gender ='f'
23 | AND n.name LIKE '%An%'
24 | AND rt.role ='actress'
25 | AND t.production_year > 2000
26 | AND t.id = mi.movie_id
27 | AND t.id = mc.movie_id
28 | AND t.id = ci.movie_id
29 | AND mc.movie_id = ci.movie_id
30 | AND mc.movie_id = mi.movie_id
31 | AND mi.movie_id = ci.movie_id
32 | AND cn.id = mc.company_id
33 | AND it.id = mi.info_type_id
34 | AND n.id = ci.person_id
35 | AND rt.id = ci.role_id
36 | AND n.id = an.person_id
37 | AND ci.person_id = an.person_id
38 | AND chn.id = ci.person_role_id;
39 |
40 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_19d.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(n.name) AS voicing_actress,
2 | MIN(t.title) AS jap_engl_voiced_movie
3 | FROM aka_name AS an,
4 | char_name AS chn,
5 | cast_info AS ci,
6 | company_name AS cn,
7 | info_type AS it,
8 | movie_companies AS mc,
9 | movie_info AS mi,
10 | name AS n,
11 | role_type AS rt,
12 | title AS t
13 | WHERE ci.note IN ('(voice)',
14 | '(voice: Japanese version)',
15 | '(voice) (uncredited)',
16 | '(voice: English version)')
17 | AND cn.country_code ='[us]'
18 | AND it.info = 'release dates'
19 | AND n.gender ='f'
20 | AND rt.role ='actress'
21 | AND t.production_year > 2000
22 | AND t.id = mi.movie_id
23 | AND t.id = mc.movie_id
24 | AND t.id = ci.movie_id
25 | AND mc.movie_id = ci.movie_id
26 | AND mc.movie_id = mi.movie_id
27 | AND mi.movie_id = ci.movie_id
28 | AND cn.id = mc.company_id
29 | AND it.id = mi.info_type_id
30 | AND n.id = ci.person_id
31 | AND rt.id = ci.role_id
32 | AND n.id = an.person_id
33 | AND ci.person_id = an.person_id
34 | AND chn.id = ci.person_role_id;
35 |
36 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_20a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(t.title) AS complete_downey_ironman_movie
2 | FROM complete_cast AS cc,
3 | comp_cast_type AS cct1,
4 | comp_cast_type AS cct2,
5 | char_name AS chn,
6 | cast_info AS ci,
7 | keyword AS k,
8 | kind_type AS kt,
9 | movie_keyword AS mk,
10 | name AS n,
11 | title AS t
12 | WHERE cct1.kind = 'cast'
13 | AND cct2.kind LIKE '%complete%'
14 | AND chn.name NOT LIKE '%Sherlock%'
15 | AND (chn.name LIKE '%Tony%Stark%'
16 | OR chn.name LIKE '%Iron%Man%')
17 | AND k.keyword IN ('superhero',
18 | 'sequel',
19 | 'second-part',
20 | 'marvel-comics',
21 | 'based-on-comic',
22 | 'tv-special',
23 | 'fight',
24 | 'violence')
25 | AND kt.kind = 'movie'
26 | AND t.production_year > 1950
27 | AND kt.id = t.kind_id
28 | AND t.id = mk.movie_id
29 | AND t.id = ci.movie_id
30 | AND t.id = cc.movie_id
31 | AND mk.movie_id = ci.movie_id
32 | AND mk.movie_id = cc.movie_id
33 | AND ci.movie_id = cc.movie_id
34 | AND chn.id = ci.person_role_id
35 | AND n.id = ci.person_id
36 | AND k.id = mk.keyword_id
37 | AND cct1.id = cc.subject_id
38 | AND cct2.id = cc.status_id;
39 |
40 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_20b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(t.title) AS complete_downey_ironman_movie
2 | FROM complete_cast AS cc,
3 | comp_cast_type AS cct1,
4 | comp_cast_type AS cct2,
5 | char_name AS chn,
6 | cast_info AS ci,
7 | keyword AS k,
8 | kind_type AS kt,
9 | movie_keyword AS mk,
10 | name AS n,
11 | title AS t
12 | WHERE cct1.kind = 'cast'
13 | AND cct2.kind LIKE '%complete%'
14 | AND chn.name NOT LIKE '%Sherlock%'
15 | AND (chn.name LIKE '%Tony%Stark%'
16 | OR chn.name LIKE '%Iron%Man%')
17 | AND k.keyword IN ('superhero',
18 | 'sequel',
19 | 'second-part',
20 | 'marvel-comics',
21 | 'based-on-comic',
22 | 'tv-special',
23 | 'fight',
24 | 'violence')
25 | AND kt.kind = 'movie'
26 | AND n.name LIKE '%Downey%Robert%'
27 | AND t.production_year > 2000
28 | AND kt.id = t.kind_id
29 | AND t.id = mk.movie_id
30 | AND t.id = ci.movie_id
31 | AND t.id = cc.movie_id
32 | AND mk.movie_id = ci.movie_id
33 | AND mk.movie_id = cc.movie_id
34 | AND ci.movie_id = cc.movie_id
35 | AND chn.id = ci.person_role_id
36 | AND n.id = ci.person_id
37 | AND k.id = mk.keyword_id
38 | AND cct1.id = cc.subject_id
39 | AND cct2.id = cc.status_id;
40 |
41 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_21b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(cn.name) AS company_name,
2 | MIN(lt.link) AS link_type,
3 | MIN(t.title) AS german_follow_up
4 | FROM company_name AS cn,
5 | company_type AS ct,
6 | keyword AS k,
7 | link_type AS lt,
8 | movie_companies AS mc,
9 | movie_info AS mi,
10 | movie_keyword AS mk,
11 | movie_link AS ml,
12 | title AS t
13 | WHERE cn.country_code !='[pl]'
14 | AND (cn.name LIKE '%Film%'
15 | OR cn.name LIKE '%Warner%')
16 | AND ct.kind ='production companies'
17 | AND k.keyword ='sequel'
18 | AND lt.link LIKE '%follow%'
19 | AND mc.note IS NULL
20 | AND mi.info IN ('Germany',
21 | 'German')
22 | AND t.production_year BETWEEN 2000 AND 2010
23 | AND lt.id = ml.link_type_id
24 | AND ml.movie_id = t.id
25 | AND t.id = mk.movie_id
26 | AND mk.keyword_id = k.id
27 | AND t.id = mc.movie_id
28 | AND mc.company_type_id = ct.id
29 | AND mc.company_id = cn.id
30 | AND mi.movie_id = t.id
31 | AND ml.movie_id = mk.movie_id
32 | AND ml.movie_id = mc.movie_id
33 | AND mk.movie_id = mc.movie_id
34 | AND ml.movie_id = mi.movie_id
35 | AND mk.movie_id = mi.movie_id
36 | AND mc.movie_id = mi.movie_id;
37 |
38 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_23a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(kt.kind) AS movie_kind,
2 | MIN(t.title) AS complete_us_internet_movie
3 | FROM complete_cast AS cc,
4 | comp_cast_type AS cct1,
5 | company_name AS cn,
6 | company_type AS ct,
7 | info_type AS it1,
8 | keyword AS k,
9 | kind_type AS kt,
10 | movie_companies AS mc,
11 | movie_info AS mi,
12 | movie_keyword AS mk,
13 | title AS t
14 | WHERE cct1.kind = 'complete+verified'
15 | AND cn.country_code = '[us]'
16 | AND it1.info = 'release dates'
17 | AND kt.kind IN ('movie')
18 | AND mi.note LIKE '%internet%'
19 | AND mi.info IS NOT NULL
20 | AND (mi.info LIKE 'USA:% 199%'
21 | OR mi.info LIKE 'USA:% 200%')
22 | AND t.production_year > 2000
23 | AND kt.id = t.kind_id
24 | AND t.id = mi.movie_id
25 | AND t.id = mk.movie_id
26 | AND t.id = mc.movie_id
27 | AND t.id = cc.movie_id
28 | AND mk.movie_id = mi.movie_id
29 | AND mk.movie_id = mc.movie_id
30 | AND mk.movie_id = cc.movie_id
31 | AND mi.movie_id = mc.movie_id
32 | AND mi.movie_id = cc.movie_id
33 | AND mc.movie_id = cc.movie_id
34 | AND k.id = mk.keyword_id
35 | AND it1.id = mi.info_type_id
36 | AND cn.id = mc.company_id
37 | AND ct.id = mc.company_type_id
38 | AND cct1.id = cc.status_id;
39 |
40 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_23b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(kt.kind) AS movie_kind,
2 | MIN(t.title) AS complete_nerdy_internet_movie
3 | FROM complete_cast AS cc,
4 | comp_cast_type AS cct1,
5 | company_name AS cn,
6 | company_type AS ct,
7 | info_type AS it1,
8 | keyword AS k,
9 | kind_type AS kt,
10 | movie_companies AS mc,
11 | movie_info AS mi,
12 | movie_keyword AS mk,
13 | title AS t
14 | WHERE cct1.kind = 'complete+verified'
15 | AND cn.country_code = '[us]'
16 | AND it1.info = 'release dates'
17 | AND k.keyword IN ('nerd',
18 | 'loner',
19 | 'alienation',
20 | 'dignity')
21 | AND kt.kind IN ('movie')
22 | AND mi.note LIKE '%internet%'
23 | AND mi.info LIKE 'USA:% 200%'
24 | AND t.production_year > 2000
25 | AND kt.id = t.kind_id
26 | AND t.id = mi.movie_id
27 | AND t.id = mk.movie_id
28 | AND t.id = mc.movie_id
29 | AND t.id = cc.movie_id
30 | AND mk.movie_id = mi.movie_id
31 | AND mk.movie_id = mc.movie_id
32 | AND mk.movie_id = cc.movie_id
33 | AND mi.movie_id = mc.movie_id
34 | AND mi.movie_id = cc.movie_id
35 | AND mc.movie_id = cc.movie_id
36 | AND k.id = mk.keyword_id
37 | AND it1.id = mi.info_type_id
38 | AND cn.id = mc.company_id
39 | AND ct.id = mc.company_type_id
40 | AND cct1.id = cc.status_id;
41 |
42 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_32a.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(lt.link) AS link_type,
2 | MIN(t1.title) AS first_movie,
3 | MIN(t2.title) AS second_movie
4 | FROM keyword AS k,
5 | link_type AS lt,
6 | movie_keyword AS mk,
7 | movie_link AS ml,
8 | title AS t1,
9 | title AS t2
10 | WHERE k.keyword ='10,000-mile-club'
11 | AND mk.keyword_id = k.id
12 | AND t1.id = mk.movie_id
13 | AND ml.movie_id = t1.id
14 | AND ml.linked_movie_id = t2.id
15 | AND lt.id = ml.link_type_id
16 | AND mk.movie_id = t1.id;
17 |
18 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/queries/_32b.sql:
--------------------------------------------------------------------------------
1 | SELECT MIN(lt.link) AS link_type,
2 | MIN(t1.title) AS first_movie,
3 | MIN(t2.title) AS second_movie
4 | FROM keyword AS k,
5 | link_type AS lt,
6 | movie_keyword AS mk,
7 | movie_link AS ml,
8 | title AS t1,
9 | title AS t2
10 | WHERE k.keyword ='character-name-in-title'
11 | AND mk.keyword_id = k.id
12 | AND t1.id = mk.movie_id
13 | AND ml.movie_id = t1.id
14 | AND ml.linked_movie_id = t2.id
15 | AND lt.id = ml.link_type_id
16 | AND mk.movie_id = t1.id;
17 |
18 |
--------------------------------------------------------------------------------
/benchmarks/trino/imdb/workspace.sdf.yml:
--------------------------------------------------------------------------------
1 | workspace:
2 | edition: "1.3"
3 | name: "imdb"
4 | defaults:
5 | dialect: trino
6 | description: >
7 | This queryset is based on the various tables made public by IMDB. Similar to the TPC-H benchmark,
8 | is designed to evaluate analytical database performance. It is also known as the join order benchmark.
9 |
10 | To run the benchmark:
11 | 1. Run the included hydrate.sh script which downloads relevant data
12 | 2. To execute all queries: `sdf run --no-cache`
13 | includes:
14 | - path: queries/
15 | - path: sources
16 | - path: imdb_data
17 | type: resource
18 |
--------------------------------------------------------------------------------
/benchmarks/trino/tpch/.gitignore:
--------------------------------------------------------------------------------
1 | /tpch_pd_scale_10.zip
2 | /tpch_pd_scale_10
3 |
--------------------------------------------------------------------------------
/benchmarks/trino/tpch/hydrate.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | set -xeuo pipefail
3 |
4 | # Downloads a ~1.2GB` IMDB dataset in zipped format.
5 |
6 | cd "${BASH_SOURCE%/*}"
7 | curl -LO https://cdn.sdf.com/data/tpch/tpch_pd_scale_10.zip
8 | unzip tpch_pd_scale_10.zip
9 |
--------------------------------------------------------------------------------
/benchmarks/trino/tpch/queries/q1.sql:
--------------------------------------------------------------------------------
1 | select
2 | l_returnflag,
3 | l_linestatus,
4 | sum(l_quantity) as sum_qty,
5 | sum(l_extendedprice) as sum_base_price,
6 | sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
7 | sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
8 | avg(l_quantity) as avg_qty,
9 | avg(l_extendedprice) as avg_price,
10 | avg(l_discount) as avg_disc,
11 | count(*) as count_order
12 | from
13 | lineitem
14 | where
15 | l_shipdate <= date '1998-09-02'
16 | group by
17 | l_returnflag,
18 | l_linestatus
19 | order by
20 | l_returnflag,
21 | l_linestatus;
--------------------------------------------------------------------------------
/benchmarks/trino/tpch/queries/q10.sql:
--------------------------------------------------------------------------------
1 | select
2 | c_custkey,
3 | c_name,
4 | sum(l_extendedprice * (1 - l_discount)) as revenue,
5 | c_acctbal,
6 | n_name,
7 | c_address,
8 | c_phone,
9 | c_comment
10 | from
11 | customer,
12 | orders,
13 | lineitem,
14 | nation
15 | where
16 | c_custkey = o_custkey
17 | and l_orderkey = o_orderkey
18 | and o_orderdate >= date '1993-10-01'
19 | and o_orderdate < date '1994-01-01'
20 | and l_returnflag = 'R'
21 | and c_nationkey = n_nationkey
22 | group by
23 | c_custkey,
24 | c_name,
25 | c_acctbal,
26 | c_phone,
27 | n_name,
28 | c_address,
29 | c_comment
30 | order by
31 | revenue desc;
--------------------------------------------------------------------------------
/benchmarks/trino/tpch/queries/q11.sql:
--------------------------------------------------------------------------------
1 | select
2 | ps_partkey,
3 | sum(ps_supplycost * ps_availqty) as value
4 | from
5 | partsupp,
6 | supplier,
7 | nation
8 | where
9 | ps_suppkey = s_suppkey
10 | and s_nationkey = n_nationkey
11 | and n_name = 'GERMANY'
12 | group by
13 | ps_partkey
14 | having
15 | sum(ps_supplycost * ps_availqty) > (
16 | select
17 | sum(ps_supplycost * ps_availqty) * 0.0001
18 | from
19 | partsupp,
20 | supplier,
21 | nation
22 | where
23 | ps_suppkey = s_suppkey
24 | and s_nationkey = n_nationkey
25 | and n_name = 'GERMANY'
26 | )
27 | order by
28 | value desc;
--------------------------------------------------------------------------------
/benchmarks/trino/tpch/queries/q12.sql:
--------------------------------------------------------------------------------
1 | select
2 | l_shipmode,
3 | sum(case
4 | when o_orderpriority = '1-URGENT'
5 | or o_orderpriority = '2-HIGH'
6 | then 1
7 | else 0
8 | end) as high_line_count,
9 | sum(case
10 | when o_orderpriority <> '1-URGENT'
11 | and o_orderpriority <> '2-HIGH'
12 | then 1
13 | else 0
14 | end) as low_line_count
15 | from
16 | lineitem
17 | join
18 | orders
19 | on
20 | l_orderkey = o_orderkey
21 | where
22 | l_shipmode in ('MAIL', 'SHIP')
23 | and l_commitdate < l_receiptdate
24 | and l_shipdate < l_commitdate
25 | and l_receiptdate >= date '1994-01-01'
26 | and l_receiptdate < date '1995-01-01'
27 | group by
28 | l_shipmode
29 | order by
30 | l_shipmode;
--------------------------------------------------------------------------------
/benchmarks/trino/tpch/queries/q13.sql:
--------------------------------------------------------------------------------
1 | select
2 | c_count,
3 | count(*) as custdist
4 | from
5 | (
6 | select
7 | c_custkey,
8 | count(o_orderkey)
9 | from
10 | customer left outer join orders on
11 | c_custkey = o_custkey
12 | and o_comment not like '%special%requests%'
13 | group by
14 | c_custkey
15 | ) as c_orders (c_custkey, c_count)
16 | group by
17 | c_count
18 | order by
19 | custdist desc,
20 | c_count desc;
--------------------------------------------------------------------------------
/benchmarks/trino/tpch/queries/q14.sql:
--------------------------------------------------------------------------------
1 | select
2 | 100.00 * sum(case
3 | when p_type like 'PROMO%'
4 | then l_extendedprice * (1 - l_discount)
5 | else 0
6 | end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue
7 | from
8 | lineitem,
9 | part
10 | where
11 | l_partkey = p_partkey
12 | and l_shipdate >= date '1995-09-01'
13 | and l_shipdate < date '1995-10-01';
--------------------------------------------------------------------------------
/benchmarks/trino/tpch/queries/q15.sql:
--------------------------------------------------------------------------------
1 | create view revenue0 as
2 | select
3 | l_suppkey as supplier_no,
4 | sum(l_extendedprice * (1 - l_discount)) as total_revenue
5 | from
6 | lineitem
7 | where
8 | l_shipdate >= date '1996-01-01'
9 | and l_shipdate < date '1996-01-01' + interval '3' month
10 | group by
11 | l_suppkey;
12 | select
13 | s_suppkey,
14 | s_name,
15 | s_address,
16 | s_phone,
17 | total_revenue
18 | from
19 | supplier,
20 | revenue0
21 | where
22 | s_suppkey = supplier_no
23 | and total_revenue = (
24 | select
25 | max(total_revenue)
26 | from
27 | revenue0
28 | )
29 | order by
30 | s_suppkey;
31 |
32 | drop view revenue0;
--------------------------------------------------------------------------------
/benchmarks/trino/tpch/queries/q16.sql:
--------------------------------------------------------------------------------
1 | select
2 | p_brand,
3 | p_type,
4 | p_size,
5 | count(distinct ps_suppkey) as supplier_cnt
6 | from
7 | partsupp,
8 | part
9 | where
10 | p_partkey = ps_partkey
11 | and p_brand <> 'Brand#45'
12 | and p_type not like 'MEDIUM POLISHED%'
13 | and p_size in (49, 14, 23, 45, 19, 3, 36, 9)
14 | and ps_suppkey not in (
15 | select
16 | s_suppkey
17 | from
18 | supplier
19 | where
20 | s_comment like '%Customer%Complaints%'
21 | )
22 | group by
23 | p_brand,
24 | p_type,
25 | p_size
26 | order by
27 | supplier_cnt desc,
28 | p_brand,
29 | p_type,
30 | p_size;
--------------------------------------------------------------------------------
/benchmarks/trino/tpch/queries/q17.sql:
--------------------------------------------------------------------------------
1 | select
2 | sum(l_extendedprice) / 7.0 as avg_yearly
3 | from
4 | lineitem,
5 | part
6 | where
7 | p_partkey = l_partkey
8 | and p_brand = 'Brand#23'
9 | and p_container = 'MED BOX'
10 | and l_quantity < (
11 | select
12 | 0.2 * avg(l_quantity)
13 | from
14 | lineitem
15 | where
16 | l_partkey = p_partkey
17 | );
--------------------------------------------------------------------------------
/benchmarks/trino/tpch/queries/q18.sql:
--------------------------------------------------------------------------------
1 | select
2 | c_name,
3 | c_custkey,
4 | o_orderkey,
5 | o_orderdate,
6 | o_totalprice,
7 | sum(l_quantity)
8 | from
9 | customer,
10 | orders,
11 | lineitem
12 | where
13 | o_orderkey in (
14 | select
15 | l_orderkey
16 | from
17 | lineitem
18 | group by
19 | l_orderkey
20 | having
21 | sum(l_quantity) > 300
22 | )
23 | and c_custkey = o_custkey
24 | and o_orderkey = l_orderkey
25 | group by
26 | c_name,
27 | c_custkey,
28 | o_orderkey,
29 | o_orderdate,
30 | o_totalprice
31 | order by
32 | o_totalprice desc,
33 | o_orderdate;
--------------------------------------------------------------------------------
/benchmarks/trino/tpch/queries/q2.sql:
--------------------------------------------------------------------------------
1 | select
2 | s_acctbal,
3 | s_name,
4 | n_name,
5 | p_partkey,
6 | p_mfgr,
7 | s_address,
8 | s_phone,
9 | s_comment
10 | from
11 | part,
12 | supplier,
13 | partsupp,
14 | nation,
15 | region
16 | where
17 | p_partkey = ps_partkey
18 | and s_suppkey = ps_suppkey
19 | and p_size = 15
20 | and p_type like '%BRASS'
21 | and s_nationkey = n_nationkey
22 | and n_regionkey = r_regionkey
23 | and r_name = 'EUROPE'
24 | and ps_supplycost = (
25 | select
26 | min(ps_supplycost)
27 | from
28 | partsupp,
29 | supplier,
30 | nation,
31 | region
32 | where
33 | p_partkey = ps_partkey
34 | and s_suppkey = ps_suppkey
35 | and s_nationkey = n_nationkey
36 | and n_regionkey = r_regionkey
37 | and r_name = 'EUROPE'
38 | )
39 | order by
40 | s_acctbal desc,
41 | n_name,
42 | s_name,
43 | p_partkey;
--------------------------------------------------------------------------------
/benchmarks/trino/tpch/queries/q20.sql:
--------------------------------------------------------------------------------
1 | select
2 | s_name,
3 | s_address
4 | from
5 | supplier,
6 | nation
7 | where
8 | s_suppkey in (
9 | select
10 | ps_suppkey
11 | from
12 | partsupp
13 | where
14 | ps_partkey in (
15 | select
16 | p_partkey
17 | from
18 | part
19 | where
20 | p_name like 'forest%'
21 | )
22 | and ps_availqty > (
23 | select
24 | 0.5 * sum(l_quantity)
25 | from
26 | lineitem
27 | where
28 | l_partkey = ps_partkey
29 | and l_suppkey = ps_suppkey
30 | and l_shipdate >= date '1994-01-01'
31 | and l_shipdate < date '1994-01-01' + interval '1' year
32 | )
33 | )
34 | and s_nationkey = n_nationkey
35 | and n_name = 'CANADA'
36 | order by
37 | s_name;
--------------------------------------------------------------------------------
/benchmarks/trino/tpch/queries/q21.sql:
--------------------------------------------------------------------------------
1 | select
2 | s_name,
3 | count(*) as numwait
4 | from
5 | supplier,
6 | lineitem l1,
7 | orders,
8 | nation
9 | where
10 | s_suppkey = l1.l_suppkey
11 | and o_orderkey = l1.l_orderkey
12 | and o_orderstatus = 'F'
13 | and l1.l_receiptdate > l1.l_commitdate
14 | and exists (
15 | select
16 | *
17 | from
18 | lineitem l2
19 | where
20 | l2.l_orderkey = l1.l_orderkey
21 | and l2.l_suppkey <> l1.l_suppkey
22 | )
23 | and not exists (
24 | select
25 | *
26 | from
27 | lineitem l3
28 | where
29 | l3.l_orderkey = l1.l_orderkey
30 | and l3.l_suppkey <> l1.l_suppkey
31 | and l3.l_receiptdate > l3.l_commitdate
32 | )
33 | and s_nationkey = n_nationkey
34 | and n_name = 'SAUDI ARABIA'
35 | group by
36 | s_name
37 | order by
38 | numwait desc,
39 | s_name;
--------------------------------------------------------------------------------
/benchmarks/trino/tpch/queries/q22.sql:
--------------------------------------------------------------------------------
1 | select
2 | cntrycode,
3 | count(*) as numcust,
4 | sum(c_acctbal) as totacctbal
5 | from
6 | (
7 | select
8 | substring(c_phone from 1 for 2) as cntrycode,
9 | c_acctbal
10 | from
11 | customer
12 | where
13 | substring(c_phone from 1 for 2) in
14 | ('13', '31', '23', '29', '30', '18', '17')
15 | and c_acctbal > (
16 | select
17 | avg(c_acctbal)
18 | from
19 | customer
20 | where
21 | c_acctbal > 0.00
22 | and substring(c_phone from 1 for 2) in
23 | ('13', '31', '23', '29', '30', '18', '17')
24 | )
25 | and not exists (
26 | select
27 | *
28 | from
29 | orders
30 | where
31 | o_custkey = c_custkey
32 | )
33 | ) as custsale
34 | group by
35 | cntrycode
36 | order by
37 | cntrycode;
--------------------------------------------------------------------------------
/benchmarks/trino/tpch/queries/q3.sql:
--------------------------------------------------------------------------------
1 | select
2 | l_orderkey,
3 | sum(l_extendedprice * (1 - l_discount)) as revenue,
4 | o_orderdate,
5 | o_shippriority
6 | from
7 | customer,
8 | orders,
9 | lineitem
10 | where
11 | c_mktsegment = 'BUILDING'
12 | and c_custkey = o_custkey
13 | and l_orderkey = o_orderkey
14 | and o_orderdate < date '1995-03-15'
15 | and l_shipdate > date '1995-03-15'
16 | group by
17 | l_orderkey,
18 | o_orderdate,
19 | o_shippriority
20 | order by
21 | revenue desc,
22 | o_orderdate;
--------------------------------------------------------------------------------
/benchmarks/trino/tpch/queries/q4.sql:
--------------------------------------------------------------------------------
1 | select
2 | o_orderpriority,
3 | count(*) as order_count
4 | from
5 | orders
6 | where
7 | o_orderdate >= '1993-07-01'
8 | and o_orderdate < date '1993-07-01' + interval '3' month
9 | and exists (
10 | select
11 | *
12 | from
13 | lineitem
14 | where
15 | l_orderkey = o_orderkey
16 | and l_commitdate < l_receiptdate
17 | )
18 | group by
19 | o_orderpriority
20 | order by
21 | o_orderpriority;
--------------------------------------------------------------------------------
/benchmarks/trino/tpch/queries/q5.sql:
--------------------------------------------------------------------------------
1 | select
2 | n_name,
3 | sum(l_extendedprice * (1 - l_discount)) as revenue
4 | from
5 | customer,
6 | orders,
7 | lineitem,
8 | supplier,
9 | nation,
10 | region
11 | where
12 | c_custkey = o_custkey
13 | and l_orderkey = o_orderkey
14 | and l_suppkey = s_suppkey
15 | and c_nationkey = s_nationkey
16 | and s_nationkey = n_nationkey
17 | and n_regionkey = r_regionkey
18 | and r_name = 'ASIA'
19 | and o_orderdate >= date '1994-01-01'
20 | and o_orderdate < date '1995-01-01'
21 | group by
22 | n_name
23 | order by
24 | revenue desc;
--------------------------------------------------------------------------------
/benchmarks/trino/tpch/queries/q6.sql:
--------------------------------------------------------------------------------
1 | select
2 | sum(l_extendedprice * l_discount) as revenue
3 | from
4 | lineitem
5 | where
6 | l_shipdate >= date '1994-01-01'
7 | and l_shipdate < date '1995-01-01'
8 | and l_discount between 0.06 - 0.01 and 0.06 + 0.01
9 | and l_quantity < 24;
--------------------------------------------------------------------------------
/benchmarks/trino/tpch/queries/q7.sql:
--------------------------------------------------------------------------------
1 | select
2 | supp_nation,
3 | cust_nation,
4 | l_year,
5 | sum(volume) as revenue
6 | from
7 | (
8 | select
9 | n1.n_name as supp_nation,
10 | n2.n_name as cust_nation,
11 | extract(year from l_shipdate) as l_year,
12 | l_extendedprice * (1 - l_discount) as volume
13 | from
14 | supplier,
15 | lineitem,
16 | orders,
17 | customer,
18 | nation n1,
19 | nation n2
20 | where
21 | s_suppkey = l_suppkey
22 | and o_orderkey = l_orderkey
23 | and c_custkey = o_custkey
24 | and s_nationkey = n1.n_nationkey
25 | and c_nationkey = n2.n_nationkey
26 | and (
27 | (n1.n_name = 'FRANCE' and n2.n_name = 'GERMANY')
28 | or (n1.n_name = 'GERMANY' and n2.n_name = 'FRANCE')
29 | )
30 | and l_shipdate between date '1995-01-01' and date '1996-12-31'
31 | ) as shipping
32 | group by
33 | supp_nation,
34 | cust_nation,
35 | l_year
36 | order by
37 | supp_nation,
38 | cust_nation,
39 | l_year;
40 |
--------------------------------------------------------------------------------
/benchmarks/trino/tpch/queries/q8.sql:
--------------------------------------------------------------------------------
1 | select
2 | o_year,
3 | sum(case
4 | when nation = 'BRAZIL' then volume
5 | else 0
6 | end) / sum(volume) as mkt_share
7 | from
8 | (
9 | select
10 | extract(year from o_orderdate) as o_year,
11 | l_extendedprice * (1 - l_discount) as volume,
12 | n2.n_name as nation
13 | from
14 | part,
15 | supplier,
16 | lineitem,
17 | orders,
18 | customer,
19 | nation n1,
20 | nation n2,
21 | region
22 | where
23 | p_partkey = l_partkey
24 | and s_suppkey = l_suppkey
25 | and l_orderkey = o_orderkey
26 | and o_custkey = c_custkey
27 | and c_nationkey = n1.n_nationkey
28 | and n1.n_regionkey = r_regionkey
29 | and r_name = 'AMERICA'
30 | and s_nationkey = n2.n_nationkey
31 | and o_orderdate between date '1995-01-01' and date '1996-12-31'
32 | and p_type = 'ECONOMY ANODIZED STEEL'
33 | ) as all_nations
34 | group by
35 | o_year
36 | order by
37 | o_year;
--------------------------------------------------------------------------------
/benchmarks/trino/tpch/queries/q9.sql:
--------------------------------------------------------------------------------
1 | select
2 | nation,
3 | o_year,
4 | sum(amount) as sum_profit
5 | from
6 | (
7 | select
8 | n_name as nation,
9 | extract(year from o_orderdate) as o_year,
10 | l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount
11 | from
12 | part,
13 | supplier,
14 | lineitem,
15 | partsupp,
16 | orders,
17 | nation
18 | where
19 | s_suppkey = l_suppkey
20 | and ps_suppkey = l_suppkey
21 | and ps_partkey = l_partkey
22 | and p_partkey = l_partkey
23 | and o_orderkey = l_orderkey
24 | and s_nationkey = n_nationkey
25 | and p_name like '%green%'
26 | ) as profit
27 | group by
28 | nation,
29 | o_year
30 | order by
31 | nation,
32 | o_year desc;
--------------------------------------------------------------------------------
/benchmarks/trino/tpch/queries/revenue0.sql:
--------------------------------------------------------------------------------
1 | create table revenue0 as
2 | select
3 | l_suppkey as supplier_no,
4 | sum(l_extendedprice * (1 - l_discount)) as total_revenue
5 | from
6 | lineitem
7 | where
8 | l_shipdate >= date '1996-01-01'
9 | and l_shipdate < date '1996-01-01' + interval '3' month
10 | group by
11 | l_suppkey;
--------------------------------------------------------------------------------
/benchmarks/trino/tpch/sources/customer.sql:
--------------------------------------------------------------------------------
1 | CREATE TABLE customer with (format='PARQUET', LOCATION='tpch_pd_scale_10/customer/');
--------------------------------------------------------------------------------
/benchmarks/trino/tpch/sources/lineitem.sql:
--------------------------------------------------------------------------------
1 | CREATE TABLE lineitem with (format='PARQUET', LOCATION='tpch_pd_scale_10/lineitem/');
--------------------------------------------------------------------------------
/benchmarks/trino/tpch/sources/nation.sql:
--------------------------------------------------------------------------------
1 | CREATE TABLE nation with (format='PARQUET', LOCATION='tpch_pd_scale_10/nation/');
--------------------------------------------------------------------------------
/benchmarks/trino/tpch/sources/orders.sql:
--------------------------------------------------------------------------------
1 | CREATE TABLE orders with (format='PARQUET', LOCATION='tpch_pd_scale_10/orders/');
--------------------------------------------------------------------------------
/benchmarks/trino/tpch/sources/part.sql:
--------------------------------------------------------------------------------
1 | CREATE TABLE part with (format='PARQUET', LOCATION='tpch_pd_scale_10/part/');
--------------------------------------------------------------------------------
/benchmarks/trino/tpch/sources/partsupp.sql:
--------------------------------------------------------------------------------
1 | CREATE TABLE partsupp with (format='PARQUET', LOCATION='tpch_pd_scale_10/partsupp/');
--------------------------------------------------------------------------------
/benchmarks/trino/tpch/sources/region.sql:
--------------------------------------------------------------------------------
1 | CREATE TABLE region with (format='PARQUET', LOCATION='tpch_pd_scale_10/region/');
--------------------------------------------------------------------------------
/benchmarks/trino/tpch/sources/supplier.sql:
--------------------------------------------------------------------------------
1 | CREATE TABLE supplier with (format='PARQUET', LOCATION='tpch_pd_scale_10/supplier/');
--------------------------------------------------------------------------------
/benchmarks/trino/tpch/workspace.sdf.yml:
--------------------------------------------------------------------------------
1 | workspace:
2 | edition: "1.3"
3 | name: "tpch"
4 | defaults:
5 | dialect: trino
6 | description: >
7 | The TPC-H benchmark is a standard for measuring processing performance of analytical SQL engines.
8 |
9 | To run the benchmark:
10 | 1. Run the included hydrate.sh script which downloads relevant data
11 | 2. To execute all queries: `sdf run --no-cache`
12 | includes:
13 | - path: sources
14 | - path: queries
15 | - path: tpch_pd_scale_10
16 | type: resource
17 |
--------------------------------------------------------------------------------
/docs/assets/favicon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sdf-labs/sdf-cli/0f2623776d779dcc07adfe6492765b5c77d3d211/docs/assets/favicon.png
--------------------------------------------------------------------------------
/docs/cloud/authentication.mdx:
--------------------------------------------------------------------------------
1 | ---
2 | title: "CLI Authentication"
3 | ---
4 |
5 | ## Authenticating the CLI
6 |
7 | Before you can deploy your assets to the SDF Cloud, you'll first need to authenticate with the platform.
8 |
9 |
10 | The SDF Cloud is only available via the **Plus** and above plans at this time. If you'd like to get access, please [inquire](https://sdf.com/inquiries)
11 |
12 |
13 | Authenticating with your CLI is easy. Simply run the command below to begin:
14 |
15 | ```shell
16 | sdf auth login
17 | ```
18 |
19 | Next, your default browser will open up and prompt you to login. Currently, Google and Okta are supported authentication providers.
20 |
21 | Once you've logged in, you'll be redirected to sucess page:
22 |
23 |
24 |
25 | Congratulations! You've successfully authenticated with the SDF Platform. You can now deploy, monitor, and interact with your assets on the platform.
26 |
--------------------------------------------------------------------------------
/docs/cloud/introduction.mdx:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Overview"
3 | description:
4 | "SDF Cloud is an integrated, auto-generated data catalog with integrated column level lineage, data classification, and more."
5 | ---
6 |
7 | SDF Cloud is the fastest way to understand your data warehouse at a glance. Code-driven assets are searchable and clear, driving
8 | data awareness across a whole organization. Visualize column level lineage, data classifications, and reports at a glance.
9 |
10 | Assets are generated automatically through static analysis, so there is no configuration, migration, or setup time. It. Just. Works.
11 |
12 |
13 |
19 |
20 |
--------------------------------------------------------------------------------
/docs/database/orchestration.mdx:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Orchestration"
3 | description:
4 | "Simple orchestration with SDF"
5 | ---
6 |
--------------------------------------------------------------------------------
/docs/database/supported-functions/trino/comparison-functions.mdx:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Comparison Functions"
3 | ---
4 |
5 |
6 | ## `greatest`
7 |
8 | Returns the largest of the provided values.
9 |
10 |
11 | *Examples:*
12 | ```sql examples.sql
13 | SELECT greatest(1, 2, 3) AS value; -- value '3'
14 | ```
15 | *Supported Signatures*
16 | ```sql
17 | function greatest($3, ...) returns $3
18 | ```
19 | [🔗 Official Documentation](https://trino.io/docs/current/functions/comparison.html#greatest)
20 |
21 | ## `least`
22 |
23 | Returns the smallest of the provided values.
24 |
25 |
26 | *Examples:*
27 | ```sql examples.sql
28 | SELECT LEAST(5,6,7,1,2,3,4) -- list of columns or values -- value '1'
29 | ```
30 | *Supported Signatures*
31 | ```sql
32 | function least($3, ...) returns $3
33 | ```
34 | [🔗 Official Documentation](https://trino.io/docs/current/functions/comparison.html#least)
35 |
36 |
37 |
--------------------------------------------------------------------------------
/docs/guide/advanced/telemetry.mdx:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Understanding Telemetry"
3 | ---
4 |
5 | SDF sends fully-anonymized telemetry to help us improve the tool by understanding how it is used. Telemetry is optional and can be disabled at any time.
6 |
7 |
8 | If you are willing and able to leave telemetry enabled, thank you! This will help us better understand how the CLI app is used, allowing us to improve your experience.
9 |
10 |
11 | ## What is tracked?
12 |
13 | The following information is included in the telemetry events:
14 |
15 | - Anonymous device ID
16 | - Commands executed
17 | - The start and end timestamps of a command execution
18 | - Platform information (device details, operating system, CPU architecture, number of CPUs, total/available memory, etc.)
19 | - Workspace metrics (dialect, number of models etc.)
20 | - Exit code and errors if there are any
21 |
22 | We use telemetry for aggregate analysis and do not tie telemetry events to a specific identity.
23 |
24 | ## Disabling telemetry
25 |
26 | You can disable the telemetry by setting the environment variable `DISABLE_SDF_CLI_TELEMETRY=1` before running the SDF.
27 |
--------------------------------------------------------------------------------
/docs/guide/data-quality/stats.mdx:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Stats"
3 | ---
4 |
5 | ## Understanding Stats
6 |
7 | **COMING SOON**
8 |
--------------------------------------------------------------------------------
/docs/guide/transformation/authentication.mdx:
--------------------------------------------------------------------------------
1 | ---
2 | title: 'Overview'
3 | description: 'SDF has rich authentication capabilities'
4 | icon: "play"
5 | ---
6 |
7 | - where auth stuff is stored on different systems
8 | - how to login
9 | - how to logout
10 | - how to validate connections (auth status)
11 | - how to deal with multiple connections
12 |
--------------------------------------------------------------------------------
/docs/integrations/openai/ai-classification.mdx:
--------------------------------------------------------------------------------
1 | ---
2 | title: 'AI Classification'
3 | description: 'Use your own OpenAI keypair with SDF to accelerate Data Classification'
4 | icon: "robot"
5 | ---
6 |
7 |
8 | Documentation is still in progress. Please check back later for a full guide on how to use OpenAI with SDF.
9 |
10 |
--------------------------------------------------------------------------------
/docs/linter/format.mdx:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Formatting"
3 | description:
4 | "Overview of Formatting"
5 | version: "Preview"
6 | ---
7 |
8 | ## SQL Formatting
9 | The SDF formatter is an extremely fast SQL formatter designed as a drop-in replacement for SQLFluff. It is available natively as part of the SDF cli.
10 |
11 | ### `sdf format`
12 | The entrypoint for the formatter is `sdf format`.
13 | ```
14 | sdf format # Formats all files in the SDF Workspace
15 | sdf format /path/to/file.sql # Formats one specific file
16 | sdf format /path/to/dir/*.sql # Formats all files matching glob pattern
17 | ```
18 |
19 | ### Formatting Configuration
20 | You may set a formatting configuration for an SDF project.
21 |
22 | ```yml workspace.sdf.yml
23 | sdf-args:
24 | lint: >
25 | ...
26 | format: >
27 | -l line-length=80
28 | -l indent=2
29 | -l commas=leading
30 | ```
31 |
32 | ### Formatting Reference
33 | The following formatting options are supported.
34 |
35 | | Type | Configuration | Auto-Fix | Default |
36 | | ------- | -------------- | -------- | ---------- |
37 | | Layout | `line-length` | ✅ | `80` |
38 | | Layout | `indent` | ✅ | `2` |
39 | | Layout | `commas` | ✅ | `trailing` |
40 |
--------------------------------------------------------------------------------
/docs/linter/macros.mdx:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Macros (jinja)"
3 | description:
4 | "Integrating jinja macros into your linting configuration"
5 | version: "Preview"
6 | ---
7 |
8 | ## Jinja Templating
9 | SDF Lint and Format are guaranteed to be compatible with all elements in an SDF workspace,
10 | including jinja macros, and materialization libraries.
11 |
12 | Jinja does not materially impact the performance of SDF Lint, and lint violations are provided
13 | at the level of the source SQL code, not pre-processed (ie. post jinja-expansion) level. This makes
14 | debugging easier.
15 |
16 | ### Configuration
17 | No additional configuration is needed for SDF workspaces using macros.
18 |
19 | ### Linting dbt Projects
20 | SDF lint is not (as of now) compatible with dbt projects. If you'd like that capability, let us know in
21 | our [community slack](https://sdf.com/join)!
22 |
--------------------------------------------------------------------------------
/docs/reference/bigquery/approximate_aggregate_functions.mdx:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Approximate Aggregate Functions"
3 | ---
4 |
5 | ## `approx_count_distinct`
6 |
7 | *Supported Signatures*
8 | ```sql
9 | function approx_count_distinct($1) returns bigint
10 | ```
11 | [🔗 Official Documentation](https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-and-operators#approx_count_distinct)
12 |
13 | ## `approx_quantiles`
14 |
15 | *Supported Signatures*
16 | ```sql
17 | function approx_quantiles($1, bigint) returns array<$1>
18 | ```
19 | [🔗 Official Documentation](https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-and-operators#approx_quantiles)
20 |
21 | ## `approx_top_count`
22 |
23 | *Supported Signatures*
24 | ```sql
25 | function approx_top_count($1, bigint) returns array>
26 | ```
27 | [🔗 Official Documentation](https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-and-operators#approx_top_count)
28 |
29 | ## `approx_top_sum`
30 |
31 | *Supported Signatures*
32 | ```sql
33 | function approx_top_sum($1, bigint, bigint) returns array>
34 | ```
35 | [🔗 Official Documentation](https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-and-operators#approx_top_sum)
36 |
37 |
38 |
--------------------------------------------------------------------------------
/docs/reference/bigquery/bit_functions.mdx:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Bit Functions"
3 | ---
4 |
5 | ## `bit_count`
6 |
7 | *Supported Signatures*
8 | ```sql
9 | function bit_count $1($1) returns bigint
10 | where $1 in (int, bytes)
11 | ```
12 | [🔗 Official Documentation](https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-and-operators#bit_count)
13 |
14 |
15 |
--------------------------------------------------------------------------------
/docs/reference/bigquery/debugging_functions.mdx:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Debugging Functions"
3 | ---
4 |
5 | ## `error`
6 |
7 | *Supported Signatures*
8 | ```sql
9 | function error(string) returns string
10 | ```
11 | [🔗 Official Documentation](https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-and-operators#error)
12 |
13 |
14 |
--------------------------------------------------------------------------------
/docs/reference/bigquery/other_expressions.mdx:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Other Expressions"
3 | ---
4 |
5 | ## `if`
6 |
7 | *Supported Signatures*
8 | ```sql
9 | function if(bool, $1, $1) returns $1
10 | ```
11 | [🔗 Official Documentation](https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-and-operators#if)
12 |
13 | ## `reclassify`
14 |
15 | Changes the classification label of the first argument from the expected classifier specified by the second argument to the desired classifier specified by the third. The expected and desired classifiers must belong to the same classifier group. No other classifiers attached to the first argument are impacted. The function will emit a warning if the the first argument does not have the expected classifier
16 |
17 | *Examples:*
18 | ```sql examples.sql
19 | select reclassify(12345, 'pii.clear_text', 'pii.masked') as value; -- value '12345'
20 | ```
21 | *Supported Signatures*
22 | ```sql
23 | function reclassify($1, string, string) returns $1
24 | function reclassify($1, string) returns $1
25 | ```
26 | [🔗 Official Documentation](https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-and-operators#reclassify)
27 |
28 |
29 |
--------------------------------------------------------------------------------
/docs/reference/bigquery/search_functions.mdx:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Interval Functions"
3 | ---
4 |
5 | ## `search`
6 |
7 | *Supported Signatures*
8 | ```sql
9 | function search $1($1, string) returns bool
10 | where $1 in (string, json, array)
11 | ```
12 | [🔗 Official Documentation](https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-and-operators#search)
13 |
14 |
15 |
--------------------------------------------------------------------------------
/docs/reference/bigquery/security_functions.mdx:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Security Functions"
3 | ---
4 |
5 | ## `session_user`
6 |
7 | *Supported Signatures*
8 | ```sql
9 | function session_user() returns string
10 | ```
11 | [🔗 Official Documentation](https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-and-operators#session_user)
12 |
13 |
14 |
--------------------------------------------------------------------------------
/docs/reference/bigquery/statistical_aggregate_functions.mdx:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Statistical Aggregation Functions"
3 | ---
4 |
5 | ## `variance`
6 |
7 | *Supported Signatures*
8 | ```sql
9 | function variance $1($1) returns double
10 | where $1 in (int64, double)
11 | ```
12 | [🔗 Official Documentation](https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-and-operators#variance)
13 |
14 |
15 |
--------------------------------------------------------------------------------
/docs/reference/bigquery/temporal_functions.mdx:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Temporal Functions"
3 | ---
4 |
5 | ## `date_sub`
6 |
7 | *Supported Signatures*
8 | ```sql
9 | function date_sub $1($1, interval) returns $1
10 | where $1 in (date, timestamp)
11 | ```
12 | [🔗 Official Documentation](https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-and-operators#date_sub)
13 |
14 |
15 |
--------------------------------------------------------------------------------
/docs/reference/bigquery/utility_functions.mdx:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Utility Functions"
3 | ---
4 |
5 | ## `generate_uuid`
6 |
7 | *Supported Signatures*
8 | ```sql
9 | function generate_uuid() returns string
10 | ```
11 | [🔗 Official Documentation](https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-and-operators#generate_uuid)
12 |
13 |
14 |
--------------------------------------------------------------------------------
/docs/reference/redshift/aggregate_functions.mdx:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Aggregate Functions"
3 | ---
4 |
5 | ## `approx_count_distinct`
6 |
7 | *Supported Signatures*
8 | ```sql
9 | function approx_count_distinct(bigint) returns bigint
10 | ```
11 | [🔗 Official Documentation](https://docs.aws.amazon.com/redshift/latest/dg/c_SQL_functions.html)
12 |
13 | ## `regexp_count`
14 |
15 | *Supported Signatures*
16 | ```sql
17 | function regexp_count(varchar, varchar) returns bigint
18 | function regexp_count(varchar, varchar, bigint) returns bigint
19 | function regexp_count(varchar, varchar, bigint, varchar) returns bigint
20 | ```
21 | [🔗 Official Documentation](https://docs.aws.amazon.com/redshift/latest/dg/REGEXP_COUNT.html)
22 |
23 |
24 |
--------------------------------------------------------------------------------
/docs/reference/redshift/math_functions.mdx:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Math Functions"
3 | ---
4 |
5 | ## `checksum`
6 |
7 | *Supported Signatures*
8 | ```sql
9 | function checksum(varchar) returns bigint
10 | ```
11 | [🔗 Official Documentation](https://docs.aws.amazon.com/redshift/latest/dg/r_CHECKSUM.html)
12 |
13 | ## `is_integer`
14 |
15 | *Supported Signatures*
16 | ```sql
17 | function is_integer(super) returns boolean
18 | ```
19 | [🔗 Official Documentation](https://docs.aws.amazon.com/redshift/latest/dg/r_is_integer.html)
20 |
21 |
22 |
--------------------------------------------------------------------------------
/docs/reference/snowflake/account_usage_table_functions.mdx:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Account Usage Table Functions"
3 | ---
4 |
5 |
6 | ## `tag_references_with_lineage`
7 |
8 | *Supported Signatures*
9 | ```sql
10 | function tag_references_with_lineage(string) returns struct<
11 | tag_database string,
12 | tag_schema string,
13 | tag_id bigint,
14 | tag_name string,
15 | tag_value string,
16 | level string,
17 | object_database string,
18 | object_schema string,
19 | object_id bigint,
20 | object_name string,
21 | object_deleted timestamp,
22 | domain string,
23 | column_id bigint,
24 | column_name string
25 | >
26 |
27 | ```
28 | [🔗 Official Documentation](https://docs.snowflake.com/en/sql-reference/functions/tag_references_with_lineage)
29 |
30 |
31 |
--------------------------------------------------------------------------------
/docs/reference/snowflake/hash_functions.mdx:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Hash Functions"
3 | ---
4 |
5 |
6 | ## `hash`
7 |
8 | hash value
9 | *Supported Signatures*
10 | ```sql
11 | function hash(...) returns decimal(19, 0)
12 | ```
13 | [🔗 Official Documentation](https://docs.snowflake.com/en/sql-reference/functions/hash)
14 |
15 |
16 |
--------------------------------------------------------------------------------
/docs/reference/snowflake/metadata_functions.mdx:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Metadata Functions"
3 | ---
4 |
5 |
6 | ## `generate_column_description`
7 |
8 | parse the result from INFER_SCHEMA and generate a copy-able column description
9 | *Supported Signatures*
10 | ```sql
11 | function generate_column_description(array, varchar) returns varchar
12 | ```
13 | [🔗 Official Documentation](https://docs.snowflake.com/en/sql-reference/functions/generate_column_description)
14 |
15 | ## `get_ddl`
16 |
17 | DDL used to create the object
18 | *Supported Signatures*
19 | ```sql
20 | function get_ddl(varchar, varchar) returns varchar
21 | function get_ddl(varchar, varchar, boolean) returns varchar
22 | function get_ddl(varchar, decimal(38, 0)) returns varchar
23 | function get_ddl(varchar, decimal(38, 0), boolean) returns varchar
24 | ```
25 | [🔗 Official Documentation](https://docs.snowflake.com/en/sql-reference/functions/get_ddl)
26 |
27 |
28 |
--------------------------------------------------------------------------------
/docs/reference/snowflake/vector_similarity_functions.mdx:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Vector Similarity Functions"
3 | ---
4 |
5 |
6 | ## `vector_inner_product`
7 |
8 | Inner product between vectors
9 | *Supported Signatures*
10 | ```sql
11 | function vector_inner_product(array, array) returns double
12 | ```
13 | [🔗 Official Documentation](https://docs.snowflake.com/en/sql-reference/functions/vector_inner_product)
14 |
15 | ## `vector_l2_distance`
16 |
17 | L2 distance between vectors
18 | *Supported Signatures*
19 | ```sql
20 | function vector_l2_distance(array, array) returns double
21 | ```
22 | [🔗 Official Documentation](https://docs.snowflake.com/en/sql-reference/functions/vector_l2_distance)
23 |
24 |
25 |
--------------------------------------------------------------------------------
/docs/reference/trino/comparison_functions.mdx:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Comparison Functions"
3 | ---
4 |
5 | ## `greatest`
6 |
7 | Returns the largest of the provided values.
8 |
9 |
10 | *Examples:*
11 | ```sql examples.sql
12 | SELECT greatest(1, 2, 3) AS value; -- value '3'
13 | ```
14 | *Supported Signatures*
15 | ```sql
16 | function greatest($3, ...) returns $3
17 | ```
18 | [🔗 Official Documentation](https://trino.io/docs/current/functions/comparison.html#greatest)
19 |
20 | ## `least`
21 |
22 | Returns the smallest of the provided values.
23 |
24 |
25 | *Examples:*
26 | ```sql examples.sql
27 | SELECT LEAST(5,6,7,1,2,3,4) -- list of columns or values -- value '1'
28 | ```
29 | *Supported Signatures*
30 | ```sql
31 | function least($3, ...) returns $3
32 | ```
33 | [🔗 Official Documentation](https://trino.io/docs/current/functions/comparison.html#least)
34 |
35 |
36 |
--------------------------------------------------------------------------------
/docs/reference/trino/hyperloglog_functions.mdx:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Hyperloglog Functions"
3 | ---
4 |
5 | ## `approx_set`
6 |
7 | Returns the HyperLogLog sketch of the input data set of x. This data sketch underlies approx_distinct() and can be stored and used later by calling cardinality().
8 |
9 | *Supported Signatures*
10 | ```sql
11 | function approx_set(bigint) returns hyperloglog
12 | function approx_set(double) returns hyperloglog
13 | function approx_set(varchar) returns hyperloglog
14 | ```
15 | [🔗 Official Documentation](https://trino.io/docs/current/functions/hyperloglog.html#approx_set)
16 |
17 | ## `empty_approx_set`
18 |
19 | Returns an empty HyperLogLog.
20 |
21 | *Supported Signatures*
22 | ```sql
23 | function empty_approx_set() returns hyperloglog
24 | ```
25 | [🔗 Official Documentation](https://trino.io/docs/current/functions/hyperloglog.html#empty_approx_set)
26 |
27 | ## `merge`
28 |
29 | Returns the HyperLogLog of the aggregate union of the individual hll HyperLogLog structures.
30 |
31 | *Supported Signatures*
32 | ```sql
33 | function merge(hyperloglog) returns hyperloglog
34 | function merge(qdigest) returns qdigest
35 | function merge(tdigest) returns tdigest
36 | ```
37 | [🔗 Official Documentation](https://trino.io/docs/current/functions/hyperloglog.html#merge)
38 |
39 |
40 |
--------------------------------------------------------------------------------
/docs/reference/trino/lambda_functions.mdx:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Lambda Functions"
3 | ---
4 |
5 | ## `reduce_agg`
6 |
7 | Reduces all input values into a single value. inputFunction will be invoked for each non-null input value. In addition to taking the input value, inputFunction takes the current state, initially initialState, and returns the new state. combineFunction will be invoked to combine two states into a new state. The final state is returned.
8 |
9 | *Supported Signatures*
10 | ```sql
11 | function reduce_agg($1, $10, function($10, $1, $10), function($10, $10, $10)) returns $10
12 | ```
13 | [🔗 Official Documentation](https://trino.io/docs/current/functions/aggregate.html#reduce_agg)
14 |
15 | ## `regexp_replace`
16 |
17 | Removes every instance of the substring matched by the regular expression pattern from string.
18 |
19 |
20 | *Examples:*
21 | ```sql examples.sql
22 | SELECT REGEXP_REPLACE('text foo another text', 'foo', 'bar') -- value 'text bar another text'
23 | ```
24 | *Supported Signatures*
25 | ```sql
26 | function regexp_replace(varchar, joniregexp) returns varchar
27 | function regexp_replace(varchar, joniregexp, varchar) returns varchar
28 | ```
29 | [🔗 Official Documentation](https://trino.io/docs/current/functions/regexp.html#regexp_replace)
30 |
31 |
32 |
--------------------------------------------------------------------------------
/docs/reference/trino/mongodb_functions.mdx:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Mongodb Functions"
3 | ---
4 |
5 | ## `objectid`
6 |
7 | Extracts the TIMESTAMP WITH TIME ZONE from a given ObjectId.
8 |
9 | *Supported Signatures*
10 | ```sql
11 | function objectid() returns objectid
12 | function objectid(varchar) returns objectid
13 | ```
14 | [🔗 Official Documentation](https://trino.io/docs/current/connector/mongodb.html#objectid)
15 |
16 | ## `objectid_timestamp`
17 |
18 | Extracts the TIMESTAMP WITH TIME ZONE from a given ObjectId.
19 |
20 | *Supported Signatures*
21 | ```sql
22 | function objectid_timestamp(objectid) returns timestamp
23 | ```
24 | [🔗 Official Documentation](https://trino.io/docs/current/functions/../connector/mongodb.html#objectid_timestamp)
25 |
26 | ## `timestamp_objectid`
27 |
28 | Creates an ObjectId from a TIMESTAMP WITH TIME ZONE.
29 |
30 | *Supported Signatures*
31 | ```sql
32 | function timestamp_objectid(timestamp(0)) returns objectid
33 | ```
34 | [🔗 Official Documentation](https://trino.io/docs/current/functions/../connector/mongodb.html#timestamp_objectid)
35 |
36 |
37 |
--------------------------------------------------------------------------------
/docs/reference/trino/t-digest_functions.mdx:
--------------------------------------------------------------------------------
1 | ---
2 | title: "T-digest Functions"
3 | ---
4 |
5 | ## `tdigest_agg`
6 |
7 | Composes all input values of x into a tdigest. x can be of any numeric type.
8 |
9 | *Supported Signatures*
10 | ```sql
11 | function tdigest_agg(double) returns tdigest
12 | function tdigest_agg(double, double) returns tdigest
13 | ```
14 | [🔗 Official Documentation](https://trino.io/docs/current/functions/tdigest.html#tdigest_agg)
15 |
16 |
17 |
--------------------------------------------------------------------------------
/docs/reference/trino/uuid_functions.mdx:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Uuid Functions"
3 | ---
4 |
5 | ## `uuid`
6 |
7 | Returns a pseudo randomly generated UUID (type 4).
8 |
9 |
10 | *Examples:*
11 | ```sql examples.sql
12 | SELECT uuid() as value; -- value '22552909-5560-4234-a383-5a968a4e2a91'
13 | ```
14 | *Supported Signatures*
15 | ```sql
16 | function uuid() returns uuid
17 | ```
18 | *Note: uuid() is volatile, i.e. might return a
19 | different value for the same input.*
20 |
21 | [🔗 Official Documentation](https://trino.io/docs/current/functions/uuid.html#uuid)
22 |
23 |
24 |
--------------------------------------------------------------------------------
/docs/snippets/preview-warning.mdx:
--------------------------------------------------------------------------------
1 |
2 | {title} is currently only available in our preview release. Preview features are not recommended for production use. For more information on how to install or update to the Preview release channel, see [here](/introduction/install#release-tracks).
3 |
4 |
--------------------------------------------------------------------------------
/docs/tutorials/learn-more.mdx:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Learn more"
3 | ---
4 |
5 | ## Tutorials Summary
6 | We hope you enjoyed our series of tutorials and got to leaned more about SDF.
7 |
8 | We learned how to:
9 | * [Create a model](/tutorials/creating-a-model)
10 | * [Debug with SDF](/tutorials/debugging)
11 | * [Deprecate a model](/tutorials/deprecating-a-model)
12 | * [Enrich your warehouse](/tutorials/enriching-your-warehouse)
13 | * [Ensure data quality](/tutorials/ensuring-data-quality)
14 |
15 |
16 | ## Quick Links
17 | There is much more that SDF can do!
18 |
19 |
20 | Connect to a project with Snowflake or Redshift
21 |
22 |
23 | Visualize your data warehouse with SDF cloud beta
24 |
25 |
26 | Learn about different materialization types
27 |
28 |
29 | Transfomr your warehouse dynamically using variables and macros
30 |
31 |
32 |
--------------------------------------------------------------------------------
/examples/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sdf-labs/sdf-cli/0f2623776d779dcc07adfe6492765b5c77d3d211/examples/.gitkeep
--------------------------------------------------------------------------------
/examples/bigquery_incremental/models/last_hn_timestamp.sql:
--------------------------------------------------------------------------------
1 | SELECT MAX(`timestamp`) as ts FROM bigquery-public-data.hacker_news.`full`
2 |
--------------------------------------------------------------------------------
/examples/bigquery_incremental/models/popular_articles.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | *
3 | FROM
4 | bigquery-public-data.hacker_news.`full`
5 | WHERE
6 | {% if builtin.is_incremental_mode %}
7 | -- Only fetch rows that are newer than the newest row in the previous materialization of this table
8 | `timestamp` >= (SELECT MAX(`timestamp`) FROM popular_articles)
9 | {% else %}
10 | `timestamp` >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 7 DAY)
11 | {% endif %}
12 | AND title IS NOT NULL
13 | AND (dead IS NULL OR dead = FALSE)
14 | AND score > 100
15 |
--------------------------------------------------------------------------------
/examples/bigquery_incremental/workspace.sdf.yml:
--------------------------------------------------------------------------------
1 | workspace:
2 | name: bigquery_incremental
3 | edition: "1.3"
4 | description: >
5 | This workspace demonstrates incremental tables with BigQuery.
6 |
7 | Make sure to replace `` with your BigQuery project name.
8 |
9 | To follow along with SDF's official guide: https://docs.sdf.com/integrations/bigquery/incremental-materialization
10 |
11 | defaults:
12 | dialect: bigquery
13 | preprocessor: jinja
14 | catalog:
15 |
16 | includes:
17 | - path: models
18 |
19 | integrations:
20 | - provider: bigquery
21 | type: database
22 | sources:
23 | - pattern: bigquery-public-data.*.*
24 | targets:
25 | - pattern: "*.pub.*"
26 |
--------------------------------------------------------------------------------
/examples/bigquery_starter/.gitignore:
--------------------------------------------------------------------------------
1 |
2 | #sdf gitignore
3 | /sdftarget
4 |
--------------------------------------------------------------------------------
/examples/bigquery_starter/models/aggregate_orders.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | o.customer_id,
3 | c.customer_name,
4 | COUNT(o.order_id) AS total_orders,
5 | SUM(o.quantity * p.price) AS total_spent
6 | FROM
7 | orders o
8 | JOIN
9 | customers c ON o.customer_id = c.customer_id
10 | JOIN
11 | products p ON o.product_id = p.product_id
12 | GROUP BY
13 | o.customer_id, c.customer_name;
--------------------------------------------------------------------------------
/examples/bigquery_starter/models/customers_over_100.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | customer_id,
3 | customer_name,
4 | total_orders,
5 | total_spent
6 | FROM
7 | aggregate_orders
8 | WHERE
9 | total_spent > 100
10 | ORDER BY
11 | total_spent DESC;
--------------------------------------------------------------------------------
/examples/bigquery_starter/workspace.sdf.yml:
--------------------------------------------------------------------------------
1 | workspace:
2 | name: bigquery_starter
3 | edition: "1.3"
4 | description: >
5 | This is a starter workspace for BigQuery users. It includes a script to create a dataset and some initial tables with seed data, then one table and one view transforming
6 | the seed data.
7 |
8 | Make sure to replace `` with your BigQuery project name.
9 |
10 | To follow along with SDF's official guide: https://docs.sdf.com/integrations/bigquery/basic-materialization
11 |
12 | defaults:
13 | catalog:
14 | schema: sdf_ecommerce_example
15 | dialect: bigquery
16 |
17 | includes:
18 | - path: models
19 |
20 | integrations:
21 | - provider: bigquery
22 | type: database
23 | sources:
24 | - pattern: "*.sdf_ecommerce_example.*"
25 | targets:
26 | - pattern: "*.sdf_ecommerce_example.*"
27 |
28 |
--------------------------------------------------------------------------------
/examples/cybersyn_tech_innovation/.gitignore:
--------------------------------------------------------------------------------
1 |
2 | #sdf gitignore
3 | /sdftarget
4 |
--------------------------------------------------------------------------------
/examples/cybersyn_tech_innovation/models/sdf_snowflake/cybersyn_tech_innovation/all_nvidia_patents.sql:
--------------------------------------------------------------------------------
1 | /*
2 | Find all patents where Nvidia is the designated assignee
3 | */
4 | WITH patents_in_last_year AS (
5 | SELECT
6 | patent_id,
7 | patent_type,
8 | invention_title
9 | FROM tech__innovation_essentials.cybersyn.uspto_patent_index AS patent
10 | WHERE patent.DOCUMENT_PUBLICATION_DATE > DATEADD(YEAR, -1, CURRENT_DATE())
11 | )
12 | SELECT
13 | patent.patent_id,
14 | patent.patent_type,
15 | patent.invention_title
16 | FROM tech__innovation_essentials.cybersyn.uspto_contributor_index AS contrib
17 | JOIN tech__innovation_essentials.cybersyn.uspto_patent_contributor_relationships AS rship
18 | ON (contrib.contributor_id = rship.contributor_id)
19 | JOIN patents_in_last_year AS patent
20 | ON (rship.patent_id = patent.patent_id)
21 | WHERE contrib.contributor_name ILIKE 'NVIDIA CORPORATION'
22 | AND rship.contribution_type = 'Assignee - United States Company Or Corporation'
--------------------------------------------------------------------------------
/examples/cybersyn_tech_innovation/models/sdf_snowflake/cybersyn_tech_innovation/funder_aggregates.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | FUNDER_NAME,
3 | COUNT(*) AS total_funders,
4 | COALESCE(SUM(WORKS_COUNT), 0) AS total_works_count,
5 | COALESCE(SUM(WORKS_CITED_BY_COUNT), 0) AS total_citations,
6 | COALESCE(SUM(GRANTS_COUNT), 0) AS total_grants,
7 | COUNT(DISTINCT FUNDER_COUNTRY_GEO_ID) AS unique_country_count,
8 | MIN(CREATED_DATE) AS first_created_date,
9 | MAX(UPDATED_DATE) AS last_updated_date
10 | FROM TECH__INNOVATION_ESSENTIALS.CYBERSYN.OPENALEX_FUNDERS_INDEX
11 | GROUP BY FUNDER_NAME
12 | ORDER BY total_citations DESC
--------------------------------------------------------------------------------
/examples/cybersyn_tech_innovation/models/sdf_snowflake/cybersyn_tech_innovation/most_starred_repos.sql:
--------------------------------------------------------------------------------
1 | /*
2 | Pull the repos with the most stars in the past year
3 | */
4 | WITH latest_repo_name AS (
5 | SELECT
6 | repo_name,
7 | repo_id
8 | FROM tech__innovation_essentials.cybersyn.github_repos
9 | QUALIFY ROW_NUMBER() OVER (PARTITION BY repo_id ORDER BY first_seen DESC) = 1
10 | )
11 | SELECT
12 | repo.repo_name,
13 | repo.repo_id,
14 | SUM(stars.count) AS sum_stars
15 | FROM tech__innovation_essentials.cybersyn.github_stars AS stars
16 | JOIN latest_repo_name AS repo
17 | ON (repo.repo_id = stars.repo_id)
18 | WHERE stars.date >= DATEADD('day', -365, CURRENT_DATE)
19 | GROUP BY repo.repo_name, repo.repo_id
20 | ORDER BY sum_stars DESC NULLS LAST
21 | LIMIT 50
--------------------------------------------------------------------------------
/examples/github_analysis/.gitignore:
--------------------------------------------------------------------------------
1 |
2 | #sdf gitignore
3 | /sdftarget
4 |
--------------------------------------------------------------------------------
/examples/github_analysis/checks/no_timezone_comparison.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | column_id,
3 | classifiers
4 | FROM
5 | sdf.information_schema.columns
6 | WHERE
7 | CONTAINS_ARRAY_VARCHAR(classifiers, 'DATE.') AND
8 | CARDINALITY(ARRAY_DISTINCT(classifiers)) > 1;
--------------------------------------------------------------------------------
/examples/github_analysis/classification/taxonomy.sdf.yml:
--------------------------------------------------------------------------------
1 | classifier:
2 | name: DATE
3 | labels:
4 | - name: UTC
5 | - name: PST
6 | - name: EST
--------------------------------------------------------------------------------
/examples/github_analysis/metadata/sdf_snowflake/stg/repo_event_aggregates.sdf.yml:
--------------------------------------------------------------------------------
1 | table:
2 | name: repo_event_aggregates
3 | description: >
4 | This table aggregates the number of events that have occurred for each repository.
5 | columns:
6 | - name: event_count
7 | description: The number of events that have occurred for a repository.
8 | tests:
9 | - expect: not_null()
--------------------------------------------------------------------------------
/examples/github_analysis/metadata/sdf_snowflake/stg/repo_stars.sdf.yml:
--------------------------------------------------------------------------------
1 | table:
2 | name: repo_stars
3 | description: >
4 | The number of stars per repository.
5 | columns:
6 | - name: total_stars
7 | description: The total number of stars that a repository has.
8 | tests:
9 | - expect: minimum(0)
10 |
--------------------------------------------------------------------------------
/examples/github_analysis/metadata/sdf_snowflake/stg/star_growth.sdf.yml:
--------------------------------------------------------------------------------
1 | table:
2 | name: star_growth
3 | description: >
4 | This table shows the number of stars that have been added to a repository over the course of all events on the repo.
5 | columns:
6 | - name: first_star_count
7 | description: The number of stars that a repository had before the first event recorded.
8 | tests:
9 | - expect: minimum(0)
10 | - name: last_star_count
11 | description: The number of stars that a repository had after the last event recorded.
12 | tests:
13 | - expect: minimum(0)
14 | - name: total_stars_acquired
15 | description: The total number of stars that a repository has acquired over the course of all events.
16 | tests:
17 | - expect: minimum(0)
18 | - name: star_growth
19 | description: The difference between the stars that a repository had after the last event recorded and the stars that a repository had before the first event recorded.
20 | tests:
21 | - expect: minimum(0)
22 |
--------------------------------------------------------------------------------
/examples/github_analysis/metadata/tech__innovation_essentials/cybersyn/github_repos.sdf.yml:
--------------------------------------------------------------------------------
1 | table:
2 | name: github_repos
3 | columns:
4 | - name: first_seen
5 | classifiers:
6 | - DATE.PST
7 | - name: last_seen
8 | classifiers:
9 | - DATE.PST
--------------------------------------------------------------------------------
/examples/github_analysis/metadata/tech__innovation_essentials/cybersyn/github_stars.sdf.yml:
--------------------------------------------------------------------------------
1 | table:
2 | name: github_stars
3 | columns:
4 | - name: date
5 | classifiers:
6 | - DATE.UTC
--------------------------------------------------------------------------------
/examples/github_analysis/models/sdf_snowflake/analysis/engagement_summary_by_repo.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | d.repo_name,
3 | re.actor_count,
4 | re.event_type_count,
5 | re.total_events,
6 | re.first_interaction,
7 | re.last_interaction
8 | FROM fct.repo_engagement re
9 | JOIN dim.repos d
10 | ON re.repo_id = d.repo_id
11 | ORDER BY re.total_events DESC, re.actor_count DESC;
12 |
13 |
--------------------------------------------------------------------------------
/examples/github_analysis/models/sdf_snowflake/analysis/event_dist_across_repo.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | d.repo_name,
3 | e.event_type,
4 | sum(e.event_count) AS total_events,
5 | min(e.first_event) AS first_event,
6 | max(e.last_event) AS last_event
7 | FROM fct.event_activity e
8 | JOIN dim.repos d
9 | ON e.repo_id = d.repo_id
10 | GROUP BY d.repo_name, e.event_type
11 | ORDER BY total_events DESC;
12 |
13 |
--------------------------------------------------------------------------------
/examples/github_analysis/models/sdf_snowflake/analysis/growth_repos.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | d.repo_name,
3 | e.actor_count,
4 | e.total_events,
5 | a.star_growth,
6 | a.total_stars
7 | FROM fct.repo_engagement e
8 | JOIN fct.repo_activity a
9 | ON e.repo_id = a.repo_id
10 | JOIN dim.repos d
11 | ON e.repo_id = d.repo_id
12 | WHERE a.star_growth < 10 -- Low star growth threshold
13 | ORDER BY
14 | e.actor_count DESC,
15 | e.total_events DESC
16 | ;
17 |
18 |
--------------------------------------------------------------------------------
/examples/github_analysis/models/sdf_snowflake/analysis/star_growth_by_repo.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | d.repo_name,
3 | t.year,
4 | t.month,
5 | SUM(f.star_growth) AS star_growth
6 | FROM fct.repo_activity f
7 | JOIN dim.repos d
8 | ON f.repo_id = d.repo_id
9 | JOIN dim.date t
10 | ON f.first_star_date = t.date
11 | GROUP BY d.repo_name, t.year, t.month
12 | ORDER BY t.year, t.month DESC;
13 |
14 |
--------------------------------------------------------------------------------
/examples/github_analysis/models/sdf_snowflake/dim/date.sql:
--------------------------------------------------------------------------------
1 | SELECT DISTINCT
2 | DATE_TRUNC('day', s.date) AS date,
3 | EXTRACT (year FROM s.date) AS year,
4 | EXTRACT (month FROM s.date) AS month,
5 | EXTRACT (day FROM s.date) AS day,
6 | EXTRACT (quarter FROM s.date) AS quarter
7 | FROM tech__innovation_essentials.cybersyn.github_stars s
8 |
--------------------------------------------------------------------------------
/examples/github_analysis/models/sdf_snowflake/dim/event_type.sql:
--------------------------------------------------------------------------------
1 | SELECT DISTINCT
2 | type AS event_type,
3 | COUNT(*) AS event_count
4 | FROM tech__innovation_essentials.cybersyn.github_events
5 | WHERE type IS NOT NULL
6 | GROUP BY type
7 |
--------------------------------------------------------------------------------
/examples/github_analysis/models/sdf_snowflake/dim/repos.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | r.repo_id,
3 | n.repo_name,
4 | r.first_seen,
5 | r.last_seen,
6 | e.event_count,
7 | l.last_event_timestamp
8 | FROM tech__innovation_essentials.cybersyn.github_repos r
9 | LEFT JOIN stg.repo_event_aggregates e
10 | ON r.repo_id = e.repo_id
11 | LEFT JOIN stg.latest_repo_events l
12 | ON r.repo_id = l.repo_id
13 | LEFT JOIN stg.repo_names n
14 | ON r.repo_id = n.repo_id
15 |
16 |
--------------------------------------------------------------------------------
/examples/github_analysis/models/sdf_snowflake/fct/event_activity.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | e.repo_id,
3 | e.type AS event_type,
4 | COUNT(e.id) AS event_count, -- Total count of events by type
5 | MIN(
6 | e.created_at_timestamp
7 | ) AS first_event, -- First occurrence of the event
8 | MAX(
9 | e.created_at_timestamp
10 | ) AS last_event -- Last occurrence of the event
11 | FROM tech__innovation_essentials.cybersyn.github_events e
12 | WHERE e.repo_id IS NOT NULL
13 | GROUP BY e.repo_id, e.type
14 |
--------------------------------------------------------------------------------
/examples/github_analysis/models/sdf_snowflake/fct/repo_activity.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | g.repo_id,
3 | d.repo_name,
4 | g.first_star_date,
5 | g.last_star_date,
6 | g.star_growth,
7 | g.total_stars_acquired as total_stars,
8 | SUM(e.event_count) as total_events,
9 | SUM(e.event_count) / NULLIF(g.star_growth, 0) as events_per_star -- Example metric
10 | FROM stg.star_growth g
11 | JOIN dim.repos d
12 | ON g.repo_id = d.repo_id
13 | LEFT JOIN stg.repo_event_aggregates e
14 | ON g.repo_id = e.repo_id
15 | GROUP BY g.repo_id, d.repo_name, g.first_star_date, g.last_star_date, g.star_growth, g.total_stars_acquired
16 |
--------------------------------------------------------------------------------
/examples/github_analysis/models/sdf_snowflake/fct/repo_engagement.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | e.id,
3 | e.type,
4 | e.repo_id,
5 | e.actor_id,
6 | COUNT(DISTINCT e.actor_id) AS actor_count, -- Number of distinct users who interacted
7 | COUNT(
8 | DISTINCT e.type
9 | ) AS event_type_count, -- Number of distinct event types
10 | COUNT(
11 | e.id
12 | ) AS total_events, -- Total number of events
13 | MIN(
14 | e.created_at_timestamp
15 | ) AS first_interaction, -- First interaction timestamp
16 | MAX(
17 | e.created_at_timestamp
18 | ) AS last_interaction -- Last interaction timestamp
19 | FROM tech__innovation_essentials.cybersyn.github_events e
20 | WHERE e.repo_id IS NOT NULL
21 | GROUP BY e.id, e.type, e.repo_id, e.actor_id
22 |
--------------------------------------------------------------------------------
/examples/github_analysis/models/sdf_snowflake/stg/latest_repo_events.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | repo_id,
3 | max(created_at_timestamp) as last_event_timestamp
4 | FROM tech__innovation_essentials.cybersyn.github_events
5 | WHERE repo_id IS NOT NULL
6 | GROUP BY repo_id
7 |
--------------------------------------------------------------------------------
/examples/github_analysis/models/sdf_snowflake/stg/repo_event_aggregates.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | repo_id,
3 | type,
4 | count(*) as event_count
5 | FROM tech__innovation_essentials.cybersyn.github_events
6 | WHERE repo_id IS NOT NULL
7 | GROUP BY repo_id, type
8 |
--------------------------------------------------------------------------------
/examples/github_analysis/models/sdf_snowflake/stg/repo_names.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | repo_name,
3 | repo_id
4 | FROM tech__innovation_essentials.cybersyn.github_repos
5 | QUALIFY row_number() OVER (PARTITION BY repo_id ORDER BY first_seen DESC) = 1
6 |
--------------------------------------------------------------------------------
/examples/github_analysis/models/sdf_snowflake/stg/repo_stars.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | repo_id,
3 | SUM(count) AS total_stars
4 | FROM tech__innovation_essentials.cybersyn.github_stars s
5 | WHERE s.count IS NOT NULL
6 | GROUP BY repo_id
7 |
--------------------------------------------------------------------------------
/examples/github_analysis/models/sdf_snowflake/stg/star_dates.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | repo_id,
3 | MIN(date) AS first_star_date,
4 | MAX(date) AS last_star_date
5 | from tech__innovation_essentials.cybersyn.github_stars
6 | GROUP BY repo_id
7 |
--------------------------------------------------------------------------------
/examples/github_analysis/models/sdf_snowflake/stg/star_growth.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | s.repo_id,
3 | s.date,
4 | MIN(s.date) AS first_star_date,
5 | MAX(s.date) AS last_star_date,
6 | SUM(
7 | CASE
8 | WHEN s.date = d.first_star_date
9 | THEN s.count
10 | ELSE 0
11 | END
12 | ) AS first_star_count,
13 | SUM(
14 | CASE
15 | WHEN s.date = d.last_star_date
16 | THEN s.count
17 | ELSE 0
18 | END
19 | ) AS last_star_count,
20 | SUM(s.count) AS total_stars_acquired,
21 | (
22 | SUM(
23 | CASE
24 | WHEN s.date = d.last_star_date
25 | THEN s.count
26 | ELSE 0
27 | END
28 | )
29 | - SUM(
30 | CASE
31 | WHEN s.date = d.first_star_date
32 | THEN s.count
33 | ELSE 0
34 | END
35 | )
36 | ) AS star_growth
37 | FROM tech__innovation_essentials.cybersyn.github_stars s
38 | JOIN stg.star_dates d
39 | ON s.repo_id = d.repo_id
40 | GROUP BY s.repo_id, s.date
41 |
--------------------------------------------------------------------------------
/examples/hello/.gitignore:
--------------------------------------------------------------------------------
1 |
2 | #sdf gitignore
3 | /sdftarget
4 |
--------------------------------------------------------------------------------
/examples/hello/models/main.sql:
--------------------------------------------------------------------------------
1 | select 'Hello World!' as message
2 |
--------------------------------------------------------------------------------
/examples/hello/workspace.sdf.yml:
--------------------------------------------------------------------------------
1 | workspace:
2 | name: hello
3 | edition: "1.3"
4 | description: "A minimal workspace"
5 |
6 | includes:
7 | - path: models
8 |
--------------------------------------------------------------------------------
/examples/hello_from_dbt/.gitignore:
--------------------------------------------------------------------------------
1 |
2 | #sdf gitignore
3 | /sdftarget
4 |
--------------------------------------------------------------------------------
/examples/hello_from_dbt/models/main.sql:
--------------------------------------------------------------------------------
1 | select 'Hello World!' as message;
--------------------------------------------------------------------------------
/examples/hello_from_dbt/seeds/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sdf-labs/sdf-cli/0f2623776d779dcc07adfe6492765b5c77d3d211/examples/hello_from_dbt/seeds/.gitkeep
--------------------------------------------------------------------------------
/examples/hello_from_dbt/workspace.sdf.yml:
--------------------------------------------------------------------------------
1 | workspace:
2 | name: hello
3 | edition: "1.3"
4 |
5 | defaults:
6 | preprocessor: jinja
7 |
8 | includes:
9 | - path: models
10 | - path: seeds
11 | type: resource
12 |
--------------------------------------------------------------------------------
/examples/hello_with_pii/.gitignore:
--------------------------------------------------------------------------------
1 |
2 | #sdf gitignore
3 | /sdftarget
4 |
--------------------------------------------------------------------------------
/examples/hello_with_pii/checks/code_check.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | DISTINCT c.table_name as "table_name",
3 | c.column_name as "column name",
4 | c.classifiers
5 | FROM
6 | sdf.information_schema.columns c
7 | WHERE
8 | CONTAINS_ARRAY_VARCHAR(c.classifiers, 'PII.name')
--------------------------------------------------------------------------------
/examples/hello_with_pii/models/main.sql:
--------------------------------------------------------------------------------
1 | select
2 | 'hello' as column_1,
3 | 'Jeffrey Walters' as column_2;
--------------------------------------------------------------------------------
/examples/hello_with_pii/workspace.sdf.yml:
--------------------------------------------------------------------------------
1 | workspace:
2 | name: hello
3 | edition: "1.3"
4 | defaults:
5 | dialect: trino
6 |
7 | includes:
8 | - path: models
9 | ---
10 | classifier:
11 | name: PII
12 | labels:
13 | - name: name
14 | ---
15 | table:
16 | name: main
17 | columns:
18 | - name: column_2
19 | # UNCOMMENT THE BELOW
20 | # classifiers:
21 | # - PII.name
22 |
--------------------------------------------------------------------------------
/examples/hello_world_s3/local/popdata.sql:
--------------------------------------------------------------------------------
1 | create table popdata with (
2 | format='csv',
3 | skip_header_line_count=1, -- skip the first line of the file
4 | location='local/pop.csv'
5 | );
6 |
--------------------------------------------------------------------------------
/examples/hello_world_s3/local/world_metrics.sql:
--------------------------------------------------------------------------------
1 | select
2 | sum(population) as world_pop,
3 | min(population) as smallest_country,
4 | max(population) as largest_country
5 | from popdata;
6 |
7 | select
8 | country,
9 | split(date, ' ')[1] as day,
10 | split_part(source, ' ', 2) as source,
11 | split_part(source, ' ', 3) as source3
12 | from popdata;
13 |
--------------------------------------------------------------------------------
/examples/hello_world_s3/remote/q1.sql:
--------------------------------------------------------------------------------
1 | -- Let's find the least populated country in 1999
2 | SELECT
3 | region_or_country,
4 | "ISO3_Alpha_code",
5 | "Population_Density_Per_Square_KM"
6 | FROM un_pop_data
7 | WHERE "Year" = 1999
8 | ORDER BY "Population_Density_Per_Square_KM"
9 | LIMIT 1;
--------------------------------------------------------------------------------
/examples/hello_world_s3/remote/un_pop_data.sql:
--------------------------------------------------------------------------------
1 | -- Creates an root table with an S3 Location
2 | -- Note: Set aws Region
3 | create table un_pop_data WITH (
4 | FORMAT='CSV',
5 | skip_header_line_count=1,
6 | LOCATION='s3://sdfdatasets/hello-world/world_population_full.csv'
7 | );
--------------------------------------------------------------------------------
/examples/hello_world_s3/workspace.sdf.yml:
--------------------------------------------------------------------------------
1 | workspace:
2 | name: hello_world
3 | edition: "1.3"
4 | description: >
5 | Hello World! Let's analyze the world population.
6 |
7 | To build using the local population data (./local/pop.csv), run using the 'local' environment: 'sdf run -e local --show all'
8 |
9 | To pull data from s3 (s3://sdfdatasets), first authenticate SDF with a local AWS profile. This can be any AWS profile, as the S3 bucket is public.
10 | Use: `sdf auth login aws --profile `
11 |
12 | Next, run using the remote environment: 'sdf run -e remote --show all'.
13 |
14 | Type 'sdf compile' to view schema information for either environment.
15 |
16 | defaults:
17 | dialect: trino
18 |
19 | ---
20 | environment:
21 | name: local
22 |
23 | includes:
24 | - path: local
25 | type: model
26 | - path: local/pop.csv
27 | type: resource
28 | ---
29 | environment:
30 | name: remote
31 |
32 | includes:
33 | - path: remote
34 |
35 | integrations:
36 | - provider: s3
37 | type: data
38 | buckets:
39 | - uri: s3://sdfdatasets
40 | region: us-east-1
41 |
--------------------------------------------------------------------------------
/examples/jaffle_shop/.gitignore:
--------------------------------------------------------------------------------
1 |
2 | #sdf gitignore
3 | /sdftarget
4 |
--------------------------------------------------------------------------------
/examples/jaffle_shop/models/analytics/orders.sql:
--------------------------------------------------------------------------------
1 | {% set payment_methods = ['credit_card', 'coupon', 'bank_transfer', 'gift_card'] %}
2 |
3 | with orders as (
4 |
5 | select * from staging.stg_orders
6 |
7 | ),
8 |
9 | payments as (
10 |
11 | select * from staging.stg_payments
12 |
13 | ),
14 |
15 | order_payments as (
16 |
17 | select
18 | order_id,
19 |
20 | {% for payment_method in payment_methods -%}
21 | sum(case when payment_method = '{{ payment_method }}' then amount else 0 end) as {{ payment_method }}_amount,
22 | {% endfor -%}
23 |
24 | sum(amount) as total_amount
25 |
26 | from payments
27 |
28 | group by order_id
29 |
30 | ),
31 |
32 | final as (
33 |
34 | select
35 | orders.order_id,
36 | orders.customer_id,
37 | orders.order_date,
38 | orders.status,
39 |
40 | {% for payment_method in payment_methods -%}
41 |
42 | order_payments.{{ payment_method }}_amount,
43 |
44 | {% endfor -%}
45 |
46 | order_payments.total_amount as amount
47 |
48 | from orders
49 |
50 |
51 | left join order_payments
52 | on orders.order_id = order_payments.order_id
53 |
54 | )
55 |
56 | select * from final
--------------------------------------------------------------------------------
/examples/jaffle_shop/models/raw/seeds.sdf.yml:
--------------------------------------------------------------------------------
1 | table:
2 | name: raw_customers
3 | location: seeds/raw_customers.csv
4 | with-header: true
5 | file-format: csv
6 | ---
7 | table:
8 | name: raw_orders
9 | location: seeds/raw_orders.csv
10 | with-header: true
11 | file-format: csv
12 | ---
13 | table:
14 | name: raw_payments
15 | location: seeds/raw_payments.csv
16 | with-header: true
17 | file-format: csv
--------------------------------------------------------------------------------
/examples/jaffle_shop/models/staging/stg_customers.sql:
--------------------------------------------------------------------------------
1 | with source as (
2 |
3 | select * from raw.raw_customers
4 |
5 | ),
6 |
7 | renamed as (
8 |
9 | select
10 | id as customer_id,
11 | first_name,
12 | last_name
13 |
14 | from source
15 |
16 | )
17 |
18 | select * from renamed;
--------------------------------------------------------------------------------
/examples/jaffle_shop/models/staging/stg_orders.sql:
--------------------------------------------------------------------------------
1 | with source as (
2 |
3 | select * from raw.raw_orders
4 |
5 | ),
6 |
7 | renamed as (
8 |
9 | select
10 | id as order_id,
11 | user_id as customer_id,
12 | order_date,
13 | status
14 |
15 | from source
16 |
17 | )
18 |
19 | select * from renamed;
--------------------------------------------------------------------------------
/examples/jaffle_shop/models/staging/stg_payments.sql:
--------------------------------------------------------------------------------
1 | with source as (
2 |
3 | select * from raw.raw_payments
4 |
5 | ),
6 |
7 | renamed as (
8 |
9 | select
10 | id as payment_id,
11 | order_id,
12 | payment_method,
13 |
14 | -- `amount` is currently stored in cents, so we convert it to dollars
15 | amount / 100 as amount
16 |
17 | from source
18 |
19 | )
20 |
21 | select * from renamed;
--------------------------------------------------------------------------------
/examples/jaffle_shop/models/staging/tests.sdf.yml:
--------------------------------------------------------------------------------
1 | table:
2 | name: stg_customers
3 | columns:
4 | - name: customer_id
5 | tests:
6 | - expect: unique()
7 | - expect: not_null()
8 | ---
9 | table:
10 | name: stg_orders
11 | columns:
12 | - name: order_id
13 | tests:
14 | - expect: unique()
15 | - expect: not_null()
16 | - name: status
17 | tests:
18 | - expect: in_accepted_values(['placed', 'shipped', 'completed', 'return_pending', 'returned'])
19 | ---
20 | table:
21 | name: stg_payments
22 | columns:
23 | - name: payment_id
24 | tests:
25 | - expect: unique()
26 | - expect: not_null()
27 | - name: payment_method
28 | tests:
29 | - expect: in_accepted_values(['credit_card', 'coupon', 'bank_transfer', 'gift_card'])
--------------------------------------------------------------------------------
/examples/jaffle_shop/seeds/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sdf-labs/sdf-cli/0f2623776d779dcc07adfe6492765b5c77d3d211/examples/jaffle_shop/seeds/.gitkeep
--------------------------------------------------------------------------------
/examples/jaffle_shop/workspace.sdf.yml:
--------------------------------------------------------------------------------
1 | workspace:
2 | name: jaffle_shop # The name of this workspace -- required
3 | edition: "1.3" # The edition of this workspace -- required
4 | description:
5 | This workspace models the DBT / DuckDB Jaffle Shop example project.
6 | The functionality of both DBT and DuckDB are encapsulated by SDF, as this project can be run entirely locally, with no external dependencies on data or compute providers.
7 |
8 | Try compiling first with `sdf compile`. This validates all SQL and dependencies are correct.
9 | Then, run everything locally with `sdf run`. This will run the entire DAG locally.
10 | Lastly, track lineage with `sdf lineage`. Specify the fully qualified name after the command to see lineage for a specific table of column.
11 |
12 | defaults:
13 | catalog: jaffle_shop
14 | schema: public
15 | preprocessor: jinja
16 | materialization: table
17 |
18 | includes:
19 | - path: models # The path to sql sources for this workspace -- at least one path is required
20 | index: schema-table-name # Infers the schema from the directory name, enabling faster compile and a more intuitive project structure.
21 | - path: seeds
22 | type: resource
23 | - path: models/staging
24 | index: schema-table-name
25 | defaults:
26 | materialization: view
27 |
--------------------------------------------------------------------------------
/examples/lineage/checks/check_sink_phone_is_pii.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | DISTINCT c.table_name as "table_name",
3 | c.column_name as "column name",
4 | c.classifiers
5 | FROM
6 | sdf.information_schema.columns c
7 | WHERE
8 | CONTAINS_ARRAY_VARCHAR(c.classifiers, '%DATA.pii%')
9 | and c.table_id like '%lineage.pub.sink'
--------------------------------------------------------------------------------
/examples/lineage/models/knis.sql:
--------------------------------------------------------------------------------
1 | select txn_date, sum(qty) as qty
2 | from middle
3 | group by txn_date
--------------------------------------------------------------------------------
/examples/lineage/models/middle.sql:
--------------------------------------------------------------------------------
1 | select user_id, max(phone) as phone, txn_date, sum(qty) as qty
2 | from source
3 | group by user_id, txn_date
--------------------------------------------------------------------------------
/examples/lineage/models/sink.sql:
--------------------------------------------------------------------------------
1 | select user_id as uid, phone, txn_date, qty
2 | from middle
3 | where qty > 180
--------------------------------------------------------------------------------
/examples/lineage/models/source.sql:
--------------------------------------------------------------------------------
1 | select column1 as user_id,
2 | column2 as phone,
3 | column3 as txn_date,
4 | column4 as qty from
5 | (VALUES
6 | (1, '555-1212', '2022-01-01', 100),
7 | (1, '555-1212', '2022-02-01', 50),
8 | (1, '555-1212', '2022-03-01', 75),
9 | (2, '444-1313', '2022-01-01', 200),
10 | (2, '444-1313', '2022-02-01', 100),
11 | (3, '333-1414', '2022-03-01', 300))
12 |
--------------------------------------------------------------------------------
/examples/lineage/workspace.sdf.yml:
--------------------------------------------------------------------------------
1 | workspace:
2 | name: lineage
3 | edition: "1.3"
4 | description: >
5 | Creates a three stage pipeline with one source ('source.sql') and two sinks, called 'sink.sql' and 'knis.sql'. It shows how to compute lineage.
6 |
7 | Type 'sdf build' to run the pipeline
8 |
9 | Type 'sdf compile' to view information on classifiers and schema
10 |
11 | Type 'sdf lineage' to view lineage in the cli
12 |
13 | Type 'sdf auth login' and 'sdf deploy' to deploy and view lineage on the sdf console
14 |
15 | Try removing the DATA.pii classifier on source.
16 |
17 | includes:
18 | - path: models
19 | - path: checks
20 |
21 | ---
22 | environment:
23 | name: trino
24 |
25 | defaults:
26 | dialect: trino
27 |
28 | includes:
29 | - type: model
30 | path: models/
31 | - type: check
32 | path: checks/
33 | ---
34 | classifier:
35 | name: DATA
36 |
37 | labels:
38 | - name: uid
39 | - name: pii
40 | ---
41 | table:
42 | name: source
43 |
44 | columns:
45 | - name: user_id
46 | classifiers:
47 | - DATA.uid
48 | - name: phone
49 | classifiers:
50 | - DATA.pii
51 |
--------------------------------------------------------------------------------
/examples/linter/.gitignore:
--------------------------------------------------------------------------------
1 |
2 | #sdf gitignore
3 | /sdftarget
4 |
--------------------------------------------------------------------------------
/examples/linter/ddls/my_table.sdf.yml:
--------------------------------------------------------------------------------
1 | table:
2 | name: my_table
3 | location: seeds/my_table.csv
4 | columns:
5 | - name: num
6 | datatype: int
7 | - name: cool_col
8 | datatype: varchar
9 | - name: bool_col
10 | datatype: boolean
--------------------------------------------------------------------------------
/examples/linter/models/main.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | DISTINCT('Hello World!') as message,
3 | num / 100.0 AS "num%",
4 | num as "NUM",
5 | my_table.cool_col,
6 | NULL as null_col,
7 | true AS true_col
8 | from my_table
9 | WHERE bool_col = 'TRUE'
10 | Order By 1, cool_col
11 | ;
--------------------------------------------------------------------------------
/examples/linter/seeds/my_table.csv:
--------------------------------------------------------------------------------
1 | num,cool_col,bool_col
2 | 1,'cool_col_1',TRUE
3 | 2,'cool_col_2',TRUE
4 | 3,'cool_col_3',FALSE
5 | 4,'cool_col_4',TRUE
6 | 5,'cool_col_5',false
--------------------------------------------------------------------------------
/examples/linter/workspace.sdf.yml:
--------------------------------------------------------------------------------
1 | workspace:
2 | name: linter
3 | edition: "1.3"
4 | description: "An example workspace with linting and formatting issues"
5 |
6 | includes:
7 | - path: models
8 | - path: seeds
9 |
10 | defaults:
11 | dialect: snowflake
12 |
--------------------------------------------------------------------------------
/examples/moms_flower_shop/.gitignore:
--------------------------------------------------------------------------------
1 |
2 | #sdf gitignore
3 | /sdftarget
4 |
--------------------------------------------------------------------------------
/examples/moms_flower_shop/checks/README.txt:
--------------------------------------------------------------------------------
1 | This folder will contain static analysis checks against SDF's information schema
--------------------------------------------------------------------------------
/examples/moms_flower_shop/classifications/column_classifiers.sdf.yml:
--------------------------------------------------------------------------------
1 | classifier:
2 | name: EVENT
3 | labels:
4 | - name: inapp
5 | - name: marketing
6 |
7 | ---
8 | classifier:
9 | name: PII
10 | labels:
11 | - name: name
12 | - name: address
13 | - name: email
14 | - name: gender
15 |
--------------------------------------------------------------------------------
/examples/moms_flower_shop/classifications/table_classifiers.sdf.yml:
--------------------------------------------------------------------------------
1 | classifier:
2 | name: TABLE_STATUS
3 | labels:
4 | - name: deprecated
5 | - name: dev
6 | - name: public
7 | propagate: false
8 |
9 | ---
10 | classifier:
11 | name: RETENTION
12 | labels:
13 | - name: d7
14 | - name: d30
15 | - name: d90
16 | - name: d180
17 | - name: infinity
18 | propagate: false
19 |
--------------------------------------------------------------------------------
/examples/moms_flower_shop/metadata/analytics/agg_installs_and_campaigns.sdf.yml:
--------------------------------------------------------------------------------
1 | table:
2 | name: agg_installs_and_campaigns
3 | # Uncomment below to begin the "Enriching Your Warehouse" Tutorial >>>>>
4 | # classifiers:
5 | # - RETENTION.infinity
6 | # <<<<<
--------------------------------------------------------------------------------
/examples/moms_flower_shop/metadata/analytics/dim_marketing_campaigns.sdf.yml:
--------------------------------------------------------------------------------
1 | table:
2 | name: dim_marketing_campaigns
3 | # Uncomment below to begin the "Enriching Your Warehouse" Tutorial >>>>>
4 | # classifiers:
5 | # - RETENTION.infinity
6 | # <<<<<
--------------------------------------------------------------------------------
/examples/moms_flower_shop/metadata/raw/raw_addresses.sdf.yml:
--------------------------------------------------------------------------------
1 | table:
2 | name: raw_addresses
3 | description: >
4 | All relevant information related to street addresses known to mom s flower shop.
5 | This information comes from the user input into the mobile app.
6 |
7 | # Uncomment below to begin the "Enriching Your Warehouse" Tutorial >>>>>
8 | # classifiers:
9 | # - RETENTION.d7
10 | # <<<<<
11 |
12 | columns:
13 | - name: index
14 | description: Row number
15 |
16 | - name: address_id
17 | description: A unique identifier of an address
18 |
19 | - name: full_address
20 | description: The full address associated with the address_id
21 |
22 | - name: street_number
23 | description: The address street number associated with the address_id
24 |
25 | - name: street_name
26 | description: The address street name associated with the address_id
27 |
28 | - name: state
29 | description: The address US state associated with the address_id
30 |
31 | - name: city
32 | description: The address US city associated with the address_id
33 |
--------------------------------------------------------------------------------
/examples/moms_flower_shop/metadata/staging/app_installs.sdf.yml:
--------------------------------------------------------------------------------
1 | table:
2 | name: app_installs
3 | description: >
4 | This table is a staging table which adds campaign information
5 | to app install events
6 | # Uncomment here to add a "deprecated" classifier to the table
7 | # classifiers:
8 | # - TABLE_STATUS.deprecated
--------------------------------------------------------------------------------
/examples/moms_flower_shop/models/analytics/agg_installs_and_campaigns.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | -- install events data
3 | DATE_FORMAT(install_time, '%Y-%m-%d') AS install_date,
4 | campaign_name,
5 | platform,
6 | COUNT(DISTINCT customer_id) AS distinct_installs
7 | FROM staging.app_installs
8 | GROUP BY 1,2,3
--------------------------------------------------------------------------------
/examples/moms_flower_shop/models/raw/raw_addresses.sql:
--------------------------------------------------------------------------------
1 | CREATE TABLE raw_addresses
2 | WITH (FORMAT='PARQUET', LOCATION='seeds/parquet/addresses.parquet');
--------------------------------------------------------------------------------
/examples/moms_flower_shop/models/raw/raw_customers.sql:
--------------------------------------------------------------------------------
1 | CREATE TABLE raw_customers
2 | WITH (FORMAT='PARQUET', LOCATION='seeds/parquet/customers.parquet');
--------------------------------------------------------------------------------
/examples/moms_flower_shop/models/raw/raw_inapp_events.sql:
--------------------------------------------------------------------------------
1 | CREATE TABLE raw_inapp_events
2 | WITH (FORMAT='PARQUET', LOCATION='seeds/parquet/inapp_events.parquet');
--------------------------------------------------------------------------------
/examples/moms_flower_shop/models/raw/raw_marketing_campaign_events.sql:
--------------------------------------------------------------------------------
1 | CREATE TABLE raw_marketing_campaign_events
2 | WITH (FORMAT='PARQUET', LOCATION='seeds/parquet/marketing_campaign_events.parquet');
--------------------------------------------------------------------------------
/examples/moms_flower_shop/models/staging/app_installs.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | -- install events data
3 | COALESCE(m.event_id, i.event_id) AS event_id,
4 | i.customer_id,
5 | i.event_time AS install_time,
6 | i.platform,
7 |
8 | -- marketing campaigns data - if doesn't exist than organic
9 | COALESCE(m.campaign_id, -1) AS campaign_id,
10 | COALESCE(m.campaign_name, 'organic') AS campaign_name,
11 | COALESCE(m.c_name, 'organic') AS campaign_type
12 | FROM inapp_events i
13 | LEFT OUTER JOIN raw.raw_marketing_campaign_events m
14 | ON (i.event_id = m.event_id)
15 | WHERE event_name = 'install'
--------------------------------------------------------------------------------
/examples/moms_flower_shop/models/staging/app_installs_v2.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | DISTINCT
3 | -- install events data
4 | i.event_id,
5 | i.customer_id,
6 | i.event_time AS install_time,
7 | i.platform,
8 |
9 | -- marketing campaigns data - if doesn't exist than organic
10 | COALESCE(m.campaign_id, -1) AS campaign_id,
11 | COALESCE(m.campaign_name, 'organic') AS campaign_name,
12 | COALESCE(m.c_name, 'organic') AS campaign_type
13 | FROM inapp_events i
14 | LEFT OUTER JOIN raw.raw_marketing_campaign_events m
15 | ON (i.campaign_id = m.campaign_id)
16 | WHERE event_name = 'install'
--------------------------------------------------------------------------------
/examples/moms_flower_shop/models/staging/customers.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | c.id AS customer_id,
3 | c.first_name,
4 | c.last_name,
5 | c.first_name || ' ' || c.last_name AS full_name,
6 | c.email,
7 | c.gender,
8 |
9 | -- Marketing info
10 | i.campaign_id,
11 | i.campaign_name,
12 | i.campaign_type,
13 |
14 | -- Address info
15 | c.address_id,
16 | a.full_address,
17 | a.state
18 | FROM raw.raw_customers c
19 |
20 | LEFT OUTER JOIN app_installs_v2 i
21 | ON (c.id = i.customer_id)
22 |
23 | LEFT OUTER JOIN raw.raw_addresses a
24 | ON (c.address_id = a.address_id)
25 |
--------------------------------------------------------------------------------
/examples/moms_flower_shop/models/staging/inapp_events.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | event_id,
3 | customer_id,
4 | FROM_UNIXTIME(event_time/1000) AS event_time,
5 | event_name,
6 | event_value,
7 | additional_details,
8 | platform,
9 | campaign_id
10 | FROM raw.raw_inapp_events
--------------------------------------------------------------------------------
/examples/moms_flower_shop/models/staging/marketing_campaigns.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | campaign_id,
3 | campaign_name,
4 | SUBSTR(c_name, 1, LENGTH(c_name)-1) AS campaign_type,
5 | MIN(
6 | FROM_UNIXTIME(event_time/1000) -- convert unixtime from milliseconds to seconds
7 | ) AS start_time,
8 | MAX(
9 | FROM_UNIXTIME(event_time/1000) -- convert unixtime from milliseconds to seconds
10 | ) AS end_time,
11 | COUNT(event_time) AS campaign_duration,
12 | SUM(cost) AS total_campaign_spent,
13 | ARRAY_AGG(event_id) AS event_ids
14 | FROM raw.raw_marketing_campaign_events
15 | GROUP BY
16 | campaign_id,
17 | campaign_name,
18 | campaign_type
19 |
--------------------------------------------------------------------------------
/examples/moms_flower_shop/models/staging/stg_installs_per_campaign.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | campaign_id,
3 | COUNT(event_id) AS total_num_installs
4 | FROM app_installs
5 | GROUP BY 1
6 |
--------------------------------------------------------------------------------
/examples/moms_flower_shop/reports/README.txt:
--------------------------------------------------------------------------------
1 | This folder will contain data warehouse reports based on SDF's information schema
--------------------------------------------------------------------------------
/examples/moms_flower_shop/seeds/csv/README.txt:
--------------------------------------------------------------------------------
1 | To explore the raw source files as CSVs, open the project on GitHub: (...)
--------------------------------------------------------------------------------
/examples/moms_flower_shop/seeds/parquet/addresses.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sdf-labs/sdf-cli/0f2623776d779dcc07adfe6492765b5c77d3d211/examples/moms_flower_shop/seeds/parquet/addresses.parquet
--------------------------------------------------------------------------------
/examples/moms_flower_shop/seeds/parquet/customers.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sdf-labs/sdf-cli/0f2623776d779dcc07adfe6492765b5c77d3d211/examples/moms_flower_shop/seeds/parquet/customers.parquet
--------------------------------------------------------------------------------
/examples/moms_flower_shop/seeds/parquet/inapp_events.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sdf-labs/sdf-cli/0f2623776d779dcc07adfe6492765b5c77d3d211/examples/moms_flower_shop/seeds/parquet/inapp_events.parquet
--------------------------------------------------------------------------------
/examples/moms_flower_shop/seeds/parquet/marketing_campaign_events.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sdf-labs/sdf-cli/0f2623776d779dcc07adfe6492765b5c77d3d211/examples/moms_flower_shop/seeds/parquet/marketing_campaign_events.parquet
--------------------------------------------------------------------------------
/examples/moms_flower_shop_completed/.gitignore:
--------------------------------------------------------------------------------
1 |
2 | #sdf gitignore
3 | /sdftarget
4 |
--------------------------------------------------------------------------------
/examples/moms_flower_shop_completed/checks/README.txt:
--------------------------------------------------------------------------------
1 | This folder will contain static analysis checks against SDF's information schema
--------------------------------------------------------------------------------
/examples/moms_flower_shop_completed/checks/mixed_event_ids.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | DISTINCT c.table_name as "table_name",
3 | c.column_name as "column name",
4 | c.classifiers
5 | FROM
6 | sdf.information_schema.columns c
7 | WHERE
8 | -- more than one EVENT classifier is assigned
9 | CAST(c.classifiers AS VARCHAR) LIKE '%EVENT%EVENT%'
--------------------------------------------------------------------------------
/examples/moms_flower_shop_completed/classifications/column_classifiers.sdf.yml:
--------------------------------------------------------------------------------
1 | classifier:
2 | name: EVENT
3 | labels:
4 | - name: inapp
5 | - name: marketing
6 |
7 | ---
8 | classifier:
9 | name: PII
10 | labels:
11 | - name: name
12 | - name: address
13 | - name: email
14 | - name: gender
15 |
--------------------------------------------------------------------------------
/examples/moms_flower_shop_completed/classifications/table_classifiers.sdf.yml:
--------------------------------------------------------------------------------
1 | classifier:
2 | name: TABLE_STATUS
3 | labels:
4 | - name: deprecated
5 | - name: dev
6 | - name: public
7 | propagate: false
8 |
9 | ---
10 | classifier:
11 | name: RETENTION
12 | labels:
13 | - name: d7
14 | - name: d30
15 | - name: d90
16 | - name: d180
17 | - name: infinity
18 | propagate: false
19 |
--------------------------------------------------------------------------------
/examples/moms_flower_shop_completed/metadata/analytics/agg_installs_and_campaigns.sdf.yml:
--------------------------------------------------------------------------------
1 | table:
2 | name: agg_installs_and_campaigns
3 | # Uncomment below to begin the "Enriching Your Warehouse" Tutorial >>>>>
4 | classifiers:
5 | - RETENTION.infinity
6 | # <<<<<
7 |
--------------------------------------------------------------------------------
/examples/moms_flower_shop_completed/metadata/analytics/dim_marketing_campaigns.sdf.yml:
--------------------------------------------------------------------------------
1 | table:
2 | name: dim_marketing_campaigns
3 | # Uncomment below to begin the "Enriching Your Warehouse" Tutorial >>>>>
4 | classifiers:
5 | - RETENTION.infinity
6 | # <<<<<
--------------------------------------------------------------------------------
/examples/moms_flower_shop_completed/metadata/raw/raw_addresses.sdf.yml:
--------------------------------------------------------------------------------
1 | table:
2 | name: raw_addresses
3 | description: >
4 | All relevant information related to street addresses known to mom's flower shop.
5 | This information comes from the user input into the mobile app.
6 |
7 | # Uncomment below to begin the "Enriching Your Warehouse" Tutorial >>>>>
8 | classifiers:
9 | - RETENTION.d7
10 | # <<<<<
11 |
12 | columns:
13 | - name: index
14 | description: Row number
15 |
16 | - name: address_id
17 | description: A unique identifier of an address
18 |
19 | - name: full_address
20 | description: The full address associated with the address_id
21 |
22 | - name: street_number
23 | description: The address street number associated with the address_id
24 |
25 | - name: street_name
26 | description: The address street name associated with the address_id
27 |
28 | - name: state
29 | description: The address US state associated with the address_id
30 |
31 | - name: city
32 | description: The address US city associated with the address_id
33 |
--------------------------------------------------------------------------------
/examples/moms_flower_shop_completed/metadata/staging/app_installs.sdf.yml:
--------------------------------------------------------------------------------
1 | table:
2 | name: app_installs
3 | description: >
4 | This table is a staging table which adds campaign information
5 | to app install events
6 | # Uncomment here to add a "deprecated" classifier to the table
7 | classifiers:
8 | - TABLE_STATUS.deprecated
--------------------------------------------------------------------------------
/examples/moms_flower_shop_completed/metadata/staging/inapp_events.sdf.yml:
--------------------------------------------------------------------------------
1 | table:
2 | name: inapp_events
3 | tests:
4 | - expect: unique_columns(["event_id"])
5 | severity: error
6 | columns:
7 | - name: event_value
8 | tests:
9 | - expect: valid_scalar("""event_value >= 0""")
10 | severity: error
11 | - expect: minimum(0)
12 | severity: error
--------------------------------------------------------------------------------
/examples/moms_flower_shop_completed/models/analytics/agg_installs_and_campaigns.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | -- install events data
3 | DATE_FORMAT(install_time, '%Y-%m-%d') AS install_date,
4 | campaign_name,
5 | platform,
6 | COUNT(DISTINCT customer_id) AS distinct_installs
7 | FROM staging.app_installs_v2
8 | GROUP BY 1,2,3
--------------------------------------------------------------------------------
/examples/moms_flower_shop_completed/models/analytics/dim_marketing_campaigns.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | -- marketing campaigns dimensions
3 | m.campaign_id,
4 | m.campaign_name,
5 | -- metrics
6 | i.total_num_installs,
7 | total_campaign_spent /
8 | NULLIF(i.total_num_installs, 0) AS avg_customer_acquisition_cost,
9 | campaign_duration /
10 | NULLIF(i.total_num_installs, 0) AS install_duration_ratio
11 | FROM staging.marketing_campaigns m
12 | LEFT OUTER JOIN staging.stg_installs_per_campaign i
13 | ON (m.campaign_id = i.campaign_id)
14 | ORDER BY total_num_installs DESC NULLS LAST
--------------------------------------------------------------------------------
/examples/moms_flower_shop_completed/models/raw/raw_addresses.sql:
--------------------------------------------------------------------------------
1 | CREATE TABLE raw_addresses
2 | WITH (FORMAT='PARQUET', LOCATION='seeds/parquet/addresses.parquet');
--------------------------------------------------------------------------------
/examples/moms_flower_shop_completed/models/raw/raw_customers.sql:
--------------------------------------------------------------------------------
1 | CREATE TABLE raw_customers
2 | WITH (FORMAT='PARQUET', LOCATION='seeds/parquet/customers.parquet');
--------------------------------------------------------------------------------
/examples/moms_flower_shop_completed/models/raw/raw_inapp_events.sql:
--------------------------------------------------------------------------------
1 | CREATE TABLE raw_inapp_events
2 | WITH (FORMAT='PARQUET', LOCATION='seeds/parquet/inapp_events.parquet');
--------------------------------------------------------------------------------
/examples/moms_flower_shop_completed/models/raw/raw_marketing_campaign_events.sql:
--------------------------------------------------------------------------------
1 | CREATE TABLE raw_marketing_campaign_events
2 | WITH (FORMAT='PARQUET', LOCATION='seeds/parquet/marketing_campaign_events.parquet');
--------------------------------------------------------------------------------
/examples/moms_flower_shop_completed/models/staging/app_installs.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | -- install events data
3 | COALESCE(m.event_id, i.event_id) AS event_id,
4 | i.customer_id,
5 | i.event_time AS install_time,
6 | i.platform,
7 |
8 | -- marketing campaigns data - if doesn't exist than organic
9 | COALESCE(m.campaign_id, -1) AS campaign_id,
10 | COALESCE(m.campaign_name, 'organic') AS campaign_name,
11 | COALESCE(m.c_name, 'organic') AS campaign_type
12 | FROM inapp_events i
13 | LEFT OUTER JOIN raw.raw_marketing_campaign_events m
14 | ON (i.event_id = m.event_id)
15 | WHERE event_name = 'install'
--------------------------------------------------------------------------------
/examples/moms_flower_shop_completed/models/staging/app_installs_v2.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | DISTINCT
3 | -- install events data
4 | i.event_id,
5 | i.customer_id,
6 | i.event_time AS install_time,
7 | i.platform,
8 |
9 | -- marketing campaigns data - if doesn't exist than organic
10 | COALESCE(m.campaign_id, -1) AS campaign_id,
11 | COALESCE(m.campaign_name, 'organic') AS campaign_name,
12 | COALESCE(m.c_name, 'organic') AS campaign_type
13 | FROM inapp_events i
14 | LEFT OUTER JOIN raw.raw_marketing_campaign_events m
15 | ON (i.campaign_id = m.campaign_id)
16 | WHERE event_name = 'install'
--------------------------------------------------------------------------------
/examples/moms_flower_shop_completed/models/staging/customers.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | c.id AS customer_id,
3 | c.first_name,
4 | c.last_name,
5 | c.first_name || ' ' || c.last_name AS full_name,
6 | c.email,
7 | c.gender,
8 |
9 | -- Marketing info
10 | i.campaign_id,
11 | i.campaign_name,
12 | i.campaign_type,
13 |
14 | -- Address info
15 | c.address_id,
16 | a.full_address,
17 | a.state
18 | FROM raw.raw_customers c
19 |
20 | LEFT OUTER JOIN app_installs_v2 i
21 | ON (c.id = i.customer_id)
22 |
23 | LEFT OUTER JOIN raw.raw_addresses a
24 | ON (c.address_id = a.address_id)
25 |
--------------------------------------------------------------------------------
/examples/moms_flower_shop_completed/models/staging/inapp_events.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | event_id,
3 | customer_id,
4 | FROM_UNIXTIME(event_time/1000) AS event_time,
5 | event_name,
6 | event_value,
7 | additional_details,
8 | platform,
9 | campaign_id
10 | FROM raw.raw_inapp_events
--------------------------------------------------------------------------------
/examples/moms_flower_shop_completed/models/staging/marketing_campaigns.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | campaign_id,
3 | campaign_name,
4 | SUBSTR(c_name, 1, LENGTH(c_name)-1) AS campaign_type,
5 | MIN(
6 | FROM_UNIXTIME(event_time/1000) -- convert unixtime from milliseconds to seconds
7 | ) AS start_time,
8 | MAX(
9 | FROM_UNIXTIME(event_time/1000) -- convert unixtime from milliseconds to seconds
10 | ) AS end_time,
11 | COUNT(event_time) AS campaign_duration,
12 | SUM(cost) AS total_campaign_spent,
13 | ARRAY_AGG(event_id) AS event_ids
14 | FROM raw.raw_marketing_campaign_events
15 | GROUP BY
16 | campaign_id,
17 | campaign_name,
18 | campaign_type
19 |
--------------------------------------------------------------------------------
/examples/moms_flower_shop_completed/models/staging/stg_installs_per_campaign.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | campaign_id,
3 | COUNT(event_id) AS total_num_installs
4 | FROM app_installs_v2
5 | GROUP BY 1
6 |
--------------------------------------------------------------------------------
/examples/moms_flower_shop_completed/reports/README.txt:
--------------------------------------------------------------------------------
1 | This folder will contain data warehouse reports based on SDF's information schema
--------------------------------------------------------------------------------
/examples/moms_flower_shop_completed/reports/deprecated_table_reference.sql:
--------------------------------------------------------------------------------
1 | WITH
2 | deprecated_tables AS (
3 | SELECT
4 | table_id
5 | FROM sdf.information_schema.tables
6 | WHERE
7 | CONTAINS(classifiers, 'TABLE_STATUS.deprecated')
8 | )
9 |
10 | SELECT
11 | to_table_id AS table_id,
12 | from_table_id AS upstream_deprecated_table_id
13 | FROM sdf.information_schema.table_lineage
14 | WHERE from_table_id IN (SELECT table_id FROM deprecated_tables)
15 | AND to_table_id IS NOT NULL
16 |
--------------------------------------------------------------------------------
/examples/moms_flower_shop_completed/seeds/csv/README.txt:
--------------------------------------------------------------------------------
1 | To explore the raw source files as CSVs, open the project on GitHub: (...)
--------------------------------------------------------------------------------
/examples/moms_flower_shop_completed/seeds/parquet/addresses.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sdf-labs/sdf-cli/0f2623776d779dcc07adfe6492765b5c77d3d211/examples/moms_flower_shop_completed/seeds/parquet/addresses.parquet
--------------------------------------------------------------------------------
/examples/moms_flower_shop_completed/seeds/parquet/customers.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sdf-labs/sdf-cli/0f2623776d779dcc07adfe6492765b5c77d3d211/examples/moms_flower_shop_completed/seeds/parquet/customers.parquet
--------------------------------------------------------------------------------
/examples/moms_flower_shop_completed/seeds/parquet/inapp_events.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sdf-labs/sdf-cli/0f2623776d779dcc07adfe6492765b5c77d3d211/examples/moms_flower_shop_completed/seeds/parquet/inapp_events.parquet
--------------------------------------------------------------------------------
/examples/moms_flower_shop_completed/seeds/parquet/marketing_campaign_events.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sdf-labs/sdf-cli/0f2623776d779dcc07adfe6492765b5c77d3d211/examples/moms_flower_shop_completed/seeds/parquet/marketing_campaign_events.parquet
--------------------------------------------------------------------------------
/examples/moms_flower_shop_completed/workspace.sdf.yml:
--------------------------------------------------------------------------------
1 | workspace:
2 | name: moms_flower_shop
3 | edition: "1.3"
4 | description: >
5 | This workspace represents the data warehouse of mom's flower shop.
6 |
7 | It contains raw data regarding:
8 | 1. Customers
9 | 2. Marketing campaigns
10 | 3. Mobile in-app events
11 | 4. Street addresses
12 |
13 | That data is available in the seeds folder and is referenced in models/raw
14 | to be loaded and used by SDF. Data transformations are performed and additional
15 | models are available in the staging and analytics folders under the models folder.
16 |
17 | includes:
18 | - path: models
19 | type: model
20 | index: schema-table-name
21 | - path: seeds/parquet
22 | type: resource
23 | - path: metadata
24 | type: metadata
25 | index: schema-table-name
26 | - path: classifications
27 | type: metadata
28 | - path: reports
29 | type: report
30 | - path: checks
31 | type: check
32 |
33 | defaults:
34 | preprocessor: jinja
35 | ---
36 | environment:
37 | name: dev
38 | integrations:
39 | - provider: sdf
40 | type: database
41 | targets:
42 | - pattern: moms_flower_shop.*.*
43 | rename-as: moms_workshed.${1}.${2}
44 |
45 |
--------------------------------------------------------------------------------
/examples/pii_saas_platform/.gitignore:
--------------------------------------------------------------------------------
1 |
2 | #sdf gitignore
3 | /sdftarget
4 |
--------------------------------------------------------------------------------
/examples/pii_saas_platform/checks/no_pii_in_external.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | c.table_id
3 | FROM
4 | sdf.information_schema.columns c
5 | WHERE
6 | CONTAINS_ARRAY_VARCHAR(c.classifiers, 'PII')
7 | AND c.schema_name = 'external'
8 | GROUP BY c.table_id;
9 |
--------------------------------------------------------------------------------
/examples/pii_saas_platform/classification/taxonomy.sdf.yml:
--------------------------------------------------------------------------------
1 | classifier:
2 | name: PII
3 | labels:
4 | - name: name
5 | - name: email
6 | - name: phone
--------------------------------------------------------------------------------
/examples/pii_saas_platform/classification/users.sdf.yml:
--------------------------------------------------------------------------------
1 | table:
2 | name: payment.public.users
3 | description: DDL for the users table
4 | columns:
5 | - name: name
6 | classifiers:
7 | - PII.name
8 | - name: email
9 | classifiers:
10 | - PII.email
11 | - name: phone
12 | classifiers:
13 | - PII.phone
--------------------------------------------------------------------------------
/examples/pii_saas_platform/ddls/payment/public/invoices.sql:
--------------------------------------------------------------------------------
1 | CREATE TABLE invoices (
2 | invoice_id INT,
3 | organization_id INT,
4 | amount DECIMAL(10, 2),
5 | issued_date TIMESTAMP,
6 | due_date TIMESTAMP,
7 | payer_user_id INT,
8 | paid_date TIMESTAMP,
9 | status VARCHAR(50),
10 | FOREIGN KEY (organization_id) REFERENCES organizations(organization_id),
11 | FOREIGN KEY (user_id) REFERENCES users(user_id)
12 | );
--------------------------------------------------------------------------------
/examples/pii_saas_platform/ddls/payment/public/organizations.sql:
--------------------------------------------------------------------------------
1 | CREATE TABLE organizations (
2 | organization_id INT,
3 | name VARCHAR(255),
4 | created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
5 | );
6 |
--------------------------------------------------------------------------------
/examples/pii_saas_platform/ddls/payment/public/users.sql:
--------------------------------------------------------------------------------
1 |
2 | CREATE TABLE users (
3 | user_id INT,
4 | organization_id INT,
5 | name VARCHAR(255),
6 | email VARCHAR(255),
7 | phone VARCHAR(255),
8 | role VARCHAR(100),
9 | created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
10 | FOREIGN KEY (organization_id) REFERENCES organizations(organization_id)
11 | );
12 |
--------------------------------------------------------------------------------
/examples/pii_saas_platform/models/external/invoice_stats.sql:
--------------------------------------------------------------------------------
1 | -- WARNING: This query is an example of what NOT to do. It exposes sensitive user information.
2 | SELECT
3 | u.user_id,
4 | u.name,
5 | u.email,
6 | i.invoice_id,
7 | i.amount,
8 | i.status
9 | FROM
10 | payment.public.users u
11 | JOIN
12 | payment.public.invoices i ON u.user_id = i.payer_user_id;
--------------------------------------------------------------------------------
/examples/pii_saas_platform/models/external/org_invoice_stats.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | o.organization_id,
3 | o.name AS organization_name,
4 | COUNT(i.invoice_id) AS total_invoices,
5 | AVG(i.amount) AS average_invoice_amount,
6 | SUM(CASE WHEN i.status = 'Paid' THEN 1 ELSE 0 END) / COUNT(i.invoice_id) * 100 AS percent_invoices_paid
7 | FROM
8 | payment.public.organizations o
9 | LEFT JOIN
10 | payment.public.invoices i ON o.organization_id = i.organization_id
11 | GROUP BY
12 | o.organization_id, o.name;
--------------------------------------------------------------------------------
/examples/pii_saas_platform/models/internal/avg_invoice_amt.sql:
--------------------------------------------------------------------------------
1 | SELECT AVG(amount) AS average_invoice_amount
2 | FROM payment.public.invoices ;
3 |
--------------------------------------------------------------------------------
/examples/pii_saas_platform/models/internal/invoice_payment_delay.sql:
--------------------------------------------------------------------------------
1 | SELECT invoice_id, DATEDIFF(day, due_date, paid_date) AS delay_days
2 | FROM payment.public.invoices
3 | WHERE status = 'Paid' AND paid_date > due_date;
4 |
5 |
--------------------------------------------------------------------------------
/examples/pii_saas_platform/models/internal/mau_per_org.sql:
--------------------------------------------------------------------------------
1 | SELECT organization_id,
2 | DATE_TRUNC('MONTH', created_at) AS month,
3 | COUNT(DISTINCT user_id) AS monthly_active_users
4 | FROM payment.public.users
5 | GROUP BY organization_id, month;
--------------------------------------------------------------------------------
/examples/pii_saas_platform/models/internal/most_frequent_payer.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | o.organization_id,
3 | o.name AS organization_name,
4 | u.user_id,
5 | u.email,
6 | COUNT(i.invoice_id) AS payment_count
7 | FROM
8 | payment.public.invoices i
9 | JOIN
10 | payment.public.users u ON i.payer_user_id = u.user_id
11 | JOIN
12 | payment.public.organizations o ON u.organization_id = o.organization_id
13 | WHERE
14 | i.status = 'Paid' -- Considering only paid invoices
15 | GROUP BY
16 | o.organization_id, o.name, u.user_id, u.email
17 | ORDER BY
18 | o.organization_id, payment_count DESC;
19 |
--------------------------------------------------------------------------------
/examples/pii_saas_platform/models/internal/total_revenue_per_org.sql:
--------------------------------------------------------------------------------
1 | SELECT organization_id, SUM(amount) AS total_revenue
2 | FROM payment.public.invoices
3 | WHERE status = 'Paid'
4 | GROUP BY organization_id;
5 |
--------------------------------------------------------------------------------
/examples/pii_saas_platform/models/internal/users_per_domain.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | o.organization_id,
3 | o.name AS organization_name,
4 | SPLIT_PART(u.email, '@', 2) AS email_domain,
5 | COUNT(*) AS user_count
6 | FROM
7 | payment.public.users u
8 | JOIN
9 | payment.public.organizations o ON u.organization_id = o.organization_id
10 | GROUP BY
11 | o.organization_id, o.name, email_domain
12 | ORDER BY
13 | o.organization_id, user_count DESC;
14 |
--------------------------------------------------------------------------------
/examples/pii_saas_platform/models/internal/users_per_org.sql:
--------------------------------------------------------------------------------
1 | SELECT o.organization_id, o.name, COUNT(u.user_id) AS user_count
2 | FROM payment.public.organizations o
3 | JOIN payment.public.users u ON o.organization_id = u.organization_id
4 | GROUP BY o.organization_id, o.name;
5 |
--------------------------------------------------------------------------------
/examples/pii_saas_platform/reports/tables_with_pii.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | t.table_id,
3 | t.description,
4 | t.dialect
5 | FROM
6 | sdf.information_schema.tables t
7 | JOIN
8 | sdf.information_schema.columns c ON t.table_id = c.table_id
9 | WHERE CONTAINS_ARRAY_VARCHAR(c.classifiers, 'PII')
10 | GROUP BY 1,2,3;
--------------------------------------------------------------------------------
/examples/pii_saas_platform/workspace.sdf.yml:
--------------------------------------------------------------------------------
1 | workspace:
2 | name: pii_saas_platform # The name of this workspace -- required
3 | edition: "1.3" # The edition of this workspace -- required
4 | description: >
5 | This workspace mocks a Snowflake environment of users, organizations, and invoices and demonstrates code contracts & reports in action.
6 |
7 | Try `sdf compile`, `sdf test`, and `sdf report` to see the magic happen
8 |
9 | defaults:
10 | dialect: snowflake # The dialect of SQL used in this workspace, defaults to "trino"
11 |
12 | includes:
13 | - path: classification
14 | - path: checks
15 | type: check
16 | defaults:
17 | catalog: sdf
18 | schema: checks
19 | dialect: trino
20 | - path: reports
21 | type: report
22 | defaults:
23 | catalog: sdf
24 | schema: reports
25 | dialect: trino
26 | - path: models # The path to sql sources for this workspace -- at least one path is required
27 | defaults:
28 | catalog: transformations
29 | index: schema-table-name
30 | - path: ddls # The path to ddl sources for this workspace -- at least one path is required
31 | index: catalog-schema-table-name
32 |
--------------------------------------------------------------------------------
/examples/seeds/models/french_customers.sql:
--------------------------------------------------------------------------------
1 | select * from raw_customers
2 | where country = 'France'
3 |
--------------------------------------------------------------------------------
/examples/seeds/workspace.sdf.yml:
--------------------------------------------------------------------------------
1 | workspace:
2 | name: seeds
3 | edition: "1.3"
4 | description: >
5 | This workspace demonstrates a seed table: a table whose data is provided in a CSV file
6 |
7 | To follow along with SDF's official guide: https://docs.sdf.com/integrations/snowflake/seeds
8 |
9 | defaults:
10 | preprocessor: jinja
11 |
12 | ---
13 | environment:
14 | name: test
15 | includes:
16 | - path: seeds
17 | type: seed
18 | - path: models
19 |
--------------------------------------------------------------------------------
/examples/snapshots/models/test1/a1.sdf.yml:
--------------------------------------------------------------------------------
1 | table:
2 | name: a1
3 | materialization: table
4 |
--------------------------------------------------------------------------------
/examples/snapshots/models/test1/a1.sql:
--------------------------------------------------------------------------------
1 | select * from values
2 | (1, CAST('Jack' AS VARCHAR), '2022-01-01'),
3 | (2, 'Bob', '2022-01-01'),
4 | (3, 'Jane', '2022-01-01')
5 | as T(id, name, "event time")
6 |
--------------------------------------------------------------------------------
/examples/snapshots/models/test1/a2.sdf.yml:
--------------------------------------------------------------------------------
1 | table:
2 | name: a2
3 | materialization: table
4 |
--------------------------------------------------------------------------------
/examples/snapshots/models/test1/a2.sql:
--------------------------------------------------------------------------------
1 | select * from values
2 | (1, CAST('Jacob' AS VARCHAR), 10, '2022-01-02'),
3 | (3, 'Mary Jane', 20, '2022-01-01'),
4 | (44, 'Cloe', 30, '2022-01-02')
5 | as T(id, name, age, "event time")
6 |
--------------------------------------------------------------------------------
/examples/snapshots/models/test1/b.sdf.yml:
--------------------------------------------------------------------------------
1 | table:
2 | name: b
3 | materialization: snapshot-table
4 | snapshot-options:
5 | strategy: timestamp
6 | unique-key: id
7 | updated-at: event time
8 |
--------------------------------------------------------------------------------
/examples/snapshots/models/test1/b.sql:
--------------------------------------------------------------------------------
1 | {% if builtin.is_snapshot_mode %}
2 | select * from a2
3 | {% else %}
4 | select * from a1
5 | {% endif %}
6 |
--------------------------------------------------------------------------------
/examples/snapshots/models/test2/a1.sdf.yml:
--------------------------------------------------------------------------------
1 | table:
2 | name: a1
3 | materialization: table
4 |
--------------------------------------------------------------------------------
/examples/snapshots/models/test2/a1.sql:
--------------------------------------------------------------------------------
1 | select * from values
2 | (1, CAST('Jack' AS VARCHAR), '2022-01-01'),
3 | (2, 'Bob', '2022-01-01'),
4 | (3, 'Jane', '2022-01-01')
5 | as T(id, name, "event time")
6 |
--------------------------------------------------------------------------------
/examples/snapshots/models/test2/a2.sdf.yml:
--------------------------------------------------------------------------------
1 | table:
2 | name: a2
3 | materialization: table
4 |
--------------------------------------------------------------------------------
/examples/snapshots/models/test2/a2.sql:
--------------------------------------------------------------------------------
1 | select * from values
2 | (1, CAST('Jacob' AS VARCHAR), 10, '2022-01-02'),
3 | (3, 'Mary Jane', 20, '2022-01-01'),
4 | (44, 'Cloe', 30, '2022-01-02')
5 | as T(id, name, age, "event time")
6 |
--------------------------------------------------------------------------------
/examples/snapshots/models/test2/b.sdf.yml:
--------------------------------------------------------------------------------
1 | table:
2 | name: b
3 | materialization: snapshot-table
4 | snapshot-options:
5 | strategy: check
6 | unique-key: id
7 | check-cols: all
8 |
--------------------------------------------------------------------------------
/examples/snapshots/models/test2/b.sql:
--------------------------------------------------------------------------------
1 | {% if builtin.is_snapshot_mode %}
2 | select * from a2
3 | {% else %}
4 | select * from a1
5 | {% endif %}
6 |
--------------------------------------------------------------------------------
/examples/snapshots/models/test3/a1.sdf.yml:
--------------------------------------------------------------------------------
1 | table:
2 | name: a1
3 | materialization: table
4 |
--------------------------------------------------------------------------------
/examples/snapshots/models/test3/a1.sql:
--------------------------------------------------------------------------------
1 | select * from values
2 | (1, CAST('Jack' AS VARCHAR), '2022-01-01'),
3 | (2, 'Bob', '2022-01-01'),
4 | (3, 'Jane', '2022-01-01')
5 | as T(id, name, "event time")
6 |
--------------------------------------------------------------------------------
/examples/snapshots/models/test3/a2.sdf.yml:
--------------------------------------------------------------------------------
1 | table:
2 | name: a2
3 | materialization: table
4 |
--------------------------------------------------------------------------------
/examples/snapshots/models/test3/a2.sql:
--------------------------------------------------------------------------------
1 | select * from values
2 | (1, CAST('Jacob' AS VARCHAR), 10, '2022-01-02'),
3 | (3, 'Mary Jane', 20, '2022-01-01'),
4 | (44, 'Cloe', 30, '2022-01-02')
5 | as T(id, name, age, "event time")
6 |
--------------------------------------------------------------------------------
/examples/snapshots/models/test3/b.sdf.yml:
--------------------------------------------------------------------------------
1 | table:
2 | name: b
3 | materialization: snapshot-table
4 | snapshot-options:
5 | strategy: check
6 | unique-key: id
7 | check-cols:
8 | !cols ['event time']
9 |
--------------------------------------------------------------------------------
/examples/snapshots/models/test3/b.sql:
--------------------------------------------------------------------------------
1 | {% if builtin.is_snapshot_mode %}
2 | select * from a2
3 | {% else %}
4 | select * from a1
5 | {% endif %}
6 |
--------------------------------------------------------------------------------
/examples/snapshots/workspace.sdf.yml:
--------------------------------------------------------------------------------
1 | workspace:
2 | name: snapshots
3 | edition: "1.3"
4 | description: >
5 | This workspace uses a simple scenario of one source table (A) and one derived table (B) to demonstrate snapshots
6 | Running and compiling this workspace requires a connection to a Snowflake account.
7 | The provider in this workspace is using the `default` credentials. Run sdf auth login snowflake with no name provided to set these.
8 |
9 | To follow along with SDF's official guide: https://docs.sdf.com/integrations/snowflake/snapshots
10 |
11 | defaults:
12 | preprocessor: jinja
13 | ---
14 | environment:
15 | name: test1
16 | description: timestamp strategy
17 | includes:
18 | - path: models/test1
19 | ---
20 | environment:
21 | name: test2
22 | description: check strategy all columns
23 | includes:
24 | - path: models/test2
25 | ---
26 | environment:
27 | name: test3
28 | description: check strategy selected columns
29 | includes:
30 | - path: models/test3
31 |
--------------------------------------------------------------------------------
/examples/tests/.gitignore:
--------------------------------------------------------------------------------
1 |
2 | #sdf gitignore
3 | /sdftarget
4 |
--------------------------------------------------------------------------------
/examples/tests/models/raw_inapp_events.sql:
--------------------------------------------------------------------------------
1 | CREATE TABLE raw_inapp_events
2 | WITH (FORMAT='CSV', skip_header_line_count=1, LOCATION='seeds/inapp_events.csv');
--------------------------------------------------------------------------------
/examples/tests/workspace.sdf.yml:
--------------------------------------------------------------------------------
1 | workspace:
2 | name: tests_workspace # The name of this workspace -- required
3 | edition: "1.3" # The edition of this workspace -- required
4 | description: >
5 | This workspace demonstrates how to use the SDF built-in tests library
6 |
7 | includes:
8 | - path: models # The path to sql models for this workspace -- at least one path is required
9 | - path: seeds # Where raw data is stored locally
10 | type: resource
11 | - path: src_metadata # Where table metadata and tests are stored
12 |
13 | defaults:
14 | preprocessor: jinja
15 |
--------------------------------------------------------------------------------
/schemas/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sdf-labs/sdf-cli/0f2623776d779dcc07adfe6492765b5c77d3d211/schemas/.gitkeep
--------------------------------------------------------------------------------
/schemas/sdf-definition-schema.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sdf-labs/sdf-cli/0f2623776d779dcc07adfe6492765b5c77d3d211/schemas/sdf-definition-schema.json
--------------------------------------------------------------------------------