├── .dockerignore ├── .editorconfig ├── .git-blame-ignore-revs ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.yml │ ├── config.yml │ ├── documentation_request.yml │ └── feature_request.yml ├── PULL_REQUEST_TEMPLATE.md └── workflows │ ├── get_docs_changes.yml │ ├── lint.yml │ ├── main.yml │ ├── test_common.yml │ ├── test_destinations_local.yml │ ├── test_destinations_remote.yml │ ├── test_docs_snippets.yml │ ├── test_plus.yml │ ├── test_sources_local.yml │ ├── test_tools_airflow.yml │ ├── test_tools_build_images.yml │ ├── test_tools_dbt_cloud.yml │ ├── test_tools_dbt_runner.yml │ ├── tools_deploy_docs.yml │ └── tools_deploy_notebooks.yml ├── .gitignore ├── CONTRIBUTING.md ├── LICENSE.txt ├── Makefile ├── README.md ├── compiled_packages.txt ├── deploy └── dlt │ ├── Dockerfile │ ├── Dockerfile.airflow │ └── README.md ├── dlt ├── __init__.py ├── __main__.py ├── cli │ ├── __init__.py │ ├── _dlt.py │ ├── ai_command.py │ ├── command_wrappers.py │ ├── config_toml_writer.py │ ├── debug.py │ ├── deploy_command.py │ ├── deploy_command_helpers.py │ ├── docs_command.py │ ├── echo.py │ ├── exceptions.py │ ├── init_command.py │ ├── pipeline_command.py │ ├── pipeline_files.py │ ├── plugins.py │ ├── reference.py │ ├── requirements.py │ ├── source_detection.py │ ├── telemetry_command.py │ └── utils.py ├── common │ ├── __init__.py │ ├── arithmetics.py │ ├── configuration │ │ ├── __init__.py │ │ ├── accessors.py │ │ ├── const.py │ │ ├── container.py │ │ ├── exceptions.py │ │ ├── inject.py │ │ ├── plugins.py │ │ ├── providers │ │ │ ├── __init__.py │ │ │ ├── airflow.py │ │ │ ├── context.py │ │ │ ├── dictionary.py │ │ │ ├── doc.py │ │ │ ├── environ.py │ │ │ ├── google_secrets.py │ │ │ ├── provider.py │ │ │ ├── toml.py │ │ │ └── vault.py │ │ ├── resolve.py │ │ ├── specs │ │ │ ├── __init__.py │ │ │ ├── api_credentials.py │ │ │ ├── aws_credentials.py │ │ │ ├── azure_credentials.py │ │ │ ├── base_configuration.py │ │ │ ├── config_providers_context.py │ │ │ ├── config_section_context.py │ │ │ ├── connection_string_credentials.py │ │ │ ├── exceptions.py │ │ │ ├── gcp_credentials.py │ │ │ ├── known_sections.py │ │ │ ├── mixins.py │ │ │ ├── pluggable_run_context.py │ │ │ ├── runtime_configuration.py │ │ │ └── sftp_credentials.py │ │ └── utils.py │ ├── data_types │ │ ├── __init__.py │ │ ├── type_helpers.py │ │ └── typing.py │ ├── data_writers │ │ ├── __init__.py │ │ ├── buffered.py │ │ ├── configuration.py │ │ ├── escape.py │ │ ├── exceptions.py │ │ └── writers.py │ ├── destination │ │ ├── __init__.py │ │ ├── capabilities.py │ │ ├── client.py │ │ ├── configuration.py │ │ ├── dataset.py │ │ ├── exceptions.py │ │ ├── reference.py │ │ ├── typing.py │ │ └── utils.py │ ├── exceptions.py │ ├── git.py │ ├── incremental │ │ ├── __init__.py │ │ └── typing.py │ ├── json │ │ ├── __init__.py │ │ ├── _orjson.py │ │ └── _simplejson.py │ ├── jsonpath.py │ ├── known_env.py │ ├── libs │ │ ├── __init__.py │ │ ├── cryptography.py │ │ ├── deltalake.py │ │ ├── numpy.py │ │ ├── pandas.py │ │ ├── pandas_sql.py │ │ ├── pyarrow.py │ │ ├── pydantic.py │ │ ├── pyiceberg.py │ │ ├── sql_alchemy.py │ │ ├── sql_alchemy_compat.py │ │ ├── sql_alchemy_shims.py │ │ ├── sqlglot.py │ │ └── utils.py │ ├── logger.py │ ├── managed_thread_pool.py │ ├── metrics.py │ ├── normalizers │ │ ├── __init__.py │ │ ├── exceptions.py │ │ ├── json │ │ │ ├── __init__.py │ │ │ ├── helpers.py │ │ │ ├── relational.py │ │ │ └── typing.py │ │ ├── naming │ │ │ ├── __init__.py │ │ │ ├── direct.py │ │ │ ├── duck_case.py │ │ │ ├── exceptions.py │ │ │ ├── naming.py │ │ │ ├── snake_case.py │ │ │ ├── sql_ci_v1.py │ │ │ └── sql_cs_v1.py │ │ ├── typing.py │ │ └── utils.py │ ├── pendulum.py │ ├── pipeline.py │ ├── reflection │ │ ├── __init__.py │ │ ├── exceptions.py │ │ ├── inspect.py │ │ ├── ref.py │ │ ├── spec.py │ │ └── utils.py │ ├── runners │ │ ├── __init__.py │ │ ├── configuration.py │ │ ├── pool_runner.py │ │ ├── runnable.py │ │ ├── stdout.py │ │ ├── synth_pickle.py │ │ ├── typing.py │ │ └── venv.py │ ├── runtime │ │ ├── __init__.py │ │ ├── anon_tracker.py │ │ ├── collector.py │ │ ├── exceptions.py │ │ ├── exec_info.py │ │ ├── init.py │ │ ├── json_logging.py │ │ ├── run_context.py │ │ ├── sentry.py │ │ ├── signals.py │ │ ├── slack.py │ │ ├── telemetry.py │ │ └── typing.py │ ├── schema │ │ ├── __init__.py │ │ ├── configuration.py │ │ ├── detections.py │ │ ├── exceptions.py │ │ ├── migrations.py │ │ ├── normalizers.py │ │ ├── schema.py │ │ ├── typing.py │ │ └── utils.py │ ├── storages │ │ ├── __init__.py │ │ ├── configuration.py │ │ ├── data_item_storage.py │ │ ├── exceptions.py │ │ ├── file_storage.py │ │ ├── fsspec_filesystem.py │ │ ├── fsspecs │ │ │ ├── __init__.py │ │ │ └── google_drive.py │ │ ├── live_schema_storage.py │ │ ├── load_package.py │ │ ├── load_storage.py │ │ ├── normalize_storage.py │ │ ├── schema_storage.py │ │ ├── transactional_file.py │ │ └── versioned_storage.py │ ├── time.py │ ├── typing.py │ ├── utils.py │ ├── validation.py │ ├── versioned_state.py │ ├── warnings.py │ └── wei.py ├── destinations │ ├── __init__.py │ ├── adapters.py │ ├── configuration.py │ ├── dataset │ │ ├── __init__.py │ │ ├── dataset.py │ │ ├── exceptions.py │ │ ├── factory.py │ │ ├── relation.py │ │ └── utils.py │ ├── decorators.py │ ├── exceptions.py │ ├── fs_client.py │ ├── impl │ │ ├── __init__.py │ │ ├── athena │ │ │ ├── __init__.py │ │ │ ├── athena.py │ │ │ ├── athena_adapter.py │ │ │ ├── configuration.py │ │ │ ├── factory.py │ │ │ └── sql_client.py │ │ ├── bigquery │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── bigquery.py │ │ │ ├── bigquery_adapter.py │ │ │ ├── configuration.py │ │ │ ├── factory.py │ │ │ ├── sql_client.py │ │ │ └── warnings.py │ │ ├── clickhouse │ │ │ ├── __init__.py │ │ │ ├── clickhouse.py │ │ │ ├── clickhouse_adapter.py │ │ │ ├── configuration.py │ │ │ ├── factory.py │ │ │ ├── sql_client.py │ │ │ ├── typing.py │ │ │ └── utils.py │ │ ├── databricks │ │ │ ├── __init__.py │ │ │ ├── configuration.py │ │ │ ├── databricks.py │ │ │ ├── factory.py │ │ │ └── sql_client.py │ │ ├── destination │ │ │ ├── __init__.py │ │ │ ├── configuration.py │ │ │ ├── destination.py │ │ │ └── factory.py │ │ ├── dremio │ │ │ ├── __init__.py │ │ │ ├── configuration.py │ │ │ ├── dremio.py │ │ │ ├── factory.py │ │ │ ├── pydremio.py │ │ │ └── sql_client.py │ │ ├── duckdb │ │ │ ├── __init__.py │ │ │ ├── configuration.py │ │ │ ├── duck.py │ │ │ ├── exceptions.py │ │ │ ├── factory.py │ │ │ └── sql_client.py │ │ ├── dummy │ │ │ ├── __init__.py │ │ │ ├── configuration.py │ │ │ ├── dummy.py │ │ │ └── factory.py │ │ ├── filesystem │ │ │ ├── __init__.py │ │ │ ├── configuration.py │ │ │ ├── factory.py │ │ │ ├── filesystem.py │ │ │ ├── sql_client.py │ │ │ └── typing.py │ │ ├── lancedb │ │ │ ├── __init__.py │ │ │ ├── configuration.py │ │ │ ├── exceptions.py │ │ │ ├── factory.py │ │ │ ├── jobs.py │ │ │ ├── lancedb_adapter.py │ │ │ ├── lancedb_client.py │ │ │ ├── schema.py │ │ │ ├── type_mapper.py │ │ │ ├── utils.py │ │ │ └── warnings.py │ │ ├── motherduck │ │ │ ├── __init__.py │ │ │ ├── configuration.py │ │ │ ├── factory.py │ │ │ ├── motherduck.py │ │ │ └── sql_client.py │ │ ├── mssql │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── configuration.py │ │ │ ├── factory.py │ │ │ ├── mssql.py │ │ │ └── sql_client.py │ │ ├── postgres │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── configuration.py │ │ │ ├── factory.py │ │ │ ├── postgres.py │ │ │ ├── postgres_adapter.py │ │ │ └── sql_client.py │ │ ├── qdrant │ │ │ ├── __init__.py │ │ │ ├── configuration.py │ │ │ ├── exceptions.py │ │ │ ├── factory.py │ │ │ ├── qdrant_adapter.py │ │ │ ├── qdrant_job_client.py │ │ │ └── warnings.py │ │ ├── redshift │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── configuration.py │ │ │ ├── factory.py │ │ │ └── redshift.py │ │ ├── snowflake │ │ │ ├── __init__.py │ │ │ ├── configuration.py │ │ │ ├── factory.py │ │ │ ├── snowflake.py │ │ │ ├── sql_client.py │ │ │ └── utils.py │ │ ├── sqlalchemy │ │ │ ├── __init__.py │ │ │ ├── alter_table.py │ │ │ ├── configuration.py │ │ │ ├── db_api_client.py │ │ │ ├── factory.py │ │ │ ├── load_jobs.py │ │ │ ├── merge_job.py │ │ │ ├── sqlalchemy_job_client.py │ │ │ └── type_mapper.py │ │ ├── synapse │ │ │ ├── __init__.py │ │ │ ├── configuration.py │ │ │ ├── factory.py │ │ │ ├── sql_client.py │ │ │ ├── synapse.py │ │ │ └── synapse_adapter.py │ │ └── weaviate │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── ci_naming.py │ │ │ ├── configuration.py │ │ │ ├── exceptions.py │ │ │ ├── factory.py │ │ │ ├── naming.py │ │ │ ├── weaviate_adapter.py │ │ │ └── weaviate_client.py │ ├── insert_job_client.py │ ├── job_client_impl.py │ ├── job_impl.py │ ├── path_utils.py │ ├── queries.py │ ├── sql_client.py │ ├── sql_jobs.py │ ├── type_mapping.py │ ├── typing.py │ └── utils.py ├── extract │ ├── __init__.py │ ├── concurrency.py │ ├── decorators.py │ ├── exceptions.py │ ├── extract.py │ ├── extractors.py │ ├── hints.py │ ├── incremental │ │ ├── __init__.py │ │ ├── exceptions.py │ │ ├── lag.py │ │ └── transform.py │ ├── items.py │ ├── items_transform.py │ ├── pipe.py │ ├── pipe_iterator.py │ ├── reference.py │ ├── resource.py │ ├── source.py │ ├── state.py │ ├── storage.py │ ├── utils.py │ ├── validation.py │ └── wrappers.py ├── helpers │ ├── __init__.py │ ├── airflow_helper.py │ ├── dbt │ │ ├── __init__.py │ │ ├── configuration.py │ │ ├── dbt_utils.py │ │ ├── exceptions.py │ │ ├── profiles.yml │ │ └── runner.py │ ├── dbt_cloud │ │ ├── __init__.py │ │ ├── client.py │ │ └── configuration.py │ ├── ibis.py │ ├── streamlit_app │ │ ├── __init__.py │ │ ├── blocks │ │ │ ├── __init__.py │ │ │ ├── load_info.py │ │ │ ├── menu.py │ │ │ ├── query.py │ │ │ ├── resource_state.py │ │ │ ├── show_data.py │ │ │ └── table_hints.py │ │ ├── index.py │ │ ├── pages │ │ │ ├── __init__.py │ │ │ ├── dashboard.py │ │ │ └── load_info.py │ │ ├── theme.py │ │ ├── utils.py │ │ └── widgets │ │ │ ├── __init__.py │ │ │ ├── color_mode_selector.py │ │ │ ├── schema.py │ │ │ ├── stats.py │ │ │ ├── summary.py │ │ │ └── tags.py │ └── studio │ │ ├── .dlt │ │ └── config.toml │ │ ├── config.py │ │ ├── dlt_app.py │ │ ├── dlt_app_styles.css │ │ ├── runner.py │ │ ├── strings.py │ │ ├── ui_elements.py │ │ └── utils.py ├── load │ ├── __init__.py │ ├── configuration.py │ ├── exceptions.py │ ├── load.py │ └── utils.py ├── normalize │ ├── __init__.py │ ├── configuration.py │ ├── exceptions.py │ ├── items_normalizers.py │ ├── normalize.py │ ├── validate.py │ └── worker.py ├── pipeline │ ├── __init__.py │ ├── configuration.py │ ├── current.py │ ├── dbt.py │ ├── drop.py │ ├── exceptions.py │ ├── helpers.py │ ├── mark.py │ ├── pipeline.py │ ├── platform.py │ ├── progress.py │ ├── state_sync.py │ ├── trace.py │ ├── track.py │ ├── typing.py │ └── warnings.py ├── py.typed ├── reflection │ ├── __init__.py │ ├── names.py │ ├── script_inspector.py │ └── script_visitor.py ├── sources │ ├── .gitignore │ ├── __init__.py │ ├── _core_source_templates │ │ ├── __init__.py │ │ ├── filesystem_pipeline.py │ │ ├── rest_api_pipeline.py │ │ └── sql_database_pipeline.py │ ├── _single_file_templates │ │ ├── .dlt │ │ │ └── config.toml │ │ ├── .gitignore │ │ ├── __init__.py │ │ ├── arrow_pipeline.py │ │ ├── dataframe_pipeline.py │ │ ├── debug_pipeline.py │ │ ├── default_pipeline.py │ │ ├── fruitshop_pipeline.py │ │ ├── github_api_pipeline.py │ │ ├── requests_pipeline.py │ │ └── vibe_rest_api_pipeline.py │ ├── config.py │ ├── credentials.py │ ├── filesystem │ │ ├── __init__.py │ │ ├── helpers.py │ │ ├── readers.py │ │ └── settings.py │ ├── helpers │ │ ├── __init__.py │ │ ├── requests │ │ │ ├── __init__.py │ │ │ ├── retry.py │ │ │ ├── session.py │ │ │ └── typing.py │ │ ├── rest_client │ │ │ ├── __init__.py │ │ │ ├── auth.py │ │ │ ├── client.py │ │ │ ├── detector.py │ │ │ ├── exceptions.py │ │ │ ├── paginators.py │ │ │ ├── typing.py │ │ │ └── utils.py │ │ └── transform.py │ ├── rest_api │ │ ├── __init__.py │ │ ├── config_setup.py │ │ ├── exceptions.py │ │ ├── typing.py │ │ └── utils.py │ └── sql_database │ │ ├── __init__.py │ │ ├── arrow_helpers.py │ │ ├── helpers.py │ │ └── schema_types.py ├── transformations │ ├── __init__.py │ ├── configuration.py │ ├── decorators.py │ ├── exceptions.py │ ├── lineage.py │ ├── transformation.py │ └── typing.py └── version.py ├── docs ├── __init__.py ├── examples │ ├── .dlt │ │ └── config.toml │ ├── CONTRIBUTING.md │ ├── __init__.py │ ├── _template │ │ ├── .dlt │ │ │ ├── config.toml │ │ │ └── example.secrets.toml │ │ ├── __init__.py │ │ └── _template.py │ ├── archive │ │ ├── .dlt │ │ │ └── example.secrets.toml │ │ ├── README.md │ │ ├── __init__.py │ │ ├── _helpers.py │ │ ├── credentials │ │ │ ├── .dlt │ │ │ │ └── config.toml │ │ │ ├── __init__.py │ │ │ └── explicit.py │ │ ├── data │ │ │ ├── channels.json │ │ │ ├── demo_example.json │ │ │ ├── messages.json │ │ │ ├── rasa_trackers │ │ │ │ ├── 2888158124550630_tracker.jsonl │ │ │ │ └── 8629c904-0c26-4f0b-927b-14d48db43c28_tracker.jsonl │ │ │ └── singer_taps │ │ │ │ ├── csv_catalog.json │ │ │ │ ├── model_annotations.csv │ │ │ │ ├── tap_google_sheet.jsonl │ │ │ │ └── tap_hubspot.jsonl │ │ ├── dbt_run_jaffle.py │ │ ├── discord_iterator.py │ │ ├── examples │ │ │ └── schemas │ │ │ │ └── dlt_quickstart.schema.yaml │ │ ├── google_drive_csv.py │ │ ├── google_sheets.py │ │ ├── quickstart.py │ │ ├── rasa_example.py │ │ ├── read_table.py │ │ ├── restore_pipeline.py │ │ ├── schemas │ │ │ ├── __init__.py │ │ │ ├── discord.schema.yml │ │ │ ├── dlt_quickstart.schema.yaml │ │ │ ├── hubspot.schema.yaml │ │ │ └── inferred_demo.schema.yml │ │ ├── singer_tap_example.py │ │ ├── singer_tap_jsonl_example.py │ │ ├── sources │ │ │ ├── __init__.py │ │ │ ├── google_sheets.py │ │ │ ├── jsonl.py │ │ │ ├── rasa │ │ │ │ ├── __init__.py │ │ │ │ ├── rasa.py │ │ │ │ └── rasa.schema.yaml │ │ │ ├── singer_tap.py │ │ │ ├── sql_query.py │ │ │ └── stdout.py │ │ └── sync_schema_example.py │ ├── backfill_in_chunks │ │ ├── __init__.py │ │ └── backfill_in_chunks.py │ ├── chess │ │ ├── .dlt │ │ │ └── config.toml │ │ ├── __init__.py │ │ ├── chess.py │ │ ├── chess_dbt.py │ │ └── dbt_transform │ │ │ ├── .gitignore │ │ │ ├── analyses │ │ │ └── .gitkeep │ │ │ ├── dbt_project.yml │ │ │ ├── macros │ │ │ └── .gitkeep │ │ │ ├── models │ │ │ ├── _dlt_loads.sql │ │ │ ├── load_ids.sql │ │ │ ├── schema.yml │ │ │ ├── sources.yml │ │ │ └── view_player_games.sql │ │ │ ├── package-lock.yml │ │ │ ├── packages.yml │ │ │ ├── seeds │ │ │ └── .gitkeep │ │ │ ├── snapshots │ │ │ └── .gitkeep │ │ │ └── tests │ │ │ └── .gitkeep │ ├── chess_production │ │ ├── .dlt │ │ │ └── config.toml │ │ ├── __init__.py │ │ └── chess_production.py │ ├── conftest.py │ ├── connector_x_arrow │ │ ├── __init__.py │ │ └── connector_x_arrow.py │ ├── custom_config_provider │ │ ├── .dlt │ │ │ └── config.toml │ │ ├── __init__.py │ │ ├── custom_config_provider.py │ │ └── profiles.yaml │ ├── custom_destination_bigquery │ │ ├── .dlt │ │ │ └── example.secrets.toml │ │ ├── __init__.py │ │ └── custom_destination_bigquery.py │ ├── custom_destination_lancedb │ │ ├── .dlt │ │ │ ├── config.toml │ │ │ └── example.secrets.toml │ │ ├── .gitignore │ │ ├── __init__.py │ │ └── custom_destination_lancedb.py │ ├── custom_naming │ │ ├── .dlt │ │ │ └── config.toml │ │ ├── __init__.py │ │ ├── custom_naming.py │ │ ├── sql_ci_no_collision.py │ │ └── sql_cs_latin2.py │ ├── google_sheets │ │ ├── .dlt │ │ │ └── example.secrets.toml │ │ ├── __init__.py │ │ └── google_sheets.py │ ├── incremental_loading │ │ ├── .dlt │ │ │ └── example.secrets.toml │ │ ├── __init__.py │ │ └── incremental_loading.py │ ├── nested_data │ │ ├── .dlt │ │ │ └── example.secrets.toml │ │ ├── __init__.py │ │ └── nested_data.py │ ├── partial_loading │ │ ├── .dlt │ │ │ ├── config.toml │ │ │ └── example.secrets.toml │ │ ├── __init__.py │ │ ├── partial_loading.py │ │ └── requirements.txt │ ├── pdf_to_weaviate │ │ ├── __init__.py │ │ ├── assets │ │ │ └── invoices │ │ │ │ ├── invoice_2.txt │ │ │ │ └── invoice_20230831-p1.pdf │ │ └── pdf_to_weaviate.py │ ├── postgres_to_postgres │ │ ├── .dlt │ │ │ └── example.secrets.toml │ │ ├── __init__.py │ │ └── postgres_to_postgres.py │ ├── propagate_hints │ │ ├── __init__.py │ │ └── propagate_hints.py │ ├── qdrant_zendesk │ │ ├── .dlt │ │ │ └── example.secrets.toml │ │ ├── __init__.py │ │ └── qdrant_zendesk.py │ └── transformers │ │ ├── .dlt │ │ └── config.toml │ │ ├── __init__.py │ │ └── transformers.py ├── notebooks │ ├── .gitignore │ ├── CONTRIBUTING.md │ ├── Makefile │ └── playground │ │ └── playground.py ├── technical │ ├── general_usage.md │ └── working_with_schemas.md ├── tools │ ├── .env.example │ ├── README.md │ ├── __init__.py │ ├── check_embedded_snippets.py │ ├── fix_grammar_gpt.py │ ├── lint_setup │ │ ├── .gitignore │ │ ├── __init__.py │ │ └── template.py │ ├── mypy.ini │ ├── package-lock.json │ ├── prepare_examples_tests.py │ ├── requirements.txt │ ├── ruff.toml │ └── utils.py └── website │ ├── .gitignore │ ├── .npmrc │ ├── README.md │ ├── __init__.py │ ├── babel.config.js │ ├── clean_pydoc_sidebar.py │ ├── docs │ ├── .dlt │ │ ├── .gitignore │ │ └── config.toml │ ├── __init__.py │ ├── _book-onboarding-call.md │ ├── assets │ │ └── json_file.json │ ├── build-a-pipeline-tutorial.md │ ├── conftest.py │ ├── dlt-ecosystem │ │ ├── destinations │ │ │ ├── athena.md │ │ │ ├── bigquery.md │ │ │ ├── clickhouse.md │ │ │ ├── databricks.md │ │ │ ├── delta-iceberg.md │ │ │ ├── destination.md │ │ │ ├── dremio.md │ │ │ ├── duckdb.md │ │ │ ├── filesystem.md │ │ │ ├── iceberg.md │ │ │ ├── index.md │ │ │ ├── lancedb.md │ │ │ ├── motherduck.md │ │ │ ├── mssql.md │ │ │ ├── postgres.md │ │ │ ├── qdrant.md │ │ │ ├── redshift.md │ │ │ ├── snowflake.md │ │ │ ├── sqlalchemy.md │ │ │ ├── synapse.md │ │ │ └── weaviate.md │ │ ├── file-formats │ │ │ ├── _set_the_format.mdx │ │ │ ├── csv.md │ │ │ ├── insert-format.md │ │ │ ├── jsonl.md │ │ │ └── parquet.md │ │ ├── llm-tooling │ │ │ ├── cursor-restapi.md │ │ │ └── mcp-server.md │ │ ├── staging.md │ │ ├── table-formats │ │ │ ├── delta.md │ │ │ └── iceberg.md │ │ ├── transformations │ │ │ ├── add-map.md │ │ │ ├── dbt │ │ │ │ ├── __init__.py │ │ │ │ ├── dbt-snippets.py │ │ │ │ ├── dbt.md │ │ │ │ ├── dbt_cloud.md │ │ │ │ └── profiles.yml │ │ │ ├── encryption.md │ │ │ ├── index.md │ │ │ ├── python.md │ │ │ └── sql.md │ │ └── verified-sources │ │ │ ├── _source-info-header.md │ │ │ ├── airtable.md │ │ │ ├── amazon_kinesis.md │ │ │ ├── arrow-pandas.md │ │ │ ├── asana.md │ │ │ ├── chess.md │ │ │ ├── docs_images │ │ │ ├── Add_people.png │ │ │ ├── Airtable_ids.jpeg │ │ │ ├── GA4_Property_ID_size.png │ │ │ ├── Matomo_name_and_id.png │ │ │ ├── Notion_Database_2.jpeg │ │ │ ├── Share_button.png │ │ │ ├── Zendesk_Admin.jpeg │ │ │ └── Zendesk_chat_access_token.jpg │ │ │ ├── facebook_ads.md │ │ │ ├── filesystem │ │ │ ├── advanced.md │ │ │ ├── basic.md │ │ │ └── index.md │ │ │ ├── freshdesk.md │ │ │ ├── github.md │ │ │ ├── google_ads.md │ │ │ ├── google_analytics.md │ │ │ ├── google_sheets.md │ │ │ ├── hubspot.md │ │ │ ├── inbox.md │ │ │ ├── index.md │ │ │ ├── jira.md │ │ │ ├── kafka.md │ │ │ ├── matomo.md │ │ │ ├── mongodb.md │ │ │ ├── mux.md │ │ │ ├── notion.md │ │ │ ├── openapi-generator.md │ │ │ ├── personio.md │ │ │ ├── pg_replication.md │ │ │ ├── pipedrive.md │ │ │ ├── rest_api │ │ │ ├── advanced.md │ │ │ ├── basic.md │ │ │ └── index.md │ │ │ ├── salesforce.md │ │ │ ├── scrapy.md │ │ │ ├── shopify.md │ │ │ ├── slack.md │ │ │ ├── sql_database │ │ │ ├── advanced.md │ │ │ ├── configuration.md │ │ │ ├── index.md │ │ │ ├── setup.md │ │ │ ├── troubleshooting.md │ │ │ └── usage.md │ │ │ ├── strapi.md │ │ │ ├── stripe.md │ │ │ ├── workable.md │ │ │ └── zendesk.md │ ├── examples │ │ └── index.md │ ├── general-usage │ │ ├── credentials │ │ │ ├── advanced.md │ │ │ ├── complex_types.md │ │ │ ├── index.md │ │ │ └── setup.md │ │ ├── customising-pipelines │ │ │ ├── pseudonymizing_columns.md │ │ │ ├── removing_columns.md │ │ │ └── renaming_columns.md │ │ ├── data-enrichments │ │ │ ├── currency_conversion_data_enrichment.md │ │ │ ├── url-parser-data-enrichment.md │ │ │ └── user_agent_device_data_enrichment.md │ │ ├── dataset-access │ │ │ ├── data-quality-dashboard.md │ │ │ ├── dataset.md │ │ │ ├── dataset_snippets │ │ │ │ ├── __init__.py │ │ │ │ └── dataset_snippets.py │ │ │ ├── ibis-backend.md │ │ │ ├── index.md │ │ │ ├── marimo.md │ │ │ ├── sql-client.md │ │ │ ├── static │ │ │ │ ├── marimo_dataset.png │ │ │ │ ├── marimo_notebook.png │ │ │ │ ├── marimo_python.png │ │ │ │ └── marimo_sql.png │ │ │ └── streamlit.md │ │ ├── destination-tables.md │ │ ├── destination.md │ │ ├── full-loading.md │ │ ├── glossary.md │ │ ├── http │ │ │ ├── overview.md │ │ │ ├── requests.md │ │ │ └── rest-client.md │ │ ├── incremental-loading.md │ │ ├── incremental │ │ │ ├── advanced-state.md │ │ │ ├── cursor.md │ │ │ ├── lag.md │ │ │ └── troubleshooting.md │ │ ├── merge-loading.md │ │ ├── naming-convention.md │ │ ├── pipeline.md │ │ ├── resource.md │ │ ├── schema-contracts.md │ │ ├── schema-evolution.md │ │ ├── schema.md │ │ ├── snippets │ │ │ ├── __init__.py │ │ │ ├── destination-snippets.py │ │ │ ├── destination-toml.toml │ │ │ └── schema-snippets.py │ │ ├── source.md │ │ ├── state.md │ │ └── transformations │ │ │ ├── index.md │ │ │ └── transformation-snippets.py │ ├── getting-started-snippets.py │ ├── intro-snippets.py │ ├── intro.md │ ├── plus │ │ ├── core-concepts │ │ │ ├── cache.md │ │ │ ├── datasets.md │ │ │ ├── profiles.md │ │ │ └── project.md │ │ ├── ecosystem │ │ │ ├── delta.md │ │ │ ├── iceberg.md │ │ │ ├── ms-sql.md │ │ │ └── snowflake_plus.md │ │ ├── features │ │ │ ├── ai.md │ │ │ ├── data-access.md │ │ │ ├── projects.md │ │ │ ├── quality │ │ │ │ ├── data-quality.md │ │ │ │ └── tests.md │ │ │ └── transformations │ │ │ │ ├── dbt-transformations.md │ │ │ │ ├── index.md │ │ │ │ ├── python-transformations.md │ │ │ │ └── setup.md │ │ ├── getting-started │ │ │ ├── advanced_tutorial.md │ │ │ ├── installation.md │ │ │ └── tutorial.md │ │ ├── intro.md │ │ ├── production │ │ │ ├── observability.md │ │ │ └── runners.md │ │ └── reference.md │ ├── reference │ │ ├── __init__.py │ │ ├── command-line-interface.md │ │ ├── explainers │ │ │ ├── airflow-gcp-cloud-composer.md │ │ │ └── how-dlt-works.md │ │ ├── frequently-asked-questions.md │ │ ├── installation.md │ │ ├── performance.md │ │ ├── performance_snippets │ │ │ ├── .dlt │ │ │ │ └── config.toml │ │ │ ├── __init__.py │ │ │ ├── performance-snippets.py │ │ │ └── toml-snippets.toml │ │ ├── telemetry.md │ │ └── tracing.md │ ├── release-notes │ │ └── 1.12.1.md │ ├── running-in-production │ │ ├── alerting.md │ │ ├── images │ │ │ ├── airflow_dag_tasks.png │ │ │ ├── airflow_dags.png │ │ │ ├── airflow_slack_notification.png │ │ │ ├── github_actions_workflow_dag.png │ │ │ └── github_actions_workflows.png │ │ ├── monitoring.md │ │ ├── running.md │ │ └── tracing.md │ ├── tutorial │ │ ├── __init__.py │ │ ├── filesystem.md │ │ ├── load-data-from-an-api-snippets.py │ │ ├── load-data-from-an-api.md │ │ ├── playground.mdx │ │ ├── rest-api.md │ │ └── sql-database.md │ ├── utils.py │ └── walkthroughs │ │ ├── add-a-verified-source.md │ │ ├── add-incremental-configuration.md │ │ ├── add_credentials.md │ │ ├── adjust-a-schema.md │ │ ├── create-a-pipeline.md │ │ ├── create-new-destination.md │ │ ├── deploy-a-pipeline │ │ ├── deploy-gcp-cloud-function-as-webhook.md │ │ ├── deploy-with-airflow-composer.md │ │ ├── deploy-with-dagster.md │ │ ├── deploy-with-github-actions.md │ │ ├── deploy-with-google-cloud-functions.md │ │ ├── deploy-with-google-cloud-run.md │ │ ├── deploy-with-kestra.md │ │ ├── deploy-with-modal.md │ │ ├── deploy-with-orchestra.md │ │ ├── deploy-with-prefect.md │ │ ├── deploy_snippets │ │ │ ├── __init__.py │ │ │ └── deploy-with-modal-snippets.py │ │ └── images │ │ │ ├── add-credential.png │ │ │ ├── dag-folder.png │ │ │ ├── dlt_orchestra_node.png │ │ │ ├── orchestra_add_dlt_step.png │ │ │ ├── orchestra_dlt_credential.png │ │ │ ├── prefect-dashboard.png │ │ │ ├── prefect-flow-run.png │ │ │ └── run-trigger.png │ │ ├── dispatch-to-multiple-tables.md │ │ ├── run-a-pipeline.md │ │ ├── share-a-dataset.md │ │ └── zendesk-weaviate.md │ ├── docusaurus.config.js │ ├── netlify.toml │ ├── package-lock.json │ ├── package.json │ ├── pydoc-markdown.yml │ ├── pydoc_markdown_dlt.py │ ├── requirements.txt │ ├── sidebars.js │ ├── src │ ├── css │ │ └── custom.css │ └── theme │ │ ├── DocBreadcrumbs │ │ ├── Items │ │ │ └── Home │ │ │ │ ├── index.tsx │ │ │ │ └── styles.module.css │ │ ├── index.tsx │ │ └── styles.module.css │ │ ├── Heading │ │ └── index.js │ │ ├── MDXComponents.js │ │ ├── PlusAdmonition │ │ └── index.js │ │ └── Root.js │ ├── static │ ├── .nojekyll │ └── img │ │ ├── CO-Blue.svg │ │ ├── CO-White.svg │ │ ├── Concepts - Active.svg │ │ ├── Concepts - Inactive.svg │ │ ├── Customization-Active-1.svg │ │ ├── Customization-Active.svg │ │ ├── Customization-Inactive-1.svg │ │ ├── Customization-Inactive.svg │ │ ├── Destinations-Active-1.svg │ │ ├── Destinations-Active.svg │ │ ├── Destinations-Inactive-1.svg │ │ ├── Destinations-Inactive.svg │ │ ├── GeneralUsage-Active-1.svg │ │ ├── GeneralUsage-Active.svg │ │ ├── GeneralUsage-Inactive-1.svg │ │ ├── GeneralUsage-Inactive.svg │ │ ├── GettingStarted-Active-1.svg │ │ ├── GettingStarted-Active.svg │ │ ├── GettingStarted-Inactive-1.svg │ │ ├── GettingStarted-Inactive.svg │ │ ├── GithubDark.svg │ │ ├── GithubLight.svg │ │ ├── Howdltworks-Active-1.svg │ │ ├── Howdltworks-Active.svg │ │ ├── Howdltworks-Inactive-1.svg │ │ ├── Howdltworks-Inactive.svg │ │ ├── Installation-Active-1.svg │ │ ├── Installation-Active.svg │ │ ├── Installation-Inactive-1.svg │ │ ├── Installation-Inactive.svg │ │ ├── Introduction-Active-1.svg │ │ ├── Introduction-Active.svg │ │ ├── Introduction-Inactive-1.svg │ │ ├── Introduction-Inactive.svg │ │ ├── Pipelines-Active-1.svg │ │ ├── Pipelines-Active.svg │ │ ├── Pipelines-Inactive-1.svg │ │ ├── Pipelines-Inactive.svg │ │ ├── Plus-Active-1.svg │ │ ├── Plus-Active.svg │ │ ├── Plus-Inactive-1.svg │ │ ├── Plus-Inactive.svg │ │ ├── Reference-Active-1.svg │ │ ├── Reference-Active.svg │ │ ├── Reference-Inactive-1.svg │ │ ├── Reference-Inactive.svg │ │ ├── ReleaseNotes-Active-1.svg │ │ ├── ReleaseNotes-Active.svg │ │ ├── ReleaseNotes-Inactive-1.svg │ │ ├── ReleaseNotes-Inactive.svg │ │ ├── RunningInProduction-Active-1.svg │ │ ├── RunningInProduction-Active.svg │ │ ├── RunningInProduction-Inactive-1.svg │ │ ├── RunningInProduction-Inactive.svg │ │ ├── SlackDark.svg │ │ ├── SlackLight.svg │ │ ├── Sources-Active-1.svg │ │ ├── Sources-Active.svg │ │ ├── Sources-Inactive-1.svg │ │ ├── Sources-Inactive.svg │ │ ├── UserGuide-Active-1.svg │ │ ├── UserGuide-Active.svg │ │ ├── UserGuide-Inactive-1.svg │ │ ├── UserGuide-Inactive.svg │ │ ├── UsingLoadedData-Active-1.svg │ │ ├── UsingLoadedData-Active.svg │ │ ├── UsingLoadedData-Inactive-1.svg │ │ ├── UsingLoadedData-Inactive.svg │ │ ├── Walkthrough-Active-1.svg │ │ ├── Walkthrough-Active.svg │ │ ├── Walkthrough-Inactive-1.svg │ │ ├── Walkthrough-Inactive.svg │ │ ├── add-credential.png │ │ ├── add-package.png │ │ ├── architecture-diagram.png │ │ ├── bucket-details.png │ │ ├── dlt+_logo.png │ │ ├── dlt-high-level.png │ │ ├── dlt-logo-old.svg │ │ ├── dlt-logo.svg │ │ ├── dlt-onepager.png │ │ ├── dlt-pacman.gif │ │ ├── dlt.png │ │ ├── dlthub-logo.png │ │ ├── docs_where_does_dlt_fit_in_ETL_pipeline.png │ │ ├── docusaurus.png │ │ ├── favicon.ico │ │ ├── filesystem-tutorial │ │ ├── streamlit-data.png │ │ ├── streamlit-incremental-state.png │ │ └── streamlit-new-col.png │ │ ├── loading.svg │ │ ├── logo.svg │ │ ├── open-core-data-stack.png │ │ ├── open-source.svg │ │ ├── pipeline.png │ │ ├── python.svg │ │ ├── schema_evolution_colab_demo.png │ │ ├── slot-machine-gif.gif │ │ ├── streamlit-new.png │ │ ├── structured_lake.png │ │ ├── test-composer.png │ │ ├── trigger-config.png │ │ ├── undraw_docusaurus_mountain.svg │ │ ├── undraw_docusaurus_react.svg │ │ ├── undraw_docusaurus_tree.svg │ │ └── write-dispo-choice.png │ └── tools │ ├── clear_versions.js │ ├── preprocess_docs.js │ ├── update_snippets.js │ └── update_versions.js ├── mypy.ini ├── pyproject.toml ├── pytest.ini ├── tests ├── .dlt │ ├── config.toml │ └── dev.secrets.toml ├── .example.env ├── __init__.py ├── cases.py ├── cli │ ├── __init__.py │ ├── cases │ │ └── deploy_pipeline │ │ │ ├── .dlt │ │ │ ├── config.toml │ │ │ ├── secrets.toml │ │ │ └── secrets.toml.ci │ │ │ ├── .gitignore │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── debug_pipeline.py │ │ │ ├── dummy_pipeline.py │ │ │ └── requirements.txt │ ├── common │ │ ├── __init__.py │ │ ├── test_cli_invoke.py │ │ └── test_telemetry_command.py │ ├── conftest.py │ ├── test_config_toml_writer.py │ ├── test_deploy_command.py │ ├── test_init_command.py │ ├── test_pipeline_command.py │ └── utils.py ├── common │ ├── __init__.py │ ├── cases │ │ ├── configuration │ │ │ ├── .dlt │ │ │ │ ├── config.toml │ │ │ │ └── secrets.toml │ │ │ ├── .wrong.dlt │ │ │ │ └── config.toml │ │ │ ├── config.yml │ │ │ ├── dlt_home │ │ │ │ └── config.toml │ │ │ └── runtime │ │ │ │ └── .dlt │ │ │ │ └── config.toml │ │ ├── destinations │ │ │ ├── __init__.py │ │ │ └── null.py │ │ ├── level-dragon-333019-707809ee408a.json.b64 │ │ ├── mod_bot_case.json │ │ ├── modules │ │ │ ├── __init__.py │ │ │ ├── google │ │ │ │ └── colab │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── userdata.py │ │ │ └── uniq_mod_121.py │ │ ├── normalizers │ │ │ ├── __init__.py │ │ │ ├── snake_no_x.py │ │ │ ├── sql_upper.py │ │ │ └── title_case.py │ │ ├── oauth_client_secret_929384042504.json │ │ ├── pua_encoded_row.json │ │ ├── rasa_event_bot_metadata.json │ │ ├── schemas │ │ │ ├── eth │ │ │ │ ├── ethereum_schema_v10.yml │ │ │ │ ├── ethereum_schema_v11.yml │ │ │ │ ├── ethereum_schema_v3.yml │ │ │ │ ├── ethereum_schema_v4.yml │ │ │ │ ├── ethereum_schema_v5.yml │ │ │ │ ├── ethereum_schema_v6.yml │ │ │ │ ├── ethereum_schema_v7.yml │ │ │ │ ├── ethereum_schema_v8.yml │ │ │ │ └── ethereum_schema_v9.yml │ │ │ ├── ev1 │ │ │ │ ├── event.schema.7z │ │ │ │ ├── event.schema.bak.json.gz │ │ │ │ ├── event.schema.json │ │ │ │ └── model.schema.json │ │ │ ├── ev2 │ │ │ │ └── event.schema.json │ │ │ ├── github │ │ │ │ └── issues.schema.json │ │ │ ├── local │ │ │ │ ├── event.schema.json │ │ │ │ ├── event.schema.yaml │ │ │ │ └── name_mismatch.schema.yaml │ │ │ ├── rasa │ │ │ │ ├── event.schema.json │ │ │ │ └── model.schema.json │ │ │ └── sheets │ │ │ │ └── google_spreadsheet_v4.schema.json │ │ ├── secret-kube │ │ │ └── secret-kube │ │ ├── secret-value │ │ ├── secrets │ │ │ ├── deploy-key │ │ │ ├── deploy_key.pub │ │ │ ├── encrypted-private-key │ │ │ └── encrypted-private-key-base64 │ │ ├── simple_row.json │ │ └── weird_rows.json │ ├── configuration │ │ ├── __init__.py │ │ ├── test_accessors.py │ │ ├── test_annotation_future.py │ │ ├── test_configuration.py │ │ ├── test_container.py │ │ ├── test_credentials.py │ │ ├── test_environ_provider.py │ │ ├── test_inject.py │ │ ├── test_providers.py │ │ ├── test_sections.py │ │ ├── test_spec_union.py │ │ ├── test_toml_provider.py │ │ └── utils.py │ ├── data_writers │ │ ├── __init__.py │ │ ├── test_data_writers.py │ │ └── utils.py │ ├── destination │ │ ├── __init__.py │ │ ├── test_destination_capabilities.py │ │ └── test_reference.py │ ├── normalizers │ │ ├── __init__.py │ │ ├── custom_normalizers.py │ │ ├── test_json_relational.py │ │ ├── test_naming.py │ │ ├── test_naming_duck_case.py │ │ ├── test_naming_snake_case.py │ │ └── test_naming_sql.py │ ├── reflection │ │ ├── __init__.py │ │ ├── cases │ │ │ └── modules │ │ │ │ ├── broken_mod.py │ │ │ │ ├── missing_dep.py │ │ │ │ ├── pkg_1 │ │ │ │ └── mod_2 │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── mod_bkn │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── mod_4.py │ │ │ │ │ └── pkg_3 │ │ │ │ │ └── mod_4.py │ │ │ │ ├── pkg_missing_dep │ │ │ │ ├── __init__.py │ │ │ │ └── mod_in_pkg_missing_dep.py │ │ │ │ └── regular_mod.py │ │ ├── test_inspect.py │ │ ├── test_ref_ops.py │ │ └── test_reflect_spec.py │ ├── runners │ │ ├── __init__.py │ │ ├── test_runnable.py │ │ ├── test_runners.py │ │ ├── test_std_pipes.py │ │ ├── test_venv.py │ │ └── utils.py │ ├── runtime │ │ ├── __init__.py │ │ ├── conftest.py │ │ ├── dlt_plus │ │ │ ├── __init__.py │ │ │ └── version.py │ │ ├── test_collector.py │ │ ├── test_logging.py │ │ ├── test_run_context.py │ │ ├── test_run_context_data_dir.py │ │ ├── test_run_context_random_data_dir.py │ │ ├── test_signals.py │ │ ├── test_telemetry.py │ │ └── utils.py │ ├── schema │ │ ├── __init__.py │ │ ├── conftest.py │ │ ├── test_coercion.py │ │ ├── test_detections.py │ │ ├── test_filtering.py │ │ ├── test_import_normalizers.py │ │ ├── test_inference.py │ │ ├── test_merges.py │ │ ├── test_normalize_identifiers.py │ │ ├── test_schema.py │ │ ├── test_schema_contract.py │ │ ├── test_schema_migrations.py │ │ └── test_versioning.py │ ├── scripts │ │ ├── __init__.py │ │ ├── args.py │ │ ├── counter.py │ │ ├── cwd.py │ │ ├── empty.py │ │ ├── environ.py │ │ ├── long_lines.py │ │ ├── long_lines_fails.py │ │ ├── no_stdout_exception.py │ │ ├── no_stdout_no_stderr_with_fail.py │ │ ├── raises.py │ │ ├── raising_counter.py │ │ ├── stderr_counter.py │ │ ├── stdout_encode_exception.py │ │ ├── stdout_encode_result.py │ │ └── stdout_encode_unpicklable.py │ ├── storages │ │ ├── __init__.py │ │ ├── custom │ │ │ └── freshman_kgs.xlsx │ │ ├── samples │ │ │ ├── csv │ │ │ │ ├── freshman_kgs.csv │ │ │ │ ├── freshman_lbs.csv │ │ │ │ ├── mlb_players.csv │ │ │ │ └── mlb_teams_2012.csv │ │ │ ├── gzip │ │ │ │ └── taxi.csv.gz │ │ │ ├── jsonl │ │ │ │ └── mlb_players.jsonl │ │ │ ├── met_csv │ │ │ │ ├── A801 │ │ │ │ │ └── A881_20230920.csv │ │ │ │ └── A803 │ │ │ │ │ ├── A803_20230919.csv │ │ │ │ │ └── A803_20230920.csv │ │ │ ├── parquet │ │ │ │ └── mlb_players.parquet │ │ │ └── sample.txt │ │ ├── test_file_storage.py │ │ ├── test_load_package.py │ │ ├── test_load_storage.py │ │ ├── test_local_filesystem.py │ │ ├── test_normalize_storage.py │ │ ├── test_schema_storage.py │ │ ├── test_transactional_file.py │ │ ├── test_versioned_storage.py │ │ └── utils.py │ ├── test_arithmetics.py │ ├── test_git.py │ ├── test_json.py │ ├── test_jsonpath.py │ ├── test_time.py │ ├── test_typing.py │ ├── test_utils.py │ ├── test_validation.py │ ├── test_version.py │ ├── test_versioned_state.py │ ├── test_wei.py │ └── utils.py ├── conftest.py ├── destinations │ ├── __init__.py │ ├── conftest.py │ ├── test_custom_destination.py │ ├── test_destination_name_and_config.py │ ├── test_file_format_resolver.py │ ├── test_path_utils.py │ ├── test_queries.py │ ├── test_readable_dbapi_dataset.py │ └── test_utils.py ├── e2e │ ├── __init__.py │ └── helpers │ │ ├── __init__.py │ │ └── studio │ │ ├── __init__.py │ │ └── test_e2e.py ├── extract │ ├── __init__.py │ ├── cases │ │ ├── __init__.py │ │ ├── eth_source │ │ │ ├── __init__.py │ │ │ ├── ethereum.schema.yaml │ │ │ └── source.py │ │ ├── imported.any │ │ ├── section_source │ │ │ ├── __init__.py │ │ │ ├── external_resources.py │ │ │ └── named_module.py │ │ └── sources │ │ │ ├── __init__.py │ │ │ └── shorthand.py │ ├── conftest.py │ ├── data_writers │ │ ├── __init__.py │ │ ├── test_buffered_writer.py │ │ └── test_data_item_storage.py │ ├── test_decorators.py │ ├── test_extract.py │ ├── test_extract_pipe.py │ ├── test_incremental.py │ ├── test_sources.py │ ├── test_state.py │ ├── test_transform.py │ ├── test_utils.py │ ├── test_validation.py │ └── utils.py ├── helpers │ ├── airflow_tests │ │ ├── __init__.py │ │ ├── conftest.py │ │ ├── test_airflow_provider.py │ │ ├── test_airflow_wrapper.py │ │ ├── test_join_airflow_scheduler.py │ │ └── utils.py │ ├── dbt_cloud_tests │ │ ├── __init__.py │ │ └── test_dbt_cloud.py │ ├── dbt_tests │ │ ├── __init__.py │ │ ├── cases │ │ │ ├── jaffle_customers_incremental.sql │ │ │ ├── jaffle_customers_incremental_new_column.sql │ │ │ ├── jaffle_customers_with_error.sql │ │ │ ├── profiles.yml │ │ │ ├── profiles_invalid_credentials.yml │ │ │ ├── run_execution_incremental_fail.pickle.hex │ │ │ └── run_result_incremental_fail.pickle.hex │ │ ├── local │ │ │ ├── __init__.py │ │ │ ├── test_dbt_utils.py │ │ │ ├── test_runner_destinations.py │ │ │ └── utils.py │ │ ├── test_runner_dbt_versions.py │ │ └── utils.py │ ├── providers │ │ ├── __init__.py │ │ └── test_google_secrets_provider.py │ ├── streamlit_tests │ │ ├── __init__.py │ │ └── test_streamlit_show_resources.py │ └── studio │ │ ├── __init__.py │ │ ├── test_all_cells.py │ │ ├── test_utils.py │ │ └── test_welcome_page.py ├── libs │ ├── __init__.py │ ├── conftest.py │ ├── pyarrow │ │ ├── __init__.py │ │ ├── test_pyarrow.py │ │ └── test_pyarrow_normalizer.py │ ├── test_buffered_writers.py │ ├── test_csv_writer.py │ ├── test_deltalake.py │ ├── test_parquet_writer.py │ ├── test_pydantic.py │ ├── test_sqglot_query_complexity_analyzer.py │ └── test_sqlglot.py ├── load │ ├── __init__.py │ ├── athena_iceberg │ │ ├── __init__.py │ │ ├── test_athena_adapter.py │ │ ├── test_athena_iceberg.py │ │ └── test_lakeformation.py │ ├── bigquery │ │ ├── __init__.py │ │ ├── test_bigquery_client.py │ │ ├── test_bigquery_streaming_insert.py │ │ └── test_bigquery_table_builder.py │ ├── cases │ │ ├── __init__.py │ │ ├── event.schema.json │ │ ├── event_user.json │ │ ├── fake_destination.py │ │ ├── loading │ │ │ ├── csv_header.csv │ │ │ ├── csv_no_header.csv │ │ │ ├── csv_no_header.csv.gz │ │ │ ├── cve.json │ │ │ ├── event_bot.181291798a78198.0.unsupported_format │ │ │ ├── event_bot.b1d32c6660b242aaabbf3fc27245b7e6.0.insert_values │ │ │ ├── event_loop_interrupted.1234.0.jsonl │ │ │ ├── event_loop_interrupted.839c6e6b514e427687586ccc65bf133f.0.jsonl │ │ │ ├── event_user.1234.0.jsonl │ │ │ ├── event_user.839c6e6b514e427687586ccc65bf133f.0.jsonl │ │ │ ├── event_user.b1d32c6660b242aaabbf3fc27245b7e6.0.insert_values │ │ │ ├── header.jsonl │ │ │ ├── schema.json │ │ │ └── schema_updates.json │ │ ├── long_column_name.json │ │ └── long_table_name.json │ ├── clickhouse │ │ ├── __init__.py │ │ ├── docker-compose.yml │ │ ├── test_clickhouse_adapter.py │ │ ├── test_clickhouse_configuration.py │ │ ├── test_clickhouse_gcs_s3_compatibility.py │ │ ├── test_clickhouse_table_builder.py │ │ ├── test_type_mapper.py │ │ ├── test_utils.py │ │ └── utils.py │ ├── conftest.py │ ├── databricks │ │ ├── __init__.py │ │ └── test_databricks_configuration.py │ ├── dremio │ │ ├── __init__.py │ │ ├── bootstrap │ │ │ ├── Dockerfile │ │ │ ├── bootstrap_dremio.sh │ │ │ ├── minio.json │ │ │ └── nas.json │ │ ├── docker-compose.yml │ │ └── test_dremio_client.py │ ├── duckdb │ │ ├── __init__.py │ │ ├── test_duckdb_client.py │ │ └── test_duckdb_table_builder.py │ ├── filesystem │ │ ├── __init__.py │ │ ├── test_aws_credentials.py │ │ ├── test_azure_credentials.py │ │ ├── test_credentials_mixins.py │ │ ├── test_filesystem_client.py │ │ ├── test_filesystem_common.py │ │ ├── test_filesystem_sql_secrets.py │ │ ├── test_gcs_credentials.py │ │ ├── test_sql_client.py │ │ └── utils.py │ ├── filesystem_sftp │ │ ├── __init__.py │ │ ├── bootstrap │ │ │ ├── Dockerfile │ │ │ ├── SETUP.md │ │ │ ├── billy_rsa │ │ │ ├── billy_rsa-cert.pub │ │ │ ├── billy_rsa.pub │ │ │ ├── bobby_rsa │ │ │ ├── bobby_rsa.pub │ │ │ ├── ca_rsa │ │ │ ├── ca_rsa.pub │ │ │ ├── foo_rsa │ │ │ └── foo_rsa.pub │ │ ├── docker-compose.yml │ │ └── test_filesystem_sftp.py │ ├── lancedb │ │ ├── __init__.py │ │ ├── test_config.py │ │ ├── test_merge.py │ │ ├── test_model_providers.py │ │ ├── test_pipeline.py │ │ ├── test_schema_alignment.py │ │ ├── test_unit_test_add_vector_column.py │ │ ├── test_utils.py │ │ └── utils.py │ ├── motherduck │ │ ├── __init__.py │ │ └── test_motherduck_client.py │ ├── mssql │ │ ├── __init__.py │ │ ├── test_mssql_configuration.py │ │ ├── test_mssql_extras.py │ │ └── test_mssql_table_builder.py │ ├── pipeline │ │ ├── __init__.py │ │ ├── conftest.py │ │ ├── test_arrow_loading.py │ │ ├── test_athena.py │ │ ├── test_bigquery.py │ │ ├── test_clickhouse.py │ │ ├── test_csv_loading.py │ │ ├── test_databricks_pipeline.py │ │ ├── test_dbt_helper.py │ │ ├── test_dremio.py │ │ ├── test_drop.py │ │ ├── test_duckdb.py │ │ ├── test_filesystem_pipeline.py │ │ ├── test_merge_disposition.py │ │ ├── test_open_table_pipeline.py │ │ ├── test_open_tables.py │ │ ├── test_pipelines.py │ │ ├── test_postgres.py │ │ ├── test_redshift.py │ │ ├── test_refresh_modes.py │ │ ├── test_replace_disposition.py │ │ ├── test_restore_state.py │ │ ├── test_scd2.py │ │ ├── test_snowflake_pipeline.py │ │ ├── test_sqlalchemy_pipeline.py │ │ ├── test_stage_loading.py │ │ ├── test_synapse.py │ │ ├── test_write_disposition_changes.py │ │ └── utils.py │ ├── postgres │ │ ├── __init__.py │ │ ├── docker-compose.yml │ │ ├── postgres.env │ │ ├── postgres │ │ │ ├── 01_init.sql │ │ │ └── Dockerfile │ │ ├── test_postgres_client.py │ │ ├── test_postgres_table_builder.py │ │ └── utils.py │ ├── qdrant │ │ ├── __init__.py │ │ ├── test_pipeline.py │ │ ├── test_restore_state.py │ │ └── utils.py │ ├── redshift │ │ ├── __init__.py │ │ ├── setup_db.sql │ │ ├── test_redshift_client.py │ │ └── test_redshift_table_builder.py │ ├── snowflake │ │ ├── __init__.py │ │ ├── conftest.py │ │ ├── test_snowflake_client.py │ │ ├── test_snowflake_configuration.py │ │ ├── test_snowflake_table_builder.py │ │ └── test_snowflake_utils.py │ ├── sources │ │ ├── __init__.py │ │ ├── filesystem │ │ │ ├── __init__.py │ │ │ ├── cases.py │ │ │ └── test_filesystem_source.py │ │ ├── rest_api │ │ │ ├── __init__.py │ │ │ └── test_rest_api_source.py │ │ └── sql_database │ │ │ ├── __init__.py │ │ │ ├── conftest.py │ │ │ ├── sql_source.py │ │ │ ├── test_helpers.py │ │ │ ├── test_sql_database_source.py │ │ │ └── test_sql_database_source_all_destinations.py │ ├── sqlalchemy │ │ ├── __init__.py │ │ ├── docker-compose.yml │ │ └── test_sqlalchemy_configuration.py │ ├── synapse │ │ ├── __init__.py │ │ ├── test_synapse_configuration.py │ │ ├── test_synapse_table_builder.py │ │ ├── test_synapse_table_indexing.py │ │ └── utils.py │ ├── test_configuration.py │ ├── test_dummy_client.py │ ├── test_insert_job_client.py │ ├── test_job_client.py │ ├── test_jobs.py │ ├── test_lineage.py │ ├── test_parallelism_util.py │ ├── test_read_interfaces.py │ ├── test_sql_client.py │ ├── transformations │ │ ├── __init__.py │ │ ├── test_basic_transformations.py │ │ ├── test_incremental_transforms.py │ │ ├── test_model_item_format.py │ │ ├── test_multidataset_transformations.py │ │ ├── test_transformation_lineage.py │ │ └── utils.py │ ├── utils.py │ └── weaviate │ │ ├── __init__.py │ │ ├── docker-compose.yml │ │ ├── test_naming.py │ │ ├── test_pipeline.py │ │ ├── test_weaviate_client.py │ │ └── utils.py ├── normalize │ ├── __init__.py │ ├── cases │ │ ├── ethereum.blocks.9c1d9b504ea240a482b007788d5cd61c_2.json │ │ ├── event.event.bot_load_metadata_2987398237498798.json │ │ ├── event.event.many_load_2.json │ │ ├── event.event.slot_session_metadata_1.json │ │ ├── event.event.user_load_1.json │ │ ├── event.event.user_load_v228_1.json │ │ ├── github.events.load_page_1_duck.json │ │ ├── github.issues.load_page_5_duck.json │ │ ├── rasa_event_bot_metadata.json │ │ ├── rasa_event_bot_template.json │ │ ├── rasa_user_event.json │ │ └── schemas │ │ │ ├── ethereum.schema.json │ │ │ └── event.schema.json │ ├── mock_rasa_json_normalizer.py │ ├── test_model_item_normalizer.py │ ├── test_normalize.py │ └── utils.py ├── pipeline │ ├── __init__.py │ ├── cases │ │ ├── contracts │ │ │ └── trace.schema.yaml │ │ ├── github_events.json │ │ ├── github_pipeline │ │ │ ├── .dlt │ │ │ │ └── config.toml │ │ │ ├── __init__.py │ │ │ ├── github_extract.py │ │ │ ├── github_load.py │ │ │ ├── github_normalize.py │ │ │ ├── github_pipeline.py │ │ │ ├── github_rev.py │ │ │ └── github_scd2.py │ │ └── state │ │ │ └── state.v1.json │ ├── conftest.py │ ├── test_arrow_sources.py │ ├── test_dlt_versions.py │ ├── test_drop_helpers.py │ ├── test_import_export_schema.py │ ├── test_max_nesting.py │ ├── test_parallelism.py │ ├── test_pipeline.py │ ├── test_pipeline_extra.py │ ├── test_pipeline_state.py │ ├── test_pipeline_trace.py │ ├── test_platform_connection.py │ ├── test_resources_evaluation.py │ ├── test_schema_contracts.py │ ├── test_schema_updates.py │ ├── test_utils.py │ └── utils.py ├── plugins │ ├── __init__.py │ ├── dlt_example_plugin │ │ ├── Makefile │ │ ├── README.md │ │ ├── dlt_example_plugin │ │ │ ├── __init__.py │ │ │ ├── destinations │ │ │ │ ├── __init__.py │ │ │ │ ├── impl │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── factory.py │ │ │ │ └── pushdb.py │ │ │ ├── plugin.py │ │ │ └── sources │ │ │ │ ├── __init__.py │ │ │ │ └── github.py │ │ └── pyproject.toml │ └── test_plugin_discovery.py ├── plus │ ├── __init__.py │ ├── test_cli.py │ ├── test_destinations.py │ └── test_sources.py ├── reflection │ ├── __init__.py │ ├── module_cases │ │ ├── __init__.py │ │ ├── all_imports.py │ │ ├── dlt_import_exception.py │ │ ├── executes_resource.py │ │ ├── import_as_type.py │ │ ├── no_pkg.py │ │ ├── raises.py │ │ ├── stripe_analytics │ │ │ ├── __init__.py │ │ │ ├── helpers.py │ │ │ └── stripe_analytics.py │ │ ├── stripe_analytics_pipeline.py │ │ └── syntax_error.py │ └── test_script_inspector.py ├── sources │ ├── __init__.py │ ├── conftest.py │ ├── filesystem │ │ ├── __init__.py │ │ ├── test_config_sections.py │ │ └── test_filesystem_pipeline_template.py │ ├── helpers │ │ ├── __init__.py │ │ ├── rest_client │ │ │ ├── __init__.py │ │ │ ├── api_router.py │ │ │ ├── conftest.py │ │ │ ├── paginators.py │ │ │ ├── private_key.pem │ │ │ ├── test_client.py │ │ │ ├── test_detector.py │ │ │ ├── test_mock_api_server.py │ │ │ ├── test_paginators.py │ │ │ ├── test_requests_paginate.py │ │ │ └── test_utils.py │ │ ├── test_requests.py │ │ └── transform │ │ │ └── test_row_hash.py │ ├── rest_api │ │ ├── __init__.py │ │ ├── configurations │ │ │ ├── __init__.py │ │ │ ├── source_configs.py │ │ │ ├── test_auth_config.py │ │ │ ├── test_configuration.py │ │ │ ├── test_custom_auth_config.py │ │ │ ├── test_custom_paginator_config.py │ │ │ ├── test_incremental_config.py │ │ │ ├── test_paginator_config.py │ │ │ ├── test_resolve_config.py │ │ │ └── test_response_actions_config.py │ │ ├── conftest.py │ │ ├── integration │ │ │ ├── __init__.py │ │ │ ├── test_offline.py │ │ │ ├── test_processing_steps.py │ │ │ └── test_response_actions.py │ │ ├── test_config_sections.py │ │ ├── test_interpolation.py │ │ ├── test_rest_api_pipeline_template.py │ │ └── test_rest_api_source.py │ ├── sql_database │ │ ├── __init__.py │ │ ├── test_arrow_helpers.py │ │ ├── test_config_sections.py │ │ ├── test_schema_types.py │ │ └── test_sql_database_pipeline_template.py │ └── test_pipeline_templates.py ├── tests │ ├── __init__.py │ └── load │ │ ├── __init__.py │ │ └── test_utils.py ├── tools │ ├── __init__.py │ ├── clean_athena.py │ ├── clean_redshift.py │ ├── create_storages.py │ └── early_dependencies.py ├── transformations │ ├── __init__.py │ ├── conftest.py │ ├── test_lineage.py │ ├── test_transformation_decorator.py │ └── test_transformations.py └── utils.py ├── tools ├── __init__.py └── poetry-deps.sh ├── tox.ini └── uv.lock /.dockerignore: -------------------------------------------------------------------------------- 1 | .idea 2 | .direnv 3 | .mypy_cache 4 | .pytest_cache 5 | htmlcov 6 | .coverage 7 | __pycache__ 8 | .eggs 9 | .egg-info 10 | _storage 11 | _test_storage 12 | .vscode 13 | Dockerfile 14 | .md 15 | _secrets 16 | docs 17 | tests 18 | logs 19 | experiments -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # do not look above 2 | root = true 3 | 4 | [*] 5 | end_of_line = lf 6 | insert_final_newline = false 7 | trim_trailing_whitespace = true 8 | charset = utf-8 9 | indent_style = space 10 | indent_size = 4 11 | max_line_length = 200 12 | 13 | [{Makefile,**.mk}] 14 | # Use tabs for indentation (Makefiles require tabs) 15 | indent_style = tab 16 | 17 | [*.{yaml,yml,js,md}] 18 | indent_size = 2 19 | -------------------------------------------------------------------------------- /.git-blame-ignore-revs: -------------------------------------------------------------------------------- 1 | # introduce formatting with black 2 | c3ddbaa6e61c44a3809e625c802cb4c7632934a3 3 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | --- 2 | blank_issues_enabled: true 3 | contact_links: 4 | - name: Ask a question or get support on dlt Slack 5 | url: https://dlthub.com/community 6 | about: Need help or support? Join our dlt community on Slack and get assistance. 7 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 4 | ### Description 5 | 6 | 7 | 10 | ### Related Issues 11 | 12 | - Fixes #... 13 | - Closes #... 14 | - Resolves #... 15 | 16 | 19 | ### Additional Context 20 | 21 | 26 | -------------------------------------------------------------------------------- /.github/workflows/test_tools_build_images.yml: -------------------------------------------------------------------------------- 1 | name: tools | docker images 2 | 3 | on: 4 | workflow_call: 5 | workflow_dispatch: 6 | 7 | jobs: 8 | run_airflow: 9 | name: tools | docker images build 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - name: Check out 14 | uses: actions/checkout@master 15 | 16 | - name: Setup Python 17 | uses: actions/setup-python@v5 18 | with: 19 | python-version: "3.10" 20 | 21 | - name: Install uv 22 | uses: astral-sh/setup-uv@v6 23 | with: 24 | python-version: "3.10" 25 | activate-environment: true 26 | enable-cache: true 27 | 28 | - name: Build images 29 | run: make test-build-images 30 | -------------------------------------------------------------------------------- /.github/workflows/tools_deploy_docs.yml: -------------------------------------------------------------------------------- 1 | name: docs | deploy docs 2 | 3 | on: 4 | schedule: 5 | - cron: '0 2 * * *' 6 | workflow_dispatch: 7 | 8 | env: 9 | NETLIFY_DOCS_PRODUCTION_DEPLOY_HOOK: ${{ secrets.NETLIFY_DOCS_PRODUCTION_DEPLOY_HOOK }} 10 | 11 | jobs: 12 | deploy: 13 | runs-on: ubuntu-latest 14 | if: ${{ !github.event.pull_request.head.repo.fork }} 15 | steps: 16 | - name: Trigger deploy hook 17 | run: curl ${{ env.NETLIFY_DOCS_PRODUCTION_DEPLOY_HOOK }} -X POST -------------------------------------------------------------------------------- /compiled_packages.txt: -------------------------------------------------------------------------------- 1 | pyarrow\|cffi\|idna\|simplejson\|pendulum\|grpcio\|google-crc32c -------------------------------------------------------------------------------- /deploy/dlt/README.md: -------------------------------------------------------------------------------- 1 | Example `Dockerfile` that installs `dlt` package on an alpine linux image. For actual pipeline deployment please refer to [deploy a pipeline walkthrough](https://dlthub.com/docs/walkthroughs/deploy-a-pipeline/deploy-with-github-actions) 2 | -------------------------------------------------------------------------------- /dlt/__main__.py: -------------------------------------------------------------------------------- 1 | from dlt.cli._dlt import main 2 | 3 | if __name__ == "__main__": 4 | main() 5 | -------------------------------------------------------------------------------- /dlt/cli/__init__.py: -------------------------------------------------------------------------------- 1 | from dlt.cli.reference import SupportsCliCommand 2 | from dlt.cli.exceptions import CliCommandException 3 | 4 | DEFAULT_VERIFIED_SOURCES_REPO = "https://github.com/dlt-hub/verified-sources.git" 5 | DEFAULT_VIBE_SOURCES_REPO = "https://github.com/dlt-hub/vibe-hub.git" 6 | 7 | 8 | __all__ = [ 9 | "SupportsCliCommand", 10 | "CliCommandException", 11 | "DEFAULT_VERIFIED_SOURCES_REPO", 12 | "DEFAULT_VIBE_SOURCES_REPO", 13 | ] 14 | -------------------------------------------------------------------------------- /dlt/cli/debug.py: -------------------------------------------------------------------------------- 1 | """Provides a global debug setting for the CLI""" 2 | 3 | _DEBUG_FLAG = False 4 | 5 | 6 | def enable_debug() -> None: 7 | global _DEBUG_FLAG 8 | _DEBUG_FLAG = True 9 | 10 | 11 | def disable_debug() -> None: 12 | global _DEBUG_FLAG 13 | _DEBUG_FLAG = False 14 | 15 | 16 | def is_debug_enabled() -> bool: 17 | global _DEBUG_FLAG 18 | return _DEBUG_FLAG 19 | -------------------------------------------------------------------------------- /dlt/cli/reference.py: -------------------------------------------------------------------------------- 1 | from typing import Protocol, Optional 2 | 3 | import argparse 4 | 5 | 6 | class SupportsCliCommand(Protocol): 7 | """Protocol for defining one dlt cli command""" 8 | 9 | command: str 10 | """name of the command""" 11 | help_string: str 12 | """the help string for argparse""" 13 | description: Optional[str] 14 | """the more detailed description for argparse, may inlcude markdown for the docs""" 15 | docs_url: Optional[str] 16 | """the default docs url to be printed in case of an exception""" 17 | 18 | def configure_parser(self, parser: argparse.ArgumentParser) -> None: 19 | """Configures the parser for the given argument""" 20 | ... 21 | 22 | def execute(self, args: argparse.Namespace) -> None: 23 | """Executes the command with the given arguments""" 24 | ... 25 | -------------------------------------------------------------------------------- /dlt/common/__init__.py: -------------------------------------------------------------------------------- 1 | from dlt.common import logger 2 | from dlt.common.arithmetics import Decimal 3 | from dlt.common.wei import Wei 4 | from dlt.common.pendulum import pendulum 5 | from dlt.common.json import json 6 | from dlt.common.runtime.signals import sleep 7 | 8 | __all__ = ["Decimal", "Wei", "pendulum", "json", "sleep", "logger"] 9 | -------------------------------------------------------------------------------- /dlt/common/configuration/const.py: -------------------------------------------------------------------------------- 1 | from dlt.common.pendulum import pendulum 2 | 3 | RANDOM_DATE = pendulum.datetime(1768, 7, 21, 2, 56, 7, 3, tz="UTC") 4 | TYPE_EXAMPLES = { 5 | "text": "", 6 | "timestamp": RANDOM_DATE.to_iso8601_string(), 7 | "date": RANDOM_DATE.to_date_string(), 8 | } 9 | -------------------------------------------------------------------------------- /dlt/common/data_types/__init__.py: -------------------------------------------------------------------------------- 1 | from dlt.common.data_types.type_helpers import coerce_value, py_type_to_sc_type 2 | from dlt.common.data_types.typing import TDataType, DATA_TYPES 3 | 4 | __all__ = ["coerce_value", "py_type_to_sc_type", "TDataType", "DATA_TYPES"] 5 | -------------------------------------------------------------------------------- /dlt/common/data_types/typing.py: -------------------------------------------------------------------------------- 1 | from typing import Literal, Set 2 | 3 | from dlt.common.typing import get_args 4 | 5 | 6 | TDataType = Literal[ 7 | "text", 8 | "double", 9 | "bool", 10 | "timestamp", 11 | "bigint", 12 | "binary", 13 | "json", 14 | "decimal", 15 | "wei", 16 | "date", 17 | "time", 18 | ] 19 | DATA_TYPES: Set[TDataType] = set(get_args(TDataType)) 20 | -------------------------------------------------------------------------------- /dlt/common/data_writers/configuration.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | from dlt.common.warnings import Dlt100DeprecationWarning 3 | from dlt.common.destination.configuration import ( 4 | CsvQuoting, 5 | CsvFormatConfiguration, 6 | ParquetFormatConfiguration, 7 | ) 8 | 9 | warnings.warn( 10 | "Please import format configuration from dlt.common.destination.configuration", 11 | Dlt100DeprecationWarning, 12 | stacklevel=2, 13 | ) 14 | 15 | __all__ = ["CsvQuoting", "CsvFormatConfiguration", "ParquetFormatConfiguration"] 16 | -------------------------------------------------------------------------------- /dlt/common/destination/__init__.py: -------------------------------------------------------------------------------- 1 | from dlt.common.destination.capabilities import ( 2 | DestinationCapabilitiesContext, 3 | merge_caps_file_formats, 4 | TLoaderFileFormat, 5 | LOADER_FILE_FORMATS, 6 | ) 7 | from dlt.common.destination.reference import ( 8 | TDestinationReferenceArg, 9 | Destination, 10 | AnyDestination, 11 | DestinationReference, 12 | ) 13 | from dlt.common.destination.typing import PreparedTableSchema 14 | 15 | __all__ = [ 16 | "DestinationCapabilitiesContext", 17 | "merge_caps_file_formats", 18 | "TLoaderFileFormat", 19 | "LOADER_FILE_FORMATS", 20 | "PreparedTableSchema", 21 | "TDestinationReferenceArg", 22 | "Destination", 23 | "AnyDestination", 24 | "DestinationReference", 25 | ] 26 | -------------------------------------------------------------------------------- /dlt/common/destination/typing.py: -------------------------------------------------------------------------------- 1 | from typing import Literal, Optional 2 | 3 | from dlt.common.schema.typing import ( 4 | _TTableSchemaBase, 5 | TWriteDisposition, 6 | TTableReferenceParam, 7 | ) 8 | 9 | 10 | class PreparedTableSchema(_TTableSchemaBase, total=False): 11 | """Table schema with all hints prepared to be loaded""" 12 | 13 | write_disposition: TWriteDisposition 14 | references: Optional[TTableReferenceParam] 15 | _x_prepared: bool # needed for the type checker 16 | -------------------------------------------------------------------------------- /dlt/common/incremental/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/dlt/common/incremental/__init__.py -------------------------------------------------------------------------------- /dlt/common/libs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/dlt/common/libs/__init__.py -------------------------------------------------------------------------------- /dlt/common/libs/numpy.py: -------------------------------------------------------------------------------- 1 | from dlt.common.exceptions import MissingDependencyException 2 | 3 | try: 4 | import numpy # noqa: I251 5 | except ModuleNotFoundError: 6 | raise MissingDependencyException("dlt numpy Helpers", ["numpy"]) 7 | -------------------------------------------------------------------------------- /dlt/common/libs/pandas.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | from dlt.common.exceptions import MissingDependencyException 3 | 4 | try: 5 | import pandas 6 | from pandas import DataFrame 7 | except ModuleNotFoundError: 8 | raise MissingDependencyException("dlt Pandas Helpers", ["pandas"]) 9 | 10 | 11 | def pandas_to_arrow(df: pandas.DataFrame, preserve_index: bool = False) -> Any: 12 | """Converts pandas to arrow or raises an exception if pyarrow is not installed""" 13 | from dlt.common.libs.pyarrow import pyarrow as pa 14 | 15 | # NOTE: None preserves named indexes but ignores unnamed 16 | return pa.Table.from_pandas(df, preserve_index=preserve_index) 17 | -------------------------------------------------------------------------------- /dlt/common/libs/pandas_sql.py: -------------------------------------------------------------------------------- 1 | from dlt.common.exceptions import MissingDependencyException 2 | 3 | 4 | try: 5 | from pandas.io.sql import _wrap_result 6 | except ModuleNotFoundError: 7 | raise MissingDependencyException("dlt pandas helper for sql", ["pandas"]) 8 | -------------------------------------------------------------------------------- /dlt/common/libs/sql_alchemy_compat.py: -------------------------------------------------------------------------------- 1 | try: 2 | import sqlalchemy 3 | except ImportError: 4 | from dlt.common.libs.sql_alchemy_shims import URL, make_url 5 | else: 6 | from sqlalchemy.engine import URL, make_url # type: ignore[assignment] 7 | -------------------------------------------------------------------------------- /dlt/common/normalizers/__init__.py: -------------------------------------------------------------------------------- 1 | from dlt.common.normalizers.typing import TJSONNormalizer, TNormalizersConfig 2 | from dlt.common.normalizers.naming import NamingConvention 3 | 4 | 5 | __all__ = [ 6 | "NamingConvention", 7 | "TJSONNormalizer", 8 | "TNormalizersConfig", 9 | ] 10 | -------------------------------------------------------------------------------- /dlt/common/normalizers/exceptions.py: -------------------------------------------------------------------------------- 1 | from dlt.common.exceptions import DltException 2 | 3 | 4 | class NormalizerException(DltException): 5 | pass 6 | 7 | 8 | class InvalidJsonNormalizer(NormalizerException): 9 | def __init__(self, required_normalizer: str, present_normalizer: str) -> None: 10 | self.required_normalizer = required_normalizer 11 | self.present_normalizer = present_normalizer 12 | super().__init__( 13 | f"Operation requires `{required_normalizer}` normalizer while" 14 | f" `{present_normalizer}` normalizer is present" 15 | ) 16 | -------------------------------------------------------------------------------- /dlt/common/normalizers/json/typing.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Optional 2 | 3 | from dlt.common.schema.typing import TColumnName, TypedDict 4 | 5 | 6 | class RelationalNormalizerConfigPropagation(TypedDict, total=False): 7 | root: Optional[Dict[TColumnName, TColumnName]] 8 | tables: Optional[Dict[str, Dict[TColumnName, TColumnName]]] 9 | 10 | 11 | class RelationalNormalizerConfig(TypedDict, total=False): 12 | max_nesting: Optional[int] 13 | propagation: Optional[RelationalNormalizerConfigPropagation] 14 | -------------------------------------------------------------------------------- /dlt/common/normalizers/naming/__init__.py: -------------------------------------------------------------------------------- 1 | from .naming import NamingConvention 2 | 3 | __all__ = ["NamingConvention"] 4 | -------------------------------------------------------------------------------- /dlt/common/normalizers/naming/sql_ci_v1.py: -------------------------------------------------------------------------------- 1 | from dlt.common.normalizers.naming.sql_cs_v1 import NamingConvention as SqlCsNamingConvention 2 | 3 | 4 | class NamingConvention(SqlCsNamingConvention): 5 | """A variant of sql_cs which lower cases all identifiers.""" 6 | 7 | def normalize_identifier(self, identifier: str) -> str: 8 | return super().normalize_identifier(identifier).lower() 9 | 10 | @property 11 | def is_case_sensitive(self) -> bool: 12 | return False 13 | -------------------------------------------------------------------------------- /dlt/common/normalizers/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import List 3 | 4 | from dlt.common import known_env 5 | from dlt.common.utils import uniq_id_base64, many_uniq_ids_base64 6 | 7 | 8 | DLT_ID_LENGTH_BYTES = int(os.environ.get(known_env.DLT_DLT_ID_LENGTH_BYTES, 10)) 9 | 10 | 11 | def generate_dlt_ids(n_ids: int) -> List[str]: 12 | return many_uniq_ids_base64(n_ids, DLT_ID_LENGTH_BYTES) 13 | 14 | 15 | def generate_dlt_id() -> str: 16 | return uniq_id_base64(DLT_ID_LENGTH_BYTES) 17 | -------------------------------------------------------------------------------- /dlt/common/pendulum.py: -------------------------------------------------------------------------------- 1 | from datetime import timedelta, timezone # noqa: I251 2 | import pendulum # noqa: I251 3 | 4 | # force UTC as the local timezone to prevent local dates to be written to dbs 5 | pendulum.set_local_timezone(pendulum.timezone("UTC")) 6 | 7 | 8 | def __utcnow() -> pendulum.DateTime: 9 | """ 10 | Use this function instead of datetime.now 11 | Returns: 12 | pendulum.DateTime -- current time in UTC timezone 13 | """ 14 | return pendulum.now() 15 | 16 | 17 | pendulum.utcnow = __utcnow # type: ignore 18 | -------------------------------------------------------------------------------- /dlt/common/reflection/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/dlt/common/reflection/__init__.py -------------------------------------------------------------------------------- /dlt/common/runners/__init__.py: -------------------------------------------------------------------------------- 1 | from .pool_runner import run_pool, NullExecutor 2 | from .runnable import Runnable, workermethod, TExecutor 3 | from .typing import TRunMetrics 4 | from .venv import Venv, VenvNotFound 5 | 6 | 7 | __all__ = [ 8 | "run_pool", 9 | "NullExecutor", 10 | "Runnable", 11 | "workermethod", 12 | "TExecutor", 13 | "TRunMetrics", 14 | "Venv", 15 | "VenvNotFound", 16 | ] 17 | -------------------------------------------------------------------------------- /dlt/common/runners/configuration.py: -------------------------------------------------------------------------------- 1 | from typing import Literal, Optional 2 | 3 | from dlt.common.configuration import configspec 4 | from dlt.common.configuration.specs import BaseConfiguration 5 | 6 | TPoolType = Literal["process", "thread", "none"] 7 | 8 | 9 | @configspec 10 | class PoolRunnerConfiguration(BaseConfiguration): 11 | pool_type: TPoolType = None 12 | """type of pool to run, must be set in derived configs""" 13 | start_method: Optional[str] = None 14 | """start method for the pool (typically process). None is system default""" 15 | workers: Optional[int] = None 16 | """# how many threads/processes in the pool""" 17 | run_sleep: float = 0.1 18 | """how long to sleep between runs with workload, seconds""" 19 | -------------------------------------------------------------------------------- /dlt/common/runners/typing.py: -------------------------------------------------------------------------------- 1 | from typing import NamedTuple 2 | 3 | 4 | class TRunMetrics(NamedTuple): 5 | was_idle: bool 6 | pending_items: int 7 | -------------------------------------------------------------------------------- /dlt/common/runtime/__init__.py: -------------------------------------------------------------------------------- 1 | from .init import apply_runtime_config, init_telemetry 2 | 3 | __all__ = ["apply_runtime_config", "init_telemetry"] 4 | -------------------------------------------------------------------------------- /dlt/common/runtime/exceptions.py: -------------------------------------------------------------------------------- 1 | from dlt.common.exceptions import DltException 2 | 3 | 4 | class RuntimeException(DltException): 5 | pass 6 | -------------------------------------------------------------------------------- /dlt/common/runtime/slack.py: -------------------------------------------------------------------------------- 1 | import requests 2 | 3 | 4 | def send_slack_message(incoming_hook: str, message: str, is_markdown: bool = True) -> None: 5 | from dlt.common import logger 6 | from dlt.common.json import json 7 | 8 | """Sends a `message` to Slack `incoming_hook`, by default formatted as markdown.""" 9 | r = requests.post( 10 | incoming_hook, 11 | data=json.dumps({"text": message, "mrkdwn": is_markdown}).encode("utf-8"), 12 | headers={"Content-Type": "application/json;charset=utf-8"}, 13 | ) 14 | if r.status_code >= 400: 15 | logger.warning(f"Could not post the notification to slack: {r.status_code}") 16 | r.raise_for_status() 17 | -------------------------------------------------------------------------------- /dlt/common/schema/configuration.py: -------------------------------------------------------------------------------- 1 | from typing import ClassVar, Optional 2 | 3 | from dlt.common.configuration import configspec 4 | from dlt.common.configuration.specs import BaseConfiguration, known_sections 5 | from dlt.common.normalizers.typing import TNamingConventionReferenceArg 6 | from dlt.common.typing import DictStrAny 7 | 8 | 9 | @configspec 10 | class SchemaConfiguration(BaseConfiguration): 11 | # always in section 12 | __section__: ClassVar[str] = known_sections.SCHEMA 13 | 14 | naming: Optional[TNamingConventionReferenceArg] = None # Union[str, NamingConvention] 15 | json_normalizer: Optional[DictStrAny] = None 16 | allow_identifier_change_on_table_with_data: Optional[bool] = None 17 | use_break_path_on_normalize: Optional[bool] = None 18 | """Post 1.4.0 to allow table and column names that contain table separators""" 19 | -------------------------------------------------------------------------------- /dlt/common/storages/fsspecs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/dlt/common/storages/fsspecs/__init__.py -------------------------------------------------------------------------------- /dlt/destinations/configuration.py: -------------------------------------------------------------------------------- 1 | # TODO: kept for backward compat with dlt+ and delete after next release 2 | from dlt.common.storages.configuration import WithLocalFiles 3 | 4 | __all__ = ["WithLocalFiles"] 5 | -------------------------------------------------------------------------------- /dlt/destinations/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | from dlt.destinations.dataset.factory import dataset 2 | from dlt.destinations.dataset.dataset import ( 3 | ReadableDBAPIDataset, 4 | ) 5 | 6 | from dlt.destinations.dataset.utils import ( 7 | get_destination_clients, 8 | get_destination_client_initial_config, 9 | ) 10 | 11 | 12 | __all__ = [ 13 | "dataset", 14 | "ReadableDBAPIDataset", 15 | "BaseReadableDBAPIDataset", 16 | "get_destination_client_initial_config", 17 | "get_destination_clients", 18 | ] 19 | -------------------------------------------------------------------------------- /dlt/destinations/dataset/exceptions.py: -------------------------------------------------------------------------------- 1 | from dlt.common.exceptions import DltException 2 | 3 | 4 | class DatasetException(DltException): 5 | pass 6 | 7 | 8 | class ReadableRelationHasQueryException(DatasetException): 9 | def __init__(self, attempted_change: str) -> None: 10 | msg = ( 11 | "This readable relation was created with a provided sql query. You cannot change" 12 | f" `{attempted_change}`. Please change the orignal sql query." 13 | ) 14 | super().__init__(msg) 15 | 16 | 17 | class ReadableRelationUnknownColumnException(DatasetException): 18 | def __init__(self, column_name: str) -> None: 19 | msg = ( 20 | f"The selected column `{column_name}` is not known in the dlt schema for this relation." 21 | ) 22 | super().__init__(msg) 23 | -------------------------------------------------------------------------------- /dlt/destinations/dataset/factory.py: -------------------------------------------------------------------------------- 1 | from typing import Union, TYPE_CHECKING, Any 2 | 3 | from dlt.common.destination import TDestinationReferenceArg 4 | from dlt.common.schema import Schema 5 | from dlt.common import logger 6 | 7 | from dlt.destinations.dataset.dataset import ReadableDBAPIDataset 8 | 9 | if TYPE_CHECKING: 10 | from dlt import Dataset 11 | else: 12 | Dataset = Any 13 | 14 | 15 | def dataset( 16 | destination: TDestinationReferenceArg, 17 | dataset_name: str, 18 | schema: Union[Schema, str, None] = None, 19 | ) -> Dataset: 20 | return ReadableDBAPIDataset(destination, dataset_name, schema) 21 | -------------------------------------------------------------------------------- /dlt/destinations/impl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/dlt/destinations/impl/__init__.py -------------------------------------------------------------------------------- /dlt/destinations/impl/athena/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/dlt/destinations/impl/athena/__init__.py -------------------------------------------------------------------------------- /dlt/destinations/impl/bigquery/README.md: -------------------------------------------------------------------------------- 1 | # Loader account setup 2 | 3 | 1. Create a new services account, add private key to it and download the `services.json` file. 4 | 2. Make sure the newly created account has access to BigQuery API. 5 | 3. You must add the following roles to the account above: `BigQuery Data Editor`, `BigQuey Job User` and `BigQuery Read Session User` (storage API) 6 | 4. IAM to add roles is here https://console.cloud.google.com/iam-admin/iam?project=chat-analytics-rasa-ci -------------------------------------------------------------------------------- /dlt/destinations/impl/bigquery/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/dlt/destinations/impl/bigquery/__init__.py -------------------------------------------------------------------------------- /dlt/destinations/impl/bigquery/warnings.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | from dlt.common.warnings import Dlt100DeprecationWarning 3 | 4 | 5 | def per_column_cluster_hint_deprecated(clustered_columns: list[str]) -> None: 6 | columns_str = ", ".join(f"'{col}'" for col in clustered_columns) 7 | example = ( 8 | "\n\nTo migrate, use the 'cluster' parameter in the `bigquery_adapter` function, for" 9 | f" example:\n bigquery_adapter(resource, cluster=[{columns_str}])\nThis ensures the" 10 | " order of clustered columns is preserved.\n" 11 | ) 12 | warnings.warn( 13 | "Defining clustered tables in BigQuery using per-column 'cluster' hints is deprecated and" 14 | " will be removed in a future release. Clustered columns detected:" 15 | f" [{columns_str}].{example}", 16 | Dlt100DeprecationWarning, 17 | stacklevel=2, 18 | ) 19 | -------------------------------------------------------------------------------- /dlt/destinations/impl/clickhouse/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/dlt/destinations/impl/clickhouse/__init__.py -------------------------------------------------------------------------------- /dlt/destinations/impl/databricks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/dlt/destinations/impl/databricks/__init__.py -------------------------------------------------------------------------------- /dlt/destinations/impl/destination/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/dlt/destinations/impl/destination/__init__.py -------------------------------------------------------------------------------- /dlt/destinations/impl/dremio/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/dlt/destinations/impl/dremio/__init__.py -------------------------------------------------------------------------------- /dlt/destinations/impl/duckdb/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/dlt/destinations/impl/duckdb/__init__.py -------------------------------------------------------------------------------- /dlt/destinations/impl/dummy/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/dlt/destinations/impl/dummy/__init__.py -------------------------------------------------------------------------------- /dlt/destinations/impl/filesystem/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/dlt/destinations/impl/filesystem/__init__.py -------------------------------------------------------------------------------- /dlt/destinations/impl/lancedb/__init__.py: -------------------------------------------------------------------------------- 1 | from dlt.destinations.impl.lancedb.lancedb_adapter import lancedb_adapter 2 | -------------------------------------------------------------------------------- /dlt/destinations/impl/lancedb/warnings.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | from dlt.common.warnings import Dlt100DeprecationWarning 4 | 5 | 6 | def uri_on_credentials_deprecated() -> None: 7 | warnings.warn( 8 | "Usage of `uri` argument on lance db credentials is deprecated. Please set the `lance_uri`" 9 | " on lance db config. for example using toml" 10 | ' section:\n[destination.lancedb]\nuri="path/db.lancedb"\n', 11 | Dlt100DeprecationWarning, 12 | stacklevel=1, 13 | ) 14 | -------------------------------------------------------------------------------- /dlt/destinations/impl/motherduck/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/dlt/destinations/impl/motherduck/__init__.py -------------------------------------------------------------------------------- /dlt/destinations/impl/mssql/README.md: -------------------------------------------------------------------------------- 1 | # loader account setup 2 | 3 | 1. Create new database `CREATE DATABASE dlt_data` 4 | 2. Create new user, set password `CREATE USER loader WITH PASSWORD = 'loader';` 5 | 3. Set as database owner (we could set lower permission) `ALTER DATABASE dlt_data OWNER TO loader` 6 | -------------------------------------------------------------------------------- /dlt/destinations/impl/mssql/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/dlt/destinations/impl/mssql/__init__.py -------------------------------------------------------------------------------- /dlt/destinations/impl/postgres/README.md: -------------------------------------------------------------------------------- 1 | # loader account setup 2 | 3 | 1. Create new database `CREATE DATABASE dlt_data` 4 | 2. Create new user, set password `CREATE USER loader WITH PASSWORD 'loader';` 5 | 3. Set as database owner (we could set lower permission) `ALTER DATABASE dlt_data OWNER TO loader` 6 | -------------------------------------------------------------------------------- /dlt/destinations/impl/postgres/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/dlt/destinations/impl/postgres/__init__.py -------------------------------------------------------------------------------- /dlt/destinations/impl/qdrant/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/dlt/destinations/impl/qdrant/__init__.py -------------------------------------------------------------------------------- /dlt/destinations/impl/qdrant/exceptions.py: -------------------------------------------------------------------------------- 1 | from dlt.common.destination.exceptions import DestinationTerminalException 2 | 3 | 4 | class InvalidInMemoryQdrantCredentials(DestinationTerminalException): 5 | def __init__(self) -> None: 6 | super().__init__( 7 | "To use in-memory instance of qdrant, " 8 | "please instantiate it first and then pass to destination factory\n" 9 | '\nclient = QdrantClient(":memory:")\n' 10 | 'dlt.pipeline(pipeline_name="...", destination=dlt.destinations.qdrant(client)' 11 | ) 12 | -------------------------------------------------------------------------------- /dlt/destinations/impl/qdrant/warnings.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | from dlt.common.warnings import Dlt100DeprecationWarning 4 | 5 | 6 | def location_on_credentials_deprecated(arg: str) -> None: 7 | warnings.warn( 8 | f"Usage of `{arg}` option on Qdrant credentials is deprecated. Please set the `{arg}` on" 9 | f' Qdrant config. for example using toml section:\n[destination.qdrant]\n{arg}="value"\n', 10 | Dlt100DeprecationWarning, 11 | stacklevel=1, 12 | ) 13 | -------------------------------------------------------------------------------- /dlt/destinations/impl/redshift/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/dlt/destinations/impl/redshift/__init__.py -------------------------------------------------------------------------------- /dlt/destinations/impl/snowflake/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/dlt/destinations/impl/snowflake/__init__.py -------------------------------------------------------------------------------- /dlt/destinations/impl/sqlalchemy/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/dlt/destinations/impl/sqlalchemy/__init__.py -------------------------------------------------------------------------------- /dlt/destinations/impl/synapse/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/dlt/destinations/impl/synapse/__init__.py -------------------------------------------------------------------------------- /dlt/destinations/impl/synapse/sql_client.py: -------------------------------------------------------------------------------- 1 | from contextlib import suppress 2 | 3 | from dlt.destinations.impl.mssql.sql_client import PyOdbcMsSqlClient 4 | from dlt.destinations.exceptions import DatabaseUndefinedRelation 5 | 6 | 7 | class SynapseSqlClient(PyOdbcMsSqlClient): 8 | def drop_tables(self, *tables: str) -> None: 9 | if not tables: 10 | return 11 | # Synapse does not support DROP TABLE IF EXISTS. 12 | # Workaround: use DROP TABLE and suppress non-existence errors. 13 | statements = [f"DROP TABLE {self.make_qualified_table_name(table)};" for table in tables] 14 | for statement in statements: 15 | with suppress(DatabaseUndefinedRelation): 16 | self.execute_sql(statement) 17 | -------------------------------------------------------------------------------- /dlt/destinations/impl/weaviate/README.md: -------------------------------------------------------------------------------- 1 | ## Testing locally 2 | 3 | Use the docker compose found in `.github` folder. 4 | ``` 5 | docker-compose -f .github/weaviate-compose.yml up -d 6 | ``` 7 | 8 | to stop 9 | ``` 10 | docker-compose -f .github/weaviate-compose.yml down -v --remove-orphans 11 | ``` 12 | 13 | It will start weaviate with contextionary vectorizer. It does not require any secrets. Provide the following section in `config.toml` 14 | ```toml 15 | [destination.weaviate] 16 | vectorizer="text2vec-contextionary" 17 | module_config={text2vec-contextionary = { vectorizeClassName = false, vectorizePropertyName = true}} 18 | ``` -------------------------------------------------------------------------------- /dlt/destinations/impl/weaviate/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/dlt/destinations/impl/weaviate/__init__.py -------------------------------------------------------------------------------- /dlt/destinations/impl/weaviate/ci_naming.py: -------------------------------------------------------------------------------- 1 | from .naming import NamingConvention as WeaviateNamingConvention 2 | 3 | 4 | class NamingConvention(WeaviateNamingConvention): 5 | """Case insensitive naming convention for Weaviate. Lower cases all identifiers""" 6 | 7 | @property 8 | def is_case_sensitive(self) -> bool: 9 | return False 10 | 11 | def _lowercase_property(self, identifier: str) -> str: 12 | """Lowercase the whole property to become case insensitive""" 13 | return identifier.lower() 14 | -------------------------------------------------------------------------------- /dlt/destinations/impl/weaviate/exceptions.py: -------------------------------------------------------------------------------- 1 | from dlt.common.destination.exceptions import DestinationException, DestinationTerminalException 2 | 3 | 4 | class WeaviateGrpcError(DestinationException): 5 | pass 6 | 7 | 8 | class PropertyNameConflict(DestinationTerminalException): 9 | def __init__(self, error: str) -> None: 10 | super().__init__( 11 | "Your data contains items with identical property names when compared case insensitive." 12 | " Weaviate cannot handle such data. Please clean up your data before loading or change" 13 | " to case insensitive naming convention. See" 14 | " https://dlthub.com/docs/dlt-ecosystem/destinations/weaviate#names-normalization for" 15 | f" details. [{error}]" 16 | ) 17 | -------------------------------------------------------------------------------- /dlt/destinations/typing.py: -------------------------------------------------------------------------------- 1 | from typing import Any, AnyStr, List, Type, Optional, Protocol, Tuple, TypeVar, Generator 2 | 3 | 4 | # native connection 5 | TNativeConn = TypeVar("TNativeConn", bound=Any) 6 | 7 | try: 8 | from pandas import DataFrame 9 | except ImportError: 10 | DataFrame: Type[Any] = None # type: ignore 11 | 12 | try: 13 | from pyarrow import Table as ArrowTable 14 | except ImportError: 15 | ArrowTable: Type[Any] = None # type: ignore 16 | 17 | 18 | class DBTransaction(Protocol): 19 | def commit_transaction(self) -> None: ... 20 | def rollback_transaction(self) -> None: ... 21 | 22 | 23 | class DBApi(Protocol): 24 | threadsafety: int 25 | apilevel: str 26 | paramstyle: str 27 | -------------------------------------------------------------------------------- /dlt/helpers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/dlt/helpers/__init__.py -------------------------------------------------------------------------------- /dlt/helpers/dbt_cloud/configuration.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from dlt.common.configuration import configspec 4 | from dlt.common.configuration.specs import BaseConfiguration 5 | from dlt.common.typing import TSecretStrValue 6 | 7 | 8 | @configspec 9 | class DBTCloudConfiguration(BaseConfiguration): 10 | api_token: TSecretStrValue = "" 11 | 12 | account_id: Optional[str] = None 13 | job_id: Optional[str] = None 14 | project_id: Optional[str] = None 15 | environment_id: Optional[str] = None 16 | run_id: Optional[str] = None 17 | 18 | cause: str = "Triggered via API" 19 | git_sha: Optional[str] = None 20 | git_branch: Optional[str] = None 21 | schema_override: Optional[str] = None 22 | -------------------------------------------------------------------------------- /dlt/helpers/streamlit_app/__init__.py: -------------------------------------------------------------------------------- 1 | from dlt.common.exceptions import MissingDependencyException 2 | 3 | # FIXME: Remove this after implementing package installer 4 | try: 5 | import streamlit 6 | except ModuleNotFoundError: 7 | raise MissingDependencyException( 8 | "dlt Streamlit Helpers", 9 | ["streamlit"], 10 | "dlt Helpers for Streamlit should be run within a streamlit app.", 11 | ) 12 | -------------------------------------------------------------------------------- /dlt/helpers/streamlit_app/blocks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/dlt/helpers/streamlit_app/blocks/__init__.py -------------------------------------------------------------------------------- /dlt/helpers/streamlit_app/blocks/menu.py: -------------------------------------------------------------------------------- 1 | import dlt 2 | import streamlit as st 3 | 4 | from dlt.helpers.streamlit_app.utils import HERE 5 | from dlt.helpers.streamlit_app.widgets import mode_selector 6 | from dlt.helpers.streamlit_app.widgets import pipeline_summary 7 | 8 | 9 | def menu(pipeline: dlt.Pipeline) -> None: 10 | mode_selector() 11 | st.logo( 12 | "https://cdn.sanity.io/images/nsq559ov/production/7f85e56e715b847c5519848b7198db73f793448d-82x25.svg?q=75&fit=clip&auto=format", 13 | size="large", 14 | ) 15 | st.page_link(f"{HERE}/pages/dashboard.py", label="Explore data", icon="🕹️") 16 | st.page_link(f"{HERE}/pages/load_info.py", label="Load info", icon="💾") 17 | pipeline_summary(pipeline) 18 | -------------------------------------------------------------------------------- /dlt/helpers/streamlit_app/blocks/show_data.py: -------------------------------------------------------------------------------- 1 | import dlt 2 | import streamlit as st 3 | 4 | from dlt.helpers.streamlit_app.utils import query_data 5 | 6 | 7 | def show_data_button(pipeline: dlt.Pipeline, table_name: str) -> None: 8 | if st.button("SHOW DATA", key=table_name): 9 | df = query_data(pipeline, f"SELECT * FROM {table_name}", chunk_size=2048) 10 | if df is None: 11 | st.text("No rows returned") 12 | else: 13 | rows_count = df.shape[0] 14 | if df.shape[0] < 2048: 15 | st.text(f"All {rows_count} row(s)") 16 | else: 17 | st.text(f"Top {rows_count} row(s)") 18 | 19 | st.dataframe(df) 20 | -------------------------------------------------------------------------------- /dlt/helpers/streamlit_app/index.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | 3 | from dlt.helpers.streamlit_app.utils import HERE 4 | 5 | if __name__ == "__main__": 6 | st.switch_page(f"{HERE}/pages/dashboard.py") 7 | -------------------------------------------------------------------------------- /dlt/helpers/streamlit_app/pages/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/dlt/helpers/streamlit_app/pages/__init__.py -------------------------------------------------------------------------------- /dlt/helpers/streamlit_app/widgets/__init__.py: -------------------------------------------------------------------------------- 1 | from dlt.helpers.streamlit_app.widgets.stats import stat 2 | from dlt.helpers.streamlit_app.widgets.summary import pipeline_summary 3 | from dlt.helpers.streamlit_app.widgets.tags import tag 4 | from dlt.helpers.streamlit_app.widgets.schema import schema_picker 5 | from dlt.helpers.streamlit_app.widgets.color_mode_selector import mode_selector 6 | -------------------------------------------------------------------------------- /dlt/helpers/streamlit_app/widgets/schema.py: -------------------------------------------------------------------------------- 1 | import dlt 2 | import streamlit as st 3 | 4 | 5 | def schema_picker(pipeline: dlt.Pipeline) -> None: 6 | schema = None 7 | num_schemas = len(pipeline.schema_names) 8 | if num_schemas == 1: 9 | schema_name = pipeline.schema_names[0] 10 | schema = pipeline.schemas.get(schema_name) 11 | elif num_schemas > 1: 12 | text = "Select schema" 13 | selected_schema_name = st.selectbox( 14 | text, 15 | sorted(pipeline.schema_names), 16 | ) 17 | schema = pipeline.schemas.get(selected_schema_name) 18 | 19 | if schema: 20 | st.session_state["schema_name"] = schema.name 21 | st.subheader(f"Schema: {schema.name}") 22 | -------------------------------------------------------------------------------- /dlt/helpers/studio/.dlt/config.toml: -------------------------------------------------------------------------------- 1 | [runtime] 2 | # sentry_dsn="https://6f6f7b6f8e0f458a89be4187603b55fe@o1061158.ingest.sentry.io/4504819859914752" 3 | 4 | [tests] 5 | bucket_url_gs="gs://ci-test-bucket" 6 | bucket_url_s3="s3://dlt-ci-test-bucket" 7 | bucket_url_file="_storage" 8 | bucket_url_az="az://dlt-ci-test-bucket" 9 | bucket_url_abfss="abfss://dlt-ci-test-bucket@dltdata.dfs.core.windows.net" 10 | bucket_url_r2="s3://dlt-ci-test-bucket" 11 | # use "/" as root path 12 | bucket_url_gdrive="gdrive://15eC3e5MNew2XAIefWNlG8VlEa0ISnnaG" 13 | bucket_url_sftp="sftp://localhost/data" 14 | memory="memory:///m" -------------------------------------------------------------------------------- /dlt/load/__init__.py: -------------------------------------------------------------------------------- 1 | from dlt.load.load import Load 2 | 3 | __all__ = ["Load"] 4 | -------------------------------------------------------------------------------- /dlt/normalize/__init__.py: -------------------------------------------------------------------------------- 1 | from .normalize import Normalize 2 | 3 | __all__ = ["Normalize"] 4 | -------------------------------------------------------------------------------- /dlt/normalize/exceptions.py: -------------------------------------------------------------------------------- 1 | from typing import Any, List 2 | from dlt.common.exceptions import DltException 3 | 4 | 5 | class NormalizeException(DltException): 6 | def __init__(self, msg: str) -> None: 7 | super().__init__(msg) 8 | 9 | 10 | class NormalizeJobFailed(NormalizeException): 11 | def __init__( 12 | self, load_id: str, job_id: str, failed_message: str, writer_metrics: List[Any] 13 | ) -> None: 14 | self.load_id = load_id 15 | self.job_id = job_id 16 | self.failed_message = failed_message 17 | self.writer_metrics = writer_metrics 18 | super().__init__( 19 | f"Job for `{job_id=:}` failed terminally in load with `{load_id=:}` with message:" 20 | f" {failed_message}." 21 | ) 22 | -------------------------------------------------------------------------------- /dlt/pipeline/mark.py: -------------------------------------------------------------------------------- 1 | """Module with mark functions that make data to be specially processed""" 2 | from dlt.extract import ( 3 | with_table_name, 4 | with_hints, 5 | with_file_import, 6 | make_hints, 7 | make_nested_hints, 8 | materialize_schema_item as materialize_table_schema, 9 | ) 10 | -------------------------------------------------------------------------------- /dlt/pipeline/typing.py: -------------------------------------------------------------------------------- 1 | from typing import Literal 2 | 3 | TPipelineStep = Literal["run", "sync", "extract", "normalize", "load"] 4 | -------------------------------------------------------------------------------- /dlt/pipeline/warnings.py: -------------------------------------------------------------------------------- 1 | import typing as t 2 | import warnings 3 | 4 | from dlt.common.warnings import Dlt04DeprecationWarning 5 | 6 | 7 | def full_refresh_argument_deprecated(caller_name: str, full_refresh: t.Optional[bool]) -> None: 8 | """full_refresh argument is replaced with dev_mode""" 9 | if full_refresh is None: 10 | return 11 | 12 | warnings.warn( 13 | f"The `full_refresh` argument to {caller_name} is deprecated and will be removed in a" 14 | f" future version. Use `dev_mode={full_refresh}` instead which will have the same effect.", 15 | Dlt04DeprecationWarning, 16 | stacklevel=2, 17 | ) 18 | -------------------------------------------------------------------------------- /dlt/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/dlt/py.typed -------------------------------------------------------------------------------- /dlt/reflection/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/dlt/reflection/__init__.py -------------------------------------------------------------------------------- /dlt/reflection/names.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | 3 | import dlt 4 | import dlt.destinations 5 | from dlt import pipeline, attach, run, source, resource, transformer 6 | 7 | DLT = dlt.__name__ 8 | DESTINATIONS = dlt.destinations.__name__ 9 | PIPELINE = pipeline.__name__ 10 | ATTACH = attach.__name__ 11 | RUN = run.__name__ 12 | SOURCE = source.__name__ 13 | RESOURCE = resource.__name__ 14 | TRANSFORMER = transformer.__name__ 15 | 16 | DETECTED_FUNCTIONS = [PIPELINE, SOURCE, RESOURCE, RUN, TRANSFORMER] 17 | SIGNATURES = { 18 | PIPELINE: inspect.signature(pipeline), 19 | ATTACH: inspect.signature(attach), 20 | RUN: inspect.signature(run), 21 | SOURCE: inspect.signature(source), 22 | RESOURCE: inspect.signature(resource), 23 | TRANSFORMER: inspect.signature(transformer), 24 | } 25 | -------------------------------------------------------------------------------- /dlt/sources/.gitignore: -------------------------------------------------------------------------------- 1 | # ignore secrets, virtual environments and typical python compilation artifacts 2 | secrets.toml 3 | # ignore basic python artifacts 4 | .env 5 | **/__pycache__/ 6 | **/*.py[cod] 7 | **/*$py.class 8 | # ignore duckdb 9 | *.duckdb 10 | *.wal -------------------------------------------------------------------------------- /dlt/sources/__init__.py: -------------------------------------------------------------------------------- 1 | """Module with built in sources and source building blocks""" 2 | from dlt.common.typing import TDataItem, TDataItems 3 | from dlt.extract import DltSource, DltResource, Incremental as incremental 4 | from dlt.extract.reference import AnySourceFactory, SourceReference, UnknownSourceReference 5 | from . import credentials, config 6 | 7 | 8 | __all__ = [ 9 | "DltSource", 10 | "DltResource", 11 | "SourceReference", 12 | "UnknownSourceReference", 13 | "AnySourceFactory", 14 | "TDataItem", 15 | "TDataItems", 16 | "incremental", 17 | "credentials", 18 | "config", 19 | ] 20 | -------------------------------------------------------------------------------- /dlt/sources/_core_source_templates/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/dlt/sources/_core_source_templates/__init__.py -------------------------------------------------------------------------------- /dlt/sources/_single_file_templates/.dlt/config.toml: -------------------------------------------------------------------------------- 1 | # put your configuration values here 2 | 3 | [runtime] 4 | log_level="WARNING" # the system log level of dlt 5 | # use the dlthub_telemetry setting to enable/disable anonymous usage data reporting, see https://dlthub.com/docs/reference/telemetry 6 | -------------------------------------------------------------------------------- /dlt/sources/_single_file_templates/.gitignore: -------------------------------------------------------------------------------- 1 | # ignore secrets, virtual environments and typical python compilation artifacts 2 | secrets.toml 3 | # ignore basic python artifacts 4 | .env 5 | **/__pycache__/ 6 | **/*.py[cod] 7 | **/*$py.class 8 | # ignore duckdb 9 | *.duckdb 10 | *.wal -------------------------------------------------------------------------------- /dlt/sources/_single_file_templates/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/dlt/sources/_single_file_templates/__init__.py -------------------------------------------------------------------------------- /dlt/sources/config.py: -------------------------------------------------------------------------------- 1 | from dlt.common.configuration.specs import configspec 2 | from dlt.common.configuration.inject import with_config 3 | -------------------------------------------------------------------------------- /dlt/sources/filesystem/settings.py: -------------------------------------------------------------------------------- 1 | DEFAULT_CHUNK_SIZE = 100 2 | -------------------------------------------------------------------------------- /dlt/sources/helpers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/dlt/sources/helpers/__init__.py -------------------------------------------------------------------------------- /dlt/sources/helpers/requests/typing.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple, Union, Optional 2 | 3 | from dlt.common.typing import TimedeltaSeconds 4 | 5 | # Either a single timeout or tuple (connect,read) timeout 6 | TRequestTimeout = Union[TimedeltaSeconds, Tuple[TimedeltaSeconds, TimedeltaSeconds]] 7 | -------------------------------------------------------------------------------- /dlt/sources/helpers/rest_client/exceptions.py: -------------------------------------------------------------------------------- 1 | from dlt.common.exceptions import DltException 2 | 3 | 4 | class RESTClientException(DltException): 5 | pass 6 | 7 | 8 | class IgnoreResponseException(RESTClientException): 9 | pass 10 | 11 | 12 | class PaginatorSetupError(RESTClientException, ValueError): 13 | pass 14 | 15 | 16 | class PaginatorNotFound(RESTClientException): 17 | pass 18 | -------------------------------------------------------------------------------- /dlt/sources/helpers/rest_client/typing.py: -------------------------------------------------------------------------------- 1 | from typing import ( 2 | List, 3 | Dict, 4 | Union, 5 | Literal, 6 | Callable, 7 | Any, 8 | ) 9 | from requests import Response 10 | 11 | 12 | HTTPMethodBasic = Literal["GET", "POST"] 13 | HTTPMethodExtended = Literal["PUT", "PATCH", "DELETE", "HEAD", "OPTIONS"] 14 | HTTPMethod = Union[HTTPMethodBasic, HTTPMethodExtended] 15 | HookFunction = Callable[[Response, Any, Any], None] 16 | HookEvent = Union[HookFunction, List[HookFunction]] 17 | Hooks = Dict[str, HookEvent] 18 | -------------------------------------------------------------------------------- /dlt/sources/helpers/rest_client/utils.py: -------------------------------------------------------------------------------- 1 | def join_url(base_url: str, path: str) -> str: 2 | if base_url is None: 3 | raise ValueError("Base URL must be provided or set to an empty string.") 4 | 5 | if base_url == "": 6 | return path 7 | 8 | if path == "": 9 | return base_url 10 | 11 | # Normalize the base URL 12 | base_url = base_url.rstrip("/") 13 | if not base_url.endswith("/"): 14 | base_url += "/" 15 | 16 | return base_url + path.lstrip("/") 17 | -------------------------------------------------------------------------------- /dlt/sources/rest_api/exceptions.py: -------------------------------------------------------------------------------- 1 | from dlt.common.exceptions import DltException 2 | 3 | 4 | class RestApiException(DltException): 5 | pass 6 | 7 | 8 | # class Paginator 9 | -------------------------------------------------------------------------------- /dlt/sources/rest_api/utils.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple 2 | 3 | from dlt.common import logger 4 | from dlt.extract.source import DltSource 5 | 6 | 7 | def join_url(base_url: str, path: str) -> str: 8 | if not base_url.endswith("/"): 9 | base_url += "/" 10 | return base_url + path.lstrip("/") 11 | 12 | 13 | def check_connection( 14 | source: DltSource, 15 | *resource_names: str, 16 | ) -> Tuple[bool, str]: 17 | try: 18 | list(source.with_resources(*resource_names).add_limit(1)) 19 | return (True, "") 20 | except Exception as e: 21 | logger.error(f"Error checking connection: {e}") 22 | return (False, str(e)) 23 | -------------------------------------------------------------------------------- /dlt/transformations/__init__.py: -------------------------------------------------------------------------------- 1 | from dlt.transformations.decorators import transformation 2 | from dlt.transformations.configuration import TransformationConfiguration 3 | 4 | __all__ = ["transformation", "TransformationConfiguration"] 5 | -------------------------------------------------------------------------------- /dlt/transformations/configuration.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from dlt.common.configuration import configspec 4 | from dlt.common.configuration.specs import BaseConfiguration 5 | 6 | 7 | @configspec 8 | class TransformationConfiguration(BaseConfiguration): 9 | """Configuration for a transformation""" 10 | 11 | buffer_max_items: Optional[int] = 5000 12 | """ 13 | The chunk size to use for the transformation. If not provided, the chunk size 14 | will be inferred from the transformations. 15 | """ 16 | always_materialize: Optional[bool] = False 17 | """ 18 | If True, the transformation will always be materialized and not executed as a query 19 | """ 20 | -------------------------------------------------------------------------------- /dlt/transformations/typing.py: -------------------------------------------------------------------------------- 1 | from typing import Literal 2 | from typing_extensions import ParamSpec 3 | 4 | TTransformationFunParams = ParamSpec("TTransformationFunParams") 5 | -------------------------------------------------------------------------------- /docs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/__init__.py -------------------------------------------------------------------------------- /docs/examples/.dlt/config.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/examples/.dlt/config.toml -------------------------------------------------------------------------------- /docs/examples/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/examples/__init__.py -------------------------------------------------------------------------------- /docs/examples/_template/.dlt/config.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/examples/_template/.dlt/config.toml -------------------------------------------------------------------------------- /docs/examples/_template/.dlt/example.secrets.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/examples/_template/.dlt/example.secrets.toml -------------------------------------------------------------------------------- /docs/examples/_template/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/examples/_template/__init__.py -------------------------------------------------------------------------------- /docs/examples/_template/_template.py: -------------------------------------------------------------------------------- 1 | """ 2 | --- 3 | title: Example Template 4 | description: Add desciption here 5 | keywords: [example] 6 | --- 7 | 8 | This is a template for a new example. This text will show up in the docs. 9 | 10 | With this example you will learn to: 11 | 12 | * One 13 | * two 14 | * Three 15 | 16 | """ 17 | 18 | import dlt 19 | 20 | if __name__ == "__main__": 21 | # run a pipeline 22 | pipeline = dlt.pipeline( 23 | pipeline_name="example_pipeline", destination="duckdb", dataset_name="example_data" 24 | ) 25 | # Extract, normalize, and load the data 26 | load_info = pipeline.run([1, 2, 3], table_name="player") 27 | print(load_info) 28 | -------------------------------------------------------------------------------- /docs/examples/archive/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/examples/archive/__init__.py -------------------------------------------------------------------------------- /docs/examples/archive/credentials/.dlt/config.toml: -------------------------------------------------------------------------------- 1 | [sources] 2 | api_url="default api url" 3 | 4 | [custom] 5 | simple_data.api_url="api_url from custom location" -------------------------------------------------------------------------------- /docs/examples/archive/credentials/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/examples/archive/credentials/__init__.py -------------------------------------------------------------------------------- /docs/examples/archive/data/demo_example.json: -------------------------------------------------------------------------------- 1 | [{"isbn": "123-456-222", 2 | "author": 3 | { 4 | "lastname": "Panda", 5 | "firstname": "Jane" 6 | }, 7 | "editor": 8 | { 9 | "lastname": "Smite", 10 | "firstname": "Jane" 11 | }, 12 | "title": "The Ultimate Database Study Guide", 13 | "category": ["Non-Fiction", "Technology"] 14 | }, 15 | {"isbn": "123-456-789", 16 | "author": 17 | { 18 | "lastname": "Jayson", 19 | "firstname": "Joe" 20 | }, 21 | "editor": 22 | { 23 | "lastname": "Smite", 24 | "firstname": "Jane" 25 | }, 26 | "title": "Json for big data", 27 | "category": ["SF", "Horror", "Dystopia"] 28 | } 29 | ] -------------------------------------------------------------------------------- /docs/examples/archive/data/singer_taps/model_annotations.csv: -------------------------------------------------------------------------------- 1 | sender id,message id,annotation,confidence,count,added at,reviewed 2 | A92891n389182,29123898192,frustrated,0.982182,2,6/16/2022 18:33:30,FALSE 3 | A92891n389182,12787812,converted,0.1828121,1,6/16/2022 18:33:30,TRUE -------------------------------------------------------------------------------- /docs/examples/archive/google_sheets.py: -------------------------------------------------------------------------------- 1 | import dlt 2 | 3 | from sources.google_sheets import google_spreadsheet 4 | 5 | dlt.pipeline(destination="bigquery", dev_mode=False) 6 | # see example.secrets.toml to where to put credentials 7 | 8 | # "2022-05", "model_metadata" 9 | info = google_spreadsheet( 10 | "11G95oVZjieRhyGqtQMQqlqpxyvWkRXowKE8CtdLtFaU", ["named range", "Second_Copy!1:2"] 11 | ) 12 | print(list(info)) 13 | -------------------------------------------------------------------------------- /docs/examples/archive/schemas/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/examples/archive/schemas/__init__.py -------------------------------------------------------------------------------- /docs/examples/archive/singer_tap_jsonl_example.py: -------------------------------------------------------------------------------- 1 | import dlt 2 | 3 | from dlt.common.storages.schema_storage import SchemaStorage 4 | 5 | from docs.examples.sources.singer_tap import singer_raw_stream 6 | from docs.examples.sources.jsonl import jsonl_file 7 | 8 | 9 | # load hubspot schema stub - it converts all field names with `timestamp` into timestamp type 10 | schema = SchemaStorage.load_schema_file("docs/examples/schemas/", "hubspot", ("yaml",)) 11 | 12 | p = dlt.pipeline(destination="postgres", dev_mode=True) 13 | # now load a pipeline created from jsonl resource that feeds messages into singer tap transformer 14 | pipe = jsonl_file("docs/examples/data/singer_taps/tap_hubspot.jsonl") | singer_raw_stream() 15 | # provide hubspot schema 16 | info = p.run(pipe, schema=schema, credentials="postgres://loader@localhost:5432/dlt_data") 17 | print(info) 18 | -------------------------------------------------------------------------------- /docs/examples/archive/sources/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/examples/archive/sources/__init__.py -------------------------------------------------------------------------------- /docs/examples/archive/sources/rasa/__init__.py: -------------------------------------------------------------------------------- 1 | from .rasa import rasa 2 | -------------------------------------------------------------------------------- /docs/examples/backfill_in_chunks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/examples/backfill_in_chunks/__init__.py -------------------------------------------------------------------------------- /docs/examples/chess/.dlt/config.toml: -------------------------------------------------------------------------------- 1 | chess_url="https://api.chess.com/pub/" 2 | pipeline_name="chess" 3 | 4 | [runtime] 5 | log_level="WARNING" 6 | 7 | [sources.chess.extract] 8 | max_parallel_items = 5 9 | workers = 2 10 | -------------------------------------------------------------------------------- /docs/examples/chess/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/examples/chess/__init__.py -------------------------------------------------------------------------------- /docs/examples/chess/dbt_transform/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | target/ 3 | dbt_packages/ 4 | logs/ 5 | -------------------------------------------------------------------------------- /docs/examples/chess/dbt_transform/analyses/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/examples/chess/dbt_transform/analyses/.gitkeep -------------------------------------------------------------------------------- /docs/examples/chess/dbt_transform/dbt_project.yml: -------------------------------------------------------------------------------- 1 | name: 'chess_transforms' 2 | version: '1.0.0' 3 | config-version: 2 4 | 5 | # These configurations specify where dbt should look for different types of files. 6 | # The `model-paths` config, for example, states that models in this project can be 7 | # found in the "models/" directory. You probably won't need to change these! 8 | model-paths: ["models"] 9 | analysis-paths: ["analyses"] 10 | test-paths: ["tests"] 11 | seed-paths: ["seeds"] 12 | macro-paths: ["macros"] 13 | snapshot-paths: ["snapshots"] 14 | 15 | target-path: "target" # directory which will store compiled SQL files 16 | clean-targets: # directories to be removed by `dbt clean` 17 | - "target" 18 | - "dbt_packages" 19 | 20 | -------------------------------------------------------------------------------- /docs/examples/chess/dbt_transform/macros/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/examples/chess/dbt_transform/macros/.gitkeep -------------------------------------------------------------------------------- /docs/examples/chess/dbt_transform/models/_dlt_loads.sql: -------------------------------------------------------------------------------- 1 | -- never do a full refresh or you drop the original loads info 2 | {{ 3 | config( 4 | materialized='incremental', 5 | full_refresh = false 6 | ) 7 | }} 8 | 9 | select load_id, schema_name, 1 as status, {{ current_timestamp() }} as inserted_at, schema_version_hash from {{ ref('load_ids') }} 10 | WHERE load_id NOT IN ( 11 | -- TODO: use configured status + 1 12 | SELECT load_id FROM {{ source('dlt', '_dlt_loads') }} WHERE status = 1) 13 | -------------------------------------------------------------------------------- /docs/examples/chess/dbt_transform/models/load_ids.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config( 3 | materialized='table' 4 | ) 5 | }} 6 | 7 | {% if should_full_refresh() %} 8 | -- take all loads when full refresh 9 | SELECT load_id, schema_name, schema_version_hash FROM {{ source('dlt', '_dlt_loads') }} 10 | -- TODO: the status value must be configurable so we can chain packages 11 | WHERE status = 0 12 | {% else %} 13 | -- take only loads with status = 0 and no other records 14 | SELECT load_id, schema_name, schema_version_hash FROM {{ source('dlt', '_dlt_loads') }} 15 | GROUP BY load_id, schema_name, schema_version_hash 16 | -- note that it is a hack - we make sure no other statuses exist 17 | HAVING SUM(status) = 0 18 | {% endif %} -------------------------------------------------------------------------------- /docs/examples/chess/dbt_transform/models/schema.yml: -------------------------------------------------------------------------------- 1 | 2 | version: 2 3 | 4 | models: 5 | - name: load_ids 6 | description: "A list of currently loaded load ids" 7 | columns: 8 | - name: load_id 9 | tests: 10 | - unique 11 | - not_null 12 | - name: view_player_games 13 | -------------------------------------------------------------------------------- /docs/examples/chess/dbt_transform/package-lock.yml: -------------------------------------------------------------------------------- 1 | packages: 2 | - package: calogica/dbt_expectations 3 | version: 0.10.4 4 | - package: calogica/dbt_date 5 | version: 0.10.1 6 | sha1_hash: 69d1f263b3308f517bdce9d0883add938c1e4f61 7 | -------------------------------------------------------------------------------- /docs/examples/chess/dbt_transform/packages.yml: -------------------------------------------------------------------------------- 1 | packages: 2 | - package: calogica/dbt_expectations 3 | version: [">=0.4.0"] 4 | -------------------------------------------------------------------------------- /docs/examples/chess/dbt_transform/seeds/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/examples/chess/dbt_transform/seeds/.gitkeep -------------------------------------------------------------------------------- /docs/examples/chess/dbt_transform/snapshots/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/examples/chess/dbt_transform/snapshots/.gitkeep -------------------------------------------------------------------------------- /docs/examples/chess/dbt_transform/tests/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/examples/chess/dbt_transform/tests/.gitkeep -------------------------------------------------------------------------------- /docs/examples/chess_production/.dlt/config.toml: -------------------------------------------------------------------------------- 1 | chess_url="https://api.chess.com/pub/" 2 | -------------------------------------------------------------------------------- /docs/examples/chess_production/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/examples/chess_production/__init__.py -------------------------------------------------------------------------------- /docs/examples/conftest.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from tests.utils import ( 4 | patch_home_dir, 5 | autouse_test_storage, 6 | preserve_environ, 7 | wipe_pipeline, 8 | setup_secret_providers_to_current_module, 9 | ) 10 | -------------------------------------------------------------------------------- /docs/examples/connector_x_arrow/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/examples/connector_x_arrow/__init__.py -------------------------------------------------------------------------------- /docs/examples/custom_config_provider/.dlt/config.toml: -------------------------------------------------------------------------------- 1 | dlt_config_profile_name="prod" -------------------------------------------------------------------------------- /docs/examples/custom_config_provider/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/examples/custom_config_provider/__init__.py -------------------------------------------------------------------------------- /docs/examples/custom_config_provider/profiles.yaml: -------------------------------------------------------------------------------- 1 | prod: 2 | sources: 3 | github_api: # source level 4 | github: # resource level 5 | url: https://github.com/api 6 | api_key: "{{GITHUB_API_KEY}}" 7 | 8 | dev: 9 | sources: 10 | github_api: 11 | url: https://github.com/api 12 | api_key: "" # no keys in dev env 13 | -------------------------------------------------------------------------------- /docs/examples/custom_destination_bigquery/.dlt/example.secrets.toml: -------------------------------------------------------------------------------- 1 | [destination.bigquery.credentials] 2 | client_email = "" 3 | private_key = "" 4 | project_id = "" 5 | token_uri = "" 6 | refresh_token = "" 7 | client_id = "" 8 | client_secret = "" -------------------------------------------------------------------------------- /docs/examples/custom_destination_bigquery/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/examples/custom_destination_bigquery/__init__.py -------------------------------------------------------------------------------- /docs/examples/custom_destination_lancedb/.dlt/config.toml: -------------------------------------------------------------------------------- 1 | [lancedb] 2 | db_path = "spotify.db" -------------------------------------------------------------------------------- /docs/examples/custom_destination_lancedb/.dlt/example.secrets.toml: -------------------------------------------------------------------------------- 1 | [spotify] 2 | client_id = "" 3 | client_secret = "" 4 | 5 | # provide the openai api key here 6 | [destination.lancedb.credentials] 7 | embedding_model_provider_api_key = "" -------------------------------------------------------------------------------- /docs/examples/custom_destination_lancedb/.gitignore: -------------------------------------------------------------------------------- 1 | spotify.db -------------------------------------------------------------------------------- /docs/examples/custom_destination_lancedb/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/examples/custom_destination_lancedb/__init__.py -------------------------------------------------------------------------------- /docs/examples/custom_naming/.dlt/config.toml: -------------------------------------------------------------------------------- 1 | [sources.sql_ci_no_collision.schema] 2 | naming="sql_ci_no_collision" -------------------------------------------------------------------------------- /docs/examples/custom_naming/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/examples/custom_naming/__init__.py -------------------------------------------------------------------------------- /docs/examples/custom_naming/sql_cs_latin2.py: -------------------------------------------------------------------------------- 1 | from typing import ClassVar 2 | 3 | # NOTE: we use regex library that supports unicode 4 | import regex as re 5 | 6 | from dlt.common.normalizers.naming.sql_cs_v1 import NamingConvention as SqlNamingConvention 7 | from dlt.common.typing import REPattern 8 | 9 | 10 | class NamingConvention(SqlNamingConvention): 11 | """Case sensitive naming convention which allows basic unicode characters, including latin 2 characters""" 12 | 13 | RE_NON_ALPHANUMERIC: ClassVar[REPattern] = re.compile(r"[^\p{Latin}\d_]+") # type: ignore 14 | 15 | def normalize_identifier(self, identifier: str) -> str: 16 | # typically you'd change how a single 17 | return super().normalize_identifier(identifier) 18 | 19 | @property 20 | def is_case_sensitive(self) -> bool: 21 | return True 22 | -------------------------------------------------------------------------------- /docs/examples/google_sheets/.dlt/example.secrets.toml: -------------------------------------------------------------------------------- 1 | # you can just paste services.json as credentials 2 | [sources.google_sheets] 3 | credentials=''' 4 | { 5 | "type": "set me up!", 6 | "project_id": "set me up!", 7 | "private_key_id": "set me up!", 8 | "private_key": "set me up!", 9 | "client_email": "set me up!", 10 | "client_id": "set me up!", 11 | "auth_uri": "https://accounts.google.com/o/oauth2/auth", 12 | "token_uri": "https://oauth2.googleapis.com/token", 13 | "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", 14 | "client_x509_cert_url": "set me up!" 15 | } 16 | ''' 17 | -------------------------------------------------------------------------------- /docs/examples/google_sheets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/examples/google_sheets/__init__.py -------------------------------------------------------------------------------- /docs/examples/incremental_loading/.dlt/example.secrets.toml: -------------------------------------------------------------------------------- 1 | [sources.zendesk.credentials] 2 | password = "" 3 | subdomain = "" 4 | email = "" 5 | -------------------------------------------------------------------------------- /docs/examples/incremental_loading/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/examples/incremental_loading/__init__.py -------------------------------------------------------------------------------- /docs/examples/nested_data/.dlt/example.secrets.toml: -------------------------------------------------------------------------------- 1 | [sources.mongodb] 2 | connection_url="" 3 | -------------------------------------------------------------------------------- /docs/examples/nested_data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/examples/nested_data/__init__.py -------------------------------------------------------------------------------- /docs/examples/partial_loading/.dlt/config.toml: -------------------------------------------------------------------------------- 1 | [destination.filesystem] 2 | bucket_url="s3://dlt-ci-test-bucket" -------------------------------------------------------------------------------- /docs/examples/partial_loading/.dlt/example.secrets.toml: -------------------------------------------------------------------------------- 1 | [destination.filesystem.credentials] 2 | aws_access_key_id = "" # copy the access key here 3 | aws_secret_access_key = "" # copy the secret access key here -------------------------------------------------------------------------------- /docs/examples/partial_loading/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/examples/partial_loading/__init__.py -------------------------------------------------------------------------------- /docs/examples/partial_loading/requirements.txt: -------------------------------------------------------------------------------- 1 | dlt[s3] -------------------------------------------------------------------------------- /docs/examples/pdf_to_weaviate/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/examples/pdf_to_weaviate/__init__.py -------------------------------------------------------------------------------- /docs/examples/pdf_to_weaviate/assets/invoices/invoice_20230831-p1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/examples/pdf_to_weaviate/assets/invoices/invoice_20230831-p1.pdf -------------------------------------------------------------------------------- /docs/examples/postgres_to_postgres/.dlt/example.secrets.toml: -------------------------------------------------------------------------------- 1 | [destination.postgres.credentials] 2 | host = "" 3 | database = "" 4 | username = "" 5 | password = "" 6 | port = "" 7 | connection_timeout = 15 8 | 9 | [sources.postgres.credentials] 10 | host = "" 11 | database = "" 12 | username = "" 13 | password = "" 14 | port = "" 15 | chunk_size = 1000000 16 | -------------------------------------------------------------------------------- /docs/examples/postgres_to_postgres/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/examples/postgres_to_postgres/__init__.py -------------------------------------------------------------------------------- /docs/examples/propagate_hints/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/examples/propagate_hints/__init__.py -------------------------------------------------------------------------------- /docs/examples/qdrant_zendesk/.dlt/example.secrets.toml: -------------------------------------------------------------------------------- 1 | [destination.qdrant.credentials] 2 | location = "" 3 | api_key = "" 4 | 5 | [sources.zendesk.credentials] 6 | password = "" 7 | subdomain = "" 8 | email = "" 9 | -------------------------------------------------------------------------------- /docs/examples/qdrant_zendesk/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/examples/qdrant_zendesk/__init__.py -------------------------------------------------------------------------------- /docs/examples/transformers/.dlt/config.toml: -------------------------------------------------------------------------------- 1 | [runtime] 2 | log_level="WARNING" 3 | 4 | [extract] 5 | # use 2 workers to extract sources in parallel 6 | workers=2 7 | # allow 10 async items to be processed in parallel 8 | max_parallel_items=10 9 | 10 | [normalize] 11 | # use 3 worker processes to process 3 files in parallel 12 | workers=3 13 | 14 | [load] 15 | # have 50 concurrent load jobs 16 | workers=50 17 | -------------------------------------------------------------------------------- /docs/examples/transformers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/examples/transformers/__init__.py -------------------------------------------------------------------------------- /docs/notebooks/.gitignore: -------------------------------------------------------------------------------- 1 | build -------------------------------------------------------------------------------- /docs/notebooks/Makefile: -------------------------------------------------------------------------------- 1 | clear: 2 | rm -rf build 3 | 4 | build: clear 5 | uv run marimo -y export html-wasm playground/playground.py -o build/playground --mode edit 6 | 7 | serve: build 8 | python -m http.server 9000 --directory build 9 | 10 | test: 11 | uv run python playground/playground.py -------------------------------------------------------------------------------- /docs/tools/.env.example: -------------------------------------------------------------------------------- 1 | OPENAI_API_KEY="..." 2 | -------------------------------------------------------------------------------- /docs/tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/tools/__init__.py -------------------------------------------------------------------------------- /docs/tools/lint_setup/.gitignore: -------------------------------------------------------------------------------- 1 | lint_me.py 2 | lint_me -------------------------------------------------------------------------------- /docs/tools/lint_setup/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/tools/lint_setup/__init__.py -------------------------------------------------------------------------------- /docs/tools/mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | no_implicit_optional = False 3 | strict_optional = False 4 | -------------------------------------------------------------------------------- /docs/tools/package-lock.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "tools", 3 | "lockfileVersion": 3, 4 | "requires": true, 5 | "packages": {} 6 | } 7 | -------------------------------------------------------------------------------- /docs/tools/requirements.txt: -------------------------------------------------------------------------------- 1 | python-dotenv==1.0.1 2 | openai==1.14.2 3 | -------------------------------------------------------------------------------- /docs/tools/ruff.toml: -------------------------------------------------------------------------------- 1 | [lint] 2 | ignore = ["F811", "F821", "F401", "F841", "E402"] 3 | -------------------------------------------------------------------------------- /docs/website/.gitignore: -------------------------------------------------------------------------------- 1 | # Dependencies 2 | /node_modules 3 | 4 | # Production 5 | /build 6 | 7 | # preprocessed docs 8 | /docs_processed 9 | 10 | # Generated files 11 | .docusaurus 12 | .cache-loader 13 | docs/api_reference 14 | jaffle_shop 15 | 16 | # Misc 17 | .DS_Store 18 | .env.local 19 | .env.development.local 20 | .env.test.local 21 | .env.production.local 22 | 23 | npm-debug.log* 24 | yarn-debug.log* 25 | yarn-error.log* 26 | 27 | # ignore all versions, there are generated dynamically 28 | versions.json 29 | versioned_docs 30 | versioned_sidebars 31 | .dlt-repo -------------------------------------------------------------------------------- /docs/website/.npmrc: -------------------------------------------------------------------------------- 1 | engine-strict=true -------------------------------------------------------------------------------- /docs/website/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/__init__.py -------------------------------------------------------------------------------- /docs/website/babel.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | presets: [require.resolve('@docusaurus/core/lib/babel/preset')], 3 | }; 4 | -------------------------------------------------------------------------------- /docs/website/docs/.dlt/.gitignore: -------------------------------------------------------------------------------- 1 | /secrets.toml 2 | -------------------------------------------------------------------------------- /docs/website/docs/.dlt/config.toml: -------------------------------------------------------------------------------- 1 | [destination.weaviate] 2 | vectorizer="text2vec-contextionary" 3 | module_config={text2vec-contextionary = { vectorizeClassName = false, vectorizePropertyName = true}} -------------------------------------------------------------------------------- /docs/website/docs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/docs/__init__.py -------------------------------------------------------------------------------- /docs/website/docs/_book-onboarding-call.md: -------------------------------------------------------------------------------- 1 | Get in touch with the dltHub Customer Success team 2 | -------------------------------------------------------------------------------- /docs/website/docs/assets/json_file.json: -------------------------------------------------------------------------------- 1 | {"id": 1, "name": "Alice", "children": [{"id": 1, "name": "Eve"}]} -------------------------------------------------------------------------------- /docs/website/docs/conftest.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from tests.utils import ( 4 | patch_home_dir, 5 | autouse_test_storage, 6 | preserve_environ, 7 | wipe_pipeline, 8 | setup_secret_providers_to_current_module, 9 | ) 10 | -------------------------------------------------------------------------------- /docs/website/docs/dlt-ecosystem/file-formats/_set_the_format.mdx: -------------------------------------------------------------------------------- 1 | There are several ways of configuring dlt to use {props.file_type} file format for normalization step and to store your data at the destination: 2 | 3 | 1. You can set the loader_file_format argument to {props.file_type} in the run command: 4 | 5 |
 6 | info = pipeline.run(some_source(), loader_file_format="{props.file_type}")
 7 | 
8 | 9 | 2. Alternatively, you can set the file type directly in [the resource decorator](../../general-usage/resource#pick-loader-file-format-for-a-particular-resource). 10 | 11 |
12 | @dlt.resource(file_format="{props.file_type}"){'\n'}
13 | def generate_rows(nr):{'\n'}
14 |     pass
15 | 
16 | -------------------------------------------------------------------------------- /docs/website/docs/dlt-ecosystem/table-formats/delta.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Delta 3 | description: The Delta table format 4 | keywords: [delta, table formats] 5 | --- 6 | 7 | # Delta table format 8 | 9 | [Delta](https://delta.io/) is an open-source table format. `dlt` can store data as Delta tables. 10 | 11 | ## Supported destinations 12 | 13 | Supported by: **Databricks**, **filesystem** 14 | 15 | -------------------------------------------------------------------------------- /docs/website/docs/dlt-ecosystem/table-formats/iceberg.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Iceberg 3 | description: The Iceberg table format 4 | keywords: [iceberg, table formats] 5 | --- 6 | 7 | # Iceberg table format 8 | 9 | [Iceberg](https://iceberg.apache.org/) is an open-source table format. `dlt` can store data as Iceberg tables. 10 | 11 | ## Supported destinations 12 | 13 | Supported by: **Athena**, **filesystem** 14 | 15 | -------------------------------------------------------------------------------- /docs/website/docs/dlt-ecosystem/transformations/dbt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/docs/dlt-ecosystem/transformations/dbt/__init__.py -------------------------------------------------------------------------------- /docs/website/docs/dlt-ecosystem/transformations/dbt/profiles.yml: -------------------------------------------------------------------------------- 1 | config: 2 | # do not track usage, do not create .user.yml 3 | send_anonymous_usage_stats: False 4 | 5 | duckdb_dlt_dbt_test: 6 | target: analytics 7 | outputs: 8 | analytics: 9 | type: duckdb 10 | # schema: "{{ var('destination_dataset_name', var('source_dataset_name')) }}" 11 | path: "duckdb_dlt_dbt_test.duckdb" 12 | extensions: 13 | - httpfs 14 | - parquet -------------------------------------------------------------------------------- /docs/website/docs/dlt-ecosystem/verified-sources/_source-info-header.md: -------------------------------------------------------------------------------- 1 | import Admonition from "@theme/Admonition"; 2 | import Link from '../../_book-onboarding-call.md'; 3 | 4 | 5 | Join our Slack community or . 6 | 7 | 8 | -------------------------------------------------------------------------------- /docs/website/docs/dlt-ecosystem/verified-sources/docs_images/Add_people.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/docs/dlt-ecosystem/verified-sources/docs_images/Add_people.png -------------------------------------------------------------------------------- /docs/website/docs/dlt-ecosystem/verified-sources/docs_images/Airtable_ids.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/docs/dlt-ecosystem/verified-sources/docs_images/Airtable_ids.jpeg -------------------------------------------------------------------------------- /docs/website/docs/dlt-ecosystem/verified-sources/docs_images/GA4_Property_ID_size.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/docs/dlt-ecosystem/verified-sources/docs_images/GA4_Property_ID_size.png -------------------------------------------------------------------------------- /docs/website/docs/dlt-ecosystem/verified-sources/docs_images/Matomo_name_and_id.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/docs/dlt-ecosystem/verified-sources/docs_images/Matomo_name_and_id.png -------------------------------------------------------------------------------- /docs/website/docs/dlt-ecosystem/verified-sources/docs_images/Notion_Database_2.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/docs/dlt-ecosystem/verified-sources/docs_images/Notion_Database_2.jpeg -------------------------------------------------------------------------------- /docs/website/docs/dlt-ecosystem/verified-sources/docs_images/Share_button.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/docs/dlt-ecosystem/verified-sources/docs_images/Share_button.png -------------------------------------------------------------------------------- /docs/website/docs/dlt-ecosystem/verified-sources/docs_images/Zendesk_Admin.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/docs/dlt-ecosystem/verified-sources/docs_images/Zendesk_Admin.jpeg -------------------------------------------------------------------------------- /docs/website/docs/dlt-ecosystem/verified-sources/docs_images/Zendesk_chat_access_token.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/docs/dlt-ecosystem/verified-sources/docs_images/Zendesk_chat_access_token.jpg -------------------------------------------------------------------------------- /docs/website/docs/dlt-ecosystem/verified-sources/filesystem/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Cloud storage and filesystem 3 | description: dlt-verified source for reading files from cloud storage and local file system 4 | keywords: [file system, files, filesystem, readers source, cloud storage, object storage, local file system] 5 | --- 6 | 7 | The filesystem source allows seamless loading of files from the following locations: 8 | * AWS S3 9 | * Google Cloud Storage 10 | * Google Drive 11 | * Azure Blob Storage 12 | * remote filesystem (via SFTP) 13 | * local filesystem 14 | 15 | The filesystem source natively supports [CSV](../../file-formats/csv.md), [Parquet](../../file-formats/parquet.md), and [JSONL](../../file-formats/jsonl.md) files and allows customization for loading any type of structured file. 16 | 17 | import DocCardList from '@theme/DocCardList'; 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /docs/website/docs/dlt-ecosystem/verified-sources/rest_api/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: REST APIs 3 | description: Loads data from REST APIs using a declarative configuration 4 | keywords: [rest api, restful api] 5 | --- 6 | 7 | You can use the REST API source to extract data from any REST API. Using a [declarative configuration](./basic.md#source-configuration), you can define: 8 | 9 | * the API endpoints to pull data from, 10 | * their [relationships](./basic.md#define-resource-relationships), 11 | * how to handle [pagination](./basic.md#pagination), 12 | * [authentication](./basic.md#authentication). 13 | 14 | dlt will take care of the rest: unnesting the data, inferring the schema, etc., and writing to the destination. 15 | 16 | import DocCardList from '@theme/DocCardList'; 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /docs/website/docs/examples/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Code Examples 3 | description: A list of comprehensive code examples that teach you how to solve real world problems. 4 | keywords: ['examples'] 5 | --- 6 | import DocCardList from '@theme/DocCardList'; 7 | 8 | A list of comprehensive code examples that teach you how to solve real-world problems. 9 | 10 | :::info 11 | If you want to share your example, follow this [contributing](https://github.com/dlt-hub/dlt/tree/devel/docs/examples/CONTRIBUTING.md) tutorial. 12 | ::: 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /docs/website/docs/general-usage/dataset-access/dataset_snippets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/docs/general-usage/dataset-access/dataset_snippets/__init__.py -------------------------------------------------------------------------------- /docs/website/docs/general-usage/dataset-access/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Access loaded data 3 | description: How to access your loaded data 4 | keywords: [datasets, data, access] 5 | --- 6 | import DocCardList from '@theme/DocCardList'; 7 | 8 | # Access loaded data 9 | 10 | You can access data that got loaded with `dlt.pipeline` in many different ways. When using Python, you 11 | have all popular options available: data frames, SQL queries or Ibis expressions. 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /docs/website/docs/general-usage/dataset-access/static/marimo_dataset.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/docs/general-usage/dataset-access/static/marimo_dataset.png -------------------------------------------------------------------------------- /docs/website/docs/general-usage/dataset-access/static/marimo_notebook.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/docs/general-usage/dataset-access/static/marimo_notebook.png -------------------------------------------------------------------------------- /docs/website/docs/general-usage/dataset-access/static/marimo_python.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/docs/general-usage/dataset-access/static/marimo_python.png -------------------------------------------------------------------------------- /docs/website/docs/general-usage/dataset-access/static/marimo_sql.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/docs/general-usage/dataset-access/static/marimo_sql.png -------------------------------------------------------------------------------- /docs/website/docs/general-usage/snippets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/docs/general-usage/snippets/__init__.py -------------------------------------------------------------------------------- /docs/website/docs/general-usage/snippets/destination-toml.toml: -------------------------------------------------------------------------------- 1 | # @@@DLT_SNIPPET_START default_layout 2 | [destination.filesystem] 3 | bucket_url="az://dlt-azure-bucket" 4 | [destination.filesystem.credentials] 5 | azure_storage_account_name="dltdata" 6 | azure_storage_account_key="storage key" 7 | # @@@DLT_SNIPPET_END default_layout 8 | 9 | # @@@DLT_SNIPPET_START name_layout 10 | [destination.production_az_bucket] 11 | bucket_url="az://dlt-azure-bucket" 12 | [destination.production_az_bucket.credentials] 13 | azure_storage_account_name="dltdata" 14 | azure_storage_account_key="storage key" 15 | # @@@DLT_SNIPPET_END name_layout -------------------------------------------------------------------------------- /docs/website/docs/plus/production/observability.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Observability 3 | description: Observability tooling 4 | keywords: [observability, monitoring, alerting] 5 | --- 6 | 7 | # Observability 8 | 9 | There are several features under development in dlt+ to enhance your observability workflows. These include: 10 | * A UI to explore and debug your pipeline runs 11 | * An AI agent to investigate your traces and logs 12 | 13 | Interested? Join our [early access program](https://info.dlthub.com/waiting-list). 14 | 15 | -------------------------------------------------------------------------------- /docs/website/docs/plus/production/runners.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Runners 3 | description: Run pipelines in production 4 | keywords: [runners, lambda, airflow] 5 | --- 6 | 7 | # Runners 8 | 9 | With dlt+ you can now run pipelines directly from the command line, allowing you to go to production faster: 10 | 11 | ```sh 12 | dlt pipeline my_pipeline run 13 | ``` 14 | 15 | These can also be run in different environments with the use of [profiles](../core-concepts/profiles.md): 16 | 17 | ```sh 18 | dlt project --profile prod my_pipeline run 19 | ``` 20 | 21 | We are working on specialized runners for environments like Airflow, Dagster, Prefect, and more. If you're interested, feel free to [join our early access program](https://info.dlthub.com/waiting-list). 22 | 23 | -------------------------------------------------------------------------------- /docs/website/docs/reference/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/docs/reference/__init__.py -------------------------------------------------------------------------------- /docs/website/docs/reference/performance_snippets/.dlt/config.toml: -------------------------------------------------------------------------------- 1 | # @@@DLT_SNIPPET_START parallel_config_toml 2 | # `performance-snippets` is the module name and thus config section 3 | [sources.performance-snippets.data_writer] 4 | file_max_items=100000 5 | 6 | [normalize] 7 | workers=3 8 | 9 | [normalize.data_writer] 10 | file_max_items=100000 11 | 12 | [load] 13 | workers=11 14 | 15 | # @@@DLT_SNIPPET_END parallel_config_toml -------------------------------------------------------------------------------- /docs/website/docs/reference/performance_snippets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/docs/reference/performance_snippets/__init__.py -------------------------------------------------------------------------------- /docs/website/docs/reference/tracing.md: -------------------------------------------------------------------------------- 1 | 1. Identifiers 2 | 3 | 2. Data lineage 4 | 5 | 3. Schema lineage 6 | 7 | -------------------------------------------------------------------------------- /docs/website/docs/running-in-production/images/airflow_dag_tasks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/docs/running-in-production/images/airflow_dag_tasks.png -------------------------------------------------------------------------------- /docs/website/docs/running-in-production/images/airflow_dags.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/docs/running-in-production/images/airflow_dags.png -------------------------------------------------------------------------------- /docs/website/docs/running-in-production/images/airflow_slack_notification.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/docs/running-in-production/images/airflow_slack_notification.png -------------------------------------------------------------------------------- /docs/website/docs/running-in-production/images/github_actions_workflow_dag.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/docs/running-in-production/images/github_actions_workflow_dag.png -------------------------------------------------------------------------------- /docs/website/docs/running-in-production/images/github_actions_workflows.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/docs/running-in-production/images/github_actions_workflows.png -------------------------------------------------------------------------------- /docs/website/docs/tutorial/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/docs/tutorial/__init__.py -------------------------------------------------------------------------------- /docs/website/docs/walkthroughs/deploy-a-pipeline/deploy_snippets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy_snippets/__init__.py -------------------------------------------------------------------------------- /docs/website/docs/walkthroughs/deploy-a-pipeline/images/add-credential.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/docs/walkthroughs/deploy-a-pipeline/images/add-credential.png -------------------------------------------------------------------------------- /docs/website/docs/walkthroughs/deploy-a-pipeline/images/dag-folder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/docs/walkthroughs/deploy-a-pipeline/images/dag-folder.png -------------------------------------------------------------------------------- /docs/website/docs/walkthroughs/deploy-a-pipeline/images/dlt_orchestra_node.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/docs/walkthroughs/deploy-a-pipeline/images/dlt_orchestra_node.png -------------------------------------------------------------------------------- /docs/website/docs/walkthroughs/deploy-a-pipeline/images/orchestra_add_dlt_step.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/docs/walkthroughs/deploy-a-pipeline/images/orchestra_add_dlt_step.png -------------------------------------------------------------------------------- /docs/website/docs/walkthroughs/deploy-a-pipeline/images/orchestra_dlt_credential.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/docs/walkthroughs/deploy-a-pipeline/images/orchestra_dlt_credential.png -------------------------------------------------------------------------------- /docs/website/docs/walkthroughs/deploy-a-pipeline/images/prefect-dashboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/docs/walkthroughs/deploy-a-pipeline/images/prefect-dashboard.png -------------------------------------------------------------------------------- /docs/website/docs/walkthroughs/deploy-a-pipeline/images/prefect-flow-run.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/docs/walkthroughs/deploy-a-pipeline/images/prefect-flow-run.png -------------------------------------------------------------------------------- /docs/website/docs/walkthroughs/deploy-a-pipeline/images/run-trigger.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/docs/walkthroughs/deploy-a-pipeline/images/run-trigger.png -------------------------------------------------------------------------------- /docs/website/requirements.txt: -------------------------------------------------------------------------------- 1 | pydoc-markdown==4.8.2 2 | typing-extensions==4.6.3 3 | databind.json==4.4.2 4 | databind.core==4.4.2 -------------------------------------------------------------------------------- /docs/website/src/theme/DocBreadcrumbs/Items/Home/styles.module.css: -------------------------------------------------------------------------------- 1 | .breadcrumbHomeIcon { 2 | position: relative; 3 | top: 1px; 4 | vertical-align: top; 5 | height: 1.1rem; 6 | width: 1.1rem; 7 | } 8 | -------------------------------------------------------------------------------- /docs/website/src/theme/DocBreadcrumbs/styles.module.css: -------------------------------------------------------------------------------- 1 | .breadcrumbsContainer { 2 | --ifm-breadcrumb-size-multiplier: 0.8; 3 | margin-bottom: 0.8rem; 4 | } 5 | 6 | .breadcrumbsContainerPlus { 7 | --ifm-breadcrumb-size-multiplier: 0.8; 8 | margin-bottom: 0.8rem; 9 | display: flex; 10 | justify-content: space-between; 11 | align-items: center; 12 | padding: 1rem; 13 | } 14 | 15 | .customLogo { 16 | width: 69px; 17 | min-width: 69px; 18 | max-width: 69px; 19 | height: 69px; 20 | display: flex; 21 | align-items: center; 22 | justify-content: center; 23 | border: 1px solid #dadde1; 24 | border-radius: 50%; 25 | } 26 | -------------------------------------------------------------------------------- /docs/website/src/theme/Heading/index.js: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | import Heading from "@theme-original/Heading"; 3 | import { useLocation } from "@docusaurus/router"; 4 | import { PlusAdmonition } from "../PlusAdmonition"; 5 | 6 | export default function HeadingWrapper(props) { 7 | const location = useLocation(); 8 | const showPlus = location.pathname.includes("/plus/"); 9 | const { as } = props; 10 | 11 | if (as === "h1" && showPlus) { 12 | return ( 13 | <> 14 | 15 | 16 | 17 | ); 18 | } 19 | 20 | return ( 21 | <> 22 | 23 | 24 | ); 25 | } 26 | -------------------------------------------------------------------------------- /docs/website/src/theme/MDXComponents.js: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | // Import the original mapper 3 | import MDXComponents from "@theme-original/MDXComponents"; 4 | import Tabs from "@theme/Tabs"; 5 | import TabItem from "@theme/TabItem"; 6 | 7 | export default { 8 | // Re-use the default mapping 9 | ...MDXComponents, 10 | Tabs, 11 | TabItem, 12 | // Map the "" tag to our Highlight component 13 | // `Highlight` will receive all props that were passed to `` in MDX 14 | }; 15 | -------------------------------------------------------------------------------- /docs/website/src/theme/PlusAdmonition/index.js: -------------------------------------------------------------------------------- 1 | import Admonition from "@theme/Admonition"; 2 | 3 | export function PlusAdmonition() { 4 | return ( 5 | dlt+}> 6 |

7 | This page is for dlt+, which requires a license. Join our early access program for a trial license. 8 |

9 |
10 | ); 11 | } 12 | -------------------------------------------------------------------------------- /docs/website/static/.nojekyll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/static/.nojekyll -------------------------------------------------------------------------------- /docs/website/static/img/Howdltworks-Inactive-1.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /docs/website/static/img/Howdltworks-Inactive.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /docs/website/static/img/Pipelines-Inactive-1.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /docs/website/static/img/Pipelines-Inactive.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /docs/website/static/img/ReleaseNotes-Inactive-1.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /docs/website/static/img/ReleaseNotes-Inactive.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /docs/website/static/img/Walkthrough-Inactive-1.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /docs/website/static/img/Walkthrough-Inactive.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /docs/website/static/img/add-credential.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/static/img/add-credential.png -------------------------------------------------------------------------------- /docs/website/static/img/add-package.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/static/img/add-package.png -------------------------------------------------------------------------------- /docs/website/static/img/architecture-diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/static/img/architecture-diagram.png -------------------------------------------------------------------------------- /docs/website/static/img/bucket-details.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/static/img/bucket-details.png -------------------------------------------------------------------------------- /docs/website/static/img/dlt+_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/static/img/dlt+_logo.png -------------------------------------------------------------------------------- /docs/website/static/img/dlt-high-level.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/static/img/dlt-high-level.png -------------------------------------------------------------------------------- /docs/website/static/img/dlt-onepager.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/static/img/dlt-onepager.png -------------------------------------------------------------------------------- /docs/website/static/img/dlt-pacman.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/static/img/dlt-pacman.gif -------------------------------------------------------------------------------- /docs/website/static/img/dlt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/static/img/dlt.png -------------------------------------------------------------------------------- /docs/website/static/img/dlthub-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/static/img/dlthub-logo.png -------------------------------------------------------------------------------- /docs/website/static/img/docs_where_does_dlt_fit_in_ETL_pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/static/img/docs_where_does_dlt_fit_in_ETL_pipeline.png -------------------------------------------------------------------------------- /docs/website/static/img/docusaurus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/static/img/docusaurus.png -------------------------------------------------------------------------------- /docs/website/static/img/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/static/img/favicon.ico -------------------------------------------------------------------------------- /docs/website/static/img/filesystem-tutorial/streamlit-data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/static/img/filesystem-tutorial/streamlit-data.png -------------------------------------------------------------------------------- /docs/website/static/img/filesystem-tutorial/streamlit-incremental-state.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/static/img/filesystem-tutorial/streamlit-incremental-state.png -------------------------------------------------------------------------------- /docs/website/static/img/filesystem-tutorial/streamlit-new-col.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/static/img/filesystem-tutorial/streamlit-new-col.png -------------------------------------------------------------------------------- /docs/website/static/img/open-core-data-stack.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/static/img/open-core-data-stack.png -------------------------------------------------------------------------------- /docs/website/static/img/pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/static/img/pipeline.png -------------------------------------------------------------------------------- /docs/website/static/img/python.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /docs/website/static/img/schema_evolution_colab_demo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/static/img/schema_evolution_colab_demo.png -------------------------------------------------------------------------------- /docs/website/static/img/slot-machine-gif.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/static/img/slot-machine-gif.gif -------------------------------------------------------------------------------- /docs/website/static/img/streamlit-new.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/static/img/streamlit-new.png -------------------------------------------------------------------------------- /docs/website/static/img/structured_lake.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/static/img/structured_lake.png -------------------------------------------------------------------------------- /docs/website/static/img/test-composer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/static/img/test-composer.png -------------------------------------------------------------------------------- /docs/website/static/img/trigger-config.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/static/img/trigger-config.png -------------------------------------------------------------------------------- /docs/website/static/img/write-dispo-choice.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/docs/website/static/img/write-dispo-choice.png -------------------------------------------------------------------------------- /docs/website/tools/clear_versions.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs'); 2 | 3 | version_files = [ 4 | "versions.json", 5 | "versioned_docs", 6 | "versioned_sidebars" 7 | ] 8 | 9 | for (let f of version_files) { 10 | fs.rmSync(f, { recursive: true, force: true }) 11 | } -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | pythonpath= dlt docs/website/docs 3 | norecursedirs= .direnv .eggs build dist 4 | addopts= -v --showlocals --durations 10 5 | xfail_strict= true 6 | log_cli= 1 7 | log_cli_level= INFO 8 | python_files = test_*.py *_test.py *snippets.py *snippet.pytest 9 | python_functions = *_test test_* *_snippet 10 | filterwarnings= ignore::DeprecationWarning 11 | markers = 12 | essential: marks all essential tests 13 | no_load: marks tests that do not load anything 14 | -------------------------------------------------------------------------------- /tests/.dlt/config.toml: -------------------------------------------------------------------------------- 1 | [runtime] 2 | # sentry_dsn="https://6f6f7b6f8e0f458a89be4187603b55fe@o1061158.ingest.sentry.io/4504819859914752" 3 | 4 | [tests] 5 | bucket_url_gs="gs://ci-test-bucket" 6 | bucket_url_s3="s3://dlt-ci-test-bucket" 7 | # uses local_dir which is _storage/data 8 | bucket_url_file="data" 9 | bucket_url_az="az://dlt-ci-test-bucket" 10 | bucket_url_abfss="abfss://dlt-ci-test-bucket@dltdata.dfs.core.windows.net" 11 | bucket_url_r2="s3://dlt-ci-test-bucket" 12 | # use "/" as root path 13 | bucket_url_gdrive="gdrive://15eC3e5MNew2XAIefWNlG8VlEa0ISnnaG" 14 | bucket_url_sftp="sftp://localhost/data" 15 | memory="memory:///m" -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/__init__.py -------------------------------------------------------------------------------- /tests/cli/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/cli/__init__.py -------------------------------------------------------------------------------- /tests/cli/cases/deploy_pipeline/.dlt/config.toml: -------------------------------------------------------------------------------- 1 | restore_from_destination=false # block restoring from destination 2 | 3 | # put your configuration values here 4 | [sources] 5 | api_url = "api_url" # please set me up! 6 | 7 | [runtime] 8 | log_level="WARNING" # the system log level of dlt 9 | -------------------------------------------------------------------------------- /tests/cli/cases/deploy_pipeline/.dlt/secrets.toml: -------------------------------------------------------------------------------- 1 | api_key = "api_key_9x3ehash" 2 | 3 | [destination.postgres.credentials] 4 | database = "dlt_data" 5 | password = "wrong" # keep a wrong password here 6 | username = "loader" 7 | host = "localhost" 8 | port = 5432 9 | connect_timeout = 15 10 | -------------------------------------------------------------------------------- /tests/cli/cases/deploy_pipeline/.dlt/secrets.toml.ci: -------------------------------------------------------------------------------- 1 | api_key = "api_key_9x3ehash" 2 | 3 | -------------------------------------------------------------------------------- /tests/cli/cases/deploy_pipeline/.gitignore: -------------------------------------------------------------------------------- 1 | # ignore secrets, virtual environments and typical python compilation artifacts 2 | secrets.toml 3 | .env 4 | **/__pycache__/ 5 | **/*.py[cod] 6 | **/*$py.class -------------------------------------------------------------------------------- /tests/cli/cases/deploy_pipeline/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/cli/cases/deploy_pipeline/__init__.py -------------------------------------------------------------------------------- /tests/cli/cases/deploy_pipeline/debug_pipeline.py: -------------------------------------------------------------------------------- 1 | import dlt 2 | 3 | 4 | @dlt.resource 5 | def example_resource(api_url=dlt.config.value, api_key=dlt.secrets.value, last_id=0): 6 | yield [api_url, api_key, str(last_id), "param4", "param5"] 7 | 8 | 9 | @dlt.source 10 | def example_source(api_url=dlt.config.value, api_key=dlt.secrets.value, last_id=0): 11 | # return all the resources to be loaded 12 | return example_resource(api_url, api_key, last_id) 13 | 14 | 15 | if __name__ == "__main__": 16 | p = dlt.pipeline( 17 | pipeline_name="debug_pipeline", 18 | destination="postgres", 19 | dataset_name="debug_pipeline_data", 20 | dev_mode=False, 21 | ) 22 | load_info = p.run(example_source(last_id=819273998)) 23 | print(load_info) 24 | -------------------------------------------------------------------------------- /tests/cli/cases/deploy_pipeline/dummy_pipeline.py: -------------------------------------------------------------------------------- 1 | import dlt 2 | 3 | 4 | @dlt.resource 5 | def example_resource(api_url=dlt.config.value, api_key=dlt.secrets.value, last_id=0): 6 | yield [api_url, api_key, str(last_id), "param4", "param5"] 7 | 8 | 9 | @dlt.source 10 | def example_source(api_url=dlt.config.value, api_key=dlt.secrets.value, last_id=0): 11 | # return all the resources to be loaded 12 | return example_resource(api_url, api_key, last_id) 13 | 14 | 15 | if __name__ == "__main__": 16 | p = dlt.pipeline(pipeline_name="dummy_pipeline", destination="dummy") 17 | load_info = p.run(example_source(last_id=819273998)) 18 | print(load_info) 19 | -------------------------------------------------------------------------------- /tests/cli/cases/deploy_pipeline/requirements.txt: -------------------------------------------------------------------------------- 1 | dlt[redshift] >= 0.2.5a1 2 | -------------------------------------------------------------------------------- /tests/cli/common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/cli/common/__init__.py -------------------------------------------------------------------------------- /tests/cli/conftest.py: -------------------------------------------------------------------------------- 1 | from tests.utils import ( 2 | preserve_environ, 3 | autouse_test_storage, 4 | unload_modules, 5 | wipe_pipeline, 6 | patch_home_dir, 7 | ) 8 | -------------------------------------------------------------------------------- /tests/common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/common/__init__.py -------------------------------------------------------------------------------- /tests/common/cases/configuration/.wrong.dlt/config.toml: -------------------------------------------------------------------------------- 1 | api_type="REST" 2 | 3 | 4 | [typecheck.] 5 | 6 | -------------------------------------------------------------------------------- /tests/common/cases/configuration/dlt_home/config.toml: -------------------------------------------------------------------------------- 1 | [runtime] 2 | # disable telemetry 3 | dlthub_telemetry=false 4 | 5 | [api.params] 6 | param1="GLOBAL" 7 | param_global="G" -------------------------------------------------------------------------------- /tests/common/cases/configuration/runtime/.dlt/config.toml: -------------------------------------------------------------------------------- 1 | [runtime] 2 | name="runtime-cfg" 3 | data_dir="_storage" 4 | -------------------------------------------------------------------------------- /tests/common/cases/destinations/__init__.py: -------------------------------------------------------------------------------- 1 | from .null import null 2 | 3 | __all__ = ["null"] 4 | -------------------------------------------------------------------------------- /tests/common/cases/destinations/null.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Type 2 | 3 | from dlt.common.destination.capabilities import DestinationCapabilitiesContext 4 | from dlt.common.destination import Destination 5 | from dlt.common.destination.client import DestinationClientConfiguration, JobClientBase 6 | 7 | 8 | class null(Destination[DestinationClientConfiguration, "JobClientBase"]): 9 | def __init__(self, **kwargs: Any) -> None: 10 | super().__init__(**kwargs) 11 | 12 | spec = DestinationClientConfiguration 13 | 14 | def _raw_capabilities(self) -> DestinationCapabilitiesContext: 15 | return DestinationCapabilitiesContext.generic_capabilities() 16 | 17 | @property 18 | def client_class(self) -> Type["JobClientBase"]: 19 | return JobClientBase 20 | -------------------------------------------------------------------------------- /tests/common/cases/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/common/cases/modules/__init__.py -------------------------------------------------------------------------------- /tests/common/cases/modules/google/colab/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/common/cases/modules/google/colab/__init__.py -------------------------------------------------------------------------------- /tests/common/cases/modules/google/colab/userdata.py: -------------------------------------------------------------------------------- 1 | """Mocked colab userdata""" 2 | 3 | 4 | class SecretNotFoundError(Exception): 5 | pass 6 | 7 | 8 | class NotebookAccessError(Exception): 9 | pass 10 | 11 | 12 | def get(secret_name: str) -> str: 13 | if secret_name == "secrets.toml": 14 | return 'api_key="api"' 15 | 16 | raise SecretNotFoundError() 17 | -------------------------------------------------------------------------------- /tests/common/cases/modules/uniq_mod_121.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | from dlt.common.utils import get_module_name 3 | 4 | 5 | def find_my_module(): 6 | pass 7 | 8 | 9 | if __name__ == "__main__": 10 | print(get_module_name(inspect.getmodule(find_my_module))) 11 | -------------------------------------------------------------------------------- /tests/common/cases/normalizers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/common/cases/normalizers/__init__.py -------------------------------------------------------------------------------- /tests/common/cases/normalizers/snake_no_x.py: -------------------------------------------------------------------------------- 1 | from dlt.common.normalizers.naming.snake_case import NamingConvention as SnakeCaseNamingConvention 2 | 3 | 4 | class NamingConvention(SnakeCaseNamingConvention): 5 | def normalize_identifier(self, identifier: str) -> str: 6 | identifier = super().normalize_identifier(identifier) 7 | if identifier.endswith("x"): 8 | print(identifier[:-1] + "_") 9 | return identifier[:-1] + "_" 10 | return identifier 11 | -------------------------------------------------------------------------------- /tests/common/cases/normalizers/sql_upper.py: -------------------------------------------------------------------------------- 1 | from dlt.common.normalizers.naming.naming import NamingConvention as BaseNamingConvention 2 | 3 | 4 | class NamingConvention(BaseNamingConvention): 5 | PATH_SEPARATOR = "__" 6 | 7 | _CLEANUP_TABLE = str.maketrans(".\n\r'\"▶", "______") 8 | 9 | @property 10 | def is_case_sensitive(self) -> bool: 11 | return True 12 | 13 | def normalize_identifier(self, identifier: str) -> str: 14 | identifier = super().normalize_identifier(identifier) 15 | norm_identifier = identifier.translate(self._CLEANUP_TABLE).upper() 16 | return self.shorten_identifier(norm_identifier, identifier, self.max_length) 17 | -------------------------------------------------------------------------------- /tests/common/cases/normalizers/title_case.py: -------------------------------------------------------------------------------- 1 | from typing import ClassVar 2 | from dlt.common.normalizers.naming.direct import NamingConvention as DirectNamingConvention 3 | 4 | 5 | class NamingConvention(DirectNamingConvention): 6 | """Test case sensitive naming that capitalizes first and last letter and leaves the rest intact""" 7 | 8 | PATH_SEPARATOR: ClassVar[str] = "__" 9 | 10 | def normalize_identifier(self, identifier: str) -> str: 11 | # keep prefix 12 | if identifier == "_dlt": 13 | return "_dlt" 14 | identifier = super().normalize_identifier(identifier) 15 | return identifier[0].upper() + identifier[1:-1] + identifier[-1].upper() 16 | -------------------------------------------------------------------------------- /tests/common/cases/oauth_client_secret_929384042504.json: -------------------------------------------------------------------------------- 1 | { 2 | "installed": { 3 | "client_id": "921382012504-3mtjaj1s7vuvf53j88mgdq4te7akkjm3.apps.googleusercontent.com", 4 | "project_id": "level-dragon-333983", 5 | "auth_uri": "https://accounts.google.com/o/oauth2/auth", 6 | "token_uri": "https://oauth2.googleapis.com/token", 7 | "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", 8 | "client_secret": "gOCSPX-XdY5znbrvjSMEG3pkpA_GHuLPPth", 9 | "redirect_uris": [ 10 | "http://localhost" 11 | ] 12 | } 13 | } -------------------------------------------------------------------------------- /tests/common/cases/pua_encoded_row.json: -------------------------------------------------------------------------------- 1 | [{ 2 | "guy": "0xffF9Ce5f71ca6178D3BEEcEDB61e7Eff1602950E", 3 | "wad": "115792089237316195423570985008687907853269984665640564039457584007913129639935", 4 | "_dlt_meta": { 5 | "table_name": "Ronin WETH_calls_approve" 6 | }, 7 | "_tx_blockNumber": 16344816, 8 | "_tx_blockTimestamp": 1660639260, 9 | "_tx_transactionHash": "0xc51cd360f6d9e256cf517b48cb724dd56a430d73ccb4aff9835bdfa271a5c62c", 10 | "_tx_transactionIndex": "0x2", 11 | "_tx_address": "0xc99a6A985eD2Cac1ef41640596C5A5f9F4E19Ef5", 12 | "_tx_status": "0x1" 13 | }] -------------------------------------------------------------------------------- /tests/common/cases/schemas/ev1/event.schema.7z: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/common/cases/schemas/ev1/event.schema.7z -------------------------------------------------------------------------------- /tests/common/cases/schemas/ev1/event.schema.bak.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/common/cases/schemas/ev1/event.schema.bak.json.gz -------------------------------------------------------------------------------- /tests/common/cases/secret-kube/secret-kube: -------------------------------------------------------------------------------- 1 | kube 2 | -------------------------------------------------------------------------------- /tests/common/cases/secret-value: -------------------------------------------------------------------------------- 1 | BANANA -------------------------------------------------------------------------------- /tests/common/cases/secrets/deploy-key: -------------------------------------------------------------------------------- 1 | -----BEGIN OPENSSH PRIVATE KEY----- 2 | b3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAAAMwAAAAtzc2gtZW 3 | QyNTUxOQAAACDYgjrGrEhB5UrVaUw5DrIu1BsX7VbDMqpOSCLJfusN3QAAAKAeMrkJHjK5 4 | CQAAAAtzc2gtZWQyNTUxOQAAACDYgjrGrEhB5UrVaUw5DrIu1BsX7VbDMqpOSCLJfusN3Q 5 | AAAEACDN71UiYdn/3mplShYWwaZTPpDK8vJMg/XNeJKrcgLtiCOsasSEHlStVpTDkOsi7U 6 | GxftVsMyqk5IIsl+6w3dAAAAF3J1ZG9sZml4QHNjYWxldmVjdG9yLmlvAQIDBAUG 7 | -----END OPENSSH PRIVATE KEY----- 8 | -------------------------------------------------------------------------------- /tests/common/cases/secrets/deploy_key.pub: -------------------------------------------------------------------------------- 1 | ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINiCOsasSEHlStVpTDkOsi7UGxftVsMyqk5IIsl+6w3d rudolfix@scalevector.io 2 | -------------------------------------------------------------------------------- /tests/common/cases/simple_row.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "f_int": 7817289712, 4 | "f_float": 92898e37, 5 | "f_timestamp": "2021-10-13T13:49:32.901899+00:00", 6 | "f_bool": true, 7 | "f_bool_2": false, 8 | "f_str": "some string" 9 | }, 10 | { 11 | "f_int": 7817289713, 12 | "f_float": 878172.8292, 13 | "f_timestamp": "2021-10-13T13:49:32.901899+00:00", 14 | "f_bool_2": false 15 | } 16 | ] -------------------------------------------------------------------------------- /tests/common/cases/weird_rows.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "idx": 1, 4 | "str": ", NULL'); DROP SCHEMA Public --" 5 | }, 6 | { 7 | "idx": 2, 8 | "str": "イロハニホヘト チリヌルヲ 'ワカヨタレソ ツネナラム" 9 | }, 10 | { 11 | "idx": 3, 12 | "str": "ऄअआइ'ईउऊऋऌऍऎए" 13 | }, 14 | { 15 | "idx": 4, 16 | "str": "hello\nworld\t\t\t\r\u0006" 17 | }, 18 | { 19 | "idx": 5, 20 | "str": "\"'Company name: IVCi. We create, build and support collaborative environments with those who what to lead, people who want to connect, and companies that want to evolve;\n'\"'" 21 | } 22 | ] -------------------------------------------------------------------------------- /tests/common/configuration/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/common/configuration/__init__.py -------------------------------------------------------------------------------- /tests/common/data_writers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/common/data_writers/__init__.py -------------------------------------------------------------------------------- /tests/common/destination/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/common/destination/__init__.py -------------------------------------------------------------------------------- /tests/common/normalizers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/common/normalizers/__init__.py -------------------------------------------------------------------------------- /tests/common/reflection/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/common/reflection/__init__.py -------------------------------------------------------------------------------- /tests/common/reflection/cases/modules/broken_mod.py: -------------------------------------------------------------------------------- 1 | n += x 2 | -------------------------------------------------------------------------------- /tests/common/reflection/cases/modules/missing_dep.py: -------------------------------------------------------------------------------- 1 | from n import x # a regular missing import 2 | from aleph import Zero # a type 3 | 4 | 5 | class One(Zero): 6 | pass 7 | 8 | 9 | def f() -> Zero: 10 | pass 11 | -------------------------------------------------------------------------------- /tests/common/reflection/cases/modules/pkg_1/mod_2/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/common/reflection/cases/modules/pkg_1/mod_2/__init__.py -------------------------------------------------------------------------------- /tests/common/reflection/cases/modules/pkg_1/mod_2/mod_bkn/__init__.py: -------------------------------------------------------------------------------- 1 | from n import x 2 | -------------------------------------------------------------------------------- /tests/common/reflection/cases/modules/pkg_1/mod_2/mod_bkn/mod_4.py: -------------------------------------------------------------------------------- 1 | def add_n_to_x(): 2 | pass 3 | -------------------------------------------------------------------------------- /tests/common/reflection/cases/modules/pkg_1/mod_2/pkg_3/mod_4.py: -------------------------------------------------------------------------------- 1 | def f(): 2 | pass 3 | -------------------------------------------------------------------------------- /tests/common/reflection/cases/modules/pkg_missing_dep/__init__.py: -------------------------------------------------------------------------------- 1 | # this is package init where n module is missing. find_spec will import this package and fail 2 | from n import x 3 | -------------------------------------------------------------------------------- /tests/common/reflection/cases/modules/pkg_missing_dep/mod_in_pkg_missing_dep.py: -------------------------------------------------------------------------------- 1 | import dlt 2 | 3 | 4 | def f(): 5 | pass 6 | -------------------------------------------------------------------------------- /tests/common/reflection/cases/modules/regular_mod.py: -------------------------------------------------------------------------------- 1 | import dlt 2 | 3 | 4 | @dlt.source 5 | def s(): 6 | return [] 7 | 8 | 9 | def f(): 10 | pass 11 | 12 | 13 | @dlt.resource() 14 | def r(): 15 | yield [1, 2, 3] 16 | -------------------------------------------------------------------------------- /tests/common/runners/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/common/runners/__init__.py -------------------------------------------------------------------------------- /tests/common/runtime/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/common/runtime/__init__.py -------------------------------------------------------------------------------- /tests/common/runtime/conftest.py: -------------------------------------------------------------------------------- 1 | from tests.utils import preserve_environ 2 | -------------------------------------------------------------------------------- /tests/common/runtime/dlt_plus/__init__.py: -------------------------------------------------------------------------------- 1 | # simulate plugin presence 2 | from .version import __version__ 3 | -------------------------------------------------------------------------------- /tests/common/runtime/dlt_plus/version.py: -------------------------------------------------------------------------------- 1 | __version__ = "1.7.1" 2 | PKG_NAME = "dlt_plus" 3 | -------------------------------------------------------------------------------- /tests/common/runtime/test_run_context_data_dir.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import dlt 4 | 5 | # import auto fixture that sets global and data dir to TEST_STORAGE 6 | from dlt.common.runtime.run_context import DOT_DLT 7 | from tests.utils import TEST_STORAGE_ROOT, patch_home_dir 8 | 9 | 10 | def test_data_dir_test_storage() -> None: 11 | run_context = dlt.current.run_context() 12 | assert run_context.global_dir.endswith(os.path.join(TEST_STORAGE_ROOT, DOT_DLT)) 13 | assert run_context.global_dir == run_context.data_dir 14 | assert os.path.isabs(run_context.global_dir) 15 | -------------------------------------------------------------------------------- /tests/common/runtime/test_run_context_random_data_dir.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import dlt 4 | 5 | # import auto fixture that sets global and data dir to TEST_STORAGE + random folder 6 | from tests.utils import TEST_STORAGE_ROOT, patch_random_home_dir 7 | 8 | 9 | def test_data_dir_test_storage() -> None: 10 | run_context = dlt.current.run_context() 11 | assert TEST_STORAGE_ROOT in run_context.global_dir 12 | assert "global_" in run_context.global_dir 13 | assert run_context.global_dir == run_context.data_dir 14 | assert os.path.isabs(run_context.global_dir) 15 | -------------------------------------------------------------------------------- /tests/common/runtime/utils.py: -------------------------------------------------------------------------------- 1 | from typing import MutableMapping 2 | 3 | 4 | def mock_image_env(environment: MutableMapping[str, str]) -> None: 5 | environment["COMMIT_SHA"] = "192891" 6 | environment["IMAGE_VERSION"] = "scale/v:112" 7 | 8 | 9 | def mock_pod_env(environment: MutableMapping[str, str]) -> None: 10 | environment["KUBE_NODE_NAME"] = "node_name" 11 | environment["KUBE_POD_NAME"] = "pod_name" 12 | environment["KUBE_POD_NAMESPACE"] = "namespace" 13 | 14 | 15 | def mock_github_env(environment: MutableMapping[str, str]) -> None: 16 | environment["CODESPACES"] = "true" 17 | environment["GITHUB_USER"] = "rudolfix" 18 | environment["GITHUB_REPOSITORY"] = "dlt-hub/beginners-workshop-2022" 19 | environment["GITHUB_REPOSITORY_OWNER"] = "dlt-hub" 20 | -------------------------------------------------------------------------------- /tests/common/schema/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/common/schema/__init__.py -------------------------------------------------------------------------------- /tests/common/schema/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from dlt.common.configuration import resolve_configuration 4 | from dlt.common.schema import Schema 5 | from dlt.common.storages import SchemaStorageConfiguration, SchemaStorage 6 | 7 | 8 | from tests.utils import autouse_test_storage, preserve_environ 9 | 10 | 11 | @pytest.fixture 12 | def schema() -> Schema: 13 | return Schema("event") 14 | 15 | 16 | @pytest.fixture 17 | def schema_storage() -> SchemaStorage: 18 | C = resolve_configuration( 19 | SchemaStorageConfiguration(), 20 | explicit_value={ 21 | "import_schema_path": "tests/common/cases/schemas/rasa", 22 | "external_schema_format": "json", 23 | }, 24 | ) 25 | return SchemaStorage(C, makedirs=True) 26 | -------------------------------------------------------------------------------- /tests/common/scripts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/common/scripts/__init__.py -------------------------------------------------------------------------------- /tests/common/scripts/args.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | print(len(sys.argv)) 4 | print(sys.argv) 5 | -------------------------------------------------------------------------------- /tests/common/scripts/counter.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from time import sleep 3 | 4 | 5 | for i in range(5): 6 | print(i) 7 | sys.stdout.flush() 8 | sleep(0.3) 9 | print("exit") 10 | -------------------------------------------------------------------------------- /tests/common/scripts/cwd.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | print(os.getcwd()) 4 | -------------------------------------------------------------------------------- /tests/common/scripts/empty.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/common/scripts/empty.py -------------------------------------------------------------------------------- /tests/common/scripts/environ.py: -------------------------------------------------------------------------------- 1 | from os import environ 2 | 3 | for k in environ: 4 | print(f"{k}={environ[k]}") 5 | -------------------------------------------------------------------------------- /tests/common/scripts/long_lines.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | line_a = "a" * 1024 * 1024 4 | line_b = "b" * 1024 * 1024 5 | 6 | print(line_a) 7 | print(line_b, file=sys.stderr) 8 | print(line_a, flush=True) 9 | print(line_b, file=sys.stderr, flush=True) 10 | 11 | # without new lines 12 | print(line_b, file=sys.stderr, end="") 13 | print(line_a, end="") 14 | -------------------------------------------------------------------------------- /tests/common/scripts/long_lines_fails.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | line_a = "a" * 1024 * 1024 4 | line_b = "b" * 1024 * 1024 5 | 6 | print(line_a) 7 | print(line_b, file=sys.stderr) 8 | print(line_a, flush=True) 9 | print(line_b, file=sys.stderr, flush=True) 10 | 11 | # without new lines 12 | print(line_b, file=sys.stderr, end="") 13 | print(line_a, end="") 14 | exit(-1) 15 | -------------------------------------------------------------------------------- /tests/common/scripts/no_stdout_exception.py: -------------------------------------------------------------------------------- 1 | raise Exception("no stdout") 2 | -------------------------------------------------------------------------------- /tests/common/scripts/no_stdout_no_stderr_with_fail.py: -------------------------------------------------------------------------------- 1 | exit(-1) 2 | -------------------------------------------------------------------------------- /tests/common/scripts/raises.py: -------------------------------------------------------------------------------- 1 | raise Exception("always raises") 2 | -------------------------------------------------------------------------------- /tests/common/scripts/raising_counter.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from time import sleep 3 | 4 | 5 | for i in range(5): 6 | print(i) 7 | # sys.stdout.flush() 8 | if i == 2: 9 | raise Exception("end") 10 | sleep(0.3) 11 | print("exit") 12 | -------------------------------------------------------------------------------- /tests/common/scripts/stderr_counter.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from time import sleep 3 | 4 | 5 | for i in range(5): 6 | print(i, file=sys.stderr if i % 2 else sys.stdout) 7 | if i == 3: 8 | exit(1) 9 | sleep(0.3) 10 | print("exit") 11 | -------------------------------------------------------------------------------- /tests/common/scripts/stdout_encode_exception.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | from dlt.common.exceptions import UnsupportedProcessStartMethodException 3 | 4 | from dlt.common.runners import TRunMetrics 5 | from dlt.common.runners.stdout import exec_to_stdout 6 | 7 | 8 | def worker(data1, data2): 9 | print("in func") 10 | raise UnsupportedProcessStartMethodException("this") 11 | 12 | 13 | f = partial(worker, "this is string", TRunMetrics(True, 300)) 14 | with exec_to_stdout(f) as rv: 15 | print(rv) 16 | -------------------------------------------------------------------------------- /tests/common/scripts/stdout_encode_result.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | 3 | from dlt.common.runners import TRunMetrics 4 | from dlt.common.runners.stdout import exec_to_stdout 5 | 6 | 7 | def worker(data1, data2): 8 | print("in func") 9 | return data1, data2 10 | 11 | 12 | f = partial(worker, "this is string", TRunMetrics(True, 300)) 13 | with exec_to_stdout(f) as rv: 14 | print(rv) 15 | -------------------------------------------------------------------------------- /tests/common/scripts/stdout_encode_unpicklable.py: -------------------------------------------------------------------------------- 1 | from dlt.common.runners.stdout import exec_to_stdout 2 | 3 | 4 | def worker(): 5 | return [open("tests/common/scripts/counter.py", "r", encoding="utf-8")] 6 | 7 | 8 | with exec_to_stdout(worker) as rv: 9 | print(rv) 10 | -------------------------------------------------------------------------------- /tests/common/storages/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/common/storages/__init__.py -------------------------------------------------------------------------------- /tests/common/storages/custom/freshman_kgs.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/common/storages/custom/freshman_kgs.xlsx -------------------------------------------------------------------------------- /tests/common/storages/samples/csv/mlb_teams_2012.csv: -------------------------------------------------------------------------------- 1 | Team,Payroll(millions),Wins 2 | Nationals,81.34,98 3 | Reds,82.20,97 4 | Yankees,197.96,95 5 | Giants,117.62,94 6 | Braves,83.31,94 7 | Athletics,55.37,94 8 | Rangers,120.51,93 9 | Orioles,81.43,93 10 | Rays,64.17,90 11 | Angels,154.49,89 12 | Tigers,132.30,88 13 | Cardinals,110.30,88 14 | Dodgers,95.14,86 15 | WhiteSox,96.92,85 16 | Brewers,97.65,83 17 | Phillies,174.54,81 18 | Diamondbacks,74.28,81 19 | Pirates,63.43,79 20 | Padres,55.24,76 21 | Mariners,81.97,75 22 | Mets,93.35,74 23 | BlueJays,75.48,73 24 | Royals,60.91,72 25 | Marlins,118.07,69 26 | RedSox,173.18,69 27 | Indians,78.43,68 28 | Twins,94.08,66 29 | Rockies,78.06,64 30 | Cubs,88.19,61 31 | Astros,60.65,55 32 | 33 | -------------------------------------------------------------------------------- /tests/common/storages/samples/gzip/taxi.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/common/storages/samples/gzip/taxi.csv.gz -------------------------------------------------------------------------------- /tests/common/storages/samples/parquet/mlb_players.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/common/storages/samples/parquet/mlb_players.parquet -------------------------------------------------------------------------------- /tests/common/storages/samples/sample.txt: -------------------------------------------------------------------------------- 1 | dlthub content -------------------------------------------------------------------------------- /tests/common/test_version.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pytest 3 | from importlib.metadata import PackageNotFoundError 4 | 5 | from dlt.version import get_installed_requirement_string 6 | 7 | 8 | def test_installed_requirement_string() -> None: 9 | # we are running tests in editable mode so we should get path to here 10 | path = get_installed_requirement_string() 11 | assert os.path.commonpath((__file__, path)) == path 12 | # requests should be properly installed 13 | requirement = get_installed_requirement_string("requests") 14 | assert requirement.startswith("requests==") 15 | # this is not installed 16 | with pytest.raises(PackageNotFoundError): 17 | get_installed_requirement_string("requests-X") 18 | -------------------------------------------------------------------------------- /tests/destinations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/destinations/__init__.py -------------------------------------------------------------------------------- /tests/destinations/conftest.py: -------------------------------------------------------------------------------- 1 | from tests.utils import ( 2 | preserve_environ, 3 | autouse_test_storage, 4 | patch_home_dir, 5 | wipe_pipeline, 6 | ) 7 | from tests.common.configuration.utils import environment 8 | -------------------------------------------------------------------------------- /tests/e2e/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/e2e/__init__.py -------------------------------------------------------------------------------- /tests/e2e/helpers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/e2e/helpers/__init__.py -------------------------------------------------------------------------------- /tests/e2e/helpers/studio/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/e2e/helpers/studio/__init__.py -------------------------------------------------------------------------------- /tests/extract/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/extract/__init__.py -------------------------------------------------------------------------------- /tests/extract/cases/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/extract/cases/__init__.py -------------------------------------------------------------------------------- /tests/extract/cases/eth_source/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/extract/cases/eth_source/__init__.py -------------------------------------------------------------------------------- /tests/extract/cases/eth_source/source.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | import dlt 3 | 4 | 5 | @dlt.source 6 | def ethereum() -> Any: 7 | # this just tests if the schema "ethereum" was loaded 8 | return dlt.resource([1, 2, 3], name="data") 9 | -------------------------------------------------------------------------------- /tests/extract/cases/imported.any: -------------------------------------------------------------------------------- 1 | Any files may be imported into buffered writer -------------------------------------------------------------------------------- /tests/extract/cases/section_source/__init__.py: -------------------------------------------------------------------------------- 1 | import dlt 2 | 3 | 4 | @dlt.source 5 | def init_source_f_1(val: str = dlt.config.value): 6 | return dlt.resource([val], name="f_1") 7 | 8 | 9 | @dlt.resource 10 | def init_resource_f_2(val: str = dlt.config.value): 11 | yield [val] 12 | -------------------------------------------------------------------------------- /tests/extract/cases/section_source/named_module.py: -------------------------------------------------------------------------------- 1 | import dlt 2 | 3 | __source_name__ = "name_overridden" 4 | 5 | 6 | @dlt.source(section="name_overridden") 7 | def source_f_1(val: str = dlt.config.value): 8 | return dlt.resource([val], name="f_1") 9 | 10 | 11 | @dlt.resource 12 | def resource_f_2(val: str = dlt.config.value): 13 | yield [val] 14 | -------------------------------------------------------------------------------- /tests/extract/cases/sources/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/extract/cases/sources/__init__.py -------------------------------------------------------------------------------- /tests/extract/cases/sources/shorthand.py: -------------------------------------------------------------------------------- 1 | import dlt 2 | 3 | 4 | @dlt.source 5 | def shorthand(data): 6 | return dlt.resource(data, name="alpha") 7 | 8 | 9 | @dlt.source(name="shorthand_registry", section="shorthand") 10 | def with_shorthand_registry(data): 11 | return dlt.resource(data, name="alpha") 12 | -------------------------------------------------------------------------------- /tests/extract/conftest.py: -------------------------------------------------------------------------------- 1 | from tests.utils import ( 2 | autouse_test_storage, 3 | preserve_environ, 4 | patch_home_dir, 5 | wipe_pipeline, 6 | ) 7 | -------------------------------------------------------------------------------- /tests/extract/data_writers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/extract/data_writers/__init__.py -------------------------------------------------------------------------------- /tests/helpers/airflow_tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/helpers/airflow_tests/__init__.py -------------------------------------------------------------------------------- /tests/helpers/airflow_tests/conftest.py: -------------------------------------------------------------------------------- 1 | from tests.helpers.airflow_tests.utils import initialize_airflow_db 2 | from tests.utils import preserve_environ, autouse_test_storage, TEST_STORAGE_ROOT, patch_home_dir 3 | -------------------------------------------------------------------------------- /tests/helpers/dbt_cloud_tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/helpers/dbt_cloud_tests/__init__.py -------------------------------------------------------------------------------- /tests/helpers/dbt_tests/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tests/helpers/dbt_tests/cases/profiles.yml: -------------------------------------------------------------------------------- 1 | config: 2 | # do not track usage, do not create .user.yml 3 | send_anonymous_usage_stats: False 4 | 5 | jaffle_shop: 6 | target: analytics 7 | outputs: 8 | analytics: 9 | type: postgres 10 | host: "{{ env_var('DLT__CREDENTIALS__HOST') }}" 11 | user: "{{ env_var('DLT__CREDENTIALS__USERNAME') }}" 12 | password: "{{ env_var('DLT__CREDENTIALS__PASSWORD') }}" 13 | port: 5432 14 | dbname: "{{ env_var('DLT__CREDENTIALS__DATABASE') }}" 15 | schema: "{{ var('dbt_schema') }}" -------------------------------------------------------------------------------- /tests/helpers/dbt_tests/cases/profiles_invalid_credentials.yml: -------------------------------------------------------------------------------- 1 | jaffle_shop: 2 | target: analytics 3 | outputs: 4 | analytics: 5 | type: postgres 6 | host: hostname 7 | user: username 8 | password: password 9 | port: 5041 10 | dbname: database_name 11 | schema: "{{ var('dbt_schema') }}" -------------------------------------------------------------------------------- /tests/helpers/dbt_tests/local/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/helpers/dbt_tests/local/__init__.py -------------------------------------------------------------------------------- /tests/helpers/providers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/helpers/providers/__init__.py -------------------------------------------------------------------------------- /tests/helpers/streamlit_tests/__init__.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | pytest.importorskip("streamlit") 4 | -------------------------------------------------------------------------------- /tests/helpers/studio/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/helpers/studio/__init__.py -------------------------------------------------------------------------------- /tests/helpers/studio/test_welcome_page.py: -------------------------------------------------------------------------------- 1 | import marimo as mo 2 | 3 | from dlt.helpers.studio.dlt_app import home 4 | 5 | 6 | def test_welcome_cell(): 7 | output, defs = home.run( # type: ignore[unused-ignore,misc] 8 | dlt_pipeline_select=mo.ui.multiselect([1, 2, 3]), 9 | dlt_all_pipelines=[ 10 | {"name": "pipeline1", "link": "link1", "timestamp": 0}, 11 | {"name": "pipeline2", "link": "link2", "timestamp": 1}, 12 | {"name": "pipeline3", "link": "link3", "timestamp": 2}, 13 | ], 14 | dlt_pipelines_dir="some_dir", 15 | ) 16 | 17 | assert "We have found 3 pipelines in local directory" in output.text 18 | -------------------------------------------------------------------------------- /tests/libs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/libs/__init__.py -------------------------------------------------------------------------------- /tests/libs/conftest.py: -------------------------------------------------------------------------------- 1 | from tests.utils import write_version, autouse_test_storage, preserve_environ 2 | -------------------------------------------------------------------------------- /tests/libs/pyarrow/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/libs/pyarrow/__init__.py -------------------------------------------------------------------------------- /tests/load/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/load/__init__.py -------------------------------------------------------------------------------- /tests/load/athena_iceberg/__init__.py: -------------------------------------------------------------------------------- 1 | from tests.utils import skip_if_not_active 2 | 3 | 4 | skip_if_not_active("athena") 5 | -------------------------------------------------------------------------------- /tests/load/bigquery/__init__.py: -------------------------------------------------------------------------------- 1 | from tests.utils import skip_if_not_active 2 | 3 | skip_if_not_active("bigquery") 4 | -------------------------------------------------------------------------------- /tests/load/cases/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/load/cases/__init__.py -------------------------------------------------------------------------------- /tests/load/cases/fake_destination.py: -------------------------------------------------------------------------------- 1 | # module that is used to test wrong destination references 2 | 3 | 4 | class not_a_destination: 5 | def __init__(self, **kwargs) -> None: 6 | pass 7 | -------------------------------------------------------------------------------- /tests/load/cases/loading/csv_header.csv: -------------------------------------------------------------------------------- 1 | id|name|description|ordered_at|price 2 | 1|item|value|2024-04-12|128.4 3 | 1|"item"|value with space|2024-04-12|128.4 -------------------------------------------------------------------------------- /tests/load/cases/loading/csv_no_header.csv: -------------------------------------------------------------------------------- 1 | 1|item|value|2024-04-12|128.4 2 | 1|"item"|value with space|2024-04-12|128.4 -------------------------------------------------------------------------------- /tests/load/cases/loading/csv_no_header.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/load/cases/loading/csv_no_header.csv.gz -------------------------------------------------------------------------------- /tests/load/cases/loading/event_bot.181291798a78198.0.unsupported_format: -------------------------------------------------------------------------------- 1 | # unsupported format -------------------------------------------------------------------------------- /tests/load/cases/loading/event_loop_interrupted.1234.0.jsonl: -------------------------------------------------------------------------------- 1 | small file that is never read -------------------------------------------------------------------------------- /tests/load/cases/loading/event_user.1234.0.jsonl: -------------------------------------------------------------------------------- 1 | small file that is never read -------------------------------------------------------------------------------- /tests/load/cases/loading/header.jsonl: -------------------------------------------------------------------------------- 1 | {"id": 1, "name": "item", "description": "value", "ordered_at": "2024-04-12", "price": 128.4} 2 | {"id": 1, "name": "item", "description": "value with space", "ordered_at": "2024-04-12", "price": 128.4} -------------------------------------------------------------------------------- /tests/load/cases/loading/schema_updates.json: -------------------------------------------------------------------------------- 1 | [{}, {}, {}, {}, {}] -------------------------------------------------------------------------------- /tests/load/clickhouse/__init__.py: -------------------------------------------------------------------------------- 1 | from tests.utils import skip_if_not_active 2 | 3 | skip_if_not_active("clickhouse") 4 | -------------------------------------------------------------------------------- /tests/load/clickhouse/docker-compose.yml: -------------------------------------------------------------------------------- 1 | --- 2 | services: 3 | clickhouse: 4 | image: clickhouse/clickhouse-server 5 | ports: 6 | - "9000:9000" 7 | - "8123:8123" 8 | environment: 9 | - CLICKHOUSE_DB=dlt_data 10 | - CLICKHOUSE_USER=loader 11 | - CLICKHOUSE_PASSWORD=loader 12 | - CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT=1 13 | volumes: 14 | - clickhouse_data:/var/lib/clickhouse/ 15 | - clickhouse_logs:/var/log/clickhouse-server/ 16 | restart: unless-stopped 17 | healthcheck: 18 | test: [ "CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:8123/ping" ] 19 | interval: 3s 20 | timeout: 5s 21 | retries: 5 22 | 23 | 24 | volumes: 25 | clickhouse_data: 26 | clickhouse_logs: 27 | -------------------------------------------------------------------------------- /tests/load/clickhouse/utils.py: -------------------------------------------------------------------------------- 1 | from dlt.destinations.impl.clickhouse.sql_client import ClickHouseSqlClient 2 | from dlt.destinations.impl.clickhouse.typing import TDeployment 3 | 4 | 5 | def get_deployment_type(client: ClickHouseSqlClient) -> TDeployment: 6 | cloud_mode = int(client.execute_sql(""" 7 | SELECT value FROM system.settings WHERE name = 'cloud_mode' 8 | """)[0][0]) 9 | return "ClickHouseCloud" if cloud_mode else "ClickHouseOSS" 10 | -------------------------------------------------------------------------------- /tests/load/databricks/__init__.py: -------------------------------------------------------------------------------- 1 | from tests.utils import skip_if_not_active 2 | 3 | skip_if_not_active("databricks") 4 | -------------------------------------------------------------------------------- /tests/load/dremio/__init__.py: -------------------------------------------------------------------------------- 1 | from tests.utils import skip_if_not_active 2 | 3 | skip_if_not_active("dremio") 4 | -------------------------------------------------------------------------------- /tests/load/dremio/bootstrap/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.12-slim 2 | RUN apt-get update -y && apt-get upgrade -y && apt-get install -y curl 3 | COPY . . -------------------------------------------------------------------------------- /tests/load/dremio/bootstrap/nas.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "nas", 3 | "config": { 4 | "path": "/tmp", 5 | "defaultCtasFormat": "ICEBERG", 6 | "propertyList": [] 7 | }, 8 | "accelerationRefreshPeriod": 3600000, 9 | "accelerationGracePeriod": 10800000, 10 | "metadataPolicy": { 11 | "deleteUnavailableDatasets": true, 12 | "autoPromoteDatasets": false, 13 | "namesRefreshMillis": 3600000, 14 | "datasetDefinitionRefreshAfterMillis": 3600000, 15 | "datasetDefinitionExpireAfterMillis": 10800000, 16 | "authTTLMillis": 86400000, 17 | "updateMode": "PREFETCH_QUERIED" 18 | }, 19 | "type": "NAS", 20 | "accessControlList": { 21 | "userControls": [], 22 | "roleControls": [] 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /tests/load/duckdb/__init__.py: -------------------------------------------------------------------------------- 1 | from tests.utils import skip_if_not_active 2 | 3 | skip_if_not_active("duckdb") 4 | -------------------------------------------------------------------------------- /tests/load/filesystem/__init__.py: -------------------------------------------------------------------------------- 1 | from tests.utils import skip_if_not_active 2 | 3 | skip_if_not_active("filesystem") 4 | -------------------------------------------------------------------------------- /tests/load/filesystem_sftp/__init__.py: -------------------------------------------------------------------------------- 1 | from tests.utils import skip_if_not_active 2 | 3 | skip_if_not_active("filesystem") 4 | -------------------------------------------------------------------------------- /tests/load/filesystem_sftp/bootstrap/billy_rsa.pub: -------------------------------------------------------------------------------- 1 | ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC3q9LEVSygyZ+1JzayfvJ38ao3VC4PYOULN2n4w0XZrAp7glcL7284oPBhaTUNTVnavjTSFQWmwKiVes7uQ0kp0s1nDu1RwjC6xdRahuHfNPRUKmianJwkjXKVkdNftxLC5JfjdVE2ayPTj2xrBO3Bv7nJRGxm5zx8tFfjHQRiNuqs/hPES+6a9d/a+M9CD6V3hAIBJNY2KM3RoFFX5+d8JeWSS370+HKpRgWYt93/8lyUSzkkTI6axfry1RPH3Rc0oLh5/S9gOdCwxMO6Mc7COVr1lNUy+FcGDXRlR36uZtaXAP2AKIiovlhmD+Gwn1ARViv7TeESr6qcECpFPC8VdokcZRU2Dt4KQTfDhEVKpTsUjBqD2docFC9tbQsswAikmpE9DzUnQsm3Fc+yJf7G3g5qH6OBYwBe0qR/PNl+74FENngX9OpGPvGA+vCwKAgUTqfc+mCcFFlvUhzNDRUW4Zc7aS8Kkf1snlFvwwQnJnXP8aqJqAF8OFQhdLMYtLGMZ81HpuausEvYnc8++ICB3V6mMWEyOeR5bKxP1TXs+jKIlCVWfAQBCjTikRZAuGtUHPQNKMf2egSWFVRG+8cKV+SBgDUetILP6ocu77W+Cj1L/7XiLarQX8gs8Ymhvt2Vk0jFlwtWc3ZTvb/VZLfPQNnbGvuQxKoxDdCIzOt7NQ== billy@example.com 2 | -------------------------------------------------------------------------------- /tests/load/filesystem_sftp/bootstrap/bobby_rsa.pub: -------------------------------------------------------------------------------- 1 | ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQD2sf6SyXup5tWG/cqSTtORJJGM1teEnDpyupHbUP1LHXvq5nHJ0g82YX9v3wUJ3Nkd6ZMbh+bs37BCaQzR2oDWs3kIhsx/xesg03N6tdVMpXFw3tgSceSsXe31YS5rUVNGYIUVtcP3xFRQvLG41X0/5GEGS+yiicq+LfqyMnE6Np+/FGaCFzuW1nE8hrR4B/YoLKJs8e4kKOs8Jsj8mCCAXvAk+SNDZLbVhx5pQcybUSNYqC4NExn5EEj4xK71vHa4oA3gtgeubKXVr04fk5C8maku/tr1P63MUKtuvTtZBmNH+RVs9i9IwsF8VSMBSMyTMuOaa1Tsa3FIwnGZpZ1PcfTBG7CJPMu4drMBSw5y0DiAY8mdGvdklz1esG0DldAZx5Blfvw6XU16588e3zQKNiApNwQHlvJJEb0Gb7rFw/TtHhCT8MtpBqIf+gRMaFhajbgkQI6CDABp6RgmpqcI6xDFwvkQ+JFveFNFy6zXVrd+oYgz16owDpEzfxTYp0mrhivpa0dkoKqgjIU8VMuQfegoxQiJUi+mW9IqrmzcGwAlRTzBcT+IJ9JzmfZxuZQshWIQe05nPMqCJqpSukg6tN+Fl0kVF8swz6pNjEIcAryIuHlqOxSyQz8pqxUPBSPqAEcY0i+0VeMfweQ2TNHdtT3IYzOZrT79r/Ba6Jhy6w== bobby@example.com 2 | -------------------------------------------------------------------------------- /tests/load/filesystem_sftp/bootstrap/ca_rsa.pub: -------------------------------------------------------------------------------- 1 | ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDntpKth5weS9cQ9BhndlfKV3ZsncJ64P6uYS1sYVi4ng2OhsFOV7V8ENQtx6Z4izY/uuzJuLEgxWACFtVHD0OSb7zJcldpSJvribkCQ+0OvA9mHqKnECfoS65FzRtR3DwkasuTqPg9vY4KUcAj1IS++FiB0T7Zt/bEy+kjreDh6UFvtnRLMpZ0+cFvIIOZ1BNwLkXP/yxjOvH8wCGrBnWQUuvY5qGxqOmO4kmcvsrusv4k3Y6IGP16Hg5/YhfXkUwVXwsd0uw7Gf0Teg5BDE1+RfJP+pzJ3ZLCqYkcQzoDJC5aRYqjC2OstfjXRUkxQXDzD2ruhJb5avpSRNleDvhb6N9GM4G3oI7cj6CUR2l4kBX6h1pjxJze+/6xJE2j47RjOySrW+jGnDnFAh0VkjNCY3lPkzxs0VWgzR1nrUyILyBhYCPfD+RQXuGNvg9bYxNlLE/XUnuxH291Xzpuiz2vWRp3pASKjCQkFFhMY85lkSPRf/UmOsRNqEVeNXzr3pF65vm8/NVyLMwpD3eIk0w1/9BfhetvcVXGYDQfIzqJcz+gtYJDgDiF2fM41Lh7qFRHD0FQXvT3j915rsCBrD1X3tgNfIEFTHzSR/AxZ0Yxgi9GVYmIa3cquA8REmzHsIIl89bPcD8ZuCuT1xldcHkraAmlABa/VSlkJbBzaBCq7Q== julian@Julians-Laptop.local 2 | -------------------------------------------------------------------------------- /tests/load/filesystem_sftp/bootstrap/foo_rsa.pub: -------------------------------------------------------------------------------- 1 | ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDHP+bPKH03wq+pblRUy4qjut5hjJTFTaAZi/9K0Oz4fePlRBuuGsBvBX5HSOB8rikQBDDYQMNbPmvylzYs4Cr0L6amFr5Hqhdm4vBkUKck6uj6l4UMSQZXI1F6pLDYm8oV6dGI/YhapBG2//N9twLrlOtfgL8M0W4YX+4+Nkkfy/isY34y0BytBkOLtXim8B4V6Mh5W6aVTmtnpc3WE4gPcGubJuIvxypzIZNWiVhucp3Jo9GaOZeAYErS8x5wmG9tbNWVbuIudedpQtcWJorFeW+LPHAxqakI0xsPqPT5voOiVgOMSPQFG4RYavY7HeimoqFbDDV4C2Oyljs+kYCh4KrUx++fXvwG/SxFnKQlPiAzgQs4SGYi2XpBIWfmQuDy8NGDSPwId7mxxVJUbd6CNhVw3Ev/TmiRKJYjA0t3sxrlDKLToFLolSqoUQOEA0hGAxm7O0yzV7ogq2KJER46F/hRAtwywfyvQBiMk9cVOle9YtqayICGwvW0ZrxfLLGp0muqwiS2upLhCcLHu4MA+ifTAehEpjMLCo2afUjB/e56lntnR6+HchFF9IjctpHF+EzQjrfPy4/LPHnkLc8CQmmZGI8bMVZ3s2TU/o6BGyOGVJ8Qrw7A4Vd9ktihEERiPzJRdGEm7cJ1wlWIIgli6G1jfx9JPfzvlpPHUok6qw== foo@example.com 2 | -------------------------------------------------------------------------------- /tests/load/filesystem_sftp/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.8' 2 | 3 | services: 4 | 5 | sftpserver: 6 | build: 7 | context: bootstrap 8 | dockerfile: Dockerfile 9 | image: sftpserver:latest 10 | networks: 11 | - sftpserver 12 | ports: 13 | - "2222:22" 14 | volumes: 15 | - ../../common/storages/samples:/home/foo/sftp/data/standard_source/samples 16 | - ../../common/storages/samples:/home/bobby/sftp/data/standard_source/samples 17 | - ../../common/storages/samples:/home/billy/sftp/data/standard_source/samples 18 | 19 | networks: 20 | sftpserver: 21 | name: sftpserver 22 | -------------------------------------------------------------------------------- /tests/load/lancedb/__init__.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from tests.utils import skip_if_not_active 3 | 4 | skip_if_not_active("lancedb") 5 | pytest.importorskip("lancedb") 6 | -------------------------------------------------------------------------------- /tests/load/motherduck/__init__.py: -------------------------------------------------------------------------------- 1 | from tests.utils import skip_if_not_active 2 | 3 | skip_if_not_active("motherduck") 4 | -------------------------------------------------------------------------------- /tests/load/mssql/__init__.py: -------------------------------------------------------------------------------- 1 | from tests.utils import skip_if_not_active 2 | 3 | skip_if_not_active("mssql") 4 | -------------------------------------------------------------------------------- /tests/load/pipeline/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/load/pipeline/__init__.py -------------------------------------------------------------------------------- /tests/load/pipeline/conftest.py: -------------------------------------------------------------------------------- 1 | from tests.pipeline.utils import drop_dataset_from_env 2 | -------------------------------------------------------------------------------- /tests/load/postgres/__init__.py: -------------------------------------------------------------------------------- 1 | from tests.utils import skip_if_not_active 2 | 3 | skip_if_not_active("postgres") 4 | -------------------------------------------------------------------------------- /tests/load/postgres/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.7" 2 | services: 3 | db: 4 | env_file: postgres.env 5 | build: 6 | context: postgres 7 | dockerfile: Dockerfile 8 | container_name: dlt_postgres_db 9 | restart: unless-stopped 10 | volumes: 11 | - db_home:/var/lib/postgresql/data 12 | ports: 13 | - 5432:5432 14 | 15 | volumes: 16 | db_home: 17 | external: false -------------------------------------------------------------------------------- /tests/load/postgres/postgres.env: -------------------------------------------------------------------------------- 1 | POSTGRES_DB=dlt_data 2 | POSTGRES_USER=loader 3 | POSTGRES_PASSWORD=loader 4 | -------------------------------------------------------------------------------- /tests/load/postgres/postgres/01_init.sql: -------------------------------------------------------------------------------- 1 | -- create metabase database 2 | CREATE DATABASE metabase; -------------------------------------------------------------------------------- /tests/load/postgres/postgres/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM postgres:15 2 | COPY 01_init.sql /docker-entrypoint-initdb.d/ -------------------------------------------------------------------------------- /tests/load/qdrant/__init__.py: -------------------------------------------------------------------------------- 1 | from tests.utils import skip_if_not_active 2 | 3 | skip_if_not_active("qdrant") 4 | -------------------------------------------------------------------------------- /tests/load/redshift/__init__.py: -------------------------------------------------------------------------------- 1 | from tests.utils import skip_if_not_active 2 | 3 | skip_if_not_active("redshift") 4 | -------------------------------------------------------------------------------- /tests/load/redshift/setup_db.sql: -------------------------------------------------------------------------------- 1 | CREATE DATABASE $database; 2 | REVOKE ALL PRIVILEGES ON DATABASE $database FROM PUBLIC; 3 | 4 | -- \connect $database 5 | DROP SCHEMA public; 6 | -- SET search_path = data; don't use search paths 7 | CREATE USER $user WITH PASSWORD '$password'; 8 | GRANT CONNECT ON DATABASE $database TO $user; 9 | ALTER DATABASE $database OWNER TO $user 10 | 11 | 12 | -- minimum permissions to all schemas created by the user: so we can have db owner vs. schema owner 13 | ALTER SCHEMA data OWNER TO $user 14 | 15 | GRANT CREATE ON SCHEMA data TO $user 16 | 17 | ALTER DEFAULT PRIVILEGES FOR ROLE ${database}_owner GRANT SELECT, INSERT, UPDATE, DELETE, TRUNCATE ON TABLES TO $user 18 | ALTER DEFAULT PRIVILEGES FOR ROLE ${database}_owner GRANT SELECT, UPDATE ON SEQUENCES TO $user 19 | ALTER DEFAULT PRIVILEGES FOR ROLE ${database}_owner GRANT EXECUTE ON FUNCTIONS TO $user -------------------------------------------------------------------------------- /tests/load/snowflake/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/load/snowflake/__init__.py -------------------------------------------------------------------------------- /tests/load/sources/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/load/sources/__init__.py -------------------------------------------------------------------------------- /tests/load/sources/filesystem/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/load/sources/filesystem/__init__.py -------------------------------------------------------------------------------- /tests/load/sources/rest_api/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/load/sources/rest_api/__init__.py -------------------------------------------------------------------------------- /tests/load/sources/sql_database/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/load/sources/sql_database/__init__.py -------------------------------------------------------------------------------- /tests/load/sqlalchemy/__init__.py: -------------------------------------------------------------------------------- 1 | from tests.utils import skip_if_not_active 2 | 3 | skip_if_not_active("sqlalchemy") 4 | -------------------------------------------------------------------------------- /tests/load/sqlalchemy/docker-compose.yml: -------------------------------------------------------------------------------- 1 | # Use root/example as user/password credentials 2 | version: '3.1' 3 | 4 | services: 5 | 6 | db: 7 | image: mysql:8 8 | restart: always 9 | command: --sql-mode="STRICT_ALL_TABLES,STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_ENGINE_SUBSTITUTION" --innodb-strict-mode 10 | environment: 11 | MYSQL_ROOT_PASSWORD: root 12 | MYSQL_DATABASE: dlt_data 13 | MYSQL_USER: loader 14 | MYSQL_PASSWORD: loader 15 | ports: 16 | - 3306:3306 17 | # (this is just an example, not intended to be a production configuration) 18 | -------------------------------------------------------------------------------- /tests/load/sqlalchemy/test_sqlalchemy_configuration.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import sqlalchemy as sa 4 | 5 | from dlt.common.configuration import resolve_configuration 6 | from dlt.destinations.impl.sqlalchemy.configuration import ( 7 | SqlalchemyClientConfiguration, 8 | SqlalchemyCredentials, 9 | ) 10 | 11 | 12 | def test_sqlalchemy_credentials_from_engine() -> None: 13 | engine = sa.create_engine("sqlite:///:memory:") 14 | 15 | creds = resolve_configuration(SqlalchemyCredentials(engine)) 16 | 17 | # Url is taken from engine 18 | assert creds.to_url() == sa.engine.make_url("sqlite:///:memory:") 19 | # Engine is stored on the instance 20 | assert creds.engine is engine 21 | 22 | assert creds.drivername == "sqlite" 23 | assert creds.database == ":memory:" 24 | -------------------------------------------------------------------------------- /tests/load/synapse/__init__.py: -------------------------------------------------------------------------------- 1 | from tests.utils import skip_if_not_active 2 | 3 | skip_if_not_active("synapse") 4 | -------------------------------------------------------------------------------- /tests/load/test_configuration.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | import dlt 4 | 5 | from dlt.common.destination.dataset import Dataset 6 | 7 | 8 | def test_transformation_defaults() -> None: 9 | @dlt.transformation() 10 | def my_tf(dataset: Dataset) -> Any: 11 | yield dataset["example_table"].limit(5) 12 | 13 | assert my_tf.write_disposition == "append" 14 | # assert my_tf(dataset).materialization == "table" 15 | assert my_tf.table_name == "my_tf" 16 | assert my_tf.name == "my_tf" 17 | -------------------------------------------------------------------------------- /tests/load/transformations/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | We test some transformations on all destinations here and a larger set only on duckdb but all OSes 3 | See tests/transformations/ for the tests that run on all OSes 4 | """ 5 | -------------------------------------------------------------------------------- /tests/load/weaviate/__init__.py: -------------------------------------------------------------------------------- 1 | from tests.utils import skip_if_not_active 2 | 3 | skip_if_not_active("weaviate") 4 | -------------------------------------------------------------------------------- /tests/normalize/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/normalize/__init__.py -------------------------------------------------------------------------------- /tests/normalize/cases/event.event.slot_session_metadata_1.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "event": "slot", 4 | "timestamp": 1640951453.1129234, 5 | "metadata": { 6 | "rasa_x_flagged": false, 7 | "rasa_x_id": 57378 8 | }, 9 | "name": "session_started_metadata", 10 | "value": { 11 | "user_id": "world", 12 | "mitter_id": "hello" 13 | }, 14 | "is_flagged": false, 15 | "sender_id": "1234", 16 | "model_id": "20211224-134834_2.8.7", 17 | "environment": "production" 18 | } 19 | ] -------------------------------------------------------------------------------- /tests/normalize/utils.py: -------------------------------------------------------------------------------- 1 | from dlt.destinations import duckdb, redshift, postgres, bigquery, filesystem 2 | 3 | 4 | # callables to capabilities 5 | DEFAULT_CAPS = postgres().capabilities 6 | INSERT_CAPS = [duckdb().capabilities, redshift().capabilities, DEFAULT_CAPS] 7 | JSONL_CAPS = [bigquery().capabilities, filesystem().capabilities] 8 | ALL_CAPABILITIES = INSERT_CAPS + JSONL_CAPS 9 | 10 | 11 | def json_case_path(name: str) -> str: 12 | return f"./tests/normalize/cases/{name}.json" 13 | -------------------------------------------------------------------------------- /tests/pipeline/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/pipeline/__init__.py -------------------------------------------------------------------------------- /tests/pipeline/cases/github_pipeline/.dlt/config.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/pipeline/cases/github_pipeline/.dlt/config.toml -------------------------------------------------------------------------------- /tests/pipeline/cases/github_pipeline/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/pipeline/cases/github_pipeline/__init__.py -------------------------------------------------------------------------------- /tests/pipeline/cases/github_pipeline/github_extract.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import dlt 4 | 5 | from github_pipeline import github # type: ignore[import-not-found] 6 | 7 | if __name__ == "__main__": 8 | p = dlt.pipeline("dlt_github_pipeline", destination="duckdb", dataset_name="github_3") 9 | github_source = github() 10 | if len(sys.argv) > 1: 11 | # load only N issues 12 | limit = int(sys.argv[1]) 13 | github_source.add_limit(limit) 14 | info = p.extract(github_source) 15 | print(info) 16 | -------------------------------------------------------------------------------- /tests/pipeline/cases/github_pipeline/github_load.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import dlt 4 | 5 | if __name__ == "__main__": 6 | p = dlt.attach("dlt_github_pipeline") 7 | info = p.load() 8 | print(info) 9 | -------------------------------------------------------------------------------- /tests/pipeline/cases/github_pipeline/github_normalize.py: -------------------------------------------------------------------------------- 1 | import dlt 2 | 3 | if __name__ == "__main__": 4 | p = dlt.attach("dlt_github_pipeline") 5 | info = p.normalize() 6 | print(info) 7 | -------------------------------------------------------------------------------- /tests/pipeline/cases/github_pipeline/github_rev.py: -------------------------------------------------------------------------------- 1 | import dlt 2 | 3 | 4 | @dlt.source 5 | def github(): 6 | @dlt.resource( 7 | table_name="issues__2", 8 | primary_key="id", 9 | ) 10 | def load_issues(): 11 | # return data with path separators 12 | yield [ 13 | { 14 | "id": 100, 15 | "issue__id": 10, 16 | } 17 | ] 18 | 19 | return load_issues 20 | 21 | 22 | if __name__ == "__main__": 23 | p = dlt.pipeline("dlt_github_pipeline", destination="duckdb", dataset_name="github_3") 24 | github_source = github() 25 | info = p.run(github_source) 26 | print(info) 27 | -------------------------------------------------------------------------------- /tests/pipeline/cases/state/state.v1.json: -------------------------------------------------------------------------------- 1 | {"_state_version":2,"_state_engine_version":1,"pipeline_name":"debug_pipeline","dataset_name":"debug_pipeline_data","default_schema_name":"example_source","schema_names":["example_source"],"destination":"dlt.destinations.postgres"} -------------------------------------------------------------------------------- /tests/pipeline/conftest.py: -------------------------------------------------------------------------------- 1 | from tests.utils import ( 2 | preserve_environ, 3 | autouse_test_storage, 4 | patch_home_dir, 5 | wipe_pipeline, 6 | test_storage, 7 | ) 8 | from tests.common.configuration.utils import environment, toml_providers 9 | from tests.pipeline.utils import drop_dataset_from_env 10 | -------------------------------------------------------------------------------- /tests/pipeline/test_utils.py: -------------------------------------------------------------------------------- 1 | # test utils :) 2 | import pytest 3 | 4 | from tests.pipeline.utils import assert_records_as_set 5 | 6 | 7 | def test_assert_records_as_set(): 8 | assert_records_as_set([{"a": 1}, {"a": 2}], [{"a": 2}, {"a": 1}]) 9 | assert_records_as_set([{"a": 1}, {"a": 1}], [{"a": 1}, {"a": 1}]) 10 | 11 | # test that a different number of the same recoreds actually fails 12 | with pytest.raises(AssertionError): 13 | assert_records_as_set([{"a": 1}, {"a": 1}], [{"a": 1}]) 14 | -------------------------------------------------------------------------------- /tests/plugins/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/plugins/__init__.py -------------------------------------------------------------------------------- /tests/plugins/dlt_example_plugin/Makefile: -------------------------------------------------------------------------------- 1 | 2 | uninstall-example-plugin: 3 | pip uninstall example_plugin -y 4 | 5 | install-example-plugin: uninstall-example-plugin 6 | # this builds and installs the example plugin 7 | poetry build 8 | pip install dist/example_plugin-0.1.0-py3-none-any.whl -------------------------------------------------------------------------------- /tests/plugins/dlt_example_plugin/README.md: -------------------------------------------------------------------------------- 1 | # Example DLT Plugin 2 | 1. Plugin name must start with dlt- to be recognized at run time 3 | 2. Export the module that registers plugin in an entry point 4 | 3. Use pluggy hookspecs thst you can find here and there in the dlt -------------------------------------------------------------------------------- /tests/plugins/dlt_example_plugin/dlt_example_plugin/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.0.1" 2 | -------------------------------------------------------------------------------- /tests/plugins/dlt_example_plugin/dlt_example_plugin/destinations/__init__.py: -------------------------------------------------------------------------------- 1 | from .impl.factory import hive 2 | from .pushdb import push_destination 3 | 4 | 5 | __all__ = ["hive", "push_destination"] 6 | -------------------------------------------------------------------------------- /tests/plugins/dlt_example_plugin/dlt_example_plugin/destinations/impl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/plugins/dlt_example_plugin/dlt_example_plugin/destinations/impl/__init__.py -------------------------------------------------------------------------------- /tests/plugins/dlt_example_plugin/dlt_example_plugin/destinations/impl/factory.py: -------------------------------------------------------------------------------- 1 | from dlt.common.destination import DestinationCapabilitiesContext 2 | from dlt.destinations.impl.filesystem.factory import filesystem as _filesystem 3 | 4 | 5 | class hive(_filesystem): 6 | def _raw_capabilities(self) -> DestinationCapabilitiesContext: 7 | caps = super()._raw_capabilities() 8 | caps.preferred_loader_file_format = "parquet" 9 | caps.supported_loader_file_formats = ["parquet"] 10 | caps.preferred_table_format = "hive" 11 | caps.supported_table_formats = ["hive"] 12 | caps.loader_file_format_selector = None 13 | caps.merge_strategies_selector = None 14 | return caps 15 | -------------------------------------------------------------------------------- /tests/plugins/dlt_example_plugin/dlt_example_plugin/destinations/pushdb.py: -------------------------------------------------------------------------------- 1 | import dlt 2 | from dlt.common.schema import TTableSchema 3 | from dlt.common.typing import TDataItems 4 | 5 | 6 | @dlt.destination(batch_size=250, name="pushdb") 7 | def push_destination(items: TDataItems, table: TTableSchema) -> None: 8 | pass 9 | -------------------------------------------------------------------------------- /tests/plugins/dlt_example_plugin/dlt_example_plugin/sources/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/plugins/dlt_example_plugin/dlt_example_plugin/sources/__init__.py -------------------------------------------------------------------------------- /tests/plugins/dlt_example_plugin/dlt_example_plugin/sources/github.py: -------------------------------------------------------------------------------- 1 | import dlt 2 | 3 | 4 | @dlt.source 5 | def github(): 6 | @dlt.resource( 7 | table_name="issues__2", 8 | primary_key="id", 9 | ) 10 | def load_issues(): 11 | # return data with path separators 12 | yield [ 13 | { 14 | "id": 100, 15 | "issue__id": 10, 16 | } 17 | ] 18 | 19 | return load_issues 20 | -------------------------------------------------------------------------------- /tests/plugins/dlt_example_plugin/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "dlt-example-plugin" 3 | version = "0.1.0" 4 | description = "" 5 | authors = ["dave "] 6 | readme = "README.md" 7 | packages = [ 8 | { include = "dlt_example_plugin" }, 9 | ] 10 | 11 | [tool.poetry.plugins.dlt] 12 | dlt-example-plugin = "dlt_example_plugin.plugin" 13 | 14 | [tool.poetry.dependencies] 15 | python = ">=3.9.1,<3.14" 16 | dlt={"path"="../../../"} 17 | 18 | [build-system] 19 | requires = ["poetry-core"] 20 | build-backend = "poetry.core.masonry.api" 21 | -------------------------------------------------------------------------------- /tests/plus/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | A few basic tests that guard against the worst regeressions between dlt and dlt+ 3 | dlt-plus needs to be installed to run these tests, a license is not required at this point. 4 | """ 5 | -------------------------------------------------------------------------------- /tests/plus/test_cli.py: -------------------------------------------------------------------------------- 1 | from pytest_console_scripts import ScriptRunner 2 | 3 | 4 | def test_project_command(script_runner: ScriptRunner) -> None: 5 | result = script_runner.run(["dlt", "project", "-h"]) 6 | assert result.returncode == 0 7 | 8 | assert "Usage: dlt project" in result.stdout 9 | -------------------------------------------------------------------------------- /tests/plus/test_destinations.py: -------------------------------------------------------------------------------- 1 | from dlt.common.destination.reference import DestinationReference 2 | 3 | 4 | def test_iceberg_destination() -> None: 5 | # check that iceberg destination is available 6 | assert DestinationReference.find("iceberg") is not None 7 | 8 | 9 | def test_delta_destination() -> None: 10 | # check that delta destination is available 11 | assert DestinationReference.find("delta") is not None 12 | -------------------------------------------------------------------------------- /tests/plus/test_sources.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | 4 | @pytest.mark.mssql 5 | def test_mssql_source() -> None: 6 | # we just test wether the mssql source may be imported 7 | from dlt_plus.sources import mssql 8 | -------------------------------------------------------------------------------- /tests/reflection/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/reflection/__init__.py -------------------------------------------------------------------------------- /tests/reflection/module_cases/__init__.py: -------------------------------------------------------------------------------- 1 | import xxx.absolutely 2 | 3 | from xxx.absolutely import a1, a3 4 | from dlt.common.utils import uniq_id 5 | -------------------------------------------------------------------------------- /tests/reflection/module_cases/all_imports.py: -------------------------------------------------------------------------------- 1 | from dlt.common.utils import uniq_id 2 | -------------------------------------------------------------------------------- /tests/reflection/module_cases/dlt_import_exception.py: -------------------------------------------------------------------------------- 1 | from dlt.common.exceptions import MissingDependencyException 2 | 3 | 4 | try: 5 | from xxx.no import e 6 | except ImportError: 7 | raise MissingDependencyException("DLT E", ["xxx"]) 8 | -------------------------------------------------------------------------------- /tests/reflection/module_cases/executes_resource.py: -------------------------------------------------------------------------------- 1 | import dlt 2 | 3 | 4 | @dlt.resource 5 | def aleph(n: int): 6 | for i in range(0, n): 7 | yield i 8 | 9 | 10 | print(list(aleph(10))) 11 | -------------------------------------------------------------------------------- /tests/reflection/module_cases/import_as_type.py: -------------------------------------------------------------------------------- 1 | from xxx.aa import Tx 2 | 3 | 4 | def create_tx() -> Tx: 5 | return Tx() 6 | 7 | 8 | tx = Tx() 9 | -------------------------------------------------------------------------------- /tests/reflection/module_cases/no_pkg.py: -------------------------------------------------------------------------------- 1 | from . import uniq_id 2 | -------------------------------------------------------------------------------- /tests/reflection/module_cases/raises.py: -------------------------------------------------------------------------------- 1 | from xxx.absolutely import a1, a3 2 | from dlt.common.utils import uniq_id 3 | 4 | raise NotImplementedError("empty module") 5 | -------------------------------------------------------------------------------- /tests/reflection/module_cases/stripe_analytics/__init__.py: -------------------------------------------------------------------------------- 1 | from .stripe_analytics import VALUE 2 | from .helpers import HELPERS_VALUE 3 | -------------------------------------------------------------------------------- /tests/reflection/module_cases/stripe_analytics/helpers.py: -------------------------------------------------------------------------------- 1 | import paandas 2 | 3 | HELPERS_VALUE = 3 4 | -------------------------------------------------------------------------------- /tests/reflection/module_cases/stripe_analytics/stripe_analytics.py: -------------------------------------------------------------------------------- 1 | import stripe 2 | 3 | VALUE = 1 4 | -------------------------------------------------------------------------------- /tests/reflection/module_cases/stripe_analytics_pipeline.py: -------------------------------------------------------------------------------- 1 | from stripe_analytics import VALUE, HELPERS_VALUE 2 | 3 | print(VALUE) 4 | print(HELPERS_VALUE) 5 | -------------------------------------------------------------------------------- /tests/reflection/module_cases/syntax_error.py: -------------------------------------------------------------------------------- 1 | - 2 | h 3 | 4 | ddde -------------------------------------------------------------------------------- /tests/sources/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/sources/__init__.py -------------------------------------------------------------------------------- /tests/sources/conftest.py: -------------------------------------------------------------------------------- 1 | from tests.utils import ( 2 | preserve_environ, 3 | autouse_test_storage, 4 | patch_home_dir, 5 | wipe_pipeline, 6 | ) 7 | -------------------------------------------------------------------------------- /tests/sources/filesystem/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/sources/filesystem/__init__.py -------------------------------------------------------------------------------- /tests/sources/filesystem/test_config_sections.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from dlt.sources.filesystem import filesystem, read_parquet 4 | from dlt.common.configuration.exceptions import ConfigFieldMissingException 5 | 6 | 7 | def test_config_sections_resolution(): 8 | filesystem_resource = filesystem(file_glob="**/*.parquet") 9 | filesystem_pipe = filesystem_resource | read_parquet() 10 | 11 | with pytest.raises(ConfigFieldMissingException) as exc_info: 12 | list(filesystem_pipe) 13 | 14 | # NOTE: we check that the first trace related to filesystem has the correct 15 | # sections set 16 | assert list(exc_info.value.traces.values())[1][0].key.startswith( 17 | "SOURCES__FILESYSTEM__FILESYSTEM__" 18 | ) 19 | -------------------------------------------------------------------------------- /tests/sources/filesystem/test_filesystem_pipeline_template.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from tests.common.storages.utils import TEST_SAMPLE_FILES 4 | 5 | 6 | @pytest.mark.parametrize( 7 | "example_name", 8 | ( 9 | "read_custom_file_type_excel", 10 | "stream_and_merge_csv", 11 | "read_csv_with_duckdb", 12 | "read_csv_duckdb_compressed", 13 | "read_parquet_and_jsonl_chunked", 14 | "read_files_incrementally_mtime", 15 | ), 16 | ) 17 | def test_all_examples(example_name: str) -> None: 18 | from dlt.sources._core_source_templates import filesystem_pipeline 19 | 20 | filesystem_pipeline.TESTS_BUCKET_URL = TEST_SAMPLE_FILES 21 | 22 | getattr(filesystem_pipeline, example_name)() 23 | -------------------------------------------------------------------------------- /tests/sources/helpers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/sources/helpers/__init__.py -------------------------------------------------------------------------------- /tests/sources/helpers/rest_client/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/sources/helpers/rest_client/__init__.py -------------------------------------------------------------------------------- /tests/sources/helpers/rest_client/conftest.py: -------------------------------------------------------------------------------- 1 | from tests.sources.rest_api.conftest import * # noqa: F403 2 | -------------------------------------------------------------------------------- /tests/sources/helpers/rest_client/test_requests_paginate.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from dlt.sources.helpers.rest_client import paginate 4 | from dlt.sources.helpers.rest_client.paginators import JSONLinkPaginator 5 | from .conftest import assert_pagination 6 | 7 | 8 | @pytest.mark.usefixtures("mock_api_server") 9 | def test_requests_paginate(): 10 | pages_iter = paginate( 11 | "https://api.example.com/posts", 12 | paginator=JSONLinkPaginator(next_url_path="next_page"), 13 | ) 14 | 15 | pages = list(pages_iter) 16 | 17 | assert_pagination(pages) 18 | -------------------------------------------------------------------------------- /tests/sources/helpers/transform/test_row_hash.py: -------------------------------------------------------------------------------- 1 | import pyarrow as pa 2 | from dlt.sources.helpers.transform import add_row_hash_to_table 3 | 4 | 5 | def test_add_row_hash_to_table(): 6 | names = ["n_legs", "animals"] 7 | n_legs = [2, 2, 2] 8 | animals = ["duck", "duck", "duck"] 9 | 10 | table = pa.Table.from_arrays([n_legs, animals], names=names) 11 | 12 | add_row_hash = add_row_hash_to_table("row_hash") 13 | table_with_rowhash = add_row_hash(table) 14 | assert len(table_with_rowhash["row_hash"].unique()) == 1, "Expected identical row hashes" 15 | -------------------------------------------------------------------------------- /tests/sources/rest_api/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/sources/rest_api/__init__.py -------------------------------------------------------------------------------- /tests/sources/rest_api/configurations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/sources/rest_api/configurations/__init__.py -------------------------------------------------------------------------------- /tests/sources/rest_api/integration/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/sources/rest_api/integration/__init__.py -------------------------------------------------------------------------------- /tests/sources/rest_api/test_config_sections.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from dlt.sources.rest_api import rest_api 4 | from dlt.common.configuration.exceptions import ConfigFieldMissingException 5 | 6 | 7 | def test_config_sections_resolution(): 8 | with pytest.raises(ConfigFieldMissingException) as exc_info: 9 | list(rest_api()) 10 | 11 | # NOTE: we check that the first trace related to rest_api has the correct 12 | # sections set 13 | assert list(exc_info.value.traces.values())[1][0].key.startswith( 14 | "SOURCES__REST_API__REST_API__" 15 | ) 16 | -------------------------------------------------------------------------------- /tests/sources/sql_database/__init__.py: -------------------------------------------------------------------------------- 1 | # almost all tests are in tests/load since a postgres instance is required for this to work 2 | -------------------------------------------------------------------------------- /tests/sources/sql_database/test_config_sections.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from dlt.sources.sql_database import sql_database 4 | from dlt.common.configuration.exceptions import ConfigFieldMissingException 5 | 6 | 7 | def test_config_sections_resolution(): 8 | with pytest.raises(ConfigFieldMissingException) as exc_info: 9 | list(sql_database()) 10 | 11 | # NOTE: we check that the first trace related to rest_api has the correct 12 | # sections set 13 | assert list(exc_info.value.traces.values())[0][0].key.startswith( 14 | "SOURCES__SQL_DATABASE__SQL_DATABASE__" 15 | ) 16 | -------------------------------------------------------------------------------- /tests/sources/sql_database/test_sql_database_pipeline_template.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | 4 | # TODO: not all template functions are tested here 5 | # we may be able to test more in tests/load/sources 6 | @pytest.mark.parametrize( 7 | "example_name", 8 | ( 9 | "load_select_tables_from_database", 10 | # "load_entire_database", 11 | "load_standalone_table_resource", 12 | "select_columns", 13 | "specify_columns_to_load", 14 | "test_pandas_backend_verbatim_decimals", 15 | "select_with_end_value_and_row_order", 16 | "my_sql_via_pyarrow", 17 | ), 18 | ) 19 | def test_all_examples(example_name: str) -> None: 20 | from dlt.sources._core_source_templates import sql_database_pipeline 21 | 22 | getattr(sql_database_pipeline, example_name)() 23 | -------------------------------------------------------------------------------- /tests/sources/test_pipeline_templates.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import importlib 3 | 4 | 5 | @pytest.mark.parametrize( 6 | "template_name,examples", 7 | [ 8 | ("debug_pipeline", ("load_all_datatypes",)), 9 | ("default_pipeline", ("load_api_data", "load_sql_data", "load_pandas_data")), 10 | ("arrow_pipeline", ("load_arrow_tables",)), 11 | ("dataframe_pipeline", ("load_dataframe",)), 12 | ("requests_pipeline", ("load_chess_data",)), 13 | ("github_api_pipeline", ("run_source",)), 14 | ("fruitshop_pipeline", ("load_shop",)), 15 | ], 16 | ) 17 | def test_debug_pipeline(template_name: str, examples: str) -> None: 18 | demo_module = importlib.import_module(f"dlt.sources._single_file_templates.{template_name}") 19 | for example_name in examples: 20 | getattr(demo_module, example_name)() 21 | -------------------------------------------------------------------------------- /tests/tests/__init__.py: -------------------------------------------------------------------------------- 1 | """Tests for our test helpers""" 2 | -------------------------------------------------------------------------------- /tests/tests/load/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/tests/load/__init__.py -------------------------------------------------------------------------------- /tests/tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/tools/__init__.py -------------------------------------------------------------------------------- /tests/tools/clean_athena.py: -------------------------------------------------------------------------------- 1 | """WARNING: Running this script will drop add schemas in the athena destination set up in your secrets.toml""" 2 | 3 | import dlt 4 | from dlt.destinations.exceptions import DatabaseUndefinedRelation 5 | 6 | if __name__ == "__main__": 7 | pipeline = dlt.pipeline(pipeline_name="drop_athena", destination="athena") 8 | 9 | with pipeline.sql_client() as client: 10 | with client.execute_query("SHOW DATABASES") as cur: 11 | dbs = cur.fetchall() 12 | for db in dbs: 13 | db = db[0] 14 | sql = f"DROP SCHEMA `{db}` CASCADE;" 15 | try: 16 | print(sql) 17 | with client.execute_query(sql): 18 | pass # 19 | except DatabaseUndefinedRelation: 20 | print("Could not delete schema") 21 | -------------------------------------------------------------------------------- /tests/tools/create_storages.py: -------------------------------------------------------------------------------- 1 | from dlt.common.storages import ( 2 | NormalizeStorage, 3 | LoadStorage, 4 | SchemaStorage, 5 | NormalizeStorageConfiguration, 6 | LoadStorageConfiguration, 7 | SchemaStorageConfiguration, 8 | ) 9 | 10 | 11 | # NormalizeStorage(True, NormalizeVolumeConfiguration) 12 | # LoadStorage(True, LoadVolumeConfiguration, "jsonl", LoadStorage.ALL_SUPPORTED_FILE_FORMATS) 13 | # SchemaStorage(SchemaVolumeConfiguration, makedirs=True) 14 | -------------------------------------------------------------------------------- /tests/transformations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tests/transformations/__init__.py -------------------------------------------------------------------------------- /tests/transformations/conftest.py: -------------------------------------------------------------------------------- 1 | from tests.utils import ( 2 | preserve_environ, 3 | autouse_test_storage, 4 | patch_home_dir, 5 | wipe_pipeline, 6 | test_storage, 7 | ) 8 | from tests.pipeline.utils import drop_dataset_from_env 9 | -------------------------------------------------------------------------------- /tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dlt-hub/dlt/befe9ced13e08811b35f6cfabaa6708ccc32afce/tools/__init__.py --------------------------------------------------------------------------------