├── .bumpversion.cfg ├── .ci ├── build_docs.sh ├── build_wheel.sh ├── down_architecture.sh ├── install.sh ├── install_prereqs.sh ├── prepare_doc_bundle.sh ├── push_engine.sh ├── rebuild_asciicasts.sh └── up_architecture.sh ├── .coveragerc ├── .github ├── dependabot.yml └── workflows │ └── build_and_test_and_release.yml ├── .gitignore ├── .gitmodules ├── .pre-commit-config.yaml ├── .pylintrc ├── CHANGELOG.md ├── Dockerfile ├── LICENSE.md ├── README.md ├── bin └── sgr ├── docs ├── Makefile ├── README.md ├── api │ ├── modules.rst │ ├── splitgraph.cloud.project.rst │ ├── splitgraph.cloud.rst │ ├── splitgraph.commandline.rst │ ├── splitgraph.config.rst │ ├── splitgraph.core.indexing.rst │ ├── splitgraph.core.rst │ ├── splitgraph.core.sql.rst │ ├── splitgraph.engine.postgres.rst │ ├── splitgraph.engine.rst │ ├── splitgraph.hooks.data_source.rst │ ├── splitgraph.hooks.rst │ ├── splitgraph.ingestion.airbyte.rst │ ├── splitgraph.ingestion.athena.rst │ ├── splitgraph.ingestion.bigquery.rst │ ├── splitgraph.ingestion.csv.rst │ ├── splitgraph.ingestion.dbt.rst │ ├── splitgraph.ingestion.rst │ ├── splitgraph.ingestion.singer.commandline.rst │ ├── splitgraph.ingestion.singer.rst │ ├── splitgraph.ingestion.snowflake.rst │ ├── splitgraph.ingestion.socrata.rst │ ├── splitgraph.resources.icons.rst │ ├── splitgraph.resources.rst │ ├── splitgraph.resources.splitgraph_meta.rst │ ├── splitgraph.resources.static.rst │ ├── splitgraph.rst │ ├── splitgraph.splitfile.generation.rst │ ├── splitgraph.splitfile.rst │ └── splitgraph.utils.rst ├── conf.py └── generate_reference.py ├── engine ├── Dockerfile ├── Dockerfile.debug ├── Dockerfile.pg_debug ├── Makefile ├── README.md ├── build_scripts │ ├── build_splitgraph.sh │ └── fdws │ │ ├── cstore_fdw │ │ └── build_cstore_fdw.sh │ │ ├── mongo_fdw │ │ ├── build-fixes.patch │ │ └── build_mongo_fdw.sh │ │ ├── multicorn │ │ └── build_multicorn.sh │ │ └── mysql_fdw │ │ └── build_mysql_fdw.sh ├── etc │ └── postgresql │ │ ├── pg_hba.conf │ │ ├── postgresql.conf │ │ └── postgresql_debug.conf └── init_scripts │ └── 000_create_extensions.sql ├── examples ├── README.md ├── asciinema-player.css ├── benchmarking │ ├── .sgconfig │ ├── README.md │ ├── benchmarking.ipynb │ ├── benchmarking_real_data.ipynb │ ├── docker-compose.yml │ ├── poetry.lock │ └── pyproject.toml ├── bloom-filter │ ├── README.md │ ├── docker-compose.yml │ └── example.yaml ├── clickhouse │ ├── .sgconfig │ ├── README.md │ ├── clickhouse │ │ ├── Dockerfile │ │ ├── odbc.ini │ │ └── odbcinst.ini │ ├── docker-compose.yml │ ├── setup_datasets.sh │ ├── splitgraph-ddn-odbc-clickhouse.png │ └── splitgraph-odbc-clickhouse.png ├── cross-db-analytics │ ├── README.md │ ├── docker-compose.yml │ └── mounting │ │ ├── elasticsearch.json │ │ ├── elasticsearch.sql │ │ ├── matomo.json │ │ ├── matomo.sql │ │ └── mount.sh ├── custom_fdw │ ├── README.md │ ├── docker-compose.yml │ └── src │ │ ├── .sgconfig │ │ ├── engine.Dockerfile │ │ ├── hn_fdw │ │ ├── __init__.py │ │ ├── fdw.py │ │ └── mount.py │ │ └── sgr.Dockerfile ├── dbt │ ├── .dbt │ │ └── profiles.yml │ ├── .gitignore │ ├── .sgconfig │ ├── README.md │ ├── docker-compose.yml │ └── example.yaml ├── dbt_adapter │ ├── README.md │ ├── dbt │ │ ├── adapters │ │ │ └── splitgraph │ │ │ │ ├── __init__.py │ │ │ │ ├── connections.py │ │ │ │ └── impl.py │ │ └── include │ │ │ └── splitgraph │ │ │ ├── __init__.py │ │ │ ├── dbt_project.yml │ │ │ └── macros │ │ │ ├── adapters.sql │ │ │ ├── catalog.sql │ │ │ ├── materializations │ │ │ └── snapshot_merge.sql │ │ │ └── relations.sql │ ├── sample_project │ │ ├── .dbt │ │ │ └── profiles.yml │ │ ├── .gitignore │ │ ├── analysis │ │ │ └── .gitkeep │ │ ├── data │ │ │ └── .gitkeep │ │ ├── dbt_project.yml │ │ ├── macros │ │ │ └── .gitkeep │ │ ├── models │ │ │ └── splitgraph │ │ │ │ └── use_splitgraph_data.sql │ │ ├── snapshots │ │ │ └── .gitkeep │ │ └── tests │ │ │ └── .gitkeep │ └── setup.py ├── dbt_two_databases │ ├── .dbt │ │ └── profiles.yml │ ├── .gitignore │ ├── .sgconfig │ ├── README.md │ ├── analysis │ │ └── .gitkeep │ ├── data │ │ └── .gitkeep │ ├── dbt_project.yml │ ├── docker-compose.yml │ ├── macros │ │ └── .gitkeep │ ├── models │ │ └── splitgraph │ │ │ └── join_two_dbs.sql │ ├── snapshots │ │ └── .gitkeep │ ├── splitgraph │ │ ├── mongodb │ │ │ ├── Dockerfile │ │ │ ├── setup.js │ │ │ └── start.sh │ │ └── postgresql │ │ │ └── setup.sql │ └── tests │ │ └── .gitkeep ├── example_to_md.py ├── import-from-csv │ ├── .sgconfig │ ├── README.md │ ├── docker-compose.yml │ ├── example.yaml │ └── rdu-weather-history.csv ├── import-from-mongo │ ├── .sgconfig │ ├── README.md │ ├── docker-compose.yml │ ├── example.yaml │ ├── mongo_import.splitfile │ └── mongodb │ │ ├── Dockerfile │ │ ├── setup.js │ │ └── start.sh ├── iris │ ├── .sgconfig │ ├── README.md │ ├── docker-compose.yml │ ├── iris.csv │ ├── iris.ipynb │ ├── poetry.lock │ └── pyproject.toml ├── pg-replication │ ├── .sgconfig │ ├── README.md │ ├── docker-compose.yml │ ├── engine │ │ └── schema.sql │ ├── example.yaml │ └── origin │ │ └── 000_initial_data.sql ├── pgadmin │ ├── .sgconfig │ ├── README.md │ ├── docker-compose.yml │ ├── pgadmin-geo-lq-example.png │ ├── pgadmin │ │ ├── pgpassfile │ │ └── servers.json │ └── setup.sh ├── postgis │ ├── README.md │ ├── cleanup.sh │ ├── docker-compose.yml │ ├── poetry.lock │ ├── pyproject.toml │ ├── rerun.sh │ ├── vote_map.ipynb │ └── vote_map.splitfile ├── postgrest │ ├── .sgconfig │ ├── README.md │ ├── docker-compose.yml │ ├── example.yaml │ └── postgrest.conf ├── push-to-object-storage │ ├── .sgconfig │ ├── README.md │ ├── docker-compose.yml │ └── example.yaml ├── push-to-other-engine │ ├── .sgconfig │ ├── README.md │ ├── docker-compose.yml │ └── example.yaml ├── rebuild_asciinema.py ├── rerun_notebooks.sh ├── run_example.py ├── sample_splitfiles │ ├── README.md │ ├── county_votes.splitfile │ ├── london_votes.splitfile │ ├── qoz_vote_fraction_single_command.splitfile │ └── us_flights_2009.splitfile ├── splitfiles │ ├── README.md │ ├── docker-compose.yml │ ├── example.yaml │ └── rdu-weather-summary.splitfile ├── splitgraph-cloud │ ├── README.md │ ├── dataset-metadata.yml │ ├── dataset-readme.md │ ├── docker-compose.yml │ ├── example.yaml │ ├── request_1.json │ └── request_2.json ├── template │ ├── .sgconfig │ ├── README.md │ ├── docker-compose.yml │ └── example.yaml ├── test │ ├── conftest.py │ └── test_examples.py ├── update_example_versions.sh └── us-election │ ├── README.md │ ├── analyze.py │ ├── docker-compose.yml │ ├── example.yaml │ ├── pyproject.toml │ └── qoz_vote_fraction.splitfile ├── install.sh ├── mypy.ini ├── pics ├── splitfile.png └── splitfiles.gif ├── poetry.lock ├── pyproject.toml ├── setup.cfg ├── splitgraph.spec ├── splitgraph ├── .git-blame-ignore-revs ├── BUILD ├── __init__.py ├── __version__.py ├── cloud │ ├── BUILD │ ├── __init__.py │ ├── models.py │ ├── project │ │ ├── BUILD │ │ ├── __init__.py │ │ ├── dbt.py │ │ ├── generation.py │ │ ├── github_actions.py │ │ ├── models.py │ │ ├── templates.py │ │ └── utils.py │ ├── queries.py │ └── tunnel_client.py ├── commandline │ ├── BUILD │ ├── __init__.py │ ├── cloud.py │ ├── common.py │ ├── engine.py │ ├── example.py │ ├── image_creation.py │ ├── image_info.py │ ├── ingestion.py │ ├── misc.py │ ├── mount.py │ ├── push_pull.py │ └── splitfile.py ├── config │ ├── BUILD │ ├── __init__.py │ ├── argument_config.py │ ├── config.py │ ├── config_file_config.py │ ├── default_config.py │ ├── environment_config.py │ ├── export.py │ ├── keys.py │ ├── management.py │ └── system_config.py ├── core │ ├── BUILD │ ├── __init__.py │ ├── _drawing.py │ ├── common.py │ ├── engine.py │ ├── fdw_checkout.py │ ├── fragment_manager.py │ ├── image.py │ ├── image_manager.py │ ├── image_mounting.py │ ├── indexing │ │ ├── BUILD │ │ ├── __init__.py │ │ ├── bloom.py │ │ └── range.py │ ├── metadata_manager.py │ ├── migration.py │ ├── object_manager.py │ ├── output.py │ ├── overlay.py │ ├── registry.py │ ├── repository.py │ ├── server.py │ ├── sql │ │ ├── BUILD │ │ ├── __init__.py │ │ ├── _validation.py │ │ ├── queries.py │ │ └── splitfile_validation.py │ ├── table.py │ └── types.py ├── engine │ ├── BUILD │ ├── __init__.py │ ├── base.py │ ├── config.py │ ├── postgres │ │ ├── BUILD │ │ ├── __init__.py │ │ ├── engine.py │ │ └── psycopg.py │ └── utils.py ├── exceptions.py ├── hooks │ ├── BUILD │ ├── __init__.py │ ├── data_source │ │ ├── BUILD │ │ ├── __init__.py │ │ ├── base.py │ │ ├── fdw.py │ │ └── utils.py │ ├── external_objects.py │ ├── mount_handlers.py │ ├── s3.py │ ├── s3_server.py │ └── splitfile_commands.py ├── ingestion │ ├── BUILD │ ├── __init__.py │ ├── airbyte │ │ ├── BUILD │ │ ├── __init__.py │ │ ├── data_source.py │ │ ├── docker_utils.py │ │ ├── models.py │ │ └── utils.py │ ├── athena │ │ ├── BUILD │ │ └── __init__.py │ ├── bigquery │ │ ├── BUILD │ │ └── __init__.py │ ├── common.py │ ├── csv │ │ ├── BUILD │ │ ├── __init__.py │ │ ├── common.py │ │ └── fdw.py │ ├── dbt │ │ ├── BUILD │ │ ├── __init__.py │ │ ├── data_source.py │ │ └── utils.py │ ├── inference.py │ ├── pandas.py │ ├── singer │ │ ├── BUILD │ │ ├── __init__.py │ │ ├── commandline │ │ │ ├── BUILD │ │ │ └── __init__.py │ │ ├── common.py │ │ ├── data_source.py │ │ └── db_sync.py │ ├── snowflake │ │ ├── BUILD │ │ └── __init__.py │ ├── socrata │ │ ├── BUILD │ │ ├── __init__.py │ │ ├── fdw.py │ │ ├── mount.py │ │ └── querying.py │ └── sqlite │ │ ├── BUILD │ │ └── __init__.py ├── py.typed ├── resources │ ├── BUILD │ ├── __init__.py │ ├── icons │ │ ├── BUILD │ │ ├── __init__.py │ │ ├── athena.svg │ │ ├── bigquery.svg │ │ ├── csv.svg │ │ ├── dbt.svg │ │ ├── elasticsearch.svg │ │ ├── mongodb.svg │ │ ├── mysql.svg │ │ ├── postgresql.svg │ │ ├── snowflake.svg │ │ ├── socrata.svg │ │ └── sqlite.svg │ ├── splitgraph_meta │ │ ├── BUILD │ │ ├── __init__.py │ │ ├── splitgraph_meta--0.0.1--0.0.2.sql │ │ ├── splitgraph_meta--0.0.1.sql │ │ ├── splitgraph_meta--0.0.2--0.0.3.sql │ │ └── splitgraph_meta--0.0.3--0.0.4.sql │ └── static │ │ ├── BUILD │ │ ├── __init__.py │ │ ├── audit_trigger.sql │ │ ├── cstore.sql │ │ └── splitgraph_api.sql ├── splitfile │ ├── BUILD │ ├── __init__.py │ ├── _parsing.py │ ├── execution.py │ └── generation │ │ ├── BUILD │ │ ├── __init__.py │ │ ├── common.py │ │ └── replacement.py └── utils │ ├── BUILD │ ├── __init__.py │ ├── docker.py │ └── yaml.py ├── test ├── __init__.py ├── architecture │ ├── data │ │ ├── mongoorigin │ │ │ └── setup.js │ │ ├── mysqlorigin │ │ │ └── setup.sql │ │ ├── objectstorage │ │ │ └── test_csv │ │ │ │ └── some_prefix │ │ │ │ ├── encoding-win-1252.csv │ │ │ │ ├── fruits.csv │ │ │ │ ├── not_a_csv.txt │ │ │ │ ├── percentage_sign.csv │ │ │ │ └── rdu-weather-history.csv │ │ ├── pgorigin │ │ │ ├── accounts.csv │ │ │ ├── load_account_data.sql │ │ │ └── setup.sql │ │ └── remote_engine │ │ │ └── setup.sql │ ├── dev │ │ ├── Dockerfile.dev │ │ ├── docker-config │ │ │ └── pytest.dev.ini │ │ └── docker-entrypoint-dev.sh │ ├── docker-compose.ci.yml │ ├── docker-compose.core.yml │ ├── docker-compose.dev.yml │ ├── docker-compose.mounting.yml │ ├── scripts │ │ ├── build_and_publish_ci_images.sh │ │ ├── build_ci_images.sh │ │ └── publish_ci_images.sh │ ├── src │ │ ├── esorigin │ │ │ ├── Dockerfile │ │ │ ├── accounts.json │ │ │ └── init-data.sh │ │ ├── mongoorigin │ │ │ ├── Dockerfile │ │ │ └── start.sh │ │ ├── pgorigin │ │ │ ├── Dockerfile │ │ │ └── start.sh │ │ └── remote_engine │ │ │ └── Dockerfile │ └── wait-for-architecture.sh ├── clean_test.sh ├── resources │ ├── .sgconfig │ ├── api │ │ └── github_releases.json │ ├── create_table.splitfile │ ├── custom_command_calc_hash.splitfile │ ├── custom_command_dummy.splitfile │ ├── custom_plugin_dir │ │ └── some_plugin │ │ │ └── plugin.py │ ├── external_sql.splitfile │ ├── external_sql.sql │ ├── from_local.splitfile │ ├── from_remote.splitfile │ ├── from_remote_multistage.splitfile │ ├── import_all_from_mounted.splitfile │ ├── import_and_update.splitfile │ ├── import_from_mounted_db.splitfile │ ├── import_from_mounted_db_with_sql.splitfile │ ├── import_from_preuploaded_remote.splitfile │ ├── import_local.splitfile │ ├── import_local_multiple_with_queries.splitfile │ ├── import_remote_broken_stage_2.splitfile │ ├── import_remote_multiple.splitfile │ ├── import_with_custom_query.splitfile │ ├── import_with_custom_query_and_sql.splitfile │ ├── ingestion │ │ ├── bigquery │ │ │ └── dummy_credentials.json │ │ ├── csv │ │ │ ├── base_df.csv │ │ │ ├── base_df_kv.csv │ │ │ ├── encoding-win-1252.csv │ │ │ ├── evil_df.csv │ │ │ ├── grades.csv │ │ │ ├── mac_newlines.csv │ │ │ ├── patch_df.csv │ │ │ ├── patch_df_kv.csv │ │ │ └── separator_df.csv │ │ ├── dbt │ │ │ ├── airbyte_normalization │ │ │ │ ├── README.md │ │ │ │ ├── dbt_project.yml │ │ │ │ └── models │ │ │ │ │ ├── dim_mushrooms.sql │ │ │ │ │ └── sources.yml │ │ │ └── jaffle_csv │ │ │ │ ├── README.md │ │ │ │ ├── raw_customers.csv │ │ │ │ ├── raw_orders.csv │ │ │ │ └── raw_payments.csv │ │ ├── singer │ │ │ ├── discover.json │ │ │ ├── fake_tap.py │ │ │ ├── initial.json │ │ │ ├── schema_change.json │ │ │ ├── update.json │ │ │ └── wrong_schema.json │ │ └── socrata │ │ │ ├── dataset_metadata.json │ │ │ └── find_datasets.json │ ├── inline_sql.splitfile │ ├── multiline_sql.splitfile │ ├── schema_changes.splitfile │ ├── splitgraph_yml │ │ ├── readmes │ │ │ ├── readme_1.md │ │ │ └── readme_2.md │ │ ├── splitgraph.override.yml │ │ └── splitgraph.yml │ └── update_without_import.splitfile └── splitgraph │ ├── BUILD │ ├── __init__.py │ ├── cloud │ ├── __init__.py │ └── project │ │ ├── __init__.py │ │ ├── snapshots │ │ ├── test_dbt │ │ │ └── test_generate_dbt_project │ │ │ │ └── splitgraph_template │ │ │ │ ├── dbt_project.yml │ │ │ │ └── models │ │ │ │ └── staging │ │ │ │ ├── and_third_data │ │ │ │ └── and_third_data.sql │ │ │ │ ├── some_data_source │ │ │ │ └── some_data_source.sql │ │ │ │ ├── some_other_data_raw │ │ │ │ └── some_other_data_raw.sql │ │ │ │ └── sources.yml │ │ ├── test_generation │ │ │ ├── test_generate_project_no_dbt │ │ │ │ └── generate_project │ │ │ │ │ ├── .github │ │ │ │ │ └── workflows │ │ │ │ │ │ └── build.yml │ │ │ │ │ ├── splitgraph.credentials.yml │ │ │ │ │ └── splitgraph.yml │ │ │ └── test_generate_project_with_dbt │ │ │ │ └── generate_project_dbt │ │ │ │ ├── .github │ │ │ │ └── workflows │ │ │ │ │ └── build.yml │ │ │ │ ├── README.md │ │ │ │ ├── dbt_project.yml │ │ │ │ ├── models │ │ │ │ └── staging │ │ │ │ │ └── sources.yml │ │ │ │ ├── splitgraph.credentials.yml │ │ │ │ └── splitgraph.yml │ │ └── test_merging │ │ │ └── test_project_merging │ │ │ └── repositories.merged.yml │ │ ├── test_dbt.py │ │ ├── test_generation.py │ │ └── test_merging.py │ ├── commandline │ ├── BUILD │ ├── __init__.py │ ├── http_fixtures.py │ ├── snapshots │ │ ├── test_cloud │ │ │ ├── test_commandline_plugins │ │ │ │ ├── sgr_cloud_plugins.txt │ │ │ │ └── sgr_cloud_plugins_filter.txt │ │ │ └── test_commandline_stub │ │ │ │ └── sgr_cloud_stub.yml │ │ ├── test_cloud_jobs │ │ │ ├── test_csv_download │ │ │ │ ├── False │ │ │ │ │ └── sgr_cloud_download_failure.txt │ │ │ │ └── True │ │ │ │ │ └── sgr_cloud_download_success.txt │ │ │ ├── test_csv_upload │ │ │ │ ├── False │ │ │ │ │ └── sgr_cloud_upload_failure.txt │ │ │ │ └── True │ │ │ │ │ └── sgr_cloud_upload_success.txt │ │ │ ├── test_job_status_explicit_repos │ │ │ │ └── sgr_cloud_status_explicit.txt │ │ │ └── test_job_status_yaml │ │ │ │ └── sgr_cloud_status_yml.txt │ │ └── test_cloud_metadata │ │ │ └── test_commandline_dump │ │ │ ├── sgr_cloud_dump_multiple │ │ │ ├── readmes │ │ │ │ ├── otheruser-somerepo_2.fe37.md │ │ │ │ └── someuser-somerepo_1.b7f3.md │ │ │ └── splitgraph.yml │ │ │ └── sgr_cloud_dump_single │ │ │ ├── readmes │ │ │ └── someuser-somerepo_1.b7f3.md │ │ │ └── splitgraph.yml │ ├── test_cloud.py │ ├── test_cloud_jobs.py │ ├── test_cloud_metadata.py │ ├── test_commit_checkout.py │ ├── test_engine.py │ ├── test_image_object_info.py │ ├── test_init.py │ ├── test_misc.py │ ├── test_mount.py │ ├── test_push_pull.py │ └── test_splitfile.py │ ├── commands │ ├── BUILD │ ├── __init__.py │ ├── snapshots │ │ └── test_multicorn_fdws │ │ │ ├── test_aggregations_join_combinations │ │ │ ├── es │ │ │ │ └── account_join_sub_aggs.yml │ │ │ └── pg │ │ │ │ └── account_join_sub_aggs.yml │ │ │ ├── test_grouping_and_aggregations_bare │ │ │ ├── es │ │ │ │ └── account_count_by_age.yml │ │ │ └── pg │ │ │ │ └── account_count_by_age.yml │ │ │ ├── test_grouping_and_aggregations_filtering │ │ │ ├── es │ │ │ │ ├── avg_age_state_gender_filter_by_having.yml │ │ │ │ └── min_balance_state_age_filtered.yml │ │ │ └── pg │ │ │ │ ├── avg_age_state_gender_filter_by_having.yml │ │ │ │ └── min_balance_state_age_filtered.yml │ │ │ └── test_simple_grouping_clauses │ │ │ ├── es │ │ │ ├── account_genders_and_ages.yml │ │ │ └── account_states.yml │ │ │ └── pg │ │ │ ├── account_genders_and_ages.yml │ │ │ └── account_states.yml │ ├── test_bloom_indexing.py │ ├── test_commit_diff.py │ ├── test_diff_packing.py │ ├── test_external_objects.py │ ├── test_import.py │ ├── test_layered_querying.py │ ├── test_misc.py │ ├── test_mounting.py │ ├── test_multicorn_fdws.py │ ├── test_provenance.py │ ├── test_push_pull.py │ ├── test_range_indexing.py │ ├── test_schema_changes.py │ └── test_writable_lq.py │ ├── conftest.py │ ├── ingestion │ ├── BUILD │ ├── test_airbyte.py │ ├── test_athena.py │ ├── test_bigquery.py │ ├── test_commandline.py │ ├── test_common.py │ ├── test_csv.py │ ├── test_dbt_data_source.py │ ├── test_dbt_utils.py │ ├── test_inference.py │ ├── test_pandas.py │ ├── test_singer.py │ ├── test_snowflake.py │ ├── test_socrata.py │ └── test_sqlite.py │ ├── splitfile │ ├── BUILD │ ├── __init__.py │ ├── test_custom_commands.py │ ├── test_errors.py │ └── test_execution.py │ ├── test_cloud.py │ ├── test_config.py │ ├── test_drawing.py │ ├── test_engine.py │ ├── test_migrations.py │ ├── test_misc.py │ ├── test_object_cache.py │ ├── test_object_hashing.py │ ├── test_security.py │ ├── test_sql_validation.py │ └── utils.py └── wait-for-test-architecture.sh /.bumpversion.cfg: -------------------------------------------------------------------------------- 1 | [bumpversion] 2 | commit = True 3 | tag = True 4 | current_version = 0.3.12 5 | parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-(?P[a-z]+))? 6 | serialize = 7 | {major}.{minor}.{patch}-{release} 8 | {major}.{minor}.{patch} 9 | 10 | [bumpversion:file:pyproject.toml] 11 | 12 | [bumpversion:file:splitgraph/__version__.py] 13 | 14 | [bumpversion:file:install.sh] 15 | -------------------------------------------------------------------------------- /.ci/build_docs.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CI_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 4 | REPO_ROOT_DIR="${CI_DIR}/.." 5 | 6 | pushd "$REPO_ROOT_DIR" \ 7 | && poetry run "$CI_DIR"/prepare_doc_bundle.sh \ 8 | && popd \ 9 | && exit 0 10 | 11 | exit 1 12 | -------------------------------------------------------------------------------- /.ci/build_wheel.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | DEFAULT_PYPI_URL="https://test.pypi.org/legacy/" 4 | 5 | CI_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 6 | REPO_ROOT_DIR="${CI_DIR}/.." 7 | 8 | # By default, will configure PyPi for publishing. 9 | # To skip publishing setup, set NO_PUBLISH=1 .ci/build_wheel.sh 10 | NO_PUBLISH_FLAG="${NO_PUBLISH}" 11 | 12 | test -n "$NO_PUBLISH_FLAG" && { echo "Skipping publish because \$NO_PUBLISH is set" ; } 13 | test -z "$PYPI_PASSWORD" && \ 14 | ! test -n "$NO_PUBLISH_FLAG" \ 15 | && { echo "Fatal Error: No PYPI_PASSWORD set. To skip, set NO_PUBLISH=1" ; exit 1 ; } 16 | test -z "$PYPI_URL" && { echo "No PYPI_URL set. Defaulting to ${DEFAULT_PYPI_URL}" ; } 17 | 18 | PYPI_URL=${PYPI_URL-"${DEFAULT_PYPI_URL}"} 19 | 20 | # Configure pypi for deployment 21 | pushd "$REPO_ROOT_DIR" 22 | 23 | set -e 24 | if ! test -n "$NO_PUBLISH_FLAG" ; then 25 | echo "Configuring poetry with password from \$PYPI_PASSWORD" 26 | echo "To skip, try: NO_PUBLISH=1 $0 $*" 27 | poetry config http-basic.testpypi splitgraph "$PYPI_PASSWORD" 28 | poetry config http-basic.pypi splitgraph "$PYPI_PASSWORD" 29 | fi 30 | 31 | # Set the PyPi URL because it can't hurt (we skipped setting the credentials) 32 | poetry config repositories.testpypi "$PYPI_URL" 33 | 34 | poetry build 35 | popd 36 | 37 | set +e 38 | exit 0 39 | -------------------------------------------------------------------------------- /.ci/down_architecture.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CI_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 4 | REPO_ROOT_DIR="${CI_DIR}/.." 5 | TEST_DIR="${REPO_ROOT_DIR}/test" 6 | ARCHITECTURE_DIR="${TEST_DIR}/architecture" 7 | 8 | CORE_ARCHITECTURE="docker-compose.core.yml" 9 | MOUNTING_ARCHITECTURE="docker-compose.mounting.yml" 10 | 11 | # Stop the PG/MySQL that ship with Travis and run our own integration test 12 | # SG engine/remote architecture instead. 13 | pushd "$REPO_ROOT_DIR" \ 14 | && pushd "${ARCHITECTURE_DIR}" \ 15 | && echo "Bringing down the test architecture..." \ 16 | && docker-compose -f $CORE_ARCHITECTURE -f $MOUNTING_ARCHITECTURE down -v \ 17 | && echo "Test architecture down." \ 18 | && popd \ 19 | && exit 0 20 | 21 | exit 1 22 | -------------------------------------------------------------------------------- /.ci/install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -ex 2 | 3 | poetry export --dev -f requirements.txt --without-hashes -o /tmp/requirements.txt -E pandas 4 | sed -i "/ @ \//d" /tmp/requirements.txt 5 | python -m pip install -U pip 6 | cat /tmp/requirements.txt 7 | pip install --no-deps -r /tmp/requirements.txt 8 | poetry install -E pandas 9 | 10 | . "$(poetry env info --path)/bin/activate" 11 | export PATH=$PATH:"$PWD"/bin 12 | sgr --help 13 | which sgr || true 14 | 15 | # Needed to test the dbt example, not required by core sg 16 | python -m venv "$DBT_VENV" 17 | . "$DBT_VENV"/bin/activate 18 | pip install dbt-core==1.0.0 dbt-postgres==1.0.0 19 | pip install --force-reinstall --upgrade markupsafe==2.0.1 20 | 21 | # Singer tap integration test 22 | python -m venv "$TAP_MYSQL_VENV" 23 | . "$TAP_MYSQL_VENV"/bin/activate 24 | pip install tap-mysql 25 | -------------------------------------------------------------------------------- /.ci/install_prereqs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -ex 2 | 3 | CI_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 4 | REPO_ROOT_DIR="${CI_DIR}/.." 5 | 6 | test -z "$COMPOSE_VERSION" && { echo "Fatal Error: No COMPOSE_VERSION set" ; exit 1 ; } 7 | test -z "$POETRY_VERSION" && { echo "Fatal Error: No POETRY_VERSION set" ; exit 1 ; } 8 | 9 | D_COMPOSE_BASE_URL="https://github.com/docker/compose/releases/download" 10 | D_COMPOSE_ARCH="docker-compose-$(uname -s)-$(uname -m)" 11 | D_COMPOSE_URL="${D_COMPOSE_BASE_URL}/${COMPOSE_VERSION}/${D_COMPOSE_ARCH}" 12 | 13 | # Install docker compose and poetry 14 | pushd "$REPO_ROOT_DIR" 15 | curl -L "$D_COMPOSE_URL" > docker-compose 16 | chmod +x docker-compose 17 | sudo mv docker-compose /usr/local/bin 18 | curl -sSL https://install.python-poetry.org | python - 19 | 20 | popd 21 | -------------------------------------------------------------------------------- /.ci/prepare_doc_bundle.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -ex 2 | 3 | # CI script for compiling Sphinx docs and other assets into a bundle used to build 4 | # the splitgraph.com docs section (API and sgr). The bundle is available on the 5 | # releases page for the splitgraph.com CI job to pick up. 6 | 7 | # Doesn't actually build the HTML docs and isn't useful by itself (contains files 8 | # in Sphinx-internal fjson format). 9 | 10 | OUTPUT=${OUTPUT-sgr-docs-bin} 11 | CI_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 12 | REPO_ROOT_DIR="${CI_DIR}/.." 13 | TARGET_DIR="${REPO_ROOT_DIR}"/dist/"$OUTPUT" 14 | 15 | rm "$TARGET_DIR" -rf 16 | mkdir -p "$TARGET_DIR" 17 | 18 | echo "Generating Sphinx documentation in JSON format..." 19 | cd "$REPO_ROOT_DIR"/docs 20 | make json 21 | mv _build/json "$TARGET_DIR" 22 | 23 | echo "Generating Markdown sgr reference" 24 | python generate_reference.py sgr "$TARGET_DIR"/sgr 25 | 26 | echo "Generating configuration reference" 27 | python generate_reference.py config "$TARGET_DIR"/0100_config-flag-reference.mdx 28 | 29 | # Temporarily disabled: these take way too much time and aren't used by the website. 30 | # echo "Building Asciinema casts" 31 | # TARGET_DIR=$TARGET_DIR "$CI_DIR"/rebuild_asciicasts.sh 32 | 33 | echo "Archiving the bundle $OUTPUT.tar.gz" 34 | cd "$TARGET_DIR"/.. 35 | tar -czf "$OUTPUT".tar.gz "$OUTPUT" 36 | 37 | echo "All done." 38 | -------------------------------------------------------------------------------- /.ci/up_architecture.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CI_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 4 | REPO_ROOT_DIR="${CI_DIR}/.." 5 | TEST_DIR="${REPO_ROOT_DIR}/test" 6 | ARCHITECTURE_DIR="${TEST_DIR}/architecture" 7 | 8 | CORE_ARCHITECTURE="docker-compose.core.yml" 9 | MOUNTING_ARCHITECTURE="docker-compose.mounting.yml" 10 | 11 | pushd "$ARCHITECTURE_DIR" \ 12 | && docker-compose -f $CORE_ARCHITECTURE build \ 13 | && docker-compose -f $CORE_ARCHITECTURE up -d \ 14 | && { { 15 | echo "Building the mounting test architecture in the background: " $(date) 16 | docker-compose -f $MOUNTING_ARCHITECTURE build 17 | docker-compose -f $MOUNTING_ARCHITECTURE up -d 18 | echo "Background mounting test architecture build complete: " $(date) 19 | } & } \ 20 | && popd \ 21 | && echo "Wait for core test architecture..." \ 22 | && pushd "${ARCHITECTURE_DIR}" \ 23 | && ( grep local_engine /etc/hosts >/dev/null || echo "127.0.0.1 local_engine" | sudo tee -a /etc/hosts ; ) \ 24 | && ( grep remote_engine /etc/hosts >/dev/null || echo "127.0.0.1 remote_engine" | sudo tee -a /etc/hosts ; ) \ 25 | && ( grep objectstorage /etc/hosts >/dev/null || echo "127.0.0.1 objectstorage" | sudo tee -a /etc/hosts ; ) \ 26 | && ./wait-for-architecture.sh \ 27 | && popd \ 28 | && exit 0 29 | 30 | exit 1 31 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [report] 2 | # Tested inside of the actual engine 3 | omit = splitgraph/core/fdw_checkout.py,splitgraph/core/server.py,splitgraph/ingestion/csv/fdw.py 4 | 5 | # Regexes for lines to exclude from consideration 6 | exclude_lines = 7 | # Have to re-enable the standard pragma 8 | pragma: no cover 9 | 10 | # Don't complain about missing debug-only code: 11 | def __repr__ 12 | if self\.debug 13 | 14 | # Don't complain if tests don't hit defensive assertion code: 15 | raise AssertionError 16 | raise NotImplementedError 17 | 18 | # Don't complain if non-runnable code isn't run: 19 | if 0: 20 | if __name__ == .__main__.: 21 | 22 | # mypy guards to avoid importing code that's only used for typechecking 23 | if TYPE_CHECKING: 24 | 25 | ignore_errors = True 26 | 27 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .coverage 2 | .sgconfig 3 | !examples/**/.sgconfig 4 | !test/resources/.sgconfig 5 | htmlcov 6 | **/__pycache__ 7 | **/.eggs 8 | **/*.egg-info 9 | 10 | build/ 11 | docs/_build/ 12 | dist/ 13 | .venv/ 14 | .cache/ 15 | .mypy_cache/ 16 | 17 | .idea/ 18 | pip-wheel-metadata 19 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "engine/src/Multicorn"] 2 | path = engine/src/Multicorn 3 | url = https://github.com/splitgraph/Multicorn.git 4 | [submodule "engine/src/cstore_fdw"] 5 | path = engine/src/cstore_fdw 6 | url = https://github.com/splitgraph/cstore_fdw.git 7 | [submodule "engine/src/postgres-elasticsearch-fdw"] 8 | path = engine/src/postgres-elasticsearch-fdw 9 | url = https://github.com/splitgraph/postgres-elasticsearch-fdw.git 10 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/ambv/black 3 | rev: 22.3.0 4 | hooks: 5 | - id: black 6 | language_version: python3 7 | # Mypy config 8 | # Note that we have to specify exclusions here. This is because mypy's exclude setting 9 | # gets overridden by the actual path spec. 10 | - repo: https://github.com/pre-commit/mirrors-mypy 11 | rev: 'v1.0.1' 12 | hooks: 13 | - id: mypy 14 | additional_dependencies: 15 | # These have to be installed here as well as in the venv if you want to be able 16 | # to run `mypy splitgraph` from the commandline. 17 | - pydantic>=1.8.1 18 | - types-chardet 19 | - types-requests 20 | - types-tabulate 21 | - types-PyYAML 22 | exclude: "^(examples|test|docs)/" 23 | 24 | - repo: local 25 | hooks: 26 | - id: isort 27 | name: isort 28 | entry: isort 29 | require_serial: true 30 | language: python 31 | language_version: python3 32 | types_or: [cython, pyi, python] 33 | args: ["--profile", "black", "--filter-files"] 34 | minimum_pre_commit_version: "2.9.2" 35 | additional_dependencies: ["isort==5.10.1"] 36 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.6-slim 2 | 3 | RUN apt-get update && apt-get install -y curl 4 | 5 | RUN curl -sSL https://raw.githubusercontent.com/sdispater/poetry/master/get-poetry.py | python 6 | 7 | RUN mkdir /splitgraph 8 | COPY . /splitgraph 9 | 10 | RUN $HOME/.poetry/bin/poetry config settings.virtualenvs.create true 11 | RUN cd /splitgraph && $HOME/.poetry/bin/poetry install --no-dev 12 | 13 | # The pip-wheel-metadata is supposed to be temporary. For downstream image builds, Poetry tries to reinstall Splitgraph 14 | # from /splitgraph again and fails with 15 | # 16 | # FileExistsError: [Errno 17] File exists: '/splitgraph/pip-wheel-metadata 17 | # /splitgraph-0.0.0.dist-info' 18 | # See https://github.com/pypa/pip/issues/6213 19 | RUN rm /splitgraph/pip-wheel-metadata -rf 20 | 21 | CMD sgr 22 | -------------------------------------------------------------------------------- /bin/sgr: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from splitgraph.commandline import cli 4 | 5 | cli() 6 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = Splitgraph 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | 22 | # Run this to generate stubs for API docs 23 | apidoc: 24 | rm api -rf 25 | sphinx-apidoc -o api ../splitgraph 26 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | Sphinx docs: parts copied from the toplevel README, parts generated by sphinx-autodoc from docstrings. 2 | 3 | # Building 4 | 5 | ``` 6 | pip/conda install sphinx 7 | make clean html / make clean latexpdf 8 | ``` 9 | -------------------------------------------------------------------------------- /docs/api/modules.rst: -------------------------------------------------------------------------------- 1 | splitgraph 2 | ========== 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | splitgraph 8 | -------------------------------------------------------------------------------- /docs/api/splitgraph.cloud.project.rst: -------------------------------------------------------------------------------- 1 | splitgraph.cloud.project package 2 | ================================ 3 | 4 | Submodules 5 | ---------- 6 | 7 | splitgraph.cloud.project.dbt module 8 | ----------------------------------- 9 | 10 | .. automodule:: splitgraph.cloud.project.dbt 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | splitgraph.cloud.project.generation module 16 | ------------------------------------------ 17 | 18 | .. automodule:: splitgraph.cloud.project.generation 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | splitgraph.cloud.project.github\_actions module 24 | ----------------------------------------------- 25 | 26 | .. automodule:: splitgraph.cloud.project.github_actions 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | splitgraph.cloud.project.models module 32 | -------------------------------------- 33 | 34 | .. automodule:: splitgraph.cloud.project.models 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | splitgraph.cloud.project.templates module 40 | ----------------------------------------- 41 | 42 | .. automodule:: splitgraph.cloud.project.templates 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | 47 | splitgraph.cloud.project.utils module 48 | ------------------------------------- 49 | 50 | .. automodule:: splitgraph.cloud.project.utils 51 | :members: 52 | :undoc-members: 53 | :show-inheritance: 54 | 55 | Module contents 56 | --------------- 57 | 58 | .. automodule:: splitgraph.cloud.project 59 | :members: 60 | :undoc-members: 61 | :show-inheritance: 62 | -------------------------------------------------------------------------------- /docs/api/splitgraph.cloud.rst: -------------------------------------------------------------------------------- 1 | splitgraph.cloud package 2 | ======================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | :maxdepth: 4 9 | 10 | splitgraph.cloud.project 11 | 12 | Submodules 13 | ---------- 14 | 15 | splitgraph.cloud.models module 16 | ------------------------------ 17 | 18 | .. automodule:: splitgraph.cloud.models 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | splitgraph.cloud.queries module 24 | ------------------------------- 25 | 26 | .. automodule:: splitgraph.cloud.queries 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | Module contents 32 | --------------- 33 | 34 | .. automodule:: splitgraph.cloud 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | -------------------------------------------------------------------------------- /docs/api/splitgraph.core.indexing.rst: -------------------------------------------------------------------------------- 1 | splitgraph.core.indexing package 2 | ================================ 3 | 4 | Submodules 5 | ---------- 6 | 7 | splitgraph.core.indexing.bloom module 8 | ------------------------------------- 9 | 10 | .. automodule:: splitgraph.core.indexing.bloom 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | splitgraph.core.indexing.range module 16 | ------------------------------------- 17 | 18 | .. automodule:: splitgraph.core.indexing.range 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | Module contents 24 | --------------- 25 | 26 | .. automodule:: splitgraph.core.indexing 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | -------------------------------------------------------------------------------- /docs/api/splitgraph.core.sql.rst: -------------------------------------------------------------------------------- 1 | splitgraph.core.sql package 2 | =========================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | splitgraph.core.sql.queries module 8 | ---------------------------------- 9 | 10 | .. automodule:: splitgraph.core.sql.queries 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | splitgraph.core.sql.splitfile\_validation module 16 | ------------------------------------------------ 17 | 18 | .. automodule:: splitgraph.core.sql.splitfile_validation 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | Module contents 24 | --------------- 25 | 26 | .. automodule:: splitgraph.core.sql 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | -------------------------------------------------------------------------------- /docs/api/splitgraph.engine.postgres.rst: -------------------------------------------------------------------------------- 1 | splitgraph.engine.postgres package 2 | ================================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | splitgraph.engine.postgres.engine module 8 | ---------------------------------------- 9 | 10 | .. automodule:: splitgraph.engine.postgres.engine 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | splitgraph.engine.postgres.psycopg module 16 | ----------------------------------------- 17 | 18 | .. automodule:: splitgraph.engine.postgres.psycopg 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | Module contents 24 | --------------- 25 | 26 | .. automodule:: splitgraph.engine.postgres 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | -------------------------------------------------------------------------------- /docs/api/splitgraph.engine.rst: -------------------------------------------------------------------------------- 1 | splitgraph.engine package 2 | ========================= 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | :maxdepth: 4 9 | 10 | splitgraph.engine.postgres 11 | 12 | Submodules 13 | ---------- 14 | 15 | splitgraph.engine.base module 16 | ----------------------------- 17 | 18 | .. automodule:: splitgraph.engine.base 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | splitgraph.engine.config module 24 | ------------------------------- 25 | 26 | .. automodule:: splitgraph.engine.config 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | splitgraph.engine.utils module 32 | ------------------------------ 33 | 34 | .. automodule:: splitgraph.engine.utils 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | Module contents 40 | --------------- 41 | 42 | .. automodule:: splitgraph.engine 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | -------------------------------------------------------------------------------- /docs/api/splitgraph.hooks.data_source.rst: -------------------------------------------------------------------------------- 1 | splitgraph.hooks.data\_source package 2 | ===================================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | splitgraph.hooks.data\_source.base module 8 | ----------------------------------------- 9 | 10 | .. automodule:: splitgraph.hooks.data_source.base 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | splitgraph.hooks.data\_source.fdw module 16 | ---------------------------------------- 17 | 18 | .. automodule:: splitgraph.hooks.data_source.fdw 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | splitgraph.hooks.data\_source.utils module 24 | ------------------------------------------ 25 | 26 | .. automodule:: splitgraph.hooks.data_source.utils 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | Module contents 32 | --------------- 33 | 34 | .. automodule:: splitgraph.hooks.data_source 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | -------------------------------------------------------------------------------- /docs/api/splitgraph.hooks.rst: -------------------------------------------------------------------------------- 1 | splitgraph.hooks package 2 | ======================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | :maxdepth: 4 9 | 10 | splitgraph.hooks.data_source 11 | 12 | Submodules 13 | ---------- 14 | 15 | splitgraph.hooks.external\_objects module 16 | ----------------------------------------- 17 | 18 | .. automodule:: splitgraph.hooks.external_objects 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | splitgraph.hooks.mount\_handlers module 24 | --------------------------------------- 25 | 26 | .. automodule:: splitgraph.hooks.mount_handlers 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | splitgraph.hooks.s3 module 32 | -------------------------- 33 | 34 | .. automodule:: splitgraph.hooks.s3 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | splitgraph.hooks.s3\_server module 40 | ---------------------------------- 41 | 42 | .. automodule:: splitgraph.hooks.s3_server 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | 47 | splitgraph.hooks.splitfile\_commands module 48 | ------------------------------------------- 49 | 50 | .. automodule:: splitgraph.hooks.splitfile_commands 51 | :members: 52 | :undoc-members: 53 | :show-inheritance: 54 | 55 | Module contents 56 | --------------- 57 | 58 | .. automodule:: splitgraph.hooks 59 | :members: 60 | :undoc-members: 61 | :show-inheritance: 62 | -------------------------------------------------------------------------------- /docs/api/splitgraph.ingestion.airbyte.rst: -------------------------------------------------------------------------------- 1 | splitgraph.ingestion.airbyte package 2 | ==================================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | splitgraph.ingestion.airbyte.data\_source module 8 | ------------------------------------------------ 9 | 10 | .. automodule:: splitgraph.ingestion.airbyte.data_source 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | splitgraph.ingestion.airbyte.docker\_utils module 16 | ------------------------------------------------- 17 | 18 | .. automodule:: splitgraph.ingestion.airbyte.docker_utils 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | splitgraph.ingestion.airbyte.models module 24 | ------------------------------------------ 25 | 26 | .. automodule:: splitgraph.ingestion.airbyte.models 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | splitgraph.ingestion.airbyte.utils module 32 | ----------------------------------------- 33 | 34 | .. automodule:: splitgraph.ingestion.airbyte.utils 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | Module contents 40 | --------------- 41 | 42 | .. automodule:: splitgraph.ingestion.airbyte 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | -------------------------------------------------------------------------------- /docs/api/splitgraph.ingestion.athena.rst: -------------------------------------------------------------------------------- 1 | splitgraph.ingestion.athena package 2 | =================================== 3 | 4 | Module contents 5 | --------------- 6 | 7 | .. automodule:: splitgraph.ingestion.athena 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | -------------------------------------------------------------------------------- /docs/api/splitgraph.ingestion.bigquery.rst: -------------------------------------------------------------------------------- 1 | splitgraph.ingestion.bigquery package 2 | ===================================== 3 | 4 | Module contents 5 | --------------- 6 | 7 | .. automodule:: splitgraph.ingestion.bigquery 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | -------------------------------------------------------------------------------- /docs/api/splitgraph.ingestion.csv.rst: -------------------------------------------------------------------------------- 1 | splitgraph.ingestion.csv package 2 | ================================ 3 | 4 | Submodules 5 | ---------- 6 | 7 | splitgraph.ingestion.csv.common module 8 | -------------------------------------- 9 | 10 | .. automodule:: splitgraph.ingestion.csv.common 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | splitgraph.ingestion.csv.fdw module 16 | ----------------------------------- 17 | 18 | .. automodule:: splitgraph.ingestion.csv.fdw 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | Module contents 24 | --------------- 25 | 26 | .. automodule:: splitgraph.ingestion.csv 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | -------------------------------------------------------------------------------- /docs/api/splitgraph.ingestion.dbt.rst: -------------------------------------------------------------------------------- 1 | splitgraph.ingestion.dbt package 2 | ================================ 3 | 4 | Submodules 5 | ---------- 6 | 7 | splitgraph.ingestion.dbt.data\_source module 8 | -------------------------------------------- 9 | 10 | .. automodule:: splitgraph.ingestion.dbt.data_source 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | splitgraph.ingestion.dbt.utils module 16 | ------------------------------------- 17 | 18 | .. automodule:: splitgraph.ingestion.dbt.utils 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | Module contents 24 | --------------- 25 | 26 | .. automodule:: splitgraph.ingestion.dbt 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | -------------------------------------------------------------------------------- /docs/api/splitgraph.ingestion.rst: -------------------------------------------------------------------------------- 1 | splitgraph.ingestion package 2 | ============================ 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | :maxdepth: 4 9 | 10 | splitgraph.ingestion.airbyte 11 | splitgraph.ingestion.athena 12 | splitgraph.ingestion.bigquery 13 | splitgraph.ingestion.csv 14 | splitgraph.ingestion.dbt 15 | splitgraph.ingestion.singer 16 | splitgraph.ingestion.snowflake 17 | splitgraph.ingestion.socrata 18 | 19 | Submodules 20 | ---------- 21 | 22 | splitgraph.ingestion.common module 23 | ---------------------------------- 24 | 25 | .. automodule:: splitgraph.ingestion.common 26 | :members: 27 | :undoc-members: 28 | :show-inheritance: 29 | 30 | splitgraph.ingestion.inference module 31 | ------------------------------------- 32 | 33 | .. automodule:: splitgraph.ingestion.inference 34 | :members: 35 | :undoc-members: 36 | :show-inheritance: 37 | 38 | splitgraph.ingestion.pandas module 39 | ---------------------------------- 40 | 41 | .. automodule:: splitgraph.ingestion.pandas 42 | :members: 43 | :undoc-members: 44 | :show-inheritance: 45 | 46 | Module contents 47 | --------------- 48 | 49 | .. automodule:: splitgraph.ingestion 50 | :members: 51 | :undoc-members: 52 | :show-inheritance: 53 | -------------------------------------------------------------------------------- /docs/api/splitgraph.ingestion.singer.commandline.rst: -------------------------------------------------------------------------------- 1 | splitgraph.ingestion.singer.commandline package 2 | =============================================== 3 | 4 | Module contents 5 | --------------- 6 | 7 | .. automodule:: splitgraph.ingestion.singer.commandline 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | -------------------------------------------------------------------------------- /docs/api/splitgraph.ingestion.singer.rst: -------------------------------------------------------------------------------- 1 | splitgraph.ingestion.singer package 2 | =================================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | :maxdepth: 4 9 | 10 | splitgraph.ingestion.singer.commandline 11 | 12 | Submodules 13 | ---------- 14 | 15 | splitgraph.ingestion.singer.common module 16 | ----------------------------------------- 17 | 18 | .. automodule:: splitgraph.ingestion.singer.common 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | splitgraph.ingestion.singer.data\_source module 24 | ----------------------------------------------- 25 | 26 | .. automodule:: splitgraph.ingestion.singer.data_source 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | splitgraph.ingestion.singer.db\_sync module 32 | ------------------------------------------- 33 | 34 | .. automodule:: splitgraph.ingestion.singer.db_sync 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | Module contents 40 | --------------- 41 | 42 | .. automodule:: splitgraph.ingestion.singer 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | -------------------------------------------------------------------------------- /docs/api/splitgraph.ingestion.snowflake.rst: -------------------------------------------------------------------------------- 1 | splitgraph.ingestion.snowflake package 2 | ====================================== 3 | 4 | Module contents 5 | --------------- 6 | 7 | .. automodule:: splitgraph.ingestion.snowflake 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | -------------------------------------------------------------------------------- /docs/api/splitgraph.ingestion.socrata.rst: -------------------------------------------------------------------------------- 1 | splitgraph.ingestion.socrata package 2 | ==================================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | splitgraph.ingestion.socrata.fdw module 8 | --------------------------------------- 9 | 10 | .. automodule:: splitgraph.ingestion.socrata.fdw 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | splitgraph.ingestion.socrata.mount module 16 | ----------------------------------------- 17 | 18 | .. automodule:: splitgraph.ingestion.socrata.mount 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | splitgraph.ingestion.socrata.querying module 24 | -------------------------------------------- 25 | 26 | .. automodule:: splitgraph.ingestion.socrata.querying 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | Module contents 32 | --------------- 33 | 34 | .. automodule:: splitgraph.ingestion.socrata 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | -------------------------------------------------------------------------------- /docs/api/splitgraph.resources.icons.rst: -------------------------------------------------------------------------------- 1 | splitgraph.resources.icons package 2 | ================================== 3 | 4 | Module contents 5 | --------------- 6 | 7 | .. automodule:: splitgraph.resources.icons 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | -------------------------------------------------------------------------------- /docs/api/splitgraph.resources.rst: -------------------------------------------------------------------------------- 1 | splitgraph.resources package 2 | ============================ 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | :maxdepth: 4 9 | 10 | splitgraph.resources.icons 11 | splitgraph.resources.splitgraph_meta 12 | splitgraph.resources.static 13 | 14 | Module contents 15 | --------------- 16 | 17 | .. automodule:: splitgraph.resources 18 | :members: 19 | :undoc-members: 20 | :show-inheritance: 21 | -------------------------------------------------------------------------------- /docs/api/splitgraph.resources.splitgraph_meta.rst: -------------------------------------------------------------------------------- 1 | splitgraph.resources.splitgraph\_meta package 2 | ============================================= 3 | 4 | Module contents 5 | --------------- 6 | 7 | .. automodule:: splitgraph.resources.splitgraph_meta 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | -------------------------------------------------------------------------------- /docs/api/splitgraph.resources.static.rst: -------------------------------------------------------------------------------- 1 | splitgraph.resources.static package 2 | =================================== 3 | 4 | Module contents 5 | --------------- 6 | 7 | .. automodule:: splitgraph.resources.static 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | -------------------------------------------------------------------------------- /docs/api/splitgraph.rst: -------------------------------------------------------------------------------- 1 | splitgraph package 2 | ================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | :maxdepth: 4 9 | 10 | splitgraph.cloud 11 | splitgraph.commandline 12 | splitgraph.config 13 | splitgraph.core 14 | splitgraph.engine 15 | splitgraph.hooks 16 | splitgraph.ingestion 17 | splitgraph.resources 18 | splitgraph.splitfile 19 | splitgraph.utils 20 | 21 | Submodules 22 | ---------- 23 | 24 | splitgraph.exceptions module 25 | ---------------------------- 26 | 27 | .. automodule:: splitgraph.exceptions 28 | :members: 29 | :undoc-members: 30 | :show-inheritance: 31 | 32 | Module contents 33 | --------------- 34 | 35 | .. automodule:: splitgraph 36 | :members: 37 | :undoc-members: 38 | :show-inheritance: 39 | -------------------------------------------------------------------------------- /docs/api/splitgraph.splitfile.generation.rst: -------------------------------------------------------------------------------- 1 | splitgraph.splitfile.generation package 2 | ======================================= 3 | 4 | Submodules 5 | ---------- 6 | 7 | splitgraph.splitfile.generation.common module 8 | --------------------------------------------- 9 | 10 | .. automodule:: splitgraph.splitfile.generation.common 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | splitgraph.splitfile.generation.replacement module 16 | -------------------------------------------------- 17 | 18 | .. automodule:: splitgraph.splitfile.generation.replacement 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | Module contents 24 | --------------- 25 | 26 | .. automodule:: splitgraph.splitfile.generation 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | -------------------------------------------------------------------------------- /docs/api/splitgraph.splitfile.rst: -------------------------------------------------------------------------------- 1 | splitgraph.splitfile package 2 | ============================ 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | :maxdepth: 4 9 | 10 | splitgraph.splitfile.generation 11 | 12 | Submodules 13 | ---------- 14 | 15 | splitgraph.splitfile.execution module 16 | ------------------------------------- 17 | 18 | .. automodule:: splitgraph.splitfile.execution 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | Module contents 24 | --------------- 25 | 26 | .. automodule:: splitgraph.splitfile 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | -------------------------------------------------------------------------------- /docs/api/splitgraph.utils.rst: -------------------------------------------------------------------------------- 1 | splitgraph.utils package 2 | ======================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | splitgraph.utils.docker module 8 | ------------------------------ 9 | 10 | .. automodule:: splitgraph.utils.docker 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | splitgraph.utils.yaml module 16 | ---------------------------- 17 | 18 | .. automodule:: splitgraph.utils.yaml 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | Module contents 24 | --------------- 25 | 26 | .. automodule:: splitgraph.utils 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | -------------------------------------------------------------------------------- /engine/build_scripts/build_splitgraph.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -ex 2 | 3 | # Install the Splitgraph library and the layered querying foreign data wrapper. 4 | cd /splitgraph 5 | 6 | export POETRY_VERSION=1.3.2 7 | 8 | curl -sSL https://install.python-poetry.org | python3.9 - 9 | export PATH="/root/.local/bin:$PATH" 10 | 11 | # Install globally (otherwise we'll need to find a way to get Multicorn to see the venv) 12 | ln -sf /usr/bin/python3.9 /usr/bin/python 13 | poetry config virtualenvs.create false 14 | 15 | # Export the requirements into pip and install them separately (faster than Poetry) 16 | poetry export -f requirements.txt --without-hashes -o requirements.txt && sed -i "/ @ \//d" requirements.txt 17 | pip install --no-deps -r requirements.txt 18 | 19 | # We don't use pip/poetry here to install the package in "editable" mode as we 20 | # don't care about setuptools entrypoints etc. The Dockerfile just appends 21 | # /splitgraph to the PYTHONPATH. 22 | 23 | # Poetry vendors its packages which adds about 70MB to the final image size -- we 24 | # don't need it at this point, so delete it. 25 | rm "$HOME"/.poetry -rf 26 | -------------------------------------------------------------------------------- /engine/build_scripts/fdws/cstore_fdw/build_cstore_fdw.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -ex 2 | 3 | cd /src/cstore_fdw 4 | 5 | export DESTDIR=/output/root 6 | make install 7 | 8 | # Slightly ugly hack: we want Multicorn to link against CStore 9 | # and ld requires the library to be named libLIBRARY.so, 10 | # so we copy cstore_fdw.so into /usr/local/lib as well. 11 | mkdir -p $DESTDIR/usr/local/lib 12 | cp cstore_fdw.so $DESTDIR/usr/local/lib/libcstore_fdw.so 13 | -------------------------------------------------------------------------------- /engine/build_scripts/fdws/mongo_fdw/build_mongo_fdw.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -ex 2 | 3 | echo "Install mongo_fdw extension..." 4 | 5 | echo "Download mongo_fdw source code..." 6 | cd /build 7 | git clone https://github.com/EnterpriseDB/mongo_fdw.git 8 | 9 | cd mongo_fdw 10 | 11 | echo "Build mongo_fdw..." 12 | 13 | git checkout 6d06a82b9071c2a8e92d80e07ef7f9d1c4e1e69d 14 | git apply ../build-fixes.patch 15 | 16 | # Build the prerequisites (libmongoc/json-c): these won't make it into 17 | # the final image though as they're difficult to put in the right place whilst also 18 | # getting mongo_fdw to still build against them. 19 | ./autogen.sh --with-master 20 | 21 | # Build the actual FDW. 22 | export DESTDIR=/output/root 23 | make clean && make && make install 24 | -------------------------------------------------------------------------------- /engine/build_scripts/fdws/multicorn/build_multicorn.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -ex 2 | 3 | cd /src/Multicorn 4 | 5 | # Fork of official Multicorn with the ability to scan directly through cstore_fdw 6 | # fragments rather than passing data through python. 7 | 8 | export DESTDIR=/output/root 9 | export PYTHON_OVERRIDE=python3.9 10 | export PYTHON_CONFIG=x86_64-linux-gnu-python3.9-config 11 | ln -sf /usr/bin/python3.9 /usr/bin/python 12 | 13 | # Do "make CFLAGS=-DDEBUG install" instead to enable debug output for scans. 14 | # Include and dynamically link to cstore_fdw 15 | make \ 16 | CPPFLAGS="-I ../cstore_fdw" \ 17 | SHLIB_LINK="-L/output/root/usr/local/lib -lcstore_fdw -lpython3.9" \ 18 | install 19 | -------------------------------------------------------------------------------- /engine/build_scripts/fdws/mysql_fdw/build_mysql_fdw.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -ex 2 | 3 | echo "Install mysql_fdw extension..." 4 | 5 | fatal_error() { 6 | echo "Fatal:" "$@" 1>&2 ; 7 | exit 1; 8 | } 9 | 10 | mkdir /tmp/mysql-fdw-staging || { 11 | fatal_error "Failed to mkdir /tmp/mysql-fdw-staging" ; 12 | } 13 | 14 | 15 | cd /tmp/mysql-fdw-staging || { 16 | fatal_error "Failed to cd /tmp/mysql-fdw-staging" ; 17 | } 18 | 19 | echo "Download mysql_fdw source code..." 20 | git clone https://github.com/EnterpriseDB/mysql_fdw.git 21 | cd mysql_fdw || { 22 | fatal_error "Failed to cd /tmp/mysql-fdw-staging/mysql_fdw" ; 23 | } 24 | 25 | echo "Build mysql_fdw..." 26 | 27 | # Pin mysql_fdw to the last known good build on Jan 25, 2021. The next commit, 5c80ff8743a02af95cd97d6ff5b925617a3e9f01, makes it segfault when querying more than one row 28 | # with a binary type (see test_mount_mysql) 29 | git checkout cf88939d1e3f54e3fa9cf03010fa48dff8035560 30 | 31 | export USE_PGXS=1 32 | export DESTDIR=/output/root 33 | make install 34 | -------------------------------------------------------------------------------- /engine/etc/postgresql/pg_hba.conf: -------------------------------------------------------------------------------- 1 | host all all 0.0.0.0/0 md5 2 | host all all ::0/0 md5 3 | local all all md5 4 | local replication all trust 5 | -------------------------------------------------------------------------------- /engine/etc/postgresql/postgresql.conf: -------------------------------------------------------------------------------- 1 | # host all all 0.0.0.0/0 md5 2 | # local replication all trust 3 | listen_addresses = '*' 4 | port = 5432 5 | 6 | hba_file = '/etc/postgresql/pg_hba.conf' 7 | shared_preload_libraries = 'cstore_fdw' 8 | -------------------------------------------------------------------------------- /engine/etc/postgresql/postgresql_debug.conf: -------------------------------------------------------------------------------- 1 | # host all all 0.0.0.0/0 md5 2 | # local replication all trust 3 | listen_addresses = '*' 4 | port = 5432 5 | 6 | hba_file = '/etc/postgresql/pg_hba.conf' 7 | shared_preload_libraries = 'cstore_fdw' 8 | # log_destination = 'syslog' 9 | # syslog_facility = 'LOCAL0' 10 | # syslog_ident = 'postgres' 11 | # syslog_sequence_numbers = on 12 | # syslog_split_messages = on 13 | 14 | debug_print_parse = on 15 | debug_print_rewritten = on 16 | debug_print_plan = on 17 | debug_pretty_print = on 18 | log_checkpoints = on 19 | log_connections = on 20 | log_disconnections = on 21 | log_duration = on 22 | log_error_verbosity = 'VERBOSE' 23 | -------------------------------------------------------------------------------- /engine/init_scripts/000_create_extensions.sql: -------------------------------------------------------------------------------- 1 | CREATE EXTENSION postgres_fdw; 2 | CREATE EXTENSION mongo_fdw; 3 | CREATE EXTENSION mysql_fdw; 4 | CREATE EXTENSION multicorn; 5 | CREATE EXTENSION cstore_fdw; 6 | --CREATE EXTENSION plpython3u; 7 | CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; 8 | -------------------------------------------------------------------------------- /examples/benchmarking/.sgconfig: -------------------------------------------------------------------------------- 1 | [defaults] 2 | SG_LOGLEVEL=INFO 3 | SG_ENGINE_HOST=localhost 4 | SG_ENGINE_PORT=5432 5 | SG_ENGINE_USER=sgr 6 | SG_ENGINE_PWD=supersecure 7 | -------------------------------------------------------------------------------- /examples/benchmarking/README.md: -------------------------------------------------------------------------------- 1 | # Splitgraph benchmarking 2 | 3 | Since Splitgraph defers to Postgres for a lot of its operations (e.g. after checkout, a Splitgraph 4 | table becomes a normal PostgreSQL table with change tracking enabled), it's difficult to specify 5 | what is considered a benchmark for Splitgraph. 6 | 7 | There are two Jupyter notebooks here. The first one, [benchmarking](./benchmarking.ipynb), 8 | tests the overhead of common Splitgraph operations on a series of synthetic PostgreSQL tables 9 | and compares dataset sizes when stored in Splitgraph vs when stored as PostgreSQL tables. 10 | 11 | The second one, [benchmarking_real_data](./benchmarking_real_data.ipynb), uses some datasets 12 | that are available on the Splitgraph registry to compare the size improvement from storing 13 | them as Splitgraph objects as well as benchmarks querying Splitgraph repositories directly 14 | (using layered querying) vs querying them as PostgreSQL tables. 15 | 16 | ## Running the example 17 | 18 | You can view the notebooks in your browser. Alternatively, you can build and start up the engine: 19 | 20 | ``` 21 | export COMPOSE_PROJECT_NAME=splitgraph_example 22 | docker-compose down -v 23 | docker-compose build 24 | docker-compose up -d 25 | sgr init 26 | ``` 27 | 28 | You need to have been logged into the registry (`sgr cloud login` or `sgr cloud login-api`). 29 | 30 | You can also use your own engine that's managed by `sgr engine`. 31 | 32 | Install this package with [Poetry](https://github.com/sdispater/poetry): `poetry install` 33 | 34 | Open the notebook in Jupyter: `jupyter notebook benchmarking.ipynb` or `jupyter notebook benchmarking_real_data.ipynb` 35 | -------------------------------------------------------------------------------- /examples/benchmarking/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | engine: 4 | image: splitgraph/engine:${DOCKER_TAG-stable} 5 | ports: 6 | - '0.0.0.0:5432:5432' 7 | environment: 8 | - POSTGRES_USER=sgr 9 | - POSTGRES_PASSWORD=supersecure 10 | - POSTGRES_DB=splitgraph 11 | - SG_LOGLEVEL=INFO 12 | expose: 13 | - 5432 14 | -------------------------------------------------------------------------------- /examples/benchmarking/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "splitgraph-iris" 3 | version = "0.0.0" 4 | description = "Splitgraph Iris Dataset Example" 5 | license = "Apache 2.0" 6 | authors = ["Splitgraph Limited"] 7 | readme = "README.md" 8 | homepage = "https://www.splitgraph.com" 9 | repository = "https://github.com/splitgraph/sgr" 10 | 11 | [tool.poetry.dependencies] 12 | python = ">=3.7,<4.0" 13 | splitgraph = { path = "../.." } 14 | 15 | # Requirements to get the actual demo running 16 | pandas = ">=0.24" 17 | jupyter = "^1.0" 18 | seaborn = "^0.10.0" 19 | 20 | [build-system] 21 | requires = ["poetry>=0.12"] 22 | build-backend = "poetry.masonry.api" 23 | -------------------------------------------------------------------------------- /examples/bloom-filter/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | engine: 4 | image: splitgraph/engine:${DOCKER_TAG-stable} 5 | ports: 6 | - '0.0.0.0:5432:5432' 7 | environment: 8 | - POSTGRES_USER=sgr 9 | - POSTGRES_PASSWORD=supersecure 10 | - POSTGRES_DB=splitgraph 11 | - SG_LOGLEVEL=INFO 12 | - SG_CONFIG_FILE=/.sgconfig 13 | expose: 14 | - 5432 15 | # Need to mount this into the engine so that access credentials to 16 | # data.splitgraph.com propagate into it. 17 | volumes: 18 | - ${SG_CONFIG_FILE-./.sgconfig}:/.sgconfig 19 | -------------------------------------------------------------------------------- /examples/clickhouse/.sgconfig: -------------------------------------------------------------------------------- 1 | [defaults] 2 | SG_LOGLEVEL=INFO 3 | SG_ENGINE_HOST=localhost 4 | SG_ENGINE_PORT=5432 5 | SG_ENGINE_USER=sgr 6 | SG_ENGINE_PWD=supersecure 7 | -------------------------------------------------------------------------------- /examples/clickhouse/clickhouse/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM clickhouse/clickhouse-server:latest 2 | 3 | RUN apt-get update -qq && \ 4 | apt-get install -y odbc-postgresql unixodbc && \ 5 | rm -rf rm -rf /var/lib/apt/lists/* 6 | 7 | COPY odbc.ini odbcinst.ini /etc/ 8 | -------------------------------------------------------------------------------- /examples/clickhouse/clickhouse/odbc.ini: -------------------------------------------------------------------------------- 1 | ; Configuration for Splitgraph running locally (connecting to the 2 | ; engine running in the Compose stack with the default password) 3 | [splitgraph] 4 | Description = PostgreSQL connection 5 | Driver = PostgreSQL Unicode 6 | Database = splitgraph 7 | Servername = splitgraph 8 | UserName = sgr 9 | Password = supersecure 10 | Port = 5432 11 | Protocol = 12.3 12 | ReadOnly = No 13 | RowVersioning = No 14 | ShowSystemTables = No 15 | 16 | ; Configuration for connecting to the Splitgraph DDN 17 | [splitgraph_ddn] 18 | Description = PostgreSQL connection 19 | Driver = PostgreSQL Unicode 20 | Database = ddn 21 | Servername = data.splitgraph.com 22 | ; Go to https://www.splitgraph.com/connect to get credentials 23 | UserName = SPLITGRAPH_API_KEY_HERE 24 | Password = SPLITGRAPH_API_SECRET_HERE 25 | ConnSettings = SET application_name = 'odbc-clickhouse'; 26 | Port = 5432 27 | Protocol = 12.3 28 | ; Force SSL on the connection 29 | SSLmode = require 30 | ReadOnly = Yes 31 | RowVersioning = No 32 | ShowSystemTables = No 33 | ; Disable the extended query protocol (not completely supported) 34 | UseServerSidePrepare=0 -------------------------------------------------------------------------------- /examples/clickhouse/clickhouse/odbcinst.ini: -------------------------------------------------------------------------------- 1 | [PostgreSQL ANSI] 2 | Description=PostgreSQL ODBC driver (ANSI version) 3 | Driver=/usr/lib/x86_64-linux-gnu/odbc/psqlodbca.so 4 | Setup=/usr/lib/x86_64-linux-gnu/odbc/libodbcpsqlS.so 5 | Debug=0 6 | CommLog=1 7 | UsageCount=1 8 | 9 | [PostgreSQL Unicode] 10 | Description=PostgreSQL ODBC driver (Unicode version) 11 | Driver=/usr/lib/x86_64-linux-gnu/odbc/psqlodbcw.so 12 | Setup=/usr/lib/x86_64-linux-gnu/odbc/libodbcpsqlS.so 13 | Debug=0 14 | CommLog=1 15 | UsageCount=1 16 | -------------------------------------------------------------------------------- /examples/clickhouse/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | splitgraph: 4 | image: splitgraph/engine:${DOCKER_TAG-stable} 5 | environment: 6 | - POSTGRES_USER=sgr 7 | - POSTGRES_PASSWORD=supersecure 8 | - POSTGRES_DB=splitgraph 9 | - SG_LOGLEVEL=INFO 10 | - SG_CONFIG_FILE=/.sgconfig 11 | expose: 12 | - 5432 13 | volumes: 14 | - ./.sgconfig:/.sgconfig 15 | clickhouse-server: 16 | build: 17 | context: ./clickhouse 18 | ulimits: 19 | nofile: 20 | soft: 262144 21 | hard: 262144 22 | volumes: 23 | - clickhouse_data:/var/lib/clickhouse 24 | - ./clickhouse/odbc.ini:/etc/odbc.ini 25 | - ./clickhouse/odbcinst.ini:/etc/odbcinst.ini 26 | 27 | clickhouse-client: 28 | image: yandex/clickhouse-client:latest 29 | command: ["--host", "clickhouse-server"] 30 | 31 | 32 | volumes: 33 | clickhouse_data: 34 | -------------------------------------------------------------------------------- /examples/clickhouse/setup_datasets.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | # Alias sgr to the CLI installed inside of the engine container so that the user 4 | # doesn't have to install it locally. 5 | sgr() { 6 | docker-compose exec -T splitgraph sgr $@ 7 | } 8 | 9 | # Initialize the Splitgraph engine. Splitgraph's sgr mount doesn't need this, but 10 | # cloning/checking out data does. 11 | sgr init 12 | 13 | echo "Setting up Socrata dataset mounts..." 14 | 15 | # Set up the Socrata Chicago fire stations dataset by mounting it with sgr mount 16 | # Reference: https://www.splitgraph.com/docs/ingesting-data/socrata#usage 17 | sgr mount socrata chicago_data -o @- < 42 22 | ); 23 | 24 | CREATE VIEW elasticsearch.big_join AS ( 25 | SELECT 26 | t1.timestamp, 27 | t1.col_1 AS t1_col_1, 28 | t1.col_2 AS t2_col_2, 29 | t2.col_2 AS t2_col_1 30 | FROM elasticsearch.table_1_filtered t1 31 | JOIN elasticsearch_raw.table_2 t2 32 | ON t1.timestamp = t2.timestamp 33 | ORDER BY timestamp ASC 34 | ); 35 | -------------------------------------------------------------------------------- /examples/cross-db-analytics/mounting/mount.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | THIS_DIR="$(cd -P -- "$(dirname -- "$0")" && pwd -P)" 4 | 5 | echo "Initializing Splitgraph..." 6 | sgr init 7 | 8 | echo "Mounting Matomo..." 9 | sgr mount mysql_fdw matomo_raw -c "$MATOMO_USER":"$MATOMO_PASS"@"$MATOMO_HOST":"$MATOMO_PORT" -o@"$THIS_DIR"/matomo.json 10 | 11 | echo "Mounting Elasticsearch..." 12 | sgr mount elasticsearch elasticsearch_raw -c "$ELASTICSEARCH_HOST":"$ELASTICSEARCH_PORT" -o@"$THIS_DIR"/elasticsearch.json 13 | 14 | echo "Building Matomo model..." 15 | psql $(sgr config -n) -v ON_ERROR_STOP=1 -1 < "$THIS_DIR"/matomo.sql 16 | 17 | echo "Building Elasticsearch model..." 18 | psql $(sgr config -n) -v ON_ERROR_STOP=1 -1 < "$THIS_DIR"/elasticsearch.sql 19 | -------------------------------------------------------------------------------- /examples/custom_fdw/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.4" 2 | services: 3 | # Splitgraph engine: we'll be inheriting from the main engine and adding the FDW to it. 4 | engine: 5 | build: 6 | context: ./src 7 | dockerfile: engine.Dockerfile 8 | environment: 9 | - POSTGRES_USER=sgr 10 | - POSTGRES_PASSWORD=supersecure 11 | - POSTGRES_DB=splitgraph 12 | - SG_LOGLEVEL=INFO 13 | - SG_CONFIG_FILE=/.sgconfig 14 | # Bind mount the Python source code so that it can be edited without having to rebuild 15 | # the images. 16 | volumes: 17 | - ./src/hn_fdw:/hn_fdw/hn_fdw 18 | 19 | # Container with the sgr client. 20 | sgr: 21 | build: 22 | context: ./src 23 | dockerfile: sgr.Dockerfile 24 | depends_on: 25 | - engine 26 | environment: 27 | - SG_CONFIG_FILE=/.sgconfig 28 | volumes: 29 | - ./src/hn_fdw:/hn_fdw/hn_fdw -------------------------------------------------------------------------------- /examples/custom_fdw/src/.sgconfig: -------------------------------------------------------------------------------- 1 | [defaults] 2 | ; Credentials to log into the engine 3 | SG_ENGINE_HOST=engine 4 | SG_ENGINE_PORT=5432 5 | SG_ENGINE_USER=sgr 6 | SG_ENGINE_PWD=supersecure 7 | 8 | [data_sources] 9 | ; Register the "HackerNewsDataSource" function as the hackernews mount handler 10 | hackernews=hn_fdw.mount.HackerNewsDataSource 11 | -------------------------------------------------------------------------------- /examples/custom_fdw/src/engine.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM splitgraph/engine:stable 2 | 3 | RUN mkdir /hn_fdw 4 | COPY ./hn_fdw /hn_fdw/hn_fdw 5 | COPY .sgconfig /.sgconfig 6 | ENV PYTHONPATH $PYTHONPATH:/hn_fdw 7 | -------------------------------------------------------------------------------- /examples/custom_fdw/src/hn_fdw/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/splitgraph/sgr/0c6523b0cbf53b5886447a77e2f67ef49d36dde5/examples/custom_fdw/src/hn_fdw/__init__.py -------------------------------------------------------------------------------- /examples/custom_fdw/src/sgr.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8.2-slim 2 | 3 | RUN pip3 install splitgraph 4 | 5 | # Copy the Python code into the container and add it to the PYTHONPATH manually 6 | RUN mkdir /hn_fdw 7 | COPY ./hn_fdw /hn_fdw/hn_fdw 8 | COPY .sgconfig /.sgconfig 9 | ENV PYTHONPATH $PYTHONPATH:/hn_fdw 10 | 11 | ENTRYPOINT ["/bin/bash"] 12 | -------------------------------------------------------------------------------- /examples/dbt/.dbt/profiles.yml: -------------------------------------------------------------------------------- 1 | jaffle_shop: 2 | target: splitgraph 3 | outputs: 4 | splitgraph: 5 | type: postgres 6 | host: localhost 7 | user: sgr 8 | pass: supersecure 9 | port: 5432 10 | dbname: splitgraph 11 | # The final schema that dbt writes to is a combination of this profile schema 12 | # and the schema specified in the model. 13 | schema: "dbt_jaffle" 14 | threads: 4 15 | -------------------------------------------------------------------------------- /examples/dbt/.gitignore: -------------------------------------------------------------------------------- 1 | jaffle_shop/ 2 | -------------------------------------------------------------------------------- /examples/dbt/.sgconfig: -------------------------------------------------------------------------------- 1 | [defaults] 2 | SG_LOGLEVEL=INFO 3 | SG_ENGINE_HOST=localhost 4 | SG_ENGINE_PORT=5432 5 | SG_ENGINE_USER=sgr 6 | SG_ENGINE_PWD=supersecure 7 | -------------------------------------------------------------------------------- /examples/dbt/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | engine: 4 | image: splitgraph/engine:${DOCKER_TAG-stable} 5 | ports: 6 | - '0.0.0.0:5432:5432' 7 | environment: 8 | - POSTGRES_USER=sgr 9 | - POSTGRES_PASSWORD=supersecure 10 | - POSTGRES_DB=splitgraph 11 | - SG_LOGLEVEL=INFO 12 | expose: 13 | - 5432 14 | -------------------------------------------------------------------------------- /examples/dbt_adapter/dbt/adapters/splitgraph/__init__.py: -------------------------------------------------------------------------------- 1 | from dbt.adapters.base import AdapterPlugin 2 | from dbt.adapters.splitgraph.connections import SplitgraphCredentials 3 | from dbt.adapters.splitgraph.impl import SplitgraphAdapter 4 | from dbt.include import splitgraph 5 | 6 | Plugin = AdapterPlugin( 7 | adapter=SplitgraphAdapter, 8 | credentials=SplitgraphCredentials, 9 | include_path=splitgraph.PACKAGE_PATH, 10 | dependencies=["postgres"], 11 | ) 12 | -------------------------------------------------------------------------------- /examples/dbt_adapter/dbt/adapters/splitgraph/impl.py: -------------------------------------------------------------------------------- 1 | from dbt.adapters.postgres.impl import PostgresAdapter 2 | from dbt.adapters.splitgraph import SplitgraphConnectionManager 3 | 4 | 5 | class SplitgraphAdapter(PostgresAdapter): 6 | ConnectionManager = SplitgraphConnectionManager 7 | -------------------------------------------------------------------------------- /examples/dbt_adapter/dbt/include/splitgraph/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | PACKAGE_PATH = os.path.dirname(__file__) 4 | -------------------------------------------------------------------------------- /examples/dbt_adapter/dbt/include/splitgraph/dbt_project.yml: -------------------------------------------------------------------------------- 1 | config-version: 2 2 | name: dbt_splitgraph 3 | version: 1.0 4 | 5 | # Note that these macros are copypasted from the official dbt Redshift Jinja macros 6 | # that mostly call their Postgres counterparts and edited so that all macros just call Postgres. 7 | macro-paths: ["macros"] 8 | -------------------------------------------------------------------------------- /examples/dbt_adapter/dbt/include/splitgraph/macros/catalog.sql: -------------------------------------------------------------------------------- 1 | 2 | {% macro splitgraph__get_catalog(information_schema, schemas) %} 3 | {{ return(postgres__get_catalog(information_schema, schemas)) }} 4 | {% endmacro %} 5 | -------------------------------------------------------------------------------- /examples/dbt_adapter/dbt/include/splitgraph/macros/materializations/snapshot_merge.sql: -------------------------------------------------------------------------------- 1 | 2 | {% macro splitgraph__snapshot_merge_sql(target, source, insert_cols) -%} 3 | {{ postgres__snapshot_merge_sql(target, source, insert_cols) }} 4 | {% endmacro %} 5 | -------------------------------------------------------------------------------- /examples/dbt_adapter/dbt/include/splitgraph/macros/relations.sql: -------------------------------------------------------------------------------- 1 | {% macro splitgraph__get_relations () -%} 2 | {{ return(dbt.postgres__get_relations()) }} 3 | {% endmacro %} 4 | -------------------------------------------------------------------------------- /examples/dbt_adapter/sample_project/.dbt/profiles.yml: -------------------------------------------------------------------------------- 1 | # This is a sample dbt profile. Note that it's supposed to be in your home directory, 2 | # configured with your own credentials. It's only here as an example. 3 | # See https://docs.getdbt.com/docs/running-a-dbt-project/using-the-command-line-interface/configure-your-profile/ 4 | # for more information. 5 | default: 6 | target: splitgraph 7 | outputs: 8 | splitgraph: 9 | type: splitgraph 10 | host: localhost 11 | user: sgr 12 | pass: password 13 | port: 5432 14 | dbname: splitgraph 15 | schema: adapter_showcase 16 | threads: 4 17 | -------------------------------------------------------------------------------- /examples/dbt_adapter/sample_project/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | target/ 3 | dbt_modules/ 4 | logs/ 5 | -------------------------------------------------------------------------------- /examples/dbt_adapter/sample_project/analysis/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/splitgraph/sgr/0c6523b0cbf53b5886447a77e2f67ef49d36dde5/examples/dbt_adapter/sample_project/analysis/.gitkeep -------------------------------------------------------------------------------- /examples/dbt_adapter/sample_project/data/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/splitgraph/sgr/0c6523b0cbf53b5886447a77e2f67ef49d36dde5/examples/dbt_adapter/sample_project/data/.gitkeep -------------------------------------------------------------------------------- /examples/dbt_adapter/sample_project/dbt_project.yml: -------------------------------------------------------------------------------- 1 | # Showcase project, generated by dbt init. 2 | # See models/splitgraph/use_splitgraph_data.sql for the actual code. 3 | 4 | name: 'splitgraph_adapter_showcase' 5 | version: '1.0.0' 6 | config-version: 2 7 | 8 | # This setting configures which "profile" dbt uses for this project. 9 | profile: 'default' 10 | 11 | source-paths: ["models"] 12 | analysis-paths: ["analysis"] 13 | test-paths: ["tests"] 14 | data-paths: ["data"] 15 | macro-paths: ["macros"] 16 | snapshot-paths: ["snapshots"] 17 | 18 | target-path: "target" # directory which will store compiled SQL files 19 | clean-targets: # directories to be removed by `dbt clean` 20 | - "target" 21 | - "dbt_modules" 22 | 23 | 24 | # Configuring models 25 | # Full documentation: https://docs.getdbt.com/docs/configuring-models 26 | 27 | models: 28 | splitgraph_adapter_showcase: 29 | splitgraph: 30 | materialized: view 31 | -------------------------------------------------------------------------------- /examples/dbt_adapter/sample_project/macros/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/splitgraph/sgr/0c6523b0cbf53b5886447a77e2f67ef49d36dde5/examples/dbt_adapter/sample_project/macros/.gitkeep -------------------------------------------------------------------------------- /examples/dbt_adapter/sample_project/models/splitgraph/use_splitgraph_data.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized='table') }} 2 | 3 | with source_data as ( 4 | 5 | select domain, count(domain) as count 6 | from "splitgraph/socrata:latest".datasets 7 | group by domain 8 | 9 | ) 10 | 11 | select * 12 | from source_data 13 | -------------------------------------------------------------------------------- /examples/dbt_adapter/sample_project/snapshots/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/splitgraph/sgr/0c6523b0cbf53b5886447a77e2f67ef49d36dde5/examples/dbt_adapter/sample_project/snapshots/.gitkeep -------------------------------------------------------------------------------- /examples/dbt_adapter/sample_project/tests/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/splitgraph/sgr/0c6523b0cbf53b5886447a77e2f67ef49d36dde5/examples/dbt_adapter/sample_project/tests/.gitkeep -------------------------------------------------------------------------------- /examples/dbt_adapter/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from setuptools import find_namespace_packages, setup 3 | 4 | package_name = "dbt-splitgraph" 5 | package_version = "0.0.1" 6 | description = """The Splitgraph adapter plugin for dbt (data build tool)""" 7 | 8 | setup( 9 | name=package_name, 10 | version=package_version, 11 | description=description, 12 | long_description=description, 13 | author="Splitgraph", 14 | author_email="support@splitgraph.com", 15 | url="https://www.splitgraph.com", 16 | packages=find_namespace_packages(), 17 | package_data={ 18 | "dbt": [ 19 | "include/splitgraph/macros/*.sql", 20 | "include/splitgraph/dbt_project.yml", 21 | ] 22 | }, 23 | install_requires=["dbt-core", "splitgraph"], 24 | ) 25 | -------------------------------------------------------------------------------- /examples/dbt_two_databases/.dbt/profiles.yml: -------------------------------------------------------------------------------- 1 | default: 2 | target: splitgraph 3 | outputs: 4 | splitgraph: 5 | type: postgres 6 | host: localhost 7 | user: sgr 8 | pass: supersecure 9 | port: 5432 10 | dbname: splitgraph 11 | schema: "dbt_two_databases" 12 | threads: 4 13 | -------------------------------------------------------------------------------- /examples/dbt_two_databases/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | target/ 3 | dbt_modules/ 4 | logs/ 5 | -------------------------------------------------------------------------------- /examples/dbt_two_databases/.sgconfig: -------------------------------------------------------------------------------- 1 | [defaults] 2 | SG_LOGLEVEL=INFO 3 | SG_ENGINE_HOST=localhost 4 | SG_ENGINE_PORT=5432 5 | SG_ENGINE_USER=sgr 6 | SG_ENGINE_PWD=supersecure 7 | -------------------------------------------------------------------------------- /examples/dbt_two_databases/analysis/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/splitgraph/sgr/0c6523b0cbf53b5886447a77e2f67ef49d36dde5/examples/dbt_two_databases/analysis/.gitkeep -------------------------------------------------------------------------------- /examples/dbt_two_databases/data/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/splitgraph/sgr/0c6523b0cbf53b5886447a77e2f67ef49d36dde5/examples/dbt_two_databases/data/.gitkeep -------------------------------------------------------------------------------- /examples/dbt_two_databases/dbt_project.yml: -------------------------------------------------------------------------------- 1 | name: 'splitgraph_two_database_showcase' 2 | version: '1.0.0' 3 | config-version: 2 4 | 5 | # This setting configures which "profile" dbt uses for this project. 6 | profile: 'default' 7 | 8 | source-paths: ["models"] 9 | analysis-paths: ["analysis"] 10 | test-paths: ["tests"] 11 | data-paths: ["data"] 12 | macro-paths: ["macros"] 13 | snapshot-paths: ["snapshots"] 14 | 15 | target-path: "target" # directory which will store compiled SQL files 16 | clean-targets: # directories to be removed by `dbt clean` 17 | - "target" 18 | - "dbt_modules" 19 | 20 | 21 | # Configuring models 22 | # Full documentation: https://docs.getdbt.com/docs/configuring-models 23 | 24 | models: 25 | splitgraph_two_database_showcase: 26 | splitgraph: 27 | materialized: view 28 | -------------------------------------------------------------------------------- /examples/dbt_two_databases/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | engine: 4 | image: splitgraph/engine:${DOCKER_TAG-stable} 5 | ports: 6 | - '0.0.0.0:5432:5432' 7 | environment: 8 | - POSTGRES_USER=sgr 9 | - POSTGRES_PASSWORD=supersecure 10 | - POSTGRES_DB=splitgraph 11 | - SG_LOGLEVEL=INFO 12 | expose: 13 | - 5432 14 | mongo: 15 | build: ./splitgraph/mongodb 16 | environment: 17 | - ORIGIN_USER=originro 18 | - ORIGIN_PASS=originpass 19 | - ORIGIN_MONGO_DB=origindb 20 | expose: 21 | - 27017 22 | volumes: 23 | - ./splitgraph/mongodb/setup.js:/src/setup.js 24 | postgres: 25 | image: postgres:12.3 26 | environment: 27 | - POSTGRES_USER=originuser 28 | - POSTGRES_PASSWORD=originpass 29 | - POSTGRES_DB=origindb 30 | expose: 31 | - 5432 32 | volumes: 33 | - ./splitgraph/postgresql/setup.sql:/docker-entrypoint-initdb.d/setup.sql 34 | -------------------------------------------------------------------------------- /examples/dbt_two_databases/macros/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/splitgraph/sgr/0c6523b0cbf53b5886447a77e2f67ef49d36dde5/examples/dbt_two_databases/macros/.gitkeep -------------------------------------------------------------------------------- /examples/dbt_two_databases/models/splitgraph/join_two_dbs.sql: -------------------------------------------------------------------------------- 1 | {{ config(materialized='table') }} 2 | 3 | with fruits as ( 4 | select fruit_id, name from fruits_data.fruits 5 | ), 6 | orders as ( 7 | select name, fruit_id, happy, review 8 | from order_data.orders 9 | ) 10 | 11 | select fruits.name as fruit, orders.name as customer, review 12 | from fruits join orders 13 | on fruits.fruit_id = orders.fruit_id 14 | -------------------------------------------------------------------------------- /examples/dbt_two_databases/snapshots/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/splitgraph/sgr/0c6523b0cbf53b5886447a77e2f67ef49d36dde5/examples/dbt_two_databases/snapshots/.gitkeep -------------------------------------------------------------------------------- /examples/dbt_two_databases/splitgraph/mongodb/Dockerfile: -------------------------------------------------------------------------------- 1 | ## 2 | # mongoorigin 3 | ## 4 | FROM mongo:3.6.5-jessie 5 | 6 | ENV ORIGIN_USER docker 7 | ENV ORIGIN_PASS docker 8 | ENV ORIGIN_MONGO_DB origindb 9 | 10 | ADD start.sh /start.sh 11 | RUN chmod a+x /start.sh 12 | 13 | VOLUME /src 14 | 15 | CMD ["/start.sh"] 16 | 17 | -------------------------------------------------------------------------------- /examples/dbt_two_databases/splitgraph/mongodb/setup.js: -------------------------------------------------------------------------------- 1 | db.orders.drop() 2 | 3 | db.orders.insertMany([ 4 | {"name": "Alex", "fruit_id": 1, "happy": false, "review": "Was in transit for five days, arrived rotten."}, 5 | {"name": "James", "fruit_id": 2, "happy": true}, 6 | {"name": "Alice", "fruit_id": 3, "happy": true, "review": "Will use in salad, great fruit!"}]); 7 | -------------------------------------------------------------------------------- /examples/dbt_two_databases/splitgraph/mongodb/start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ORIGIN_USER=${ORIGIN_USER} 4 | ORIGIN_PASS=${ORIGIN_PASS} 5 | ORIGIN_MONGO_DB=${ORIGIN_MONGO_DB} 6 | 7 | mongod --fork --logpath /var/log/mongodb/mongod.log --bind_ip_all 8 | 9 | if [ ! -e '/done_setup' ]; then 10 | mongo ${ORIGIN_MONGO_DB} --eval "db.createUser({\"user\": \"${ORIGIN_USER}\", \"pwd\": \"${ORIGIN_PASS}\", \"roles\": [\"readWrite\"]})" 11 | 12 | if [ -e '/src/setup.js' ]; then 13 | mongo ${ORIGIN_MONGO_DB} < /src/setup.js 14 | fi 15 | 16 | echo 1 > /done_setup 17 | fi 18 | 19 | tail -F /var/log/mongodb/mongod.log 20 | -------------------------------------------------------------------------------- /examples/dbt_two_databases/splitgraph/postgresql/setup.sql: -------------------------------------------------------------------------------- 1 | DROP TABLE IF EXISTS fruits; 2 | 3 | CREATE TABLE fruits ( 4 | fruit_id serial primary key, 5 | name varchar 6 | ); 7 | 8 | INSERT INTO fruits (name) VALUES ('apple'); 9 | INSERT INTO fruits (name) VALUES ('orange'); 10 | INSERT INTO fruits (name) VALUES ('tomato'); 11 | -------------------------------------------------------------------------------- /examples/dbt_two_databases/tests/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/splitgraph/sgr/0c6523b0cbf53b5886447a77e2f67ef49d36dde5/examples/dbt_two_databases/tests/.gitkeep -------------------------------------------------------------------------------- /examples/import-from-csv/.sgconfig: -------------------------------------------------------------------------------- 1 | [defaults] 2 | SG_LOGLEVEL=INFO 3 | SG_ENGINE_HOST=localhost 4 | SG_ENGINE_PORT=5432 5 | SG_ENGINE_USER=sgr 6 | SG_ENGINE_PWD=supersecure 7 | -------------------------------------------------------------------------------- /examples/import-from-csv/README.md: -------------------------------------------------------------------------------- 1 | # Importing Splitgraph images from CSV 2 | 3 | This example will: 4 | 5 | * Download a CSV file of weather history in Raleigh-Durham International airport from [data.gov](https://catalog.data.gov/dataset/local-weather-archive) 6 | * Import the file into a new database table 7 | * Create a Splitgraph image from this database table. 8 | 9 | ## Running the example 10 | 11 | `../run_example.py example.yaml` and press ENTER when prompted to go through the steps. 12 | -------------------------------------------------------------------------------- /examples/import-from-csv/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | engine: 4 | image: splitgraph/engine:${DOCKER_TAG-stable} 5 | ports: 6 | - '0.0.0.0:5432:5432' 7 | environment: 8 | - POSTGRES_USER=sgr 9 | - POSTGRES_PASSWORD=supersecure 10 | - POSTGRES_DB=splitgraph 11 | - SG_LOGLEVEL=INFO 12 | expose: 13 | - 5432 14 | -------------------------------------------------------------------------------- /examples/import-from-mongo/.sgconfig: -------------------------------------------------------------------------------- 1 | [defaults] 2 | SG_LOGLEVEL=INFO 3 | SG_ENGINE_HOST=localhost 4 | SG_ENGINE_PORT=5432 5 | SG_ENGINE_USER=sgr 6 | SG_ENGINE_PWD=supersecure 7 | 8 | [data_sources] 9 | mongo_fdw=splitgraph.hooks.data_source.MongoDataSource 10 | -------------------------------------------------------------------------------- /examples/import-from-mongo/README.md: -------------------------------------------------------------------------------- 1 | # Creating a Splitgraph image from MongoDB 2 | 3 | This example will: 4 | 5 | * Create a MongoDB container with some sample data 6 | * Mount the Mongo collection as a schema on the Splitgraph engine 7 | * Create a new image from the staging table, "freezing" the collection. 8 | 9 | ## Running the example 10 | 11 | `../run_example.py example.yaml` and press ENTER when prompted to go through the steps. 12 | -------------------------------------------------------------------------------- /examples/import-from-mongo/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | engine: 4 | image: splitgraph/engine:${DOCKER_TAG-stable} 5 | ports: 6 | - '0.0.0.0:5432:5432' 7 | environment: 8 | - POSTGRES_USER=sgr 9 | - POSTGRES_PASSWORD=supersecure 10 | - POSTGRES_DB=splitgraph 11 | - SG_LOGLEVEL=INFO 12 | expose: 13 | - 5432 14 | mongo: 15 | build: ./mongodb 16 | ports: 17 | - '0.0.0.0:27017:27017' 18 | environment: 19 | - ORIGIN_USER=originro 20 | - ORIGIN_PASS=originpass 21 | - ORIGIN_MONGO_DB=origindb 22 | expose: 23 | - 27017 24 | volumes: 25 | - ./mongodb/setup.js:/src/setup.js 26 | -------------------------------------------------------------------------------- /examples/import-from-mongo/mongo_import.splitfile: -------------------------------------------------------------------------------- 1 | FROM MOUNT mongo_fdw originro:originpass@mongo:27017 '{"tables": {"stuff": { 2 | "options": { 3 | "database": "origindb", 4 | "collection": "stuff" 5 | }, 6 | "schema": { 7 | "name": "text", 8 | "duration": "numeric", 9 | "happy": "boolean"}}}}' 10 | IMPORT {SELECT * FROM stuff} AS stuff 11 | -------------------------------------------------------------------------------- /examples/import-from-mongo/mongodb/Dockerfile: -------------------------------------------------------------------------------- 1 | ## 2 | # mongoorigin 3 | ## 4 | FROM mongo:3.6.5-jessie 5 | 6 | ENV ORIGIN_USER docker 7 | ENV ORIGIN_PASS docker 8 | ENV ORIGIN_MONGO_DB origindb 9 | 10 | ADD start.sh /start.sh 11 | RUN chmod a+x /start.sh 12 | 13 | VOLUME /src 14 | 15 | CMD ["/start.sh"] 16 | 17 | -------------------------------------------------------------------------------- /examples/import-from-mongo/mongodb/setup.js: -------------------------------------------------------------------------------- 1 | db.stuff.drop() 2 | 3 | db.stuff.insertMany([ 4 | {"name": "Alex", "duration": 12, "happy": false}, 5 | {"name": "James", "duration": 2, "happy": true}, 6 | {"name": "Alice", "duration": 98765, "happy": true}]); 7 | -------------------------------------------------------------------------------- /examples/import-from-mongo/mongodb/start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ORIGIN_USER=${ORIGIN_USER} 4 | ORIGIN_PASS=${ORIGIN_PASS} 5 | ORIGIN_MONGO_DB=${ORIGIN_MONGO_DB} 6 | 7 | mongod --fork --logpath /var/log/mongodb/mongod.log --bind_ip_all 8 | 9 | if [ ! -e '/done_setup' ]; then 10 | mongo ${ORIGIN_MONGO_DB} --eval "db.createUser({\"user\": \"${ORIGIN_USER}\", \"pwd\": \"${ORIGIN_PASS}\", \"roles\": [\"readWrite\"]})" 11 | 12 | if [ -e '/src/setup.js' ]; then 13 | mongo ${ORIGIN_MONGO_DB} < /src/setup.js 14 | fi 15 | 16 | echo 1 > /done_setup 17 | fi 18 | 19 | tail -F /var/log/mongodb/mongod.log 20 | -------------------------------------------------------------------------------- /examples/iris/.sgconfig: -------------------------------------------------------------------------------- 1 | [defaults] 2 | SG_LOGLEVEL=INFO 3 | SG_ENGINE_HOST=localhost 4 | SG_ENGINE_PORT=5432 5 | SG_ENGINE_USER=sgr 6 | SG_ENGINE_PWD=supersecure 7 | -------------------------------------------------------------------------------- /examples/iris/README.md: -------------------------------------------------------------------------------- 1 | # UCIML Iris train/test validation 2 | 3 | This example showcases using the Splitgraph API from within Python, as well as using Splitgraph 4 | to seamlessly switch between two copies of an SQL database. 5 | 6 | It will: 7 | 8 | * ingest the Iris dataset from a CSV file 9 | * generate a train and test dataset as distinct Splitgraph images 10 | * use scikit-learn to train a logistic regression classifier on one half of the dataset 11 | * compare the performance of the model on different datasets 12 | 13 | ## Running the example 14 | 15 | Build and start up the engine: 16 | 17 | ``` 18 | export COMPOSE_PROJECT_NAME=splitgraph_example 19 | docker-compose down -v 20 | docker-compose build 21 | docker-compose up -d 22 | sgr init 23 | ``` 24 | 25 | Install this package with [Poetry](https://github.com/sdispater/poetry): `poetry install` 26 | 27 | Open the notebook in Jupyter: `jupyter notebook iris.ipynb` 28 | -------------------------------------------------------------------------------- /examples/iris/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | engine: 4 | image: splitgraph/engine:${DOCKER_TAG-stable} 5 | ports: 6 | - '0.0.0.0:5432:5432' 7 | environment: 8 | - POSTGRES_USER=sgr 9 | - POSTGRES_PASSWORD=supersecure 10 | - POSTGRES_DB=splitgraph 11 | - SG_LOGLEVEL=INFO 12 | expose: 13 | - 5432 14 | -------------------------------------------------------------------------------- /examples/iris/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "splitgraph-iris" 3 | version = "0.0.0" 4 | description = "Splitgraph Iris Dataset Example" 5 | license = "Apache 2.0" 6 | authors = ["Splitgraph Limited"] 7 | readme = "README.md" 8 | homepage = "https://www.splitgraph.com" 9 | repository = "https://github.com/splitgraph/sgr" 10 | 11 | [tool.poetry.dependencies] 12 | python = ">=3.7,<4.0" 13 | splitgraph = { path = "../.." } 14 | 15 | # Requirements to get the actual demo running 16 | pandas = { version = ">=0.24" } 17 | sklearn = "^0.0.0" 18 | jupyter = "^1.0" 19 | 20 | [build-system] 21 | requires = ["poetry>=0.12"] 22 | build-backend = "poetry.masonry.api" 23 | -------------------------------------------------------------------------------- /examples/pg-replication/.sgconfig: -------------------------------------------------------------------------------- 1 | [defaults] 2 | SG_LOGLEVEL=INFO 3 | SG_ENGINE_HOST=localhost 4 | SG_ENGINE_PORT=5432 5 | SG_ENGINE_USER=sgr 6 | SG_ENGINE_PWD=supersecure 7 | 8 | ; Add the origin database here so that we can 9 | ; use sgr to talk to it (can use any pg client) 10 | [remote:origin] 11 | SG_ENGINE_HOST=localhost 12 | SG_ENGINE_PORT=5431 13 | SG_ENGINE_USER=originuser 14 | SG_ENGINE_PWD=originpass 15 | SG_ENGINE_DB=originuser 16 | -------------------------------------------------------------------------------- /examples/pg-replication/README.md: -------------------------------------------------------------------------------- 1 | # Using Splitgraph as a PostgreSQL replication subscriber 2 | 3 | It is possible to add a Splitgraph engine as a replication client to a production PostgreSQL 4 | database, occasionally committing the changes as new Splitgraph images. This is done through 5 | [PostgreSQL logical replication](https://www.postgresql.org/docs/current/logical-replication.html) 6 | and has many uses: 7 | 8 | * Recording the history of the upstream database for audit purposes 9 | * Using anonymized production data snapshots for integration testing 10 | * Building derivative datasets with Splitfiles as an alternative to data warehousing. 11 | 12 | This example will: 13 | 14 | * Spin up a PostgreSQL database with some sample data 15 | * Set up a Splitgraph engine as a replication client 16 | * Create an image from the data received from the origin database 17 | * Make changes to the origin database, which will get propagated to the engine 18 | * Record the changes as a new image 19 | 20 | ## Running the example 21 | 22 | `../run_example.py example.yaml` and press ENTER when prompted to go through the steps. 23 | -------------------------------------------------------------------------------- /examples/pg-replication/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | engine: 4 | image: splitgraph/engine:${DOCKER_TAG-stable} 5 | ports: 6 | - '0.0.0.0:5432:5432' 7 | environment: 8 | - POSTGRES_USER=sgr 9 | - POSTGRES_PASSWORD=supersecure 10 | - POSTGRES_DB=splitgraph 11 | - SG_LOGLEVEL=INFO 12 | # Both the engine and the origin database have to be started with 13 | # wal_level=logical (logical WAL decoding enabled). 14 | command: 15 | - -c 16 | - 'wal_level=logical' 17 | expose: 18 | - 5432 19 | origin: 20 | image: postgres:12 21 | ports: 22 | - '0.0.0.0:5431:5432' 23 | command: 24 | - -c 25 | - 'wal_level=logical' 26 | environment: 27 | - POSTGRES_USER=originuser 28 | - POSTGRES_PASSWORD=originpass 29 | volumes: 30 | - ./origin/000_initial_data.sql:/docker-entrypoint-initdb.d/000_initial_data.sql 31 | -------------------------------------------------------------------------------- /examples/pg-replication/engine/schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE production_data.customers ( 2 | id INTEGER PRIMARY KEY, 3 | name VARCHAR, 4 | registration_time TIMESTAMP 5 | ); 6 | 7 | CREATE TABLE production_data.orders ( 8 | id INTEGER PRIMARY KEY, 9 | customer_id INTEGER, 10 | quantity INTEGER, 11 | item_name VARCHAR, 12 | placed_time TIMESTAMP 13 | ); 14 | -------------------------------------------------------------------------------- /examples/pg-replication/origin/000_initial_data.sql: -------------------------------------------------------------------------------- 1 | CREATE SCHEMA production_data; 2 | CREATE TABLE production_data.customers ( 3 | id SERIAL PRIMARY KEY, 4 | name VARCHAR, 5 | registration_time TIMESTAMP 6 | ); 7 | 8 | CREATE TABLE production_data.orders ( 9 | id SERIAL PRIMARY KEY, 10 | customer_id INTEGER, 11 | quantity INTEGER, 12 | item_name VARCHAR, 13 | placed_time TIMESTAMP, 14 | FOREIGN KEY (customer_id) REFERENCES production_data.customers(id) 15 | ); 16 | 17 | INSERT INTO production_data.customers(name, registration_time) VALUES 18 | ('Jack Doe', '2020-01-01 12:00:00'), 19 | ('Jane Doe', '2020-01-02 03:04:00'), 20 | ('Alexandra Sample', '2020-03-01 01:05:01'); 21 | 22 | INSERT INTO production_data.orders (customer_id, quantity, item_name, placed_time) VALUES 23 | (1, 15, 'Toilet Roll', '2020-03-15 12:00:00'), 24 | (1, 20, 'Hand Sanitizer', '2020-03-16 02:00:00'), 25 | (2, 5, 'Pasta', '2020-03-21 17:32:11'), 26 | (3, 50, 'Surgical Mask', '2020-04-01 12:00:01'), 27 | (1, 50, 'Surgical Mask', '2020-04-02 11:29:42'); 28 | -------------------------------------------------------------------------------- /examples/pgadmin/.sgconfig: -------------------------------------------------------------------------------- 1 | ; Copy your .sgconfig file with data.splitgraph.com API credentials here 2 | ; and change SG_ENGINE_PWD to "supersecure" 3 | -------------------------------------------------------------------------------- /examples/pgadmin/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | engine: 4 | image: splitgraph/engine:${DOCKER_TAG-stable-postgis} 5 | ports: 6 | - '0.0.0.0:5432:5432' 7 | environment: 8 | - POSTGRES_USER=sgr 9 | - POSTGRES_PASSWORD=supersecure 10 | - POSTGRES_DB=splitgraph 11 | - SG_LOGLEVEL=INFO 12 | - SG_CONFIG_FILE=/.sgconfig 13 | expose: 14 | - 5432 15 | volumes: 16 | - ./.sgconfig:/.sgconfig 17 | pgadmin: 18 | image: dpage/pgadmin4:4.21 19 | ports: 20 | - 5050:80 21 | environment: 22 | - PGADMIN_DEFAULT_EMAIL=pgadmin4@pgadmin.org 23 | - PGADMIN_DEFAULT_PASSWORD=password 24 | volumes: 25 | - ./pgadmin/pgpassfile:/tmp/pgpassfile 26 | - ./pgadmin/servers.json:/tmp/servers.json 27 | -------------------------------------------------------------------------------- /examples/pgadmin/pgadmin-geo-lq-example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/splitgraph/sgr/0c6523b0cbf53b5886447a77e2f67ef49d36dde5/examples/pgadmin/pgadmin-geo-lq-example.png -------------------------------------------------------------------------------- /examples/pgadmin/pgadmin/pgpassfile: -------------------------------------------------------------------------------- 1 | engine:5432:splitgraph:sgr:supersecure -------------------------------------------------------------------------------- /examples/pgadmin/pgadmin/servers.json: -------------------------------------------------------------------------------- 1 | { 2 | "Servers": { 3 | "1": { 4 | "Name": "splitgraph", 5 | "Group": "Servers", 6 | "Host": "engine", 7 | "Port": 5432, 8 | "MaintenanceDB": "splitgraph", 9 | "Username": "sgr", 10 | "SSLMode": "prefer", 11 | "PassFile": "/pgpassfile" 12 | } 13 | } 14 | } -------------------------------------------------------------------------------- /examples/pgadmin/setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | DC="docker-compose --project-name splitgraph_example" 4 | 5 | # Remove old example containers 6 | $DC down -v 7 | 8 | # Start the containers 9 | $DC up -d 10 | 11 | # Initialize the Splitgraph engine 12 | sgr init 13 | 14 | # Get pgadmin to load server configuration and credentials for the engine 15 | # Inspired by https://github.com/MaartenSmeets/db_perftest 16 | 17 | echo "Initializing pgAdmin and adding the engine to it..." 18 | $DC exec pgadmin sh -c "mkdir -m 700 /var/lib/pgadmin/storage/pgadmin4_pgadmin.org && \\ 19 | cp /tmp/pgpassfile /var/lib/pgadmin/storage/pgadmin4_pgadmin.org && \\ 20 | chmod 600 /var/lib/pgadmin/storage/pgadmin4_pgadmin.org/pgpassfile && \\ 21 | python /pgadmin4/setup.py --load-servers /tmp/servers.json" 22 | 23 | echo "pgAdmin initialized." 24 | echo "Go to http://localhost:5050 and log in with" 25 | echo " email: pgadmin4@pgadmin.org" 26 | echo " password: password" 27 | -------------------------------------------------------------------------------- /examples/postgis/cleanup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -ex 2 | 3 | # Quick cleanup script that deletes added/built data 4 | rm nyc.png -f 5 | rm vote_map.png -f 6 | 7 | sgr rm -y vote_map 8 | sgr rm -y splitgraph/election-geodata 9 | sgr cleanup 10 | 11 | sgr status 12 | -------------------------------------------------------------------------------- /examples/postgis/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | engine: 4 | image: splitgraph/engine:${DOCKER_TAG-stable-postgis} 5 | ports: 6 | - '0.0.0.0:5432:5432' 7 | environment: 8 | - POSTGRES_USER=sgr 9 | - POSTGRES_PASSWORD=supersecure 10 | - POSTGRES_DB=splitgraph 11 | - SG_LOGLEVEL=INFO 12 | expose: 13 | - 5432 14 | -------------------------------------------------------------------------------- /examples/postgis/pyproject.toml: -------------------------------------------------------------------------------- 1 | # These are the requirements to run the Postgis Jupyter notebook 2 | 3 | [tool.poetry] 4 | name = "splitgraph-postgis" 5 | version = "0.0.0" 6 | description = "Splitgraph PostGIS example" 7 | license = "Apache 2.0" 8 | authors = ["Splitgraph Limited"] 9 | readme = "README.md" 10 | homepage = "https://www.splitgraph.com" 11 | repository = "https://github.com/splitgraph/sgr" 12 | 13 | [tool.poetry.dependencies] 14 | python = ">=3.7,<4.0" 15 | splitgraph = { path = "../.." } 16 | pandas = ">=0.24" 17 | jupyter = ">=1.0" 18 | # For plotting 19 | geopandas = "^0.7.0" 20 | descartes = "^1.1.0" 21 | shapely = "^1.7.0" 22 | 23 | 24 | [build-system] 25 | requires = ["poetry>=0.12"] 26 | build-backend = "poetry.masonry.api" 27 | -------------------------------------------------------------------------------- /examples/postgis/rerun.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -ex 2 | 3 | jupyter nbconvert --ExecutePreprocessor.kernel_name=python3 --to notebook --execute --inplace -- vote_map.ipynb -------------------------------------------------------------------------------- /examples/postgrest/.sgconfig: -------------------------------------------------------------------------------- 1 | [defaults] 2 | SG_LOGLEVEL=INFO 3 | SG_ENGINE_HOST=localhost 4 | SG_ENGINE_PORT=5432 5 | SG_ENGINE_USER=sgr 6 | SG_ENGINE_PWD=supersecure 7 | -------------------------------------------------------------------------------- /examples/postgrest/README.md: -------------------------------------------------------------------------------- 1 | # Running PostgREST against the Splitgraph engine 2 | 3 | Since a Splitgraph engine is also a PostgreSQL database, tools that use PostgreSQL can work 4 | with Splitgraph tables without any changes. 5 | 6 | One such tool is [PostgREST](http://postgrest.org/en/latest/) that generates a REST API for a 7 | PostgreSQL schema. Splitgraph runs PostgREST in Splitgraph Cloud, allowing any Splitgraph dataset 8 | to be accessed via HTTP. For example, [this](https://data.splitgraph.com/splitgraph/domestic_us_flights/latest/-/rest/flights?and=(origin_airport.eq.JFK,destination_airport.eq.LAX)) link runs the following PostgREST query against 9 | the [`splitgraph/domestic_us_flights:latest`](https://www.splitgraph.com/splitgraph/domestic_us_flights/latest/-/overview) image: 10 | 11 | ``` 12 | flights?and=(origin_airport.eq.JFK,destination_airport.eq.LAX) 13 | ``` 14 | 15 | You can reproduce a similar setup locally, getting PostgREST to work against a Splitgraph image. 16 | 17 | This example will: 18 | 19 | * Set up a Splitgraph engine with some sample data 20 | * Run a PostgREST instance against the engine 21 | * Use curl to query the PostgREST instance. 22 | * Swap the schema to be a layered checkout, which still looks like a regular schema 23 | to PostgREST but has the ability to lazily download and cache required fragments 24 | of the dataset on the fly. 25 | 26 | ## Running the example 27 | 28 | `../run_example.py example.yaml` and press ENTER when prompted to go through the steps. 29 | -------------------------------------------------------------------------------- /examples/postgrest/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | engine: 4 | image: splitgraph/engine:${DOCKER_TAG-stable} 5 | ports: 6 | - '0.0.0.0:5432:5432' 7 | environment: 8 | - POSTGRES_USER=sgr 9 | - POSTGRES_PASSWORD=supersecure 10 | - POSTGRES_DB=splitgraph 11 | - SG_LOGLEVEL=INFO 12 | expose: 13 | - 5432 14 | postgrest: 15 | image: postgrest/postgrest:latest 16 | command: 17 | - postgrest 18 | - /etc/postgrest.conf 19 | ports: 20 | - '0.0.0.0:8080:8080' 21 | volumes: 22 | - ./postgrest.conf:/etc/postgrest.conf 23 | -------------------------------------------------------------------------------- /examples/postgrest/postgrest.conf: -------------------------------------------------------------------------------- 1 | db-uri = "postgres://sgr:supersecure@engine:5432/splitgraph" 2 | db-schema = "demo/weather" 3 | 4 | db-anon-role = "sgr" 5 | server-host = "0.0.0.0" 6 | server-port = "8080" 7 | 8 | openapi-server-proxy-uri = "http://localhost:8080" 9 | -------------------------------------------------------------------------------- /examples/push-to-object-storage/.sgconfig: -------------------------------------------------------------------------------- 1 | [defaults] 2 | SG_LOGLEVEL=INFO 3 | SG_ENGINE_HOST=localhost 4 | SG_ENGINE_PORT=5432 5 | SG_ENGINE_USER=sgr 6 | SG_ENGINE_PWD=supersecure 7 | 8 | [remote: engine_2] 9 | SG_ENGINE_ADMIN_USER=sgr 10 | SG_ENGINE_ADMIN_PWD=supersecure 11 | SG_ENGINE_POSTGRES_DB_NAME=splitgraph 12 | SG_ENGINE_HOST=localhost 13 | SG_ENGINE_PORT=5431 14 | SG_ENGINE_USER=sgr 15 | SG_ENGINE_PWD=supersecure 16 | SG_ENGINE_DB_NAME=splitgraph 17 | -------------------------------------------------------------------------------- /examples/push-to-object-storage/README.md: -------------------------------------------------------------------------------- 1 | # Pushing data to object storage 2 | 3 | ## Introduction 4 | 5 | When [pushing data to a remote engine](../push-to-other-engine), you can upload Splitgraph objects 6 | to S3-compatible object storage instead of storing them directly on the remote engine. We use 7 | [MinIO](https://min.io/) for this example. 8 | 9 | The remote engine has the actual access credentials to the bucket and pre-signs upload/download 10 | URLs for the local engine. 11 | 12 | This example will: 13 | 14 | * Start up two Splitgraph engines and configure them to synchronize with each other. 15 | * Create a dataset on the local engine 16 | * Push it out to the remote engine, uploading objects to object storage. 17 | 18 | ## Running the example 19 | 20 | Run `../run_example.py example.yaml` and press ENTER when prompted to go through the steps. -------------------------------------------------------------------------------- /examples/push-to-object-storage/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | engine_1: 4 | image: splitgraph/engine:${DOCKER_TAG-stable} 5 | ports: 6 | - '0.0.0.0:5432:5432' 7 | environment: 8 | - POSTGRES_USER=sgr 9 | - POSTGRES_PASSWORD=supersecure 10 | - POSTGRES_DB=splitgraph 11 | - SG_LOGLEVEL=INFO 12 | - SG_CONFIG_FILE=/.sgconfig 13 | expose: 14 | - 5432 15 | # Need to mount this into the engine so that access credentials to 16 | # the second engine are available to it 17 | volumes: 18 | - ${SG_CONFIG_FILE-./.sgconfig}:/.sgconfig 19 | 20 | engine_2: 21 | image: splitgraph/engine:${DOCKER_TAG-stable} 22 | ports: 23 | - '0.0.0.0:5431:5432' 24 | environment: 25 | - POSTGRES_USER=sgr 26 | - POSTGRES_PASSWORD=supersecure 27 | - POSTGRES_DB=splitgraph 28 | - SG_LOGLEVEL=INFO 29 | - SG_S3_HOST=objectstorage 30 | - SG_S3_PORT=9000 31 | - SG_S3_KEY=minio_access_key 32 | - SG_S3_PWD=minio_secret_key 33 | expose: 34 | - 5432 35 | 36 | objectstorage: 37 | image: minio/minio:RELEASE.2022-05-19T18-20-59Z.fips 38 | ports: 39 | - '0.0.0.0:9000:9000' 40 | environment: 41 | MINIO_ACCESS_KEY: minio_access_key 42 | MINIO_SECRET_KEY: minio_secret_key 43 | command: server /tmp 44 | -------------------------------------------------------------------------------- /examples/push-to-other-engine/.sgconfig: -------------------------------------------------------------------------------- 1 | [defaults] 2 | SG_LOGLEVEL=INFO 3 | SG_ENGINE_HOST=localhost 4 | SG_ENGINE_PORT=5432 5 | SG_ENGINE_USER=sgr 6 | SG_ENGINE_PWD=supersecure 7 | 8 | [remote: engine_2] 9 | SG_ENGINE_ADMIN_USER=sgr 10 | SG_ENGINE_ADMIN_PWD=supersecure 11 | SG_ENGINE_POSTGRES_DB_NAME=splitgraph 12 | SG_ENGINE_HOST=localhost 13 | SG_ENGINE_PORT=5431 14 | SG_ENGINE_USER=sgr 15 | SG_ENGINE_PWD=supersecure 16 | SG_ENGINE_DB_NAME=splitgraph -------------------------------------------------------------------------------- /examples/push-to-other-engine/README.md: -------------------------------------------------------------------------------- 1 | # Pushing data between two Splitgraph engines. 2 | 3 | ## Introduction 4 | 5 | Much like Git, Splitgraph allows you to push to and pull datasets from other remote Splitgraph installations. 6 | 7 | This example will: 8 | 9 | * Start up two Splitgraph engines and configure them to synchronize with each other. 10 | * Create a dataset on the local engine 11 | * Push it out to the remote engine 12 | * Make a change to the dataset on the local engine 13 | * Push the changes out again. 14 | 15 | ## Running the example 16 | 17 | Run `../run_example.py example.yaml` and press ENTER when prompted to go through the steps. -------------------------------------------------------------------------------- /examples/push-to-other-engine/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | engine_1: 4 | image: splitgraph/engine:${DOCKER_TAG-stable} 5 | ports: 6 | - '0.0.0.0:5432:5432' 7 | environment: 8 | - POSTGRES_USER=sgr 9 | - POSTGRES_PASSWORD=supersecure 10 | - POSTGRES_DB=splitgraph 11 | - SG_LOGLEVEL=INFO 12 | - SG_CONFIG_FILE=/.sgconfig 13 | expose: 14 | - 5432 15 | # Need to mount this into the engine so that access credentials to 16 | # the second engine are available to it 17 | volumes: 18 | - ${SG_CONFIG_FILE-./.sgconfig}:/.sgconfig 19 | 20 | engine_2: 21 | image: splitgraph/engine:${DOCKER_TAG-stable} 22 | ports: 23 | - '0.0.0.0:5431:5432' 24 | environment: 25 | - POSTGRES_USER=sgr 26 | - POSTGRES_PASSWORD=supersecure 27 | - POSTGRES_DB=splitgraph 28 | - SG_LOGLEVEL=INFO 29 | expose: 30 | - 5432 31 | -------------------------------------------------------------------------------- /examples/rerun_notebooks.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Reruns all notebooks in all subdirectories. Should be run inside of an env with SG + required libraries 4 | # (e.g. sklearn/jupyter etc) installed. 5 | 6 | find . -not -path '*/\.*' -type f -name '*.ipynb' \ 7 | -exec jupyter nbconvert --ExecutePreprocessor.kernel_name=python3 --to notebook --execute --inplace -- {} \; 8 | -------------------------------------------------------------------------------- /examples/sample_splitfiles/README.md: -------------------------------------------------------------------------------- 1 | # Sample Splitfiles 2 | 3 | ## Introduction 4 | 5 | This directory contains loose sample Splitfiles that can be run against various datasets 6 | in Splitgraph Cloud. 7 | 8 | ## Running 9 | 10 | You need to be logged into Splitgraph Cloud. You can register for Splitgraph Cloud with `sgr cloud register`. 11 | 12 | To run a Splitfile, do 13 | 14 | sgr build [filename] -o [output repository name, optional] 15 | 16 | You can also run a Splitfile directly from Github: 17 | 18 | curl -SsL https://raw.githubusercontent.com/splitgraph/splitgraph/master/examples/sample_splitfiles/[SPLITFILE_NAME].splitfile | sgr build - -o [repository_name] 19 | 20 | For example: 21 | 22 | curl -SsL https://raw.githubusercontent.com/splitgraph/splitgraph/master/examples/sample_splitfiles/county_votes.splitfile | sgr build - -o county_votes 23 | 24 | Each Splitfile contains extensive comments about what it does. 25 | -------------------------------------------------------------------------------- /examples/sample_splitfiles/london_votes.splitfile: -------------------------------------------------------------------------------- 1 | # This Splitfile joins the results of the 2017 UK General Election with 2 | # the geographical data for London's wards through an ONS lookup table 3 | # mapping ward codes to constituency codes. 4 | # 5 | # You need a PostGIS-enabled engine to run this Splitfile: 6 | # 7 | # sgr engine upgrade --image splitgraph/engine:stable-postgis 8 | # sgr sql "CREATE EXTENSION IF NOT EXISTS postgis" 9 | 10 | FROM splitgraph/uk_2017_ge:latest IMPORT { 11 | SELECT 12 | ons_code, 13 | SUM(CASE WHEN party_id = 'Conservative' 14 | THEN valid_votes ELSE 0 END) 15 | AS conservative_votes, 16 | SUM(CASE WHEN party_id = 'Labour' 17 | THEN valid_votes ELSE 0 END) 18 | AS labour_votes, 19 | SUM(valid_votes) AS total_votes 20 | FROM ward_results 21 | GROUP BY ons_code 22 | } AS votes_by_party 23 | 24 | SQL { 25 | CREATE TABLE london_votes AS 26 | SELECT 27 | lookup."PCON18NM" AS constituency, 28 | v.conservative_votes, 29 | v.labour_votes, 30 | v.total_votes, 31 | ST_Union(london.geom) AS geom 32 | FROM "splitgraph/london_wards:latest".city_merged_2018 london 33 | JOIN "splitgraph/uk_wards".lookup_table lookup 34 | ON london.gss_code = lookup."WD18CD" 35 | JOIN votes_by_party v 36 | ON v.ons_code = lookup."PCON18CD" 37 | GROUP BY constituency, 38 | conservative_votes, 39 | labour_votes, 40 | total_votes 41 | } 42 | -------------------------------------------------------------------------------- /examples/splitfiles/README.md: -------------------------------------------------------------------------------- 1 | # Using Splitfiles to build Splitgraph images 2 | 3 | This example will: 4 | 5 | * Create a source Splitgraph image in the same way as in the CSV ingestion example 6 | * Use a Splitfile to create a monthly summary of weather at RDU airport 7 | * Inspect the image's provenance 8 | * Alter the data to pretend a "revision" has been issued 9 | * Rebuild the image against the new data from its provenance 10 | 11 | ## Running the example 12 | 13 | `../run_example.py example.yaml` and press ENTER when prompted to go through the steps. 14 | -------------------------------------------------------------------------------- /examples/splitfiles/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | engine: 4 | image: splitgraph/engine:${DOCKER_TAG-stable} 5 | ports: 6 | - '0.0.0.0:5432:5432' 7 | environment: 8 | - POSTGRES_USER=sgr 9 | - POSTGRES_PASSWORD=supersecure 10 | - POSTGRES_DB=splitgraph 11 | - SG_LOGLEVEL=INFO 12 | expose: 13 | - 5432 14 | -------------------------------------------------------------------------------- /examples/splitfiles/rdu-weather-summary.splitfile: -------------------------------------------------------------------------------- 1 | # First, import the original data table: this doesn't consume extra space (reuses the same 2 | # object that's used by the demo/weather repository. 3 | 4 | FROM demo/weather IMPORT rdu AS source_data 5 | 6 | # Now summarize the data 7 | SQL CREATE TABLE monthly_summary AS ( \ 8 | SELECT to_char(date, 'YYYYMM') AS month, \ 9 | AVG(precipitation) AS average_precipitation, \ 10 | AVG(snowfall) AS average_snowfall \ 11 | FROM source_data \ 12 | GROUP BY month \ 13 | ORDER BY month ASC) 14 | -------------------------------------------------------------------------------- /examples/splitgraph-cloud/README.md: -------------------------------------------------------------------------------- 1 | # Splitgraph Cloud: publishing data and the Query API 2 | 3 | This example will: 4 | 5 | * upload the RDU weather dataset from the CSV demo to Splitgraph, making 6 | it viewable on [the Splitgraph registry](https://www.splitgraph.com/splitgraph-demo/weather) 7 | * upload its description to the registry 8 | * use the autogenerated [Postgrest](https://postgrest.org) endpoint for this dataset 9 | to query it 10 | * use the Splitgraph query API to run a couple of Splitfile commands joining this dataset with [US domestic flights from 1990 to 2009](https://www.splitgraph.com/splitgraph/domestic_us_flights) ([source](https://archive.org/details/icsdata-d35-million-us-domestic-flights-from-1990-to-2009_20100803170854-tsv)) to generate a table of average temperature at Raleigh-Durham Airport and the total passenger count in a given month 11 | * pull the created dataset and inspect it. 12 | 13 | ## Running the example 14 | 15 | Copy your .sgconfig file into this directory (it must contain API credentials to access 16 | data.splitgraph.com). If you don't have them yet, take a look at the 17 | [Splitgraph Cloud reference](https://www.splitgraph.com/docs/splitgraph_cloud/introduction) or register using `sgr cloud register`. 18 | 19 | Change the `splitgraph-demo` username in the example file to yours. 20 | 21 | Then, run `../run_example.py example.yaml` and press ENTER when prompted to go through the steps. -------------------------------------------------------------------------------- /examples/splitgraph-cloud/dataset-metadata.yml: -------------------------------------------------------------------------------- 1 | # This is a sample dataset metadata file that's used 2 | # to update the dataset's README and description. 3 | 4 | readme: dataset-readme.md 5 | description: Weather history in Raleigh-Durham International airport joined with US domestic flights data (uploaded from Splitgraph release CI) 6 | -------------------------------------------------------------------------------- /examples/splitgraph-cloud/dataset-readme.md: -------------------------------------------------------------------------------- 1 | # Sample dataset README 2 | 3 | Dataset of weather history in Raleigh-Durham International airport, joined with 4 | [US domestic flights data](/splitgraph/domestic_us_flights). 5 | 6 | This dataset was uploaded from a CI job. 7 | 8 | ## Source 9 | 10 | [data.gov](https://catalog.data.gov/dataset/local-weather-archive) 11 | -------------------------------------------------------------------------------- /examples/splitgraph-cloud/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | engine: 4 | image: splitgraph/engine:${DOCKER_TAG-stable} 5 | ports: 6 | - '0.0.0.0:5432:5432' 7 | environment: 8 | - POSTGRES_USER=sgr 9 | - POSTGRES_PASSWORD=supersecure 10 | - POSTGRES_DB=splitgraph 11 | - SG_LOGLEVEL=INFO 12 | - SG_CONFIG_FILE=/.sgconfig 13 | expose: 14 | - 5432 15 | # Need to mount this into the engine so that access credentials to 16 | # data.splitgraph.com propagate into it. 17 | volumes: 18 | - ${SG_CONFIG_FILE-./.sgconfig}:/.sgconfig 19 | -------------------------------------------------------------------------------- /examples/splitgraph-cloud/request_1.json: -------------------------------------------------------------------------------- 1 | {"command": "FROM splitgraph/domestic_us_flights:latest IMPORT {SELECT fly_month, SUM(passengers) AS total_passengers FROM flights WHERE origin_airport = 'RDU' OR destination_airport = 'RDU' GROUP BY fly_month ORDER BY fly_month} AS flights", 2 | "tag": "with_flight_data"} 3 | 4 | -------------------------------------------------------------------------------- /examples/splitgraph-cloud/request_2.json: -------------------------------------------------------------------------------- 1 | {"command": "SQL CREATE TABLE rdu_flights_weather AS ( WITH monthly_summary AS (SELECT date_trunc ('month', date) AS month, AVG(temperaturemin) AS tmin_avg, AVG(temperaturemax) AS tmax_avg FROM rdu GROUP BY month) SELECT w.month, w.tmin_avg, w.tmax_avg, f.total_passengers FROM monthly_summary w JOIN flights f ON w.month = f.fly_month)", 2 | "tag": "joined_data"} 3 | 4 | -------------------------------------------------------------------------------- /examples/template/.sgconfig: -------------------------------------------------------------------------------- 1 | [defaults] 2 | SG_LOGLEVEL=INFO 3 | SG_ENGINE_HOST=localhost 4 | SG_ENGINE_PORT=5432 5 | SG_ENGINE_USER=sgr 6 | SG_ENGINE_PWD=supersecure 7 | 8 | [data_sources] 9 | mongo_fdw=splitgraph.hooks.data_source.MongoDataSource 10 | 11 | ; Add needed configuration here -------------------------------------------------------------------------------- /examples/template/README.md: -------------------------------------------------------------------------------- 1 | # Template for Splitgraph examples 2 | 3 | In general, you want: 4 | 5 | * A docker-compose file with the engine and any extra components required (e.g. MongoDB) 6 | * Change the .sgconfig file to have the required configuration of the engine (e.g extra mount handlers) 7 | * An example.yaml file that runs the example (see the file in the current directory for inspiration): 8 | * Clean up (`docker-compose down -v`) before and after running the example 9 | * Examples are supposed to be run from the same directory that the YAML is located in 10 | * If one command in a block fails (returns nonzero), the whole example fails 11 | * There's a test suite in `test/` that automatically scans through all directories with example.yaml and 12 | runs them without pausing (to check that the example can be run through and doesn't crash). Run `pytest` 13 | from the `examples/` directory to run all tests or `pytest -k folder_name` to run just one. 14 | 15 | ## Running the example 16 | 17 | `../run_example.py example.yaml` and press ENTER when prompted to go through the steps. 18 | -------------------------------------------------------------------------------- /examples/template/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | engine: 4 | image: splitgraph/engine:${DOCKER_TAG-stable} 5 | ports: 6 | - '0.0.0.0:5432:5432' 7 | environment: 8 | - POSTGRES_USER=sgr 9 | - POSTGRES_PASSWORD=supersecure 10 | - POSTGRES_DB=splitgraph 11 | - SG_LOGLEVEL=INFO 12 | expose: 13 | - 5432 14 | # Add extra required services here. -------------------------------------------------------------------------------- /examples/template/example.yaml: -------------------------------------------------------------------------------- 1 | - commands: 2 | - "# Build and start the containers -- use the same project name so that" 3 | - "# containers from other examples are removed" 4 | - docker-compose --project-name splitgraph_example down -v --remove-orphans 5 | - docker-compose --project-name splitgraph_example build 6 | - docker-compose --project-name splitgraph_example up -d 7 | - commands: 8 | - "# This block will print out" 9 | - "# wait for the user to press ENTER" 10 | - "# and then execute" 11 | - echo Hello! 12 | - commands: 13 | - "# This one won't pause after running the command" 14 | - echo Hello! 15 | wait: False 16 | - commands: 17 | - "# This one will show a different fake 'prompt'" 18 | prompt: root@localhost 19 | - commands: 20 | - echo "This one won't echo commands before running them (good for behind-the-scenes setup)" 21 | echo: False 22 | - commands: 23 | - echo "This one will suppress stderr" 1>&2 24 | stderr: False 25 | - commands: 26 | - "# Delete the docker-compose project and cleanup" 27 | - docker-compose --project-name splitgraph_example down -v --remove-orphans -------------------------------------------------------------------------------- /examples/test/conftest.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | 3 | # List of examples to ignore: these require registration or are difficult to test. These are all run on release when we record Asciinemas. 4 | _DO_NOT_TEST = ["us-election", "splitgraph-cloud", "bloom-filter"] 5 | 6 | 7 | def pytest_generate_tests(metafunc): 8 | """Generate test cases (one for each subdirectory in examples/ apart from "test" itself""" 9 | if "example_path" in metafunc.fixturenames: 10 | basedir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) 11 | 12 | subfolders = [] 13 | for f in os.scandir(basedir): 14 | if ( 15 | not f.is_dir() 16 | or f.name in ["htmlcov", "test"] 17 | or f.name in _DO_NOT_TEST 18 | or not os.path.exists(os.path.join(f.path, "example.yaml")) 19 | ): 20 | continue 21 | subfolders.append(f.path) 22 | 23 | metafunc.parametrize("example_path", subfolders) 24 | -------------------------------------------------------------------------------- /examples/test/test_examples.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | 4 | import pytest 5 | 6 | 7 | @pytest.mark.example 8 | def test_example(example_path): 9 | """ 10 | Run a test for a single example directory 11 | 12 | :param example_path: Path to the example directory 13 | """ 14 | 15 | # Remove SG-specific envvars that were set in tests (to simulate run_example.py being 16 | # actually run in a clean environment, e.g. without SG_CONFIG_FILE set.) 17 | env = {k: v for k, v in os.environ.items() if not k.startswith("SG_")} 18 | # Disable the update check 19 | env["SG_UPDATE_FREQUENCY"] = "0" 20 | 21 | result = subprocess.run( 22 | args=["../run_example.py", "example.yaml", "--no-pause"], 23 | cwd=example_path, 24 | stderr=subprocess.STDOUT, 25 | env=env, 26 | ) 27 | 28 | if result.returncode != 0: 29 | raise AssertionError("Example exited with code %d!" % result.returncode) 30 | -------------------------------------------------------------------------------- /examples/update_example_versions.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | # Update poetry.lock files in all examples that use Poetry. 4 | 5 | while IFS= read -r -d '' file 6 | do 7 | path=$(dirname "$file") 8 | echo "Updating $path..." 9 | pushd "$path" 10 | poetry update --lock 11 | popd 12 | done < <(find . -name "poetry.lock" -print0) 13 | -------------------------------------------------------------------------------- /examples/us-election/README.md: -------------------------------------------------------------------------------- 1 | # Joining multiple Splitgraph tables: 2016 US Election 2 | 3 | This example will: 4 | 5 | * use a Splitfile to build a dataset that joins several datasets together: 6 | * [US Census](https://www.splitgraph.com/splitgraph/census) demographic data ([source](https://www.kaggle.com/muonneutrino/us-census-demographic-data/download)) 7 | * Census tracts designated as [Qualified Opportunity Zones](https://www.splitgraph.com/splitgraph/qoz) ([source](https://www.cdfifund.gov/Documents/Designated%20QOZs.12.14.18.xlsx)) 8 | * [2016 US Presidential Election precinct-level returns](https://www.splitgraph.com/splitgraph/2016_election/) ([source](https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/LYWX3D)) 9 | * Run a quick linear regression against the resultant dataset to see if there is a 10 | correlation between the voting patterns in a given county and the fraction of QOZ-qualified 11 | census tracts in that county. 12 | 13 | ## Running the example 14 | 15 | Install this package with [Poetry](https://github.com/sdispater/poetry): `poetry install` 16 | 17 | Copy your .sgconfig file into this directory (it must contain API credentials to access 18 | data.splitgraph.com). If you don't have them yet, take a look at the 19 | [Splitgraph Cloud reference](https://www.splitgraph.com/docs/splitgraph_cloud/introduction) or register using `sgr cloud register`. 20 | 21 | Then, run `../run_example.py example.yaml` and press ENTER when prompted to go through the steps. -------------------------------------------------------------------------------- /examples/us-election/analyze.py: -------------------------------------------------------------------------------- 1 | import scipy.stats as ss 2 | 3 | from splitgraph.core.repository import Repository 4 | from splitgraph.ingestion.pandas import sql_to_df 5 | 6 | # Load the dataset we created into Pandas 7 | image = Repository("", "qoz_vote_fraction").images["latest"] 8 | df = sql_to_df("SELECT * FROM qoz_vote_fraction", image=image, use_lq=True) 9 | print(df) 10 | 11 | # Is there a correlation between the Trump vote fraction and the fraction of 12 | # QOZ-qualified tracts in every county? 13 | print(ss.linregress(df["trump_vote_fraction"], df["qoz_tract_fraction"])) 14 | -------------------------------------------------------------------------------- /examples/us-election/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | engine: 4 | image: splitgraph/engine:${DOCKER_TAG-stable} 5 | ports: 6 | - '0.0.0.0:5432:5432' 7 | environment: 8 | - POSTGRES_USER=sgr 9 | - POSTGRES_PASSWORD=supersecure 10 | - POSTGRES_DB=splitgraph 11 | - SG_LOGLEVEL=INFO 12 | - SG_CONFIG_FILE=/.sgconfig 13 | expose: 14 | - 5432 15 | # Need to mount this into the engine so that access credentials to 16 | # data.splitgraph.com propagate into it. 17 | volumes: 18 | - ${SG_CONFIG_FILE-./.sgconfig}:/.sgconfig 19 | -------------------------------------------------------------------------------- /examples/us-election/example.yaml: -------------------------------------------------------------------------------- 1 | - commands: 2 | - "# Build and start the engine" 3 | - docker-compose --project-name splitgraph_example down -v --remove-orphans 4 | - docker-compose --project-name splitgraph_example build 5 | - docker-compose --project-name splitgraph_example up -d 6 | - sgr init 7 | record: False 8 | - commands: 9 | - "# This example will use Splitgraph to build a dataset that correlates, for every US county," 10 | - "# its voting pattern in the 2016 US Presidential election with the amount of" 11 | - "# Qualified Opportunity Zone areas in it." 12 | - "#" 13 | - "# First, examine the Splitfile used to build the dataset." 14 | - cat qoz_vote_fraction.splitfile 15 | - commands: 16 | - "# Now, build the dataset: the Splitgraph engine will automatically download the required data" 17 | - "# and run the queries in the Splitfile to produce the final data image." 18 | - sgr build qoz_vote_fraction.splitfile -o qoz_vote_fraction 19 | stderr: False 20 | - commands: 21 | - "# Inspect the dataset metadata" 22 | - sgr show qoz_vote_fraction 23 | - sgr table qoz_vote_fraction qoz_vote_fraction 24 | - commands: 25 | - "# Get the data for New York County (FIPS 36061)" 26 | - | 27 | sgr sql -s qoz_vote_fraction \ 28 | "SELECT row_to_json(d) FROM (SELECT * FROM qoz_vote_fraction WHERE county_id = '36061') d" 29 | - commands: 30 | - "# Do a quick analysis of the dataset." 31 | - cat analyze.py 32 | - python analyze.py 33 | - commands: 34 | - "# Delete the docker-compose project and cleanup" 35 | - docker-compose --project-name splitgraph_example down -v 36 | record: False 37 | -------------------------------------------------------------------------------- /examples/us-election/pyproject.toml: -------------------------------------------------------------------------------- 1 | # These are the requirements to run the analyze.py file. 2 | 3 | [tool.poetry] 4 | name = "splitgraph-us-election" 5 | version = "0.0.0" 6 | description = "Splitgraph US Election dataset example" 7 | license = "Apache 2.0" 8 | authors = ["Splitgraph Limited"] 9 | readme = "README.md" 10 | homepage = "https://www.splitgraph.com" 11 | repository = "https://github.com/splitgraph/sgr" 12 | 13 | [tool.poetry.dependencies] 14 | python = ">=3.7,<4.0" 15 | splitgraph = { path = "../.." } 16 | pandas = ">=0.24" 17 | scipy = ">=1.0" 18 | 19 | [build-system] 20 | requires = ["poetry>=0.12"] 21 | build-backend = "poetry.masonry.api" 22 | -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | python_version = 3.8 3 | warn_return_any = True 4 | warn_unused_configs = True 5 | plugins = pydantic.mypy 6 | 7 | # Ignore no stubs for modules like psycopg2 etc 8 | ignore_missing_imports = True 9 | 10 | # Allow reusing the same variable with multiple assignments of different types. 11 | allow_redefinition = True 12 | 13 | strict_optional = True 14 | warn_redundant_casts = True 15 | warn_unreachable = True 16 | # Disable since there's some weird mismatch between my mypy, the mypy in the pre-commit hook and the mypy in GH Actions 17 | # warn_unused_ignores = True 18 | 19 | check_untyped_defs = True 20 | 21 | # Disallow importing c from a.b if it imports c itself (unless specified in __all__) 22 | no_implicit_reexport = True 23 | 24 | exclude = ^(engine|examples|docs|test) 25 | 26 | [pydantic-mypy] 27 | # Include the field types as type hints in the generated signature for the __init__ method 28 | # (e.g. disallow doing Model(int_val="not_an_int") 29 | init_typed = True 30 | 31 | # Disallow extra arguments to __init__ (guard against typos when initializing models) 32 | init_forbid_extra = True 33 | 34 | # Raise a mypy error whenever a model is created for which calls to its __init__ or construct 35 | # methods require the use of aliases that cannot be statically determined. 36 | warn_required_dynamic_aliases = True 37 | 38 | # Disallow untyped Pydantic fields 39 | warn_untyped_fields = True 40 | -------------------------------------------------------------------------------- /pics/splitfile.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/splitgraph/sgr/0c6523b0cbf53b5886447a77e2f67ef49d36dde5/pics/splitfile.png -------------------------------------------------------------------------------- /pics/splitfiles.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/splitgraph/sgr/0c6523b0cbf53b5886447a77e2f67ef49d36dde5/pics/splitfiles.gif -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [aliases] 2 | test=pytest 3 | 4 | [tool:pytest] 5 | addopts = --cov splitgraph --cov-branch --cov-report term --cov-report html 6 | env = 7 | SG_CONFIG_FILE = test/resources/.sgconfig 8 | SG_LOGLEVEL = INFO 9 | markers = 10 | mounting: Requires one of the databases in mounting.yml to be up (testing FDW mounting for Mongo/MySQL/Postgres) 11 | registry: Tests that use a remote engine and that can be run against the registry instead (run as an unprivileged user and don't require object storage or checkouts to work) 12 | example: Tests Splitgraph examples in examples/, requires the .core.yml docker-compose test project to be down (as it spins up its own Splitgraph engines). 13 | -------------------------------------------------------------------------------- /splitgraph/.git-blame-ignore-revs: -------------------------------------------------------------------------------- 1 | # Bulk run of Black formatting 2 | 4e580529e0b7d12e6496e4e446451b03793502fd 3 | 4 | # Bulk run of isort 5 | 88088df8c8769eeb848707c256cdec710d4da202 -------------------------------------------------------------------------------- /splitgraph/BUILD: -------------------------------------------------------------------------------- 1 | python_sources( 2 | skip_black=True, 3 | ) 4 | 5 | pex_binary( 6 | name="sgr", 7 | entry_point="splitgraph.commandline:cli", 8 | # Add dynamically loaded hooks/plugins 9 | dependencies=[ 10 | "src/py/splitgraph/splitgraph/ingestion/airbyte", 11 | "src/py/splitgraph/splitgraph/ingestion/dbt", 12 | "src/py/splitgraph/splitgraph/ingestion/singer", 13 | "src/py/splitgraph/splitgraph/ingestion/socrata", 14 | "src/py/splitgraph/splitgraph/ingestion/snowflake", 15 | "src/py/splitgraph/splitgraph/ingestion/sqlite", 16 | "src/py/splitgraph/splitgraph/ingestion/athena", 17 | "src/py/splitgraph/splitgraph/ingestion/bigquery", 18 | "src/py/splitgraph/splitgraph/hooks", 19 | ], 20 | ) 21 | -------------------------------------------------------------------------------- /splitgraph/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/splitgraph/sgr/0c6523b0cbf53b5886447a77e2f67ef49d36dde5/splitgraph/__init__.py -------------------------------------------------------------------------------- /splitgraph/__version__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.3.12" 2 | -------------------------------------------------------------------------------- /splitgraph/cloud/BUILD: -------------------------------------------------------------------------------- 1 | python_sources( 2 | skip_black=True, 3 | ) 4 | -------------------------------------------------------------------------------- /splitgraph/cloud/project/BUILD: -------------------------------------------------------------------------------- 1 | python_sources( 2 | skip_black=True, 3 | ) 4 | -------------------------------------------------------------------------------- /splitgraph/cloud/project/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/splitgraph/sgr/0c6523b0cbf53b5886447a77e2f67ef49d36dde5/splitgraph/cloud/project/__init__.py -------------------------------------------------------------------------------- /splitgraph/commandline/BUILD: -------------------------------------------------------------------------------- 1 | python_sources( 2 | skip_black=True, 3 | ) 4 | -------------------------------------------------------------------------------- /splitgraph/config/BUILD: -------------------------------------------------------------------------------- 1 | python_sources( 2 | skip_black=True, 3 | ) 4 | -------------------------------------------------------------------------------- /splitgraph/config/default_config.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Optional 2 | 3 | from .keys import DEFAULTS 4 | 5 | 6 | def get_default_config_value(key: str, default_return: Optional[str] = None) -> Any: 7 | """Get the hard-coded default value of a config key. 8 | Otherwise return default_return. 9 | """ 10 | 11 | return DEFAULTS.get(key, default_return) 12 | -------------------------------------------------------------------------------- /splitgraph/config/environment_config.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Optional 3 | 4 | 5 | def get_environment_config_value(key: str, default_return: Optional[str] = None) -> Optional[str]: 6 | """Get the environment variable value of the environment variable matching key. 7 | Otherwise return default_return. 8 | """ 9 | 10 | return os.environ.get(key, default_return) 11 | -------------------------------------------------------------------------------- /splitgraph/config/management.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | from pathlib import Path 4 | from typing import cast 5 | 6 | from splitgraph.config.config import patch_config 7 | from splitgraph.config.export import overwrite_config 8 | from splitgraph.config.system_config import HOME_SUB_DIR 9 | 10 | 11 | def patch_and_save_config(config, patch): 12 | 13 | config_path = config["SG_CONFIG_FILE"] 14 | if not config_path: 15 | # Default to creating a config in the user's homedir rather than local. 16 | homedir = os.environ.get("HOME") 17 | # on Windows, HOME is not a standard env var 18 | if homedir is None and os.name == "nt": 19 | homedir = f"{os.environ['HOMEDRIVE']}{os.environ['HOMEPATH']}" 20 | config_dir = Path(cast(str, homedir)) / Path(HOME_SUB_DIR) 21 | config_path = config_dir / Path(".sgconfig") 22 | logging.debug("No config file detected, creating one at %s" % config_path) 23 | config_dir.mkdir(exist_ok=True, parents=True) 24 | else: 25 | logging.debug("Updating the existing config file at %s" % config_path) 26 | new_config = patch_config(config, patch) 27 | overwrite_config(new_config, config_path) 28 | return str(config_path) 29 | -------------------------------------------------------------------------------- /splitgraph/core/BUILD: -------------------------------------------------------------------------------- 1 | python_sources( 2 | skip_black=True, 3 | ) 4 | -------------------------------------------------------------------------------- /splitgraph/core/__init__.py: -------------------------------------------------------------------------------- 1 | """Core Splitgraph functionality: versioning and sharing tables. 2 | 3 | The main point of interaction with the Splitgraph API is a :class:`splitgraph.core.repository.Repository` object 4 | representing a local or a remote Splitgraph repository. Repositories can be created using one of the 5 | following methods: 6 | 7 | * Directly by invoking `Repository(namespace, name, engine)` where `engine` is the engine that the repository 8 | belongs to (that can be gotten with `get_engine(engine_name)`. If the created repository doesn't actually exist 9 | on the engine, it must first be initialized with `repository.init()`. 10 | * By using :func:`splitgraph.core.engine.lookup_repository` which will search for the repository on the current 11 | lookup path. 12 | """ 13 | -------------------------------------------------------------------------------- /splitgraph/core/indexing/BUILD: -------------------------------------------------------------------------------- 1 | python_sources( 2 | skip_black=True, 3 | ) 4 | -------------------------------------------------------------------------------- /splitgraph/core/indexing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/splitgraph/sgr/0c6523b0cbf53b5886447a77e2f67ef49d36dde5/splitgraph/core/indexing/__init__.py -------------------------------------------------------------------------------- /splitgraph/core/sql/BUILD: -------------------------------------------------------------------------------- 1 | python_sources( 2 | skip_black=True, 3 | ) 4 | -------------------------------------------------------------------------------- /splitgraph/core/sql/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/splitgraph/sgr/0c6523b0cbf53b5886447a77e2f67ef49d36dde5/splitgraph/core/sql/__init__.py -------------------------------------------------------------------------------- /splitgraph/engine/BUILD: -------------------------------------------------------------------------------- 1 | python_sources( 2 | skip_black=True, 3 | ) 4 | -------------------------------------------------------------------------------- /splitgraph/engine/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Defines the interface for a Splitgraph engine (a backing database), including running basic SQL commands, 3 | tracking tables for changes and uploading/downloading tables to other remote engines. 4 | 5 | By default, Splitgraph is backed by Postgres: see :mod:`splitgraph.engine.postgres` for an example of how to 6 | implement a different engine. 7 | """ 8 | from enum import Enum 9 | 10 | 11 | class ResultShape(Enum): 12 | """Shape that the result of a query will be coerced to""" 13 | 14 | NONE = 0 # No result expected 15 | ONE_ONE = 1 # e.g. "row1_val1" 16 | ONE_MANY = 2 # e.g. ("row1_val1", "row1_val_2") 17 | MANY_ONE = 3 # e.g. ["row1_val1", "row2_val_1", ...] 18 | MANY_MANY = 4 # e.g. [("row1_val1", "row1_val_2"), ("row2_val1", "row2_val_2"), ...] 19 | -------------------------------------------------------------------------------- /splitgraph/engine/postgres/BUILD: -------------------------------------------------------------------------------- 1 | python_sources( 2 | skip_black=True, 3 | dependencies=[ 4 | "src/py/splitgraph/splitgraph/resources/splitgraph_meta", 5 | "src/py/splitgraph/splitgraph/resources/static", 6 | ], 7 | ) 8 | -------------------------------------------------------------------------------- /splitgraph/engine/postgres/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/splitgraph/sgr/0c6523b0cbf53b5886447a77e2f67ef49d36dde5/splitgraph/engine/postgres/__init__.py -------------------------------------------------------------------------------- /splitgraph/engine/utils.py: -------------------------------------------------------------------------------- 1 | from typing import TYPE_CHECKING 2 | 3 | from psycopg2.sql import SQL, Identifier 4 | 5 | if TYPE_CHECKING: 6 | from splitgraph.engine.postgres.psycopg import PsycopgEngine 7 | 8 | 9 | def unmount_schema(engine: "PsycopgEngine", schema: str) -> None: 10 | engine.run_sql( 11 | SQL("DROP SERVER IF EXISTS {} CASCADE").format(Identifier("%s_lq_checkout_server" % schema)) 12 | ) 13 | engine.run_sql(SQL("DROP SERVER IF EXISTS {} CASCADE").format(Identifier(schema + "_server"))) 14 | engine.run_sql(SQL("DROP SCHEMA IF EXISTS {} CASCADE").format(Identifier(schema))) 15 | -------------------------------------------------------------------------------- /splitgraph/hooks/BUILD: -------------------------------------------------------------------------------- 1 | python_sources( 2 | skip_black=True, dependencies=["src/py/splitgraph/splitgraph/hooks/s3.py"] 3 | ) 4 | -------------------------------------------------------------------------------- /splitgraph/hooks/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Various hooks for extending Splitgraph, including: 3 | 4 | * External object handlers (:mod:`splitgraph.hooks.external_objects`) allowing to download/upload objects 5 | to locations other than the remote Splitgraph engine. 6 | * Data sources (:mod:`splitgraph.hooks.data_sources`) that allow to add data to Splitgraph, e.g. 7 | using the Postgres engine's FDW interface to mount other external databases on the engine. 8 | * Splitfile commands (:mod:`splitgraph.hooks.splitfile_commands`) to define custom data transformation steps 9 | compatible with the Splitfile framework. 10 | """ 11 | -------------------------------------------------------------------------------- /splitgraph/hooks/data_source/BUILD: -------------------------------------------------------------------------------- 1 | python_sources( 2 | skip_black=True, 3 | dependencies=[ 4 | "src/py/splitgraph/splitgraph/resources/icons", 5 | ], 6 | ) 7 | -------------------------------------------------------------------------------- /splitgraph/hooks/data_source/utils.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | from typing import Any, Dict 3 | 4 | 5 | def merge_jsonschema(left: Dict[str, Any], right: Dict[str, Any]) -> Dict[str, Any]: 6 | result = deepcopy(left) 7 | result["properties"] = {**result["properties"], **right.get("properties", {})} 8 | result["required"] = result.get("required", []) + [ 9 | r for r in right.get("required", []) if r not in result.get("required", []) 10 | ] 11 | return result 12 | -------------------------------------------------------------------------------- /splitgraph/ingestion/BUILD: -------------------------------------------------------------------------------- 1 | python_sources( 2 | skip_black=True, 3 | ) 4 | -------------------------------------------------------------------------------- /splitgraph/ingestion/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/splitgraph/sgr/0c6523b0cbf53b5886447a77e2f67ef49d36dde5/splitgraph/ingestion/__init__.py -------------------------------------------------------------------------------- /splitgraph/ingestion/airbyte/BUILD: -------------------------------------------------------------------------------- 1 | python_sources( 2 | skip_black=True, 3 | ) 4 | -------------------------------------------------------------------------------- /splitgraph/ingestion/airbyte/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/splitgraph/sgr/0c6523b0cbf53b5886447a77e2f67ef49d36dde5/splitgraph/ingestion/airbyte/__init__.py -------------------------------------------------------------------------------- /splitgraph/ingestion/athena/BUILD: -------------------------------------------------------------------------------- 1 | python_sources( 2 | skip_black=True, 3 | dependencies=[ 4 | "src/py/splitgraph/splitgraph/resources/icons", 5 | ], 6 | ) 7 | -------------------------------------------------------------------------------- /splitgraph/ingestion/bigquery/BUILD: -------------------------------------------------------------------------------- 1 | python_sources( 2 | skip_black=True, 3 | dependencies=[ 4 | "src/py/splitgraph/splitgraph/resources/icons", 5 | ], 6 | ) 7 | -------------------------------------------------------------------------------- /splitgraph/ingestion/csv/BUILD: -------------------------------------------------------------------------------- 1 | python_sources( 2 | skip_black=True, 3 | dependencies=[ 4 | "src/py/splitgraph/splitgraph/resources/icons", 5 | ], 6 | ) 7 | -------------------------------------------------------------------------------- /splitgraph/ingestion/dbt/BUILD: -------------------------------------------------------------------------------- 1 | python_sources( 2 | skip_black=True, 3 | ) 4 | -------------------------------------------------------------------------------- /splitgraph/ingestion/dbt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/splitgraph/sgr/0c6523b0cbf53b5886447a77e2f67ef49d36dde5/splitgraph/ingestion/dbt/__init__.py -------------------------------------------------------------------------------- /splitgraph/ingestion/singer/BUILD: -------------------------------------------------------------------------------- 1 | python_sources( 2 | skip_black=True, 3 | ) 4 | -------------------------------------------------------------------------------- /splitgraph/ingestion/singer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/splitgraph/sgr/0c6523b0cbf53b5886447a77e2f67ef49d36dde5/splitgraph/ingestion/singer/__init__.py -------------------------------------------------------------------------------- /splitgraph/ingestion/singer/commandline/BUILD: -------------------------------------------------------------------------------- 1 | python_sources( 2 | skip_black=True, 3 | ) 4 | -------------------------------------------------------------------------------- /splitgraph/ingestion/snowflake/BUILD: -------------------------------------------------------------------------------- 1 | python_sources( 2 | skip_black=True, 3 | dependencies=[ 4 | "src/py/splitgraph/splitgraph/resources/icons", 5 | ], 6 | ) 7 | -------------------------------------------------------------------------------- /splitgraph/ingestion/socrata/BUILD: -------------------------------------------------------------------------------- 1 | python_sources( 2 | skip_black=True, 3 | ) 4 | -------------------------------------------------------------------------------- /splitgraph/ingestion/socrata/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/splitgraph/sgr/0c6523b0cbf53b5886447a77e2f67ef49d36dde5/splitgraph/ingestion/socrata/__init__.py -------------------------------------------------------------------------------- /splitgraph/ingestion/sqlite/BUILD: -------------------------------------------------------------------------------- 1 | python_sources( 2 | skip_black=True, 3 | dependencies=[ 4 | "src/py/splitgraph/splitgraph/resources/icons", 5 | ], 6 | ) 7 | -------------------------------------------------------------------------------- /splitgraph/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/splitgraph/sgr/0c6523b0cbf53b5886447a77e2f67ef49d36dde5/splitgraph/py.typed -------------------------------------------------------------------------------- /splitgraph/resources/BUILD: -------------------------------------------------------------------------------- 1 | resources(sources=["**/*.sql", "**/*.py", "**/*.svg"]) 2 | -------------------------------------------------------------------------------- /splitgraph/resources/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/splitgraph/sgr/0c6523b0cbf53b5886447a77e2f67ef49d36dde5/splitgraph/resources/__init__.py -------------------------------------------------------------------------------- /splitgraph/resources/icons/BUILD: -------------------------------------------------------------------------------- 1 | resources(sources=["*.py", "*.svg"]) 2 | -------------------------------------------------------------------------------- /splitgraph/resources/icons/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/splitgraph/sgr/0c6523b0cbf53b5886447a77e2f67ef49d36dde5/splitgraph/resources/icons/__init__.py -------------------------------------------------------------------------------- /splitgraph/resources/icons/bigquery.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /splitgraph/resources/splitgraph_meta/BUILD: -------------------------------------------------------------------------------- 1 | resources(sources=["*.sql", "*.py"]) 2 | -------------------------------------------------------------------------------- /splitgraph/resources/splitgraph_meta/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/splitgraph/sgr/0c6523b0cbf53b5886447a77e2f67ef49d36dde5/splitgraph/resources/splitgraph_meta/__init__.py -------------------------------------------------------------------------------- /splitgraph/resources/splitgraph_meta/splitgraph_meta--0.0.1--0.0.2.sql: -------------------------------------------------------------------------------- 1 | -- Migration to add the rows added/deleted metadata to every object in object_meta 2 | ALTER TABLE splitgraph_meta.objects 3 | ADD COLUMN rows_inserted integer DEFAULT 0 CHECK (rows_inserted >= 0); 4 | 5 | ALTER TABLE splitgraph_meta.objects 6 | ADD COLUMN rows_deleted integer DEFAULT 0 CHECK (rows_deleted >= 0); 7 | -------------------------------------------------------------------------------- /splitgraph/resources/splitgraph_meta/splitgraph_meta--0.0.2--0.0.3.sql: -------------------------------------------------------------------------------- 1 | -- Add restrictions on maximum repository length and names 2 | ALTER TABLE splitgraph_meta.images 3 | ALTER COLUMN repository SET DATA TYPE varchar(64); 4 | ALTER TABLE splitgraph_meta.images 5 | ALTER COLUMN namespace SET DATA TYPE varchar(64); 6 | ALTER TABLE splitgraph_meta.images 7 | ADD CONSTRAINT images_repository CHECK (repository ~ '^[-A-Za-z0-9_]+$'); 8 | ALTER TABLE splitgraph_meta.images 9 | ADD CONSTRAINT images_namespace CHECK (namespace ~ '^[-A-Za-z0-9_]*$'); 10 | 11 | ALTER TABLE splitgraph_meta.objects 12 | ALTER COLUMN namespace SET DATA TYPE varchar(64); 13 | ALTER TABLE splitgraph_meta.objects 14 | ADD CONSTRAINT objects_namespace CHECK (namespace ~ '^[-A-Za-z0-9_]*$'); 15 | -------------------------------------------------------------------------------- /splitgraph/resources/splitgraph_meta/splitgraph_meta--0.0.3--0.0.4.sql: -------------------------------------------------------------------------------- 1 | CREATE INDEX idx_table_objects ON splitgraph_meta.tables USING GIN(object_ids); 2 | -------------------------------------------------------------------------------- /splitgraph/resources/static/BUILD: -------------------------------------------------------------------------------- 1 | resources(sources=["*.sql", "*.py"]) 2 | -------------------------------------------------------------------------------- /splitgraph/resources/static/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/splitgraph/sgr/0c6523b0cbf53b5886447a77e2f67ef49d36dde5/splitgraph/resources/static/__init__.py -------------------------------------------------------------------------------- /splitgraph/splitfile/BUILD: -------------------------------------------------------------------------------- 1 | python_sources( 2 | skip_black=True, 3 | ) 4 | -------------------------------------------------------------------------------- /splitgraph/splitfile/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Splitfile interpreter: a set of tools on top of the core Splitgraph versioning and image management to give 3 | the user a Dockerfile-like experience for building Splitgraph images (caching, consistent hashing, a declarative 4 | language). 5 | """ 6 | -------------------------------------------------------------------------------- /splitgraph/splitfile/generation/BUILD: -------------------------------------------------------------------------------- 1 | python_sources( 2 | skip_black=True, 3 | ) 4 | -------------------------------------------------------------------------------- /splitgraph/splitfile/generation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/splitgraph/sgr/0c6523b0cbf53b5886447a77e2f67ef49d36dde5/splitgraph/splitfile/generation/__init__.py -------------------------------------------------------------------------------- /splitgraph/splitfile/generation/replacement.py: -------------------------------------------------------------------------------- 1 | from typing import TYPE_CHECKING, Dict, List 2 | 3 | from splitgraph.core.sql.splitfile_validation import prepare_splitfile_sql 4 | from splitgraph.core.types import ProvenanceLine 5 | from splitgraph.splitfile.generation.common import reconstruct_splitfile 6 | 7 | if TYPE_CHECKING: 8 | from splitgraph.core.repository import Repository 9 | 10 | 11 | def reconstruct_splitfile_with_replacement( 12 | provenance_data: List[ProvenanceLine], 13 | source_replacement: Dict["Repository", str], 14 | ignore_irreproducible: bool = False, 15 | ) -> List[str]: 16 | # circular import 17 | from splitgraph.core.repository import Repository 18 | 19 | def _postprocess_sql(sql: str): 20 | # Use the SQL validator/replacer to rewrite old image hashes into new hashes/tags. 21 | def image_mapper(repository: "Repository", image_hash: str): 22 | new_image = ( 23 | repository.to_schema() + ":" + source_replacement.get(repository, image_hash) 24 | ) 25 | return new_image, new_image 26 | 27 | _, replaced_sql = prepare_splitfile_sql(sql, image_mapper) 28 | return replaced_sql 29 | 30 | return reconstruct_splitfile( 31 | provenance_data, 32 | ignore_irreproducible, 33 | postprocess_sql=_postprocess_sql, 34 | postprocess_repo=lambda n, r: source_replacement.get(Repository(n, r)), 35 | ) 36 | -------------------------------------------------------------------------------- /splitgraph/utils/BUILD: -------------------------------------------------------------------------------- 1 | python_sources( 2 | skip_black=True, 3 | ) 4 | -------------------------------------------------------------------------------- /splitgraph/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/splitgraph/sgr/0c6523b0cbf53b5886447a77e2f67ef49d36dde5/splitgraph/utils/__init__.py -------------------------------------------------------------------------------- /splitgraph/utils/yaml.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | from ruamel.yaml.main import YAML 4 | 5 | 6 | def safe_load(stream) -> Any: 7 | yaml = YAML(typ="safe") 8 | return yaml.load(stream) 9 | 10 | 11 | def safe_dump(obj: Any, stream, **kwargs) -> None: 12 | yaml = YAML(typ="safe") 13 | yaml.default_flow_style = False 14 | yaml.dump(obj, stream, **kwargs) 15 | -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/splitgraph/sgr/0c6523b0cbf53b5886447a77e2f67ef49d36dde5/test/__init__.py -------------------------------------------------------------------------------- /test/architecture/data/mongoorigin/setup.js: -------------------------------------------------------------------------------- 1 | db.stuff.drop() 2 | 3 | db.stuff.insertMany([ 4 | {"name": "Alex", "duration": 12, "happy": false}, 5 | {"name": "James", "duration": 2, "happy": true}, 6 | {"name": "Alice", "duration": 98765, "happy": true}]); 7 | 8 | -------------------------------------------------------------------------------- /test/architecture/data/mysqlorigin/setup.sql: -------------------------------------------------------------------------------- 1 | DROP TABLE IF EXISTS mushrooms; 2 | CREATE TABLE mushrooms ( 3 | mushroom_id int, 4 | name varchar(20), 5 | discovery datetime, 6 | friendly bool, 7 | binary_data binary(7), 8 | varbinary_data varbinary(16), 9 | PRIMARY KEY (mushroom_id) 10 | ); 11 | 12 | INSERT INTO mushrooms VALUES (1, 'portobello', STR_TO_DATE('11/11/2012 8:06:26 AM', '%e/%c/%Y %r'), true, 'bintst', INET6_ATON('127.0.0.1')); 13 | INSERT INTO mushrooms VALUES (2, 'deathcap', STR_TO_DATE('17/3/2018 8:06:26 AM', '%e/%c/%Y %r'), false, '\0\0\1\2\3', INET6_ATON('127.0.0.1')); 14 | -------------------------------------------------------------------------------- /test/architecture/data/objectstorage/test_csv/some_prefix/encoding-win-1252.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/splitgraph/sgr/0c6523b0cbf53b5886447a77e2f67ef49d36dde5/test/architecture/data/objectstorage/test_csv/some_prefix/encoding-win-1252.csv -------------------------------------------------------------------------------- /test/architecture/data/objectstorage/test_csv/some_prefix/fruits.csv: -------------------------------------------------------------------------------- 1 | fruit_id,timestamp,name,number,bignumber,vbignumber 2 | 1,2018-01-01 00:11:11,apple,1,42,42 3 | 2,2018-01-02 00:22:22,orange,2,1248284263629426688,12482842636294266881248284263629426688 4 | 3,2018-01-03 00:33:33,mayonnaise,"",9223372036854775807,12482842636294266881248284263629426688 5 | 4,2018-01-04 00:44:44,mustard,4,-9223372036854775808,12345 -------------------------------------------------------------------------------- /test/architecture/data/objectstorage/test_csv/some_prefix/not_a_csv.txt: -------------------------------------------------------------------------------- 1 | This is actually not a CSV file -------------------------------------------------------------------------------- /test/architecture/data/objectstorage/test_csv/some_prefix/percentage_sign.csv: -------------------------------------------------------------------------------- 1 | "Id","Submit time","Profile","Status","Source currency","Amount paid by","Fee","Amount converted","Excess refund","Target currency","Converted and sent to","Exchange rate","Exchange Rate Date","Payout time","Name","Account details","Reference","VAT (10%)" 2 | "123456789","2022/02/19 18:52:18","business","transferred","USD","15000.20","75.00","15000.00","0.0","GBP","12000.0","0.7336","2022/02/21 19:00:20","2022/02/21 19:00:36","Some Company","","", 3 | -------------------------------------------------------------------------------- /test/architecture/data/pgorigin/load_account_data.sql: -------------------------------------------------------------------------------- 1 | DROP TABLE IF EXISTS account; 2 | CREATE TABLE account ( 3 | account_number integer, 4 | balance integer, 5 | firstname character varying(20), 6 | lastname character varying(20), 7 | age integer, 8 | gender character varying(1), 9 | address text, 10 | employer character varying(20), 11 | email text, 12 | city character varying(20), 13 | state character varying(5) 14 | ); 15 | 16 | COPY account from '/src/accounts.csv' DELIMITER ',' CSV HEADER; 17 | -------------------------------------------------------------------------------- /test/architecture/data/pgorigin/setup.sql: -------------------------------------------------------------------------------- 1 | DROP TABLE IF EXISTS fruits; 2 | CREATE TABLE fruits ( 3 | fruit_id serial, 4 | name varchar 5 | ); 6 | 7 | DROP TABLE IF EXISTS vegetables; 8 | CREATE TABLE vegetables ( 9 | vegetable_id serial, 10 | name varchar 11 | ); 12 | 13 | INSERT INTO fruits (name) VALUES ('apple'); 14 | INSERT INTO fruits (name) VALUES ('orange'); 15 | 16 | INSERT INTO vegetables (name) VALUES ('potato'); 17 | INSERT INTO vegetables (name) VALUES ('carrot'); 18 | -------------------------------------------------------------------------------- /test/architecture/data/remote_engine/setup.sql: -------------------------------------------------------------------------------- 1 | -- Add an unprivileged role that we can use to test RLS on the remote. 2 | CREATE ROLE testuser LOGIN PASSWORD 'testpassword'; -------------------------------------------------------------------------------- /test/architecture/dev/Dockerfile.dev: -------------------------------------------------------------------------------- 1 | FROM python:3.6-slim 2 | 3 | # The source code for splitgraph goes here (root of the repository) 4 | VOLUME /src/splitgraph 5 | 6 | # The .sgconfig goes here, and its location should be set via env var 7 | VOLUME /sgconfig 8 | 9 | RUN apt-get update \ 10 | && apt-get install -y curl git \ 11 | && ( curl -sSL https://raw.githubusercontent.com/sdispater/poetry/master/get-poetry.py | python ) \ 12 | && mkdir -p /src/splitgraph \ 13 | && mkdir -p /sgconfig \ 14 | && ( echo "alias poetry='/root/.poetry/bin/poetry'" >> /root/.bashrc ) \ 15 | && ( echo "alias sgr='/root/.poetry/bin/poetry run sgr'" >> /root/.bashrc ) \ 16 | && ( echo "alias pytest='/root/.poetry/bin/poetry run pytest -c /sgconfig/pytest.dev.ini'" >> /root/.bashrc ) 17 | 18 | ADD docker-entrypoint-dev.sh /src/entrypoint.sh 19 | 20 | WORKDIR /src/splitgraph 21 | 22 | ENTRYPOINT ["sh", "/src/entrypoint.sh"] 23 | -------------------------------------------------------------------------------- /test/architecture/dev/docker-config/pytest.dev.ini: -------------------------------------------------------------------------------- 1 | # pytest configuration file for running pytest in sgr container 2 | 3 | [pytest] 4 | addopts = --cov splitgraph --cov-branch --cov-report term --cov-report html 5 | env = 6 | SG_CONFIG_FILE = /sgconfig/.sgconfig 7 | -------------------------------------------------------------------------------- /test/architecture/docker-compose.ci.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | ci_pgorigin: 4 | image: splitgraphci/pgorigin 5 | build: ./src/pgorigin 6 | ci_mongoorigin: 7 | image: splitgraphci/mongoorigin 8 | build: ./src/mongoorigin 9 | ci_esorigin: 10 | image: splitgraphci/esorigin 11 | build: ./src/esorigin 12 | -------------------------------------------------------------------------------- /test/architecture/docker-compose.mounting.yml: -------------------------------------------------------------------------------- 1 | # Compose file with just the architecture required for FDW mounting tests. 2 | 3 | version: '3' 4 | services: 5 | pgorigin: 6 | image: splitgraphci/pgorigin 7 | environment: 8 | - ORIGIN_USER=originro 9 | - ORIGIN_PASS=originpass 10 | - ORIGIN_PG_DB=origindb 11 | expose: 12 | - 5432 13 | volumes: 14 | - ./data/pgorigin:/src 15 | mongoorigin: 16 | image: splitgraphci/mongoorigin 17 | ports: 18 | - '0.0.0.0:27017:27017' 19 | environment: 20 | - ORIGIN_USER=originro 21 | - ORIGIN_PASS=originpass 22 | - ORIGIN_MONGO_DB=origindb 23 | expose: 24 | - 27017 25 | volumes: 26 | - ./data/mongoorigin:/src 27 | mysqlorigin: 28 | image: mysql:8.0.13 29 | # image: mariadb:10.5.8-focal 30 | ports: 31 | - '0.0.0.0:3306:3306' 32 | environment: 33 | - MYSQL_DATABASE=mysqlschema 34 | - MYSQL_ROOT_PASSWORD=supersecure 35 | - MYSQL_USER=originuser 36 | - MYSQL_PASSWORD=originpass 37 | expose: 38 | - 3306 39 | volumes: 40 | - ./data/mysqlorigin/setup.sql:/docker-entrypoint-initdb.d/setup.sql:ro 41 | esorigin: 42 | image: splitgraphci/esorigin 43 | expose: 44 | - 9200 45 | -------------------------------------------------------------------------------- /test/architecture/scripts/build_and_publish_ci_images.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 4 | 5 | usage() { 6 | echo 7 | echo "./scripts/build_and_publish_ci_images.sh" 8 | echo 9 | echo "1. Build images via scripts/build_ci_images.sh" 10 | echo 11 | echo 12 | 13 | "$DIR"/build_ci_images.sh --help 14 | 15 | echo 16 | echo 17 | echo "2. Publish images via scripts/publish_ci_images.sh" 18 | echo 19 | echo 20 | 21 | "$DIR"/publish_ci_images.sh --help 22 | 23 | echo 24 | echo 25 | echo 26 | } 27 | 28 | test "$1" == "--help" && { usage ; exit 1 ; } 29 | 30 | "$DIR"/build_ci_images.sh && "$DIR"/publish_ci_images.sh 31 | -------------------------------------------------------------------------------- /test/architecture/scripts/build_ci_images.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | usage() { 4 | echo 5 | echo "./scripts/build_ci_images.sh" 6 | echo 7 | echo "Build the services in the docker-compose.ci.yml file As long as they " 8 | echo "have an image: directive to tag them with splitgraphci/imgname, they " 9 | echo "will be tagged so that scripts/publish_ci_images.sh can detect and" 10 | echo "publish them." 11 | echo 12 | } 13 | 14 | test "$1" == "--help" && { usage ; exit 1 ; } 15 | 16 | set -eo pipefail 17 | 18 | _log_error() { 19 | echo "$@" 1>&2 20 | } 21 | 22 | fatal_error() { 23 | _log_error "Fatal:" "$@" 24 | exit 1 25 | } 26 | 27 | _log() { 28 | echo "$@" 29 | } 30 | 31 | DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 32 | 33 | ARCH_DIR="$DIR"/../ 34 | 35 | cd "$ARCH_DIR" || { 36 | fatal_error "Failed to cd to architecture directory at $ARCH_DIR" ; 37 | } 38 | 39 | which docker-compose >/dev/null 2>&1|| { 40 | fatal_error "docker-compose is not installed or not in \$PATH"; 41 | } 42 | 43 | docker-compose -f docker-compose.ci.yml build && exit 0 44 | 45 | fatal_error "Build failed" 46 | -------------------------------------------------------------------------------- /test/architecture/src/esorigin/Dockerfile: -------------------------------------------------------------------------------- 1 | ## 2 | # esorigin 3 | # Build seed data into our image for tests; appropriated from: 4 | # https://stackoverflow.com/questions/35526532/how-to-add-an-elasticsearch-index-during-docker-build 5 | ## 6 | FROM elasticsearch:7.16.1 7 | 8 | RUN mkdir /data && \ 9 | chown -R elasticsearch:elasticsearch /data && \ 10 | echo 'path.data: /data' >> config/elasticsearch.yml && \ 11 | echo 'discovery.type: "single-node"' >> config/elasticsearch.yml && \ 12 | echo "xpack.security.enabled: false" >> config/elasticsearch.yml && \ 13 | echo 'cluster.routing.allocation.disk.watermark.flood_stage: "99%"' >> config/elasticsearch.yml && \ 14 | echo 'cluster.routing.allocation.disk.watermark.high: "99%"' >> config/elasticsearch.yml 15 | 16 | ADD https://raw.githubusercontent.com/vishnubob/wait-for-it/e1f115e4ca285c3c24e847c4dd4be955e0ed51c2/wait-for-it.sh /utils/wait-for-it.sh 17 | 18 | COPY accounts.json /accounts.json 19 | COPY init-data.sh /init-data.sh 20 | RUN chmod a+x /init-data.sh 21 | RUN /usr/local/bin/docker-entrypoint.sh elasticsearch -p /tmp/epid & /bin/bash /utils/wait-for-it.sh -t 0 localhost:9200 -- /init-data.sh; kill $(cat /tmp/epid) && wait $(cat /tmp/epid); exit 0; 22 | -------------------------------------------------------------------------------- /test/architecture/src/esorigin/init-data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | curl -XPUT localhost:9200/account?pretty -H 'Content-Type: application/json' -d' 4 | { 5 | "mappings": { 6 | "properties": { 7 | "account_number": {"type": "integer"}, 8 | "balance": {"type": "integer"}, 9 | "firstname": {"type": "keyword"}, 10 | "lastname": {"type": "keyword"}, 11 | "age": {"type": "integer"}, 12 | "gender": {"type": "keyword"}, 13 | "address": {"type": "keyword"}, 14 | "employer": {"type": "keyword"}, 15 | "email": {"type": "keyword"}, 16 | "city": {"type": "keyword"}, 17 | "state": {"type": "keyword"} 18 | } 19 | } 20 | } 21 | ' 22 | 23 | curl -XPUT http://localhost:9200/account/_bulk -H 'Content-Type: application/json' \ 24 | --data-binary @/accounts.json 25 | -------------------------------------------------------------------------------- /test/architecture/src/mongoorigin/Dockerfile: -------------------------------------------------------------------------------- 1 | ## 2 | # mongoorigin 3 | ## 4 | FROM mongo:3.6.5-jessie 5 | 6 | ENV ORIGIN_USER docker 7 | ENV ORIGIN_PASS docker 8 | ENV ORIGIN_MONGO_DB origindb 9 | 10 | ADD start.sh /start.sh 11 | RUN chmod a+x /start.sh 12 | 13 | VOLUME /src 14 | 15 | CMD ["/start.sh"] 16 | 17 | -------------------------------------------------------------------------------- /test/architecture/src/mongoorigin/start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ORIGIN_USER=${ORIGIN_USER} 4 | ORIGIN_PASS=${ORIGIN_PASS} 5 | ORIGIN_MONGO_DB=${ORIGIN_MONGO_DB} 6 | 7 | mongod --fork --logpath /var/log/mongodb/mongod.log --bind_ip_all 8 | 9 | if [ ! -e '/done_setup' ]; then 10 | mongo ${ORIGIN_MONGO_DB} --eval "db.createUser({\"user\": \"${ORIGIN_USER}\", \"pwd\": \"${ORIGIN_PASS}\", \"roles\": [\"readWrite\"]})" 11 | 12 | if [ -e '/src/setup.js' ]; then 13 | mongo ${ORIGIN_MONGO_DB} < /src/setup.js 14 | fi 15 | 16 | echo 1 > /done_setup 17 | fi 18 | 19 | tail -F /var/log/mongodb/mongod.log 20 | -------------------------------------------------------------------------------- /test/architecture/src/pgorigin/Dockerfile: -------------------------------------------------------------------------------- 1 | ## 2 | # pgorigin 3 | ## 4 | FROM ubuntu:18.04 5 | 6 | RUN apt-get update -qq && apt-get install -y postgresql-10 postgresql-server-dev-10 && apt-get clean 7 | 8 | ENV ORIGIN_USER docker 9 | ENV ORIGIN_PASS docker 10 | ENV ORIGIN_PG_DB origindb 11 | 12 | # Open Postgres ports 13 | RUN mkdir -p /etc/postgresql/10/main 14 | RUN echo "host all all 0.0.0.0/0 md5" >> /etc/postgresql/10/main/pg_hba.conf 15 | RUN echo "listen_addresses = '*'" >> /etc/postgresql/10/main/postgresql.conf 16 | RUN echo "port = 5432" >> /etc/postgresql/10/main/postgresql.conf 17 | 18 | EXPOSE 5432 19 | 20 | ADD start.sh /start.sh 21 | RUN chmod a+x /start.sh 22 | 23 | VOLUME /src 24 | 25 | CMD ["/start.sh"] 26 | 27 | -------------------------------------------------------------------------------- /test/architecture/src/pgorigin/start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ORIGIN_USER=${ORIGIN_USER} 4 | ORIGIN_PASS=${ORIGIN_PASS} 5 | ORIGIN_PG_DB=${ORIGIN_PG_DB} 6 | 7 | SU='/bin/su postgres -c' 8 | 9 | rm /var/lib/postgresql/10/main/postmaster.pid /var/run/postgresql/.*.lock 10 | service postgresql start 11 | 12 | if [ ! -e '/done_setup' ]; then 13 | ${SU} "createdb ${ORIGIN_PG_DB}" 14 | ${SU} "createuser -d -s -r -l ${ORIGIN_USER}" 15 | ${SU} "psql postgres -c \"ALTER USER ${ORIGIN_USER} WITH ENCRYPTED PASSWORD '${ORIGIN_PASS}'\"" 16 | 17 | if [ -e '/src/setup.sql' ]; then 18 | ${SU} "psql ${ORIGIN_PG_DB} < /src/setup.sql" 19 | ${SU} "psql ${ORIGIN_PG_DB} < /src/load_account_data.sql" 20 | fi 21 | 22 | echo 1 > /done_setup 23 | fi 24 | 25 | tail -f /var/log/postgresql/postgresql-10-main.log 26 | -------------------------------------------------------------------------------- /test/architecture/src/remote_engine/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG DOCKER_REPO 2 | ARG DOCKER_TAG 3 | FROM ${DOCKER_REPO}/engine:${DOCKER_TAG} 4 | 5 | RUN echo "port = 5431" >> /etc/postgresql/postgresql.conf 6 | CMD ["postgres", "-c", "config_file=/etc/postgresql/postgresql.conf"] 7 | -------------------------------------------------------------------------------- /test/architecture/wait-for-architecture.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | THIS_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 4 | TEST_DIR="${THIS_DIR}/.." 5 | REPO_ROOT_DIR="${TEST_DIR}/.." 6 | DEFAULT_SG_CONFIG_FILE="${TEST_DIR}/resources/.sgconfig" 7 | 8 | if [[ "$1" == "--mounting" ]]; then 9 | echo "Wait for mounting architecture to be up" 10 | HEALTHCHECK="import test.splitgraph.conftest as c; c.healthcheck_mounting()" 11 | else 12 | echo "Wait for core architecture to be up" 13 | HEALTHCHECK="import test.splitgraph.conftest as c; c.healthcheck()" 14 | fi 15 | 16 | export SG_CONFIG_FILE=${SG_CONFIG_FILE-"${DEFAULT_SG_CONFIG_FILE}"} 17 | 18 | echo "Using config file at $SG_CONFIG_FILE ..." 19 | 20 | _run_health_check() { 21 | pushd "$REPO_ROOT_DIR" \ 22 | && python -c "$HEALTHCHECK" \ 23 | && return 0 24 | 25 | return 1 26 | } 27 | 28 | 29 | _wait_for_test_architecture() { 30 | local counter=0 31 | while true ; do 32 | 33 | if test $counter -eq 20 ; then 34 | echo 35 | echo "FATAL: Could not connect to test-architecture after 20 tries" 36 | exit 1 37 | fi 38 | 39 | if _run_health_check ; then 40 | echo 41 | echo "Architecture is ready" 42 | break; 43 | else 44 | echo -n "." 45 | let counter=counter+1 46 | sleep 5 47 | fi 48 | done 49 | 50 | return 0 51 | } 52 | 53 | _wait_for_test_architecture && exit 0 54 | exit 1 55 | -------------------------------------------------------------------------------- /test/clean_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # ./clean_test.sh 4 | # 5 | # Run a "clean" test. Intended for running in local development to give a close 6 | # simulation of the test suite running in travis. Also accounts for the 7 | # possibility that some e.g. splitgraphci images are outdated. 8 | # 9 | # - Destroy and recreate docker environment (using down -v) 10 | # - Run tests from within container 11 | # - Exit 0 on all tests pass 12 | # - Exit 1 otherwise 13 | 14 | TEST_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 15 | ARCHITECTURE_DIR="${TEST_DIR}/architecture" 16 | REPO_ROOT_DIR="${TEST_DIR}/.." 17 | CORE_ARCHITECTURE="docker-compose.core.yml" 18 | MOUNTING_ARCHITECTURE="docker-compose.mounting.yml" 19 | 20 | pushd "$REPO_ROOT_DIR" \ 21 | && pushd "${ARCHITECTURE_DIR}" \ 22 | && docker-compose -f $CORE_ARCHITECTURE -f $MOUNTING_ARCHITECTURE pull \ 23 | && docker-compose -f $CORE_ARCHITECTURE -f $MOUNTING_ARCHITECTURE down -v \ 24 | && docker-compose -f $CORE_ARCHITECTURE -f $MOUNTING_ARCHITECTURE build \ 25 | && docker-compose -f $CORE_ARCHITECTURE -f $MOUNTING_ARCHITECTURE up -d --force-recreate --remove-orphans \ 26 | && popd \ 27 | && echo "Wait for test architecture..." \ 28 | && pushd "${ARCHITECTURE_DIR}" \ 29 | && ./wait-for-architecture.sh --mounting \ 30 | && docker-compose -f $CORE_ARCHITECTURE -f $MOUNTING_ARCHITECTURE -f docker-compose.dev.yml run test \ 31 | && echo "Tests passed" \ 32 | && popd \ 33 | && exit 0 34 | 35 | popd 36 | 37 | echo "Tests (or something) failed" 38 | exit 1 39 | -------------------------------------------------------------------------------- /test/resources/create_table.splitfile: -------------------------------------------------------------------------------- 1 | SQL CREATE TABLE my_fruits (id integer, name varchar) 2 | 3 | SQL INSERT INTO my_fruits VALUES (1, 'pineapple') 4 | SQL INSERT INTO my_fruits VALUES (2, 'banana') 5 | -------------------------------------------------------------------------------- /test/resources/custom_command_calc_hash.splitfile: -------------------------------------------------------------------------------- 1 | FROM test/pg_mount IMPORT fruits 2 | 3 | BOBBYTABLES fruits 4 | -------------------------------------------------------------------------------- /test/resources/custom_command_dummy.splitfile: -------------------------------------------------------------------------------- 1 | FROM test/pg_mount IMPORT fruits 2 | 3 | # Doesn't do anything but returns None -- duplicates the previous image (no changes, random hash) 4 | DUMMY arg1 --arg2 "argument three" 5 | -------------------------------------------------------------------------------- /test/resources/custom_plugin_dir/some_plugin/plugin.py: -------------------------------------------------------------------------------- 1 | from splitgraph.core.types import IntrospectionResult 2 | from splitgraph.hooks.data_source import DataSource 3 | 4 | 5 | class TestDataSource(DataSource): 6 | def introspect(self) -> IntrospectionResult: 7 | return {"some_table": ([], {})} 8 | 9 | @classmethod 10 | def get_name(cls) -> str: 11 | return "Test Data Source" 12 | 13 | @classmethod 14 | def get_description(cls) -> str: 15 | return "Data source for testing" 16 | 17 | credentials_schema = { 18 | "type": "object", 19 | "properties": {"access_token": {"type": "string"}}, 20 | "required": ["access_token"], 21 | } 22 | 23 | params_schema = { 24 | "type": "object", 25 | "properties": {"some_field": {"type": "string"}}, 26 | "required": ["some_field"], 27 | } 28 | 29 | 30 | __plugin__ = TestDataSource 31 | -------------------------------------------------------------------------------- /test/resources/external_sql.splitfile: -------------------------------------------------------------------------------- 1 | FROM otheruser/pg_mount:latest IMPORT fruits AS my_fruits, vegetables 2 | 3 | SQL FILE ${EXTERNAL_SQL_FILE} 4 | -------------------------------------------------------------------------------- /test/resources/external_sql.sql: -------------------------------------------------------------------------------- 1 | -- external SQL file that gets called from within an splitfile with the default schema set to the mountpoint 2 | -- being produced. Uses SQL-style comments since this whole file gets sent to the engine and doesn't need things 3 | -- like line escapes etc. 4 | CREATE TABLE join_table AS SELECT fruit_id AS id, my_fruits.name AS fruit, vegetables.name AS vegetable 5 | FROM my_fruits JOIN vegetables 6 | ON fruit_id = vegetable_id -------------------------------------------------------------------------------- /test/resources/from_local.splitfile: -------------------------------------------------------------------------------- 1 | FROM test/pg_mount:latest 2 | # Same idea as from_remote but here we clone the _local_ repo into the output and create the join table. 3 | 4 | SQL CREATE TABLE join_table AS SELECT fruit_id AS id, fruits.name AS fruit, vegetables.name AS vegetable \ 5 | FROM fruits JOIN vegetables\ 6 | ON fruit_id = vegetable_id 7 | -------------------------------------------------------------------------------- /test/resources/from_remote.splitfile: -------------------------------------------------------------------------------- 1 | FROM test/pg_mount:${TAG} 2 | # This is supposed to import the remote test/pg_mount repo locally as output and base derivations off of it. 3 | 4 | SQL CREATE TABLE join_table AS SELECT fruit_id AS id, fruits.name AS fruit, vegetables.name AS vegetable \ 5 | FROM fruits JOIN vegetables\ 6 | ON fruit_id = vegetable_id 7 | 8 | # We're now supposed to have 3 tables here: the original two fruits/vegetables and the derived join table. -------------------------------------------------------------------------------- /test/resources/from_remote_multistage.splitfile: -------------------------------------------------------------------------------- 1 | # Stage 1: get both tables from the remote and join them 2 | 3 | FROM test/pg_mount:${TAG} AS output 4 | 5 | SQL CREATE TABLE join_table AS SELECT fruit_id AS id, fruits.name AS fruit, vegetables.name AS vegetable \ 6 | FROM fruits JOIN vegetables\ 7 | ON fruit_id = vegetable_id 8 | 9 | 10 | # Stage 2: from the first stage, import just the join table and make sure to use an import query to produce 11 | # a snapshot. 12 | 13 | FROM EMPTY AS output_stage_2 14 | FROM output IMPORT {SELECT id, fruit, vegetable FROM join_table} AS balanced_diet -------------------------------------------------------------------------------- /test/resources/import_all_from_mounted.splitfile: -------------------------------------------------------------------------------- 1 | FROM MOUNT postgres_fdw originro:originpass@pgorigin:5432 '{"dbname": "origindb", "remote_schema": "public"}' 2 | IMPORT ALL -------------------------------------------------------------------------------- /test/resources/import_and_update.splitfile: -------------------------------------------------------------------------------- 1 | FROM test/pg_mount IMPORT fruits AS my_fruits, vegetables 2 | 3 | SQL INSERT INTO my_fruits VALUES (3, 'mayonnaise') 4 | 5 | SQL UPDATE vegetables SET name = 'cucumber' WHERE vegetable_id = 1 -------------------------------------------------------------------------------- /test/resources/import_from_mounted_db.splitfile: -------------------------------------------------------------------------------- 1 | FROM MOUNT postgres_fdw originro:originpass@pgorigin:5432 '{"dbname": "origindb", "remote_schema": "public"}' 2 | IMPORT {SELECT * FROM fruits WHERE name = 'orange'} AS my_fruits, 3 | {SELECT * FROM vegetables WHERE name LIKE '%o'} AS o_vegetables, 4 | vegetables, 5 | fruits AS all_fruits 6 | -------------------------------------------------------------------------------- /test/resources/import_from_mounted_db_with_sql.splitfile: -------------------------------------------------------------------------------- 1 | FROM MOUNT postgres_fdw originro:originpass@pgorigin:5432 '{"dbname": "origindb", "remote_schema": "public"}' 2 | IMPORT {SELECT * FROM fruits WHERE name = 'orange'} AS my_fruits, 3 | {SELECT * FROM vegetables WHERE name LIKE '%o'} AS o_vegetables, 4 | vegetables, 5 | fruits AS all_fruits 6 | 7 | SQL CREATE TABLE new_table AS SELECT * FROM all_fruits -------------------------------------------------------------------------------- /test/resources/import_from_preuploaded_remote.splitfile: -------------------------------------------------------------------------------- 1 | # Stage 2 that runs against the output (pre-pushed to the remote) defined in import_remote_multiple.splitfile 2 | 3 | FROM output:latest IMPORT join_table, my_fruits 4 | FROM test_mg_mount:latest IMPORT stuff 5 | 6 | SQL CREATE TABLE diet AS SELECT id, name, fruit, vegetable FROM join_table JOIN stuff \ 7 | ON join_table.id = stuff.duration -------------------------------------------------------------------------------- /test/resources/import_local.splitfile: -------------------------------------------------------------------------------- 1 | # We don't allow SOURCE any more -- all tables have to be imported. 2 | FROM test/pg_mount IMPORT fruits AS my_fruits 3 | -------------------------------------------------------------------------------- /test/resources/import_local_multiple_with_queries.splitfile: -------------------------------------------------------------------------------- 1 | FROM test/pg_mount IMPORT fruits AS my_fruits, vegetables 2 | 3 | SQL DELETE FROM my_fruits WHERE fruit_id = 1 4 | 5 | SQL CREATE TABLE join_table AS SELECT fruit_id AS id, my_fruits.name AS fruit, vegetables.name AS vegetable \ 6 | FROM my_fruits JOIN vegetables on fruit_id = vegetable_id -------------------------------------------------------------------------------- /test/resources/import_remote_broken_stage_2.splitfile: -------------------------------------------------------------------------------- 1 | # Second line references a table that doesn't exist. 2 | 3 | FROM test/pg_mount:v1 IMPORT fruits AS my_fruits, vegetables 4 | 5 | SQL {CREATE TABLE join_table AS 6 | SELECT fruit_id AS id, my_fruits.name AS fruit, 7 | vegetables.name AS vegetable 8 | FROM nonexistent_fruits_table JOIN vegetables 9 | ON fruit_id = vegetable_id 10 | } 11 | -------------------------------------------------------------------------------- /test/resources/import_remote_multiple.splitfile: -------------------------------------------------------------------------------- 1 | # The preprocessor replaces $ + {...} with params passed to it from via args or from the commandline (currently 2 | # everywhere, even in the comments). 3 | # Escaping $ works too: \${ESCAPED} doesn't get changed. 4 | 5 | FROM test/pg_mount:${TAG} IMPORT fruits AS my_fruits, vegetables 6 | 7 | SQL {CREATE TABLE join_table AS 8 | SELECT fruit_id AS id, my_fruits.name AS fruit, 9 | vegetables.name AS vegetable 10 | FROM my_fruits JOIN vegetables 11 | ON fruit_id = vegetable_id 12 | } 13 | -------------------------------------------------------------------------------- /test/resources/import_with_custom_query.splitfile: -------------------------------------------------------------------------------- 1 | FROM test/pg_mount IMPORT {SELECT * FROM fruits WHERE name = 'orange'} AS my_fruits, 2 | {SELECT * FROM vegetables WHERE name LIKE '%o'} AS o_vegetables, 3 | vegetables, 4 | fruits AS all_fruits 5 | -------------------------------------------------------------------------------- /test/resources/import_with_custom_query_and_sql.splitfile: -------------------------------------------------------------------------------- 1 | FROM test/pg_mount:${TAG} IMPORT {SELECT * FROM fruits WHERE name = 'orange'} AS my_fruits, 2 | {SELECT * FROM vegetables WHERE name LIKE '%o'} AS o_vegetables, 3 | vegetables, 4 | fruits AS all_fruits 5 | 6 | SQL CREATE TABLE test_table AS SELECT * FROM all_fruits -------------------------------------------------------------------------------- /test/resources/ingestion/bigquery/dummy_credentials.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "service_account", 3 | "project_id": "project_id", 4 | "private_key_id": "private_key_id", 5 | "private_key": "private_key", 6 | "client_email": "client_email", 7 | "client_id": "client_id", 8 | "auth_uri": "https://accounts.google.com/o/oauth2/auth", 9 | "token_uri": "https://oauth2.googleapis.com/token", 10 | "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", 11 | "client_x509_cert_url": "client_x509_cert_url" 12 | } -------------------------------------------------------------------------------- /test/resources/ingestion/csv/base_df.csv: -------------------------------------------------------------------------------- 1 | fruit_id,timestamp,name 2 | 1,2018-01-01 00:11:11,apple 3 | 2,2018-01-02 00:22:22,orange 4 | 3,2018-01-03 00:33:33,mayonnaise 5 | 4,2018-01-04 00:44:44,mustard -------------------------------------------------------------------------------- /test/resources/ingestion/csv/base_df_kv.csv: -------------------------------------------------------------------------------- 1 | key,value 2 | 1,banana 3 | 2,apple 4 | -------------------------------------------------------------------------------- /test/resources/ingestion/csv/encoding-win-1252.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/splitgraph/sgr/0c6523b0cbf53b5886447a77e2f67ef49d36dde5/test/resources/ingestion/csv/encoding-win-1252.csv -------------------------------------------------------------------------------- /test/resources/ingestion/csv/evil_df.csv: -------------------------------------------------------------------------------- 1 | id,job_title,some_number 2 | "1","PRESIDENT\","25" 3 | "2"," ","26" 4 | -------------------------------------------------------------------------------- /test/resources/ingestion/csv/grades.csv: -------------------------------------------------------------------------------- 1 | "Last name", "First name", "SSN", "Test1", "Test2", "Test3", "Test4", "Final", "Grade" 2 | "Alfalfa", "Aloysius", "123-45-6789", 40.0, 90.0, 100.0, 83.0, 49.0, "D-" 3 | "Alfred", "University", "123-12-1234", 41.0, 97.0, 96.0, 97.0, 48.0, "D+" 4 | "Gerty", "Gramma", "567-89-0123", 41.0, 80.0, 60.0, 40.0, 44.0, "C" 5 | "Android", "Electric", "087-65-4321", 42.0, 23.0, 36.0, 45.0, 47.0, "B-" 6 | "Bumpkin", "Fred", "456-78-9012", 43.0, 78.0, 88.0, 77.0, 45.0, "A-" 7 | "Rubble", "Betty", "234-56-7890", 44.0, 90.0, 80.0, 90.0, 46.0, "C-" 8 | "Noshow", "Cecil", "345-67-8901", 45.0, 11.0, -1.0, 4.0, 43.0, "F" 9 | "Buff", "Bif", "632-79-9939", 46.0, 20.0, 30.0, 40.0, 50.0, "B+" 10 | "Airpump", "Andrew", "223-45-6789", 49.0 1.0, 90.0, 100.0, 83.0, "A" 11 | "Backus", "Jim", "143-12-1234", 48.0, 1.0, 97.0, 96.0, 97.0, "A+" 12 | "Carnivore", "Art", "565-89-0123", 44.0, 1.0, 80.0, 60.0, 40.0, "D+" 13 | "Dandy", "Jim", "087-75-4321", 47.0, 1.0, 23.0, 36.0, 45.0, "C+" 14 | "Elephant", "Ima", "456-71-9012", 45.0, 1.0, 78.0, 88.0, 77.0, "B-" 15 | "Franklin", "Benny", "234-56-2890", 50.0, 1.0, 90.0, 80.0, 90.0, "B-" 16 | "George", "Boy", "345-67-3901", 40.0, 1.0, 11.0, -1.0, 4.0, "B" 17 | "Heffalump", "Harvey", "632-79-9439", 30.0, 1.0, 20.0, 30.0, 40.0, "C" 18 | -------------------------------------------------------------------------------- /test/resources/ingestion/csv/mac_newlines.csv: -------------------------------------------------------------------------------- 1 | fruit_id,timestamp,name 1,2018-01-01 00:11:11,apple 2,2018-01-02 00:22:22,orange 3,2018-01-03 00:33:33,mayonnaise 4,2018-01-04 00:44:44,mustard -------------------------------------------------------------------------------- /test/resources/ingestion/csv/patch_df.csv: -------------------------------------------------------------------------------- 1 | fruit_id,timestamp,name 2 | 2,2018-01-02 00:22:22,orange 3 | 3,2018-12-31 23:59:49,mayonnaise 4 | 4,2018-12-30 00:00:00,chandelier -------------------------------------------------------------------------------- /test/resources/ingestion/csv/patch_df_kv.csv: -------------------------------------------------------------------------------- 1 | key,value 2 | 2,kumquat 3 | 3,pendulum 4 | -------------------------------------------------------------------------------- /test/resources/ingestion/csv/separator_df.csv: -------------------------------------------------------------------------------- 1 | fruit_id;timestamp;name 2 | 1;2018-01-01 00:11:11;apple 3 | 2;2018-01-02 00:22:22;orange 4 | 3;2018-01-03 00:33:33;mayonnaise 5 | 4;2018-01-04 00:44:44;mustard -------------------------------------------------------------------------------- /test/resources/ingestion/dbt/airbyte_normalization/README.md: -------------------------------------------------------------------------------- 1 | This is a dbt project used in an integration test. -------------------------------------------------------------------------------- /test/resources/ingestion/dbt/airbyte_normalization/dbt_project.yml: -------------------------------------------------------------------------------- 1 | name: 'sg_dbt_integration_test' 2 | version: '1.0' 3 | config-version: 2 4 | 5 | profile: 'some_profile' 6 | 7 | clean-targets: # directories to be removed by `dbt clean` 8 | - "build" 9 | - "dbt_modules" 10 | 11 | quoting: 12 | database: true 13 | schema: false 14 | identifier: true 15 | 16 | models: 17 | sg_dbt_integration_test: 18 | +materialized: table 19 | +unlogged: true 20 | -------------------------------------------------------------------------------- /test/resources/ingestion/dbt/airbyte_normalization/models/dim_mushrooms.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | (_airbyte_data ->> 'name')::text AS mushroom_name, 3 | (_airbyte_data ->> 'discovery')::timestamp AS discovered_on 4 | FROM {{ source('airbyte_raw', '_airbyte_raw_mushrooms') }} -------------------------------------------------------------------------------- /test/resources/ingestion/dbt/airbyte_normalization/models/sources.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | sources: 3 | - name: airbyte_raw 4 | schema: deliberately_wrong_schema_to_make_sure_we_change_it 5 | quoting: 6 | database: true 7 | schema: false 8 | identifier: false 9 | tables: 10 | - name: _airbyte_raw_mushrooms 11 | -------------------------------------------------------------------------------- /test/resources/ingestion/dbt/jaffle_csv/README.md: -------------------------------------------------------------------------------- 1 | # Sample CSV files for the jaffle_shop project 2 | 3 | Taken from https://github.com/dbt-labs/jaffle_shop/tree/main/data -------------------------------------------------------------------------------- /test/resources/ingestion/singer/fake_tap.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import json 3 | import os 4 | 5 | import click 6 | 7 | 8 | @click.command(name="tap") 9 | @click.option("-c", "--config", type=click.File("r")) 10 | @click.option("-s", "--state", type=click.File("r")) 11 | @click.option("--catalog", type=click.File("r")) 12 | @click.option("-p", "--properties", type=click.File("r")) 13 | @click.option("-d", "--discover", is_flag=True) 14 | def tap(state, config, catalog, properties, discover): 15 | basepath = os.path.dirname(__file__) 16 | json.load(config) 17 | 18 | if discover: 19 | with open(os.path.join(basepath, "discover.json")) as f: 20 | click.echo(f.read(), nl=False) 21 | return 22 | 23 | assert catalog or properties 24 | catalog_j = json.load(catalog or properties) 25 | assert len(catalog_j["streams"]) == 2 26 | 27 | if state: 28 | json.load(state) 29 | with open(os.path.join(basepath, "update.json")) as f: 30 | click.echo(f.read(), nl=False) 31 | return 32 | 33 | with open(os.path.join(basepath, "initial.json")) as f: 34 | click.echo(f.read(), nl=False) 35 | 36 | 37 | if __name__ == "__main__": 38 | tap() 39 | -------------------------------------------------------------------------------- /test/resources/ingestion/singer/schema_change.json: -------------------------------------------------------------------------------- 1 | {"type": "STATE", "value": {}} 2 | {"type": "SCHEMA", "stream": "stargazers", "schema": {"selected": true, "type": ["null", "object"], "additionalProperties": false, "properties": {"_sdc_repository": {"type": ["string"]}, "user": {"type": ["null", "object"], "additionalProperties": false, "properties": {"id": {"type": ["null", "string"]}}}, "starred_at": {"type": ["null", "string"], "format": "date-time"}, "user_id": {"type": ["null", "string"]}}}, "key_properties": ["user_id"]} 3 | {"type": "RECORD", "stream": "stargazers", "record": {"starred_at": "2020-10-11T21:09:30.000000Z", "user": {"id": 100004}, "_sdc_repository": "splitgraph/splitgraph", "user_id": "100004"}, "time_extracted": "2020-10-14T11:06:42.565793Z"} 4 | {"type": "RECORD", "stream": "stargazers", "record": {"starred_at": "2019-04-18T02:40:47.000000Z", "user": {"id": "string_user_id"}, "_sdc_repository": "splitgraph/splitgraph", "user_id": "string_user_id"}, "time_extracted": "2020-10-14T11:06:40.852311Z"} 5 | {"type": "STATE", "value": {"bookmarks": {"splitgraph/splitgraph": {"stargazers": {"since": "2020-10-14T11:06:42.565793Z"}}}}} 6 | -------------------------------------------------------------------------------- /test/resources/inline_sql.splitfile: -------------------------------------------------------------------------------- 1 | # Test SQL commands that import data directly from other images 2 | 3 | SQL { 4 | CREATE TABLE balanced_diet AS 5 | SELECT 6 | fruits.fruit_id AS id, 7 | fruits.name AS fruit, 8 | my_fruits.timestamp AS timestamp, 9 | vegetables.name AS vegetable 10 | FROM "otheruser/pg_mount".fruits fruits 11 | JOIN "otheruser/pg_mount".vegetables vegetables 12 | ON fruits.fruit_id = vegetable_id 13 | LEFT JOIN "test/pg_mount:v2".fruits my_fruits 14 | ON my_fruits.fruit_id = fruits.fruit_id; 15 | ALTER TABLE balanced_diet ADD PRIMARY KEY (id) 16 | } 17 | -------------------------------------------------------------------------------- /test/resources/multiline_sql.splitfile: -------------------------------------------------------------------------------- 1 | FROM test/pg_mount 2 | 3 | SQL { 4 | INSERT INTO FRUITS VALUES 5 | (3, 'banana'), 6 | (4, 'pineapple'); 7 | } 8 | -------------------------------------------------------------------------------- /test/resources/schema_changes.splitfile: -------------------------------------------------------------------------------- 1 | FROM test/pg_mount IMPORT fruits 2 | FROM test_mg_mount IMPORT stuff 3 | 4 | SQL CREATE TABLE spirit_fruits AS SELECT fruits.fruit_id, stuff.name, fruits.name AS spirit_fruit\ 5 | FROM fruits JOIN stuff ON fruits.fruit_id = stuff.duration 6 | 7 | # Add a new column, set it to be the old id + 10, make it PK and then delete the old ID. 8 | # Currently this produces a snap for every action (since it's a schema change). 9 | 10 | # NB This used to be several separate SQL commands. However, the new_id would be a null in the beginning, which meant 11 | # that it couldn't be a part of a DIFF object (since in absence of a PK the DIFF is PKd on the whole tuple). 12 | # Somehow this bug only showed up when WAL was changed to audit triggers to detect changes. 13 | 14 | SQL { 15 | ALTER TABLE spirit_fruits ADD COLUMN new_id integer; 16 | UPDATE spirit_fruits SET new_id = fruit_id + 10; 17 | ALTER TABLE spirit_fruits ADD PRIMARY KEY (new_id); 18 | ALTER TABLE spirit_fruits DROP COLUMN fruit_id 19 | } 20 | -------------------------------------------------------------------------------- /test/resources/splitgraph_yml/readmes/readme_1.md: -------------------------------------------------------------------------------- 1 | # Readme 1 -------------------------------------------------------------------------------- /test/resources/splitgraph_yml/readmes/readme_2.md: -------------------------------------------------------------------------------- 1 | # Readme 2 -------------------------------------------------------------------------------- /test/resources/update_without_import.splitfile: -------------------------------------------------------------------------------- 1 | SQL CREATE TABLE my_fruits (id integer, name varchar) 2 | 3 | SQL INSERT INTO my_fruits VALUES (1, 'pineapple') 4 | -------------------------------------------------------------------------------- /test/splitgraph/BUILD: -------------------------------------------------------------------------------- 1 | python_tests( 2 | name="tests", 3 | skip_mypy=True, 4 | skip_black=True, 5 | ) 6 | 7 | python_test_utils( 8 | name="test_utils", 9 | overrides={"conftest.py": {"skip_mypy": True, "skip_black": True}}, 10 | ) 11 | 12 | python_sources( 13 | skip_mypy=True, 14 | skip_black=True, 15 | ) 16 | -------------------------------------------------------------------------------- /test/splitgraph/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/splitgraph/sgr/0c6523b0cbf53b5886447a77e2f67ef49d36dde5/test/splitgraph/__init__.py -------------------------------------------------------------------------------- /test/splitgraph/cloud/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/splitgraph/sgr/0c6523b0cbf53b5886447a77e2f67ef49d36dde5/test/splitgraph/cloud/__init__.py -------------------------------------------------------------------------------- /test/splitgraph/cloud/project/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/splitgraph/sgr/0c6523b0cbf53b5886447a77e2f67ef49d36dde5/test/splitgraph/cloud/project/__init__.py -------------------------------------------------------------------------------- /test/splitgraph/cloud/project/snapshots/test_dbt/test_generate_dbt_project/splitgraph_template/dbt_project.yml: -------------------------------------------------------------------------------- 1 | # Sample dbt project referencing data from all ingested/added Splitgraph datasets. 2 | # This is not ready to run, as you'll need to: 3 | # 4 | # * Manually define tables in your sources (see models/staging/sources.yml, "tables" sections) 5 | # * Reference the sources using the source(...) macros (see 6 | # models/staging/(source_name)/source_name.sql for an example) 7 | # * Write the actual models 8 | 9 | name: 'splitgraph_template' 10 | version: '1.0.0' 11 | config-version: 2 12 | 13 | # This setting configures which "profile" dbt uses for this project. 14 | # Note that the Splitgraph runner overrides this at runtime, so this is only useful 15 | # if you are running a local Splitgraph engine and are developing this dbt model against it. 16 | profile: 'splitgraph_template' 17 | 18 | target-path: "target" # directory which will store compiled SQL files 19 | clean-targets: # directories to be removed by `dbt clean` 20 | - "target" 21 | - "dbt_packages" 22 | 23 | 24 | # Configuring models 25 | # Full documentation: https://docs.getdbt.com/docs/configuring-models 26 | 27 | models: 28 | splitgraph_template: 29 | # Staging data (materialized as CTEs) that references the source Splitgraph repositories. 30 | # Here as a starting point. You can reference these models downstream in models that actually 31 | # materialize as tables. 32 | staging: 33 | +materialized: ephemeral 34 | -------------------------------------------------------------------------------- /test/splitgraph/cloud/project/snapshots/test_dbt/test_generate_dbt_project/splitgraph_template/models/staging/and_third_data/and_third_data.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | * 3 | FROM {{ source('and_third_data', 'some_table') }} 4 | -------------------------------------------------------------------------------- /test/splitgraph/cloud/project/snapshots/test_dbt/test_generate_dbt_project/splitgraph_template/models/staging/some_data_source/some_data_source.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | * 3 | FROM {{ source('some_data_source', 'some_table') }} 4 | -------------------------------------------------------------------------------- /test/splitgraph/cloud/project/snapshots/test_dbt/test_generate_dbt_project/splitgraph_template/models/staging/some_other_data_raw/some_other_data_raw.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | * 3 | FROM {{ source('some_other_data_raw', 'some_table') }} 4 | -------------------------------------------------------------------------------- /test/splitgraph/cloud/project/snapshots/test_dbt/test_generate_dbt_project/splitgraph_template/models/staging/sources.yml: -------------------------------------------------------------------------------- 1 | # This file defines all data sources referenced by this model. The mapping 2 | # between the data source name and the Splitgraph repository is in the settings of the dbt plugin 3 | # in splitgraph.yml (see params -> sources) 4 | version: 2 5 | sources: 6 | - name: some_data_source 7 | # Splitgraph will use a different temporary schema for this source by patching this project 8 | # at runtime, so this is for informational purposes only. 9 | schema: some-data/source 10 | # We can't currently infer the tables produced by a data source at project generation time, 11 | # so for now you'll need to define the tables manually. 12 | tables: 13 | - name: some_table 14 | - name: some_other_data_raw 15 | schema: some-other/data-raw 16 | tables: 17 | - name: some_table 18 | - name: and_third_data 19 | schema: and-third/data 20 | tables: 21 | - name: some_table 22 | -------------------------------------------------------------------------------- /test/splitgraph/cloud/project/snapshots/test_generation/test_generate_project_no_dbt/generate_project/splitgraph.credentials.yml: -------------------------------------------------------------------------------- 1 | credentials: 2 | postgres_fdw: # This is the name of this credential that "external" sections can reference. 3 | plugin: postgres_fdw 4 | # Credential-specific data matching the plugin's credential schema 5 | data: 6 | username: '' # REQUIRED 7 | password: '' # REQUIRED 8 | airbyte-postgres: # This is the name of this credential that "external" sections can reference. 9 | plugin: airbyte-postgres 10 | # Credential-specific data matching the plugin's credential schema 11 | data: 12 | normalization_git_url: '' # dbt model Git URL. For `custom` normalization, a URL to the Git repo with the dbt project, for example,`https://uname:pass_or_token@github.com/organisation/repository.git`. 13 | password: '' # Password. Password associated with the username. 14 | -------------------------------------------------------------------------------- /test/splitgraph/cloud/project/snapshots/test_generation/test_generate_project_with_dbt/generate_project_dbt/dbt_project.yml: -------------------------------------------------------------------------------- 1 | # Sample dbt project referencing data from all ingested/added Splitgraph datasets. 2 | # This is not ready to run, as you'll need to: 3 | # 4 | # * Manually define tables in your sources (see models/staging/sources.yml, "tables" sections) 5 | # * Reference the sources using the source(...) macros (see 6 | # models/staging/(source_name)/source_name.sql for an example) 7 | # * Write the actual models 8 | 9 | name: 'splitgraph_template' 10 | version: '1.0.0' 11 | config-version: 2 12 | 13 | # This setting configures which "profile" dbt uses for this project. 14 | # Note that the Splitgraph runner overrides this at runtime, so this is only useful 15 | # if you are running a local Splitgraph engine and are developing this dbt model against it. 16 | profile: 'splitgraph_template' 17 | 18 | target-path: "target" # directory which will store compiled SQL files 19 | clean-targets: # directories to be removed by `dbt clean` 20 | - "target" 21 | - "dbt_packages" 22 | 23 | 24 | # Configuring models 25 | # Full documentation: https://docs.getdbt.com/docs/configuring-models 26 | 27 | models: 28 | splitgraph_template: 29 | # Staging data (materialized as CTEs) that references the source Splitgraph repositories. 30 | # Here as a starting point. You can reference these models downstream in models that actually 31 | # materialize as tables. 32 | staging: 33 | +materialized: ephemeral 34 | -------------------------------------------------------------------------------- /test/splitgraph/cloud/project/snapshots/test_generation/test_generate_project_with_dbt/generate_project_dbt/models/staging/sources.yml: -------------------------------------------------------------------------------- 1 | # This file defines all data sources referenced by this model. The mapping 2 | # between the data source name and the Splitgraph repository is in the settings of the dbt plugin 3 | # in splitgraph.yml (see params -> sources) 4 | version: 2 5 | sources: 6 | - name: myns_postgres_fdw 7 | # Splitgraph will use a different temporary schema for this source by patching this project 8 | # at runtime, so this is for informational purposes only. 9 | schema: myns/postgres_fdw 10 | # We can't currently infer the tables produced by a data source at project generation time, 11 | # so for now you'll need to define the tables manually. 12 | tables: 13 | - name: some_table 14 | - name: myns_airbyte_postgres 15 | schema: myns/airbyte-postgres 16 | tables: 17 | - name: some_table 18 | -------------------------------------------------------------------------------- /test/splitgraph/cloud/project/snapshots/test_generation/test_generate_project_with_dbt/generate_project_dbt/splitgraph.credentials.yml: -------------------------------------------------------------------------------- 1 | credentials: 2 | postgres_fdw: # This is the name of this credential that "external" sections can reference. 3 | plugin: postgres_fdw 4 | # Credential-specific data matching the plugin's credential schema 5 | data: 6 | username: '' # REQUIRED 7 | password: '' # REQUIRED 8 | airbyte-postgres: # This is the name of this credential that "external" sections can reference. 9 | plugin: airbyte-postgres 10 | # Credential-specific data matching the plugin's credential schema 11 | data: 12 | normalization_git_url: '' # dbt model Git URL. For `custom` normalization, a URL to the Git repo with the dbt project, for example,`https://uname:pass_or_token@github.com/organisation/repository.git`. 13 | password: '' # Password. Password associated with the username. 14 | dbt-sample: 15 | plugin: dbt 16 | data: 17 | git_url: $THIS_REPO_URL 18 | -------------------------------------------------------------------------------- /test/splitgraph/cloud/project/test_merging.py: -------------------------------------------------------------------------------- 1 | import os 2 | from io import StringIO 3 | from test.splitgraph.conftest import RESOURCES 4 | 5 | from splitgraph.cloud.project.models import SplitgraphYAML 6 | from splitgraph.cloud.project.utils import merge_project_files 7 | from splitgraph.utils.yaml import safe_dump, safe_load 8 | 9 | 10 | def test_project_merging(snapshot): 11 | with open(os.path.join(RESOURCES, "splitgraph_yml", "splitgraph.yml")) as f: 12 | left = SplitgraphYAML.parse_obj(safe_load(f)) 13 | with open(os.path.join(RESOURCES, "splitgraph_yml", "splitgraph.override.yml")) as f: 14 | right = SplitgraphYAML.parse_obj(safe_load(f)) 15 | 16 | merged = merge_project_files(left, right) 17 | 18 | result = StringIO() 19 | safe_dump(merged.dict(by_alias=True, exclude_unset=True), result) 20 | result.seek(0) 21 | snapshot.assert_match(result.read(), "repositories.merged.yml") 22 | -------------------------------------------------------------------------------- /test/splitgraph/commandline/BUILD: -------------------------------------------------------------------------------- 1 | python_sources( 2 | skip_mypy=True, 3 | skip_black=True, 4 | ) 5 | 6 | python_tests( 7 | name="tests", 8 | skip_mypy=True, 9 | skip_black=True, 10 | ) 11 | -------------------------------------------------------------------------------- /test/splitgraph/commandline/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/splitgraph/sgr/0c6523b0cbf53b5886447a77e2f67ef49d36dde5/test/splitgraph/commandline/__init__.py -------------------------------------------------------------------------------- /test/splitgraph/commandline/snapshots/test_cloud/test_commandline_plugins/sgr_cloud_plugins.txt: -------------------------------------------------------------------------------- 1 | ID Name Description 2 | ---------------- ------------------ --------------------------------------------------------------------------------------------------------------- 3 | postgres_fdw PostgreSQL Data source for PostgreSQL databases that supports live querying, based on postgres_fdw 4 | airbyte-postgres Postgres (Airbyte) Airbyte connector for Postgres. For more information, see https://docs.airbyte.io/integrations/sources/postgres 5 | -------------------------------------------------------------------------------- /test/splitgraph/commandline/snapshots/test_cloud/test_commandline_plugins/sgr_cloud_plugins_filter.txt: -------------------------------------------------------------------------------- 1 | ID Name Description 2 | ------------ ---------- --------------------------------------------------------------------------------------- 3 | postgres_fdw PostgreSQL Data source for PostgreSQL databases that supports live querying, based on postgres_fdw 4 | -------------------------------------------------------------------------------- /test/splitgraph/commandline/snapshots/test_cloud_jobs/test_csv_download/False/sgr_cloud_download_failure.txt: -------------------------------------------------------------------------------- 1 | (FAILURE) Waiting for task ID export_task 2 | 3 | -------------------------------------------------------------------------------- /test/splitgraph/commandline/snapshots/test_cloud_jobs/test_csv_download/True/sgr_cloud_download_success.txt: -------------------------------------------------------------------------------- 1 | (SUCCESS) Waiting for task ID export_task 2 | 3 | Downloaded query results to some-file.csv.gz. 4 | -------------------------------------------------------------------------------- /test/splitgraph/commandline/snapshots/test_cloud_jobs/test_csv_upload/False/sgr_cloud_upload_failure.txt: -------------------------------------------------------------------------------- 1 | Uploading the files... 2 | (STARTED) Waiting for task ID ingest_task 3 | (FAILURE) Waiting for task ID ingest_task 4 | 5 | Logs for /someuser/somerepo_1/ingest_task 6 | -------------------------------------------------------------------------------- /test/splitgraph/commandline/snapshots/test_cloud_jobs/test_csv_upload/True/sgr_cloud_upload_success.txt: -------------------------------------------------------------------------------- 1 | Uploading the files... 2 | (STARTED) Waiting for task ID ingest_task 3 | (SUCCESS) Waiting for task ID ingest_task 4 | 5 | 6 | Success. See the repository at http://www.example.com/someuser/somerepo_1/latest/-/tables or query it with: 7 | sgr cloud sql 'SELECT * FROM "someuser/somerepo_1"."base_df"' 8 | -------------------------------------------------------------------------------- /test/splitgraph/commandline/snapshots/test_cloud_jobs/test_job_status_explicit_repos/sgr_cloud_status_explicit.txt: -------------------------------------------------------------------------------- 1 | Repository Task ID Started Finished Manual Status 2 | -------------------- -------------- ------------------- ---------- -------- -------- 3 | someuser/somerepo_1 somerepo1_task 2020-01-01 00:00:00 False STARTED 4 | otheruser/somerepo_2 5 | -------------------------------------------------------------------------------- /test/splitgraph/commandline/snapshots/test_cloud_jobs/test_job_status_yaml/sgr_cloud_status_yml.txt: -------------------------------------------------------------------------------- 1 | Repository Task ID Started Finished Manual Status 2 | -------------------- -------------- ------------------- ------------------- -------- -------- 3 | otheruser/somerepo_2 4 | someuser/somerepo_1 somerepo1_task 2020-01-01 00:00:00 False STARTED 5 | someuser/somerepo_2 somerepo2_task 2021-01-01 00:00:00 2021-01-01 01:00:00 False SUCCESS 6 | -------------------------------------------------------------------------------- /test/splitgraph/commandline/snapshots/test_cloud_metadata/test_commandline_dump/sgr_cloud_dump_multiple/readmes/otheruser-somerepo_2.fe37.md: -------------------------------------------------------------------------------- 1 | Test Repo 2 Readme -------------------------------------------------------------------------------- /test/splitgraph/commandline/snapshots/test_cloud_metadata/test_commandline_dump/sgr_cloud_dump_multiple/readmes/someuser-somerepo_1.b7f3.md: -------------------------------------------------------------------------------- 1 | Test Repo 1 Readme -------------------------------------------------------------------------------- /test/splitgraph/commandline/snapshots/test_cloud_metadata/test_commandline_dump/sgr_cloud_dump_single/readmes/someuser-somerepo_1.b7f3.md: -------------------------------------------------------------------------------- 1 | Test Repo 1 Readme -------------------------------------------------------------------------------- /test/splitgraph/commandline/snapshots/test_cloud_metadata/test_commandline_dump/sgr_cloud_dump_single/splitgraph.yml: -------------------------------------------------------------------------------- 1 | repositories: 2 | - external: null 3 | metadata: 4 | description: Repository Description 1 5 | extra_metadata: 6 | key_1: 7 | key_2: value_1 8 | license: Public Domain 9 | readme: 10 | file: someuser-somerepo_1.b7f3.md 11 | sources: 12 | - anchor: test data source 13 | href: https://example.com 14 | isCreator: true 15 | isSameAs: false 16 | topics: [] 17 | namespace: someuser 18 | repository: somerepo_1 19 | -------------------------------------------------------------------------------- /test/splitgraph/commands/BUILD: -------------------------------------------------------------------------------- 1 | python_tests( 2 | name="tests", 3 | skip_mypy=True, 4 | skip_black=True, 5 | ) 6 | -------------------------------------------------------------------------------- /test/splitgraph/commands/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/splitgraph/sgr/0c6523b0cbf53b5886447a77e2f67ef49d36dde5/test/splitgraph/commands/__init__.py -------------------------------------------------------------------------------- /test/splitgraph/commands/snapshots/test_multicorn_fdws/test_aggregations_join_combinations/es/account_join_sub_aggs.yml: -------------------------------------------------------------------------------- 1 | - !!python/tuple 2 | - 20 3 | - 49568.0 4 | - 1650.0 5 | - !!python/tuple 6 | - 21 7 | - 49433.0 8 | - 2213.0 9 | - !!python/tuple 10 | - 22 11 | - 49404.0 12 | - 1133.0 13 | - !!python/tuple 14 | - 23 15 | - 49587.0 16 | - 1464.0 17 | - !!python/tuple 18 | - 24 19 | - 48745.0 20 | - 1011.0 21 | - !!python/tuple 22 | - 25 23 | - 49795.0 24 | - 1956.0 25 | - !!python/tuple 26 | - 26 27 | - 48466.0 28 | - 1447.0 29 | - !!python/tuple 30 | - 27 31 | - 46868.0 32 | - 1110.0 33 | - !!python/tuple 34 | - 28 35 | - 49222.0 36 | - 2889.0 37 | - !!python/tuple 38 | - 29 39 | - 49119.0 40 | - 3596.0 41 | - !!python/tuple 42 | - 30 43 | - 49334.0 44 | - 2726.0 45 | - !!python/tuple 46 | - 31 47 | - 48758.0 48 | - 2384.0 49 | - !!python/tuple 50 | - 32 51 | - 48294.0 52 | - 1031.0 53 | - !!python/tuple 54 | - 33 55 | - 48734.0 56 | - 1314.0 57 | - !!python/tuple 58 | - 34 59 | - 48997.0 60 | - 3001.0 61 | - !!python/tuple 62 | - 35 63 | - 49741.0 64 | - 1481.0 65 | - !!python/tuple 66 | - 36 67 | - 49989.0 68 | - 1249.0 69 | - !!python/tuple 70 | - 37 71 | - 47546.0 72 | - 1360.0 73 | - !!python/tuple 74 | - 38 75 | - 49339.0 76 | - 3022.0 77 | - !!python/tuple 78 | - 39 79 | - 47257.0 80 | - 3589.0 81 | - !!python/tuple 82 | - 40 83 | - 49671.0 84 | - 1932.0 85 | -------------------------------------------------------------------------------- /test/splitgraph/commands/snapshots/test_multicorn_fdws/test_aggregations_join_combinations/pg/account_join_sub_aggs.yml: -------------------------------------------------------------------------------- 1 | - !!python/tuple 2 | - 20 3 | - 49568 4 | - 1650 5 | - !!python/tuple 6 | - 21 7 | - 49433 8 | - 2213 9 | - !!python/tuple 10 | - 22 11 | - 49404 12 | - 1133 13 | - !!python/tuple 14 | - 23 15 | - 49587 16 | - 1464 17 | - !!python/tuple 18 | - 24 19 | - 48745 20 | - 1011 21 | - !!python/tuple 22 | - 25 23 | - 49795 24 | - 1956 25 | - !!python/tuple 26 | - 26 27 | - 48466 28 | - 1447 29 | - !!python/tuple 30 | - 27 31 | - 46868 32 | - 1110 33 | - !!python/tuple 34 | - 28 35 | - 49222 36 | - 2889 37 | - !!python/tuple 38 | - 29 39 | - 49119 40 | - 3596 41 | - !!python/tuple 42 | - 30 43 | - 49334 44 | - 2726 45 | - !!python/tuple 46 | - 31 47 | - 48758 48 | - 2384 49 | - !!python/tuple 50 | - 32 51 | - 48294 52 | - 1031 53 | - !!python/tuple 54 | - 33 55 | - 48734 56 | - 1314 57 | - !!python/tuple 58 | - 34 59 | - 48997 60 | - 3001 61 | - !!python/tuple 62 | - 35 63 | - 49741 64 | - 1481 65 | - !!python/tuple 66 | - 36 67 | - 49989 68 | - 1249 69 | - !!python/tuple 70 | - 37 71 | - 47546 72 | - 1360 73 | - !!python/tuple 74 | - 38 75 | - 49339 76 | - 3022 77 | - !!python/tuple 78 | - 39 79 | - 47257 80 | - 3589 81 | - !!python/tuple 82 | - 40 83 | - 49671 84 | - 1932 85 | -------------------------------------------------------------------------------- /test/splitgraph/commands/snapshots/test_multicorn_fdws/test_grouping_and_aggregations_bare/es/account_count_by_age.yml: -------------------------------------------------------------------------------- 1 | - !!python/tuple 2 | - 40 3 | - 45 4 | - 1932.0 5 | - !!python/tuple 6 | - 39 7 | - 60 8 | - 3589.0 9 | - !!python/tuple 10 | - 38 11 | - 39 12 | - 3022.0 13 | - !!python/tuple 14 | - 37 15 | - 42 16 | - 1360.0 17 | - !!python/tuple 18 | - 36 19 | - 52 20 | - 1249.0 21 | - !!python/tuple 22 | - 35 23 | - 52 24 | - 1481.0 25 | - !!python/tuple 26 | - 34 27 | - 49 28 | - 3001.0 29 | - !!python/tuple 30 | - 33 31 | - 50 32 | - 1314.0 33 | - !!python/tuple 34 | - 32 35 | - 52 36 | - 1031.0 37 | - !!python/tuple 38 | - 31 39 | - 61 40 | - 2384.0 41 | - !!python/tuple 42 | - 30 43 | - 47 44 | - 2726.0 45 | - !!python/tuple 46 | - 29 47 | - 35 48 | - 3596.0 49 | - !!python/tuple 50 | - 28 51 | - 51 52 | - 2889.0 53 | - !!python/tuple 54 | - 27 55 | - 39 56 | - 1110.0 57 | - !!python/tuple 58 | - 26 59 | - 59 60 | - 1447.0 61 | - !!python/tuple 62 | - 25 63 | - 42 64 | - 1956.0 65 | - !!python/tuple 66 | - 24 67 | - 42 68 | - 1011.0 69 | - !!python/tuple 70 | - 23 71 | - 42 72 | - 1464.0 73 | - !!python/tuple 74 | - 22 75 | - 51 76 | - 1133.0 77 | - !!python/tuple 78 | - 21 79 | - 46 80 | - 2213.0 81 | - !!python/tuple 82 | - 20 83 | - 44 84 | - 1650.0 85 | -------------------------------------------------------------------------------- /test/splitgraph/commands/snapshots/test_multicorn_fdws/test_grouping_and_aggregations_bare/pg/account_count_by_age.yml: -------------------------------------------------------------------------------- 1 | - !!python/tuple 2 | - 40 3 | - 45 4 | - 1932 5 | - !!python/tuple 6 | - 39 7 | - 60 8 | - 3589 9 | - !!python/tuple 10 | - 38 11 | - 39 12 | - 3022 13 | - !!python/tuple 14 | - 37 15 | - 42 16 | - 1360 17 | - !!python/tuple 18 | - 36 19 | - 52 20 | - 1249 21 | - !!python/tuple 22 | - 35 23 | - 52 24 | - 1481 25 | - !!python/tuple 26 | - 34 27 | - 49 28 | - 3001 29 | - !!python/tuple 30 | - 33 31 | - 50 32 | - 1314 33 | - !!python/tuple 34 | - 32 35 | - 52 36 | - 1031 37 | - !!python/tuple 38 | - 31 39 | - 61 40 | - 2384 41 | - !!python/tuple 42 | - 30 43 | - 47 44 | - 2726 45 | - !!python/tuple 46 | - 29 47 | - 35 48 | - 3596 49 | - !!python/tuple 50 | - 28 51 | - 51 52 | - 2889 53 | - !!python/tuple 54 | - 27 55 | - 39 56 | - 1110 57 | - !!python/tuple 58 | - 26 59 | - 59 60 | - 1447 61 | - !!python/tuple 62 | - 25 63 | - 42 64 | - 1956 65 | - !!python/tuple 66 | - 24 67 | - 42 68 | - 1011 69 | - !!python/tuple 70 | - 23 71 | - 42 72 | - 1464 73 | - !!python/tuple 74 | - 22 75 | - 51 76 | - 1133 77 | - !!python/tuple 78 | - 21 79 | - 46 80 | - 2213 81 | - !!python/tuple 82 | - 20 83 | - 44 84 | - 1650 85 | -------------------------------------------------------------------------------- /test/splitgraph/commands/snapshots/test_multicorn_fdws/test_simple_grouping_clauses/es/account_states.yml: -------------------------------------------------------------------------------- 1 | - AK 2 | - AL 3 | - AR 4 | - AZ 5 | - CA 6 | - CO 7 | - CT 8 | - DC 9 | - DE 10 | - FL 11 | - GA 12 | - HI 13 | - IA 14 | - ID 15 | - IL 16 | - IN 17 | - KS 18 | - KY 19 | - LA 20 | - MA 21 | - MD 22 | - ME 23 | - MI 24 | - MN 25 | - MO 26 | - MS 27 | - MT 28 | - NC 29 | - ND 30 | - NE 31 | - NH 32 | - NJ 33 | - NM 34 | - NV 35 | - NY 36 | - OH 37 | - OK 38 | - OR 39 | - PA 40 | - RI 41 | - SC 42 | - SD 43 | - TN 44 | - TX 45 | - UT 46 | - VA 47 | - VT 48 | - WA 49 | - WI 50 | - WV 51 | - WY 52 | -------------------------------------------------------------------------------- /test/splitgraph/commands/snapshots/test_multicorn_fdws/test_simple_grouping_clauses/pg/account_states.yml: -------------------------------------------------------------------------------- 1 | - AK 2 | - AL 3 | - AR 4 | - AZ 5 | - CA 6 | - CO 7 | - CT 8 | - DC 9 | - DE 10 | - FL 11 | - GA 12 | - HI 13 | - IA 14 | - ID 15 | - IL 16 | - IN 17 | - KS 18 | - KY 19 | - LA 20 | - MA 21 | - MD 22 | - ME 23 | - MI 24 | - MN 25 | - MO 26 | - MS 27 | - MT 28 | - NC 29 | - ND 30 | - NE 31 | - NH 32 | - NJ 33 | - NM 34 | - NV 35 | - NY 36 | - OH 37 | - OK 38 | - OR 39 | - PA 40 | - RI 41 | - SC 42 | - SD 43 | - TN 44 | - TX 45 | - UT 46 | - VA 47 | - VT 48 | - WA 49 | - WI 50 | - WV 51 | - WY 52 | -------------------------------------------------------------------------------- /test/splitgraph/commands/test_range_indexing.py: -------------------------------------------------------------------------------- 1 | from test.splitgraph.conftest import OUTPUT 2 | 3 | 4 | def test_range_index_ordering_collation(local_engine_empty): 5 | # Test that range index gets min/max values of text columns using the "C" collation 6 | # (sort by byte values of characters) rather than anything else (e.g. in en-US 7 | # "a" comes before "B" even though "B" has a smaller ASCII code). 8 | 9 | OUTPUT.init() 10 | OUTPUT.run_sql( 11 | "CREATE TABLE test (key_1 INTEGER, key_2 VARCHAR," 12 | " value_1 VARCHAR, value_2 INTEGER, PRIMARY KEY (key_1, key_2))" 13 | ) 14 | 15 | OUTPUT.engine.run_sql_batch( 16 | "INSERT INTO test VALUES (%s, %s, %s, %s)", 17 | [ 18 | (1, "ONE", "apple", 4), 19 | (1, "one", "ORANGE", 3), 20 | (2, "two", "banana", 2), 21 | (2, "TWO", "CUCUMBER", 1), 22 | ], 23 | schema=OUTPUT.to_schema(), 24 | ) 25 | 26 | head = OUTPUT.commit() 27 | object_id = head.get_table("test").objects[0] 28 | 29 | assert OUTPUT.objects.get_object_meta([object_id])[object_id].object_index == { 30 | "range": { 31 | "$pk": [[1, "ONE"], [2, "two"]], 32 | "key_1": [1, 2], 33 | "key_2": ["ONE", "two"], 34 | "value_1": ["CUCUMBER", "banana"], 35 | "value_2": [1, 4], 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /test/splitgraph/ingestion/BUILD: -------------------------------------------------------------------------------- 1 | python_tests( 2 | name="tests", 3 | skip_mypy=True, 4 | skip_black=True, 5 | ) 6 | -------------------------------------------------------------------------------- /test/splitgraph/ingestion/test_athena.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import Mock 2 | 3 | import pytest 4 | from psycopg2 import DatabaseError 5 | 6 | from splitgraph.core.types import Credentials, Params 7 | from splitgraph.hooks.mount_handlers import mount 8 | from splitgraph.ingestion.athena import AmazonAthenaDataSource 9 | 10 | 11 | def test_athena_data_source_options(): 12 | source = AmazonAthenaDataSource( 13 | Mock(), 14 | credentials=Credentials({"aws_access_key_id": "key", "aws_secret_access_key": "secret"}), 15 | params=Params( 16 | { 17 | "region_name": "eu-west-3", 18 | "schema_name": "mydb", 19 | "s3_staging_dir": "s3://athena/results/", 20 | } 21 | ), 22 | ) 23 | 24 | assert source.get_server_options() == { 25 | "db_url": "awsathena+rest://key:secret@athena.eu-west-3.amazonaws.com:443/mydb?s3_staging_dir=s3://athena/results/", 26 | "wrapper": "multicorn.sqlalchemyfdw.SqlAlchemyFdw", 27 | "cast_quals": "true", 28 | } 29 | 30 | 31 | @pytest.mark.mounting 32 | def test_athena_mount_expected_error(): 33 | with pytest.raises( 34 | DatabaseError, match="The security token included in the request is invalid" 35 | ): 36 | mount( 37 | "aws", 38 | "athena", 39 | { 40 | "aws_access_key_id": "key", 41 | "aws_secret_access_key": "secret", 42 | "region_name": "eu-west-3", 43 | "schema_name": "mydb", 44 | "s3_staging_dir": "s3://athena/results/", 45 | }, 46 | ) 47 | -------------------------------------------------------------------------------- /test/splitgraph/ingestion/test_inference.py: -------------------------------------------------------------------------------- 1 | from splitgraph.ingestion.inference import _infer_column_schema 2 | 3 | 4 | def test_inference(): 5 | assert _infer_column_schema(["1", "2", "3"]) == "integer" 6 | assert _infer_column_schema(["1.1", "2.4", "3"]) == "numeric" 7 | assert _infer_column_schema(["true", "TRUE", "f"]) == "boolean" 8 | assert _infer_column_schema(['{"a": 42}']) == "json" 9 | assert _infer_column_schema(["1", "", "", "4"]) == "integer" 10 | assert ( 11 | _infer_column_schema(["2020-01-01 12:34:56", "2020-01-02 00:00:00.123", ""]) == "timestamp" 12 | ) 13 | assert _infer_column_schema([""]) == "character varying" 14 | -------------------------------------------------------------------------------- /test/splitgraph/splitfile/BUILD: -------------------------------------------------------------------------------- 1 | python_tests( 2 | name="tests", 3 | skip_mypy=True, 4 | skip_black=True, 5 | ) 6 | -------------------------------------------------------------------------------- /test/splitgraph/splitfile/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/splitgraph/sgr/0c6523b0cbf53b5886447a77e2f67ef49d36dde5/test/splitgraph/splitfile/__init__.py -------------------------------------------------------------------------------- /test/splitgraph/test_drawing.py: -------------------------------------------------------------------------------- 1 | from test.splitgraph.conftest import OUTPUT, load_splitfile 2 | 3 | from splitgraph.core._drawing import render_tree 4 | from splitgraph.splitfile.execution import execute_commands, rebuild_image 5 | 6 | 7 | def test_drawing(pg_repo_local): 8 | # Doesn't really check anything, mostly used to make sure the tree drawing code doesn't throw. 9 | execute_commands(load_splitfile("import_local.splitfile"), output=OUTPUT) 10 | 11 | # Make another branch to check multi-branch repositories can render. 12 | pg_repo_local.images()[1].checkout() 13 | pg_repo_local.run_sql("INSERT INTO fruits VALUES (3, 'kiwi')") 14 | pg_repo_local.commit() 15 | 16 | rebuild_image(OUTPUT.head, {pg_repo_local: pg_repo_local.head.image_hash}) 17 | 18 | render_tree(OUTPUT) 19 | -------------------------------------------------------------------------------- /test/splitgraph/utils.py: -------------------------------------------------------------------------------- 1 | from splitgraph.core.types import TableColumn, TableSchema 2 | 3 | 4 | def reassign_ordinals(schema: TableSchema) -> TableSchema: 5 | # When a table is created anew, its ordinals are made consecutive again. 6 | return [ 7 | TableColumn(i + 1, col.name, col.pg_type, col.is_pk, col.comment) 8 | for i, col in enumerate(schema) 9 | ] 10 | 11 | 12 | def drop_comments(schema: TableSchema) -> TableSchema: 13 | # For storing object schemata in JSON in /var/lib/splitgraph/objects, 14 | # we don't store the comments (they're a feature of the table, not the object). 15 | return [TableColumn(*(t[:4])) for t in schema] 16 | -------------------------------------------------------------------------------- /wait-for-test-architecture.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | REPO_ROOT_DIR=$(cd -P -- "$(dirname -- "$0")" && pwd -P) 4 | TEST_DIR="${REPO_ROOT_DIR}/test" 5 | 6 | exec "$TEST_DIR"/architecture/wait-for-architecture.sh $1 7 | --------------------------------------------------------------------------------