├── .github ├── FUNDING.yml ├── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── config.yml │ └── feature_request.md └── workflows │ ├── build-dev.yml │ ├── build-release.yml │ ├── build-test.yml │ ├── bump.yml │ ├── test-docker-build.yml │ ├── test-sling-action.yml │ └── test-version.yml ├── .gitignore ├── .goreleaser.linux.amd64.yaml ├── .goreleaser.linux.arm64.yaml ├── .goreleaser.mac.yaml ├── .goreleaser.notes.md ├── .goreleaser.windows.yaml ├── .infisical.json ├── LICENSE ├── README.md ├── api ├── README.md ├── llms.md └── specs │ ├── attio.yaml │ ├── dbt_cloud.yaml │ ├── shopify_graphql.yaml │ ├── shopify_legacy.yaml │ ├── sling_platform.yaml │ ├── stripe.yaml │ └── uberall.yaml ├── cmd └── sling │ ├── .gitignore │ ├── Dockerfile │ ├── Dockerfile.arm64 │ ├── resource │ └── examples.sh │ ├── sling_cli.go │ ├── sling_cli_test.go │ ├── sling_cloud.go │ ├── sling_conns.go │ ├── sling_prompt.go │ ├── sling_run.go │ ├── sling_test.go │ ├── sling_update.go │ └── tests │ ├── files │ ├── binary │ │ └── test.bytes.csv │ ├── parquet │ │ ├── test1.1.parquet │ │ └── test1.parquet │ ├── test.wide.csv │ ├── test1.1.csv │ ├── test1.1.csv.gz │ ├── test1.2.csv │ ├── test1.csv │ ├── test1.parquet │ ├── test1.result.csv │ ├── test1.upsert.csv │ ├── test2.csv │ ├── test3.json │ ├── test4.csv │ ├── test5.csv │ ├── test6.csv │ ├── test7.csv │ └── test8.csv │ ├── pipelines │ ├── p.01.yaml │ └── p.02.yaml │ ├── replications │ ├── apis │ │ ├── r.61.stripe.yaml │ │ └── r.62.shopify.yaml │ ├── r.00.yaml │ ├── r.01.yaml │ ├── r.02.yaml │ ├── r.03.yaml │ ├── r.04.yaml │ ├── r.05.yaml │ ├── r.06.yaml │ ├── r.07.yaml │ ├── r.08.yaml │ ├── r.09.yaml │ ├── r.10.yaml │ ├── r.11.yaml │ ├── r.12.yaml │ ├── r.13.yaml │ ├── r.14.yaml │ ├── r.15.yaml │ ├── r.16.yaml │ ├── r.17.yaml │ ├── r.18.yaml │ ├── r.19.yaml │ ├── r.20.yaml │ └── r.21.yaml │ ├── suite.cli.tsv │ ├── suite.db.template.tsv │ ├── suite.file.template.tsv │ ├── task.yaml │ ├── test1.json │ └── test1.yaml ├── core ├── dbio │ ├── .envkey │ ├── .gitignore │ ├── README.md │ ├── api │ │ ├── api.go │ │ ├── api_test.go │ │ ├── functions.go │ │ ├── functions_test.go │ │ ├── queue.go │ │ ├── queue_test.go │ │ └── spec.go │ ├── connection │ │ ├── connection.go │ │ ├── connection_discover.go │ │ ├── connection_local.go │ │ └── connection_test.go │ ├── database │ │ ├── analyzer.go │ │ ├── analyzer_test.go │ │ ├── clickhouse_test.go │ │ ├── database.go │ │ ├── database_athena.go │ │ ├── database_bigquery.go │ │ ├── database_bigtable.go │ │ ├── database_clickhouse.go │ │ ├── database_d1.go │ │ ├── database_duckdb.go │ │ ├── database_duckdb_unix.go │ │ ├── database_duckdb_windows.go │ │ ├── database_elasticsearch.go │ │ ├── database_mongo.go │ │ ├── database_mysql.go │ │ ├── database_oracle.go │ │ ├── database_postgres.go │ │ ├── database_prometheus.go │ │ ├── database_proton.go │ │ ├── database_redshift.go │ │ ├── database_snowflake.go │ │ ├── database_sqlite.go │ │ ├── database_sqlserver.go │ │ ├── database_starrocks.go │ │ ├── database_test.go │ │ ├── database_trino.go │ │ ├── dbx.go │ │ ├── dbx_test.go │ │ ├── schemata.go │ │ ├── schemata_test.go │ │ ├── test │ │ │ ├── test1.1.csv │ │ │ ├── test1.1.csv.gz │ │ │ ├── test1.csv │ │ │ ├── test1.csv.gz │ │ │ └── test1.pipe.csv │ │ └── transaction.go │ ├── dbio.go │ ├── dbio_types.go │ ├── filesys │ │ ├── fs.go │ │ ├── fs_azure.go │ │ ├── fs_file_node.go │ │ ├── fs_ftp.go │ │ ├── fs_google.go │ │ ├── fs_http.go │ │ ├── fs_local.go │ │ ├── fs_s3.go │ │ ├── fs_sftp.go │ │ ├── fs_test.go │ │ ├── sheet_test.go │ │ └── test │ │ │ ├── delta │ │ │ ├── _delta_log │ │ │ │ ├── .00000000000000000000.json.crc │ │ │ │ └── 00000000000000000000.json │ │ │ ├── country=Argentina │ │ │ │ ├── .part-00000-8d0390a3-f797-4265-b9c2-da1c941680a3.c000.snappy.parquet.crc │ │ │ │ └── part-00000-8d0390a3-f797-4265-b9c2-da1c941680a3.c000.snappy.parquet │ │ │ ├── country=China │ │ │ │ ├── .part-00000-88fba1af-b28d-4303-9c85-9a97be631d40.c000.snappy.parquet.crc │ │ │ │ └── part-00000-88fba1af-b28d-4303-9c85-9a97be631d40.c000.snappy.parquet │ │ │ └── country=Germany │ │ │ │ ├── .part-00000-030076e1-5ec9-47c2-830a-1569f823b6ee.c000.snappy.parquet.crc │ │ │ │ └── part-00000-030076e1-5ec9-47c2-830a-1569f823b6ee.c000.snappy.parquet │ │ │ ├── lineitem_iceberg │ │ │ ├── README.md │ │ │ ├── data │ │ │ │ ├── .00000-411-0792dcfe-4e25-4ca3-8ada-175286069a47-00001.parquet.crc │ │ │ │ ├── .00041-414-f3c73457-bbd6-4b92-9c15-17b241171b16-00001.parquet.crc │ │ │ │ ├── 00000-411-0792dcfe-4e25-4ca3-8ada-175286069a47-00001.parquet │ │ │ │ └── 00041-414-f3c73457-bbd6-4b92-9c15-17b241171b16-00001.parquet │ │ │ └── metadata │ │ │ │ ├── .10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m0.avro.crc │ │ │ │ ├── .10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m1.avro.crc │ │ │ │ ├── .cf3d0be5-cf70-453d-ad8f-48fdc412e608-m0.avro.crc │ │ │ │ ├── .snap-3776207205136740581-1-cf3d0be5-cf70-453d-ad8f-48fdc412e608.avro.crc │ │ │ │ ├── .snap-7635660646343998149-1-10eaca8a-1e1c-421e-ad6d-b232e5ee23d3.avro.crc │ │ │ │ ├── .v1.metadata.json.crc │ │ │ │ ├── .v2.metadata.json.crc │ │ │ │ ├── .version-hint.text.crc │ │ │ │ ├── 10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m0.avro │ │ │ │ ├── 10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m1.avro │ │ │ │ ├── cf3d0be5-cf70-453d-ad8f-48fdc412e608-m0.avro │ │ │ │ ├── snap-3776207205136740581-1-cf3d0be5-cf70-453d-ad8f-48fdc412e608.avro │ │ │ │ ├── snap-7635660646343998149-1-10eaca8a-1e1c-421e-ad6d-b232e5ee23d3.avro │ │ │ │ ├── v1.metadata.json │ │ │ │ ├── v2.metadata.json │ │ │ │ └── version-hint.text │ │ │ ├── test.excel1.xlsx │ │ │ ├── test.excel2.xlsx │ │ │ ├── test1 │ │ │ ├── avro │ │ │ │ ├── train.avro │ │ │ │ └── twitter.avro │ │ │ ├── csv │ │ │ │ ├── test1.1.csv │ │ │ │ ├── test1.1.csv.gz │ │ │ │ └── test1.csv │ │ │ ├── json │ │ │ │ ├── test1.1.json │ │ │ │ ├── test1.2.json │ │ │ │ ├── test1.json │ │ │ │ └── test4.json │ │ │ ├── parquet │ │ │ │ ├── test1.1.parquet │ │ │ │ └── test1.parquet │ │ │ └── xml │ │ │ │ ├── test1.1.xml │ │ │ │ └── test1.xml │ │ │ └── test2 │ │ │ ├── json │ │ │ ├── test1.1.json │ │ │ └── test1.2.json │ │ │ ├── test2.1.noheader.csv │ │ │ └── test2.pipe.csv │ ├── iop │ │ ├── README.md │ │ ├── avro.go │ │ ├── compression.go │ │ ├── compression_test.go │ │ ├── csv.go │ │ ├── csv_duckdb.go │ │ ├── csv_test.go │ │ ├── dataflow.go │ │ ├── dataset.go │ │ ├── dataset_test.go │ │ ├── datastream.go │ │ ├── datastream_batch.go │ │ ├── datastream_test.go │ │ ├── datatype.go │ │ ├── datatype_test.go │ │ ├── delta.go │ │ ├── delta_test.go │ │ ├── duckdb.go │ │ ├── duckdb_test.go │ │ ├── iceberg.go │ │ ├── iceberg_test.go │ │ ├── json.go │ │ ├── parquet.go │ │ ├── parquet_arrow.go │ │ ├── parquet_arrow_test.go │ │ ├── parquet_duckdb.go │ │ ├── parquet_test.go │ │ ├── partition.go │ │ ├── partition_test.go │ │ ├── sas7bdat.go │ │ ├── sheet.go │ │ ├── sheet_excel.go │ │ ├── sheet_excel_test.go │ │ ├── sheet_google.go │ │ ├── ssh.go │ │ ├── stream_processor.go │ │ ├── templates │ │ │ └── fix_mapping.tsv │ │ ├── test │ │ │ ├── delta │ │ │ │ ├── _delta_log │ │ │ │ │ ├── .00000000000000000000.json.crc │ │ │ │ │ └── 00000000000000000000.json │ │ │ │ ├── country=Argentina │ │ │ │ │ ├── .part-00000-8d0390a3-f797-4265-b9c2-da1c941680a3.c000.snappy.parquet.crc │ │ │ │ │ └── part-00000-8d0390a3-f797-4265-b9c2-da1c941680a3.c000.snappy.parquet │ │ │ │ ├── country=China │ │ │ │ │ ├── .part-00000-88fba1af-b28d-4303-9c85-9a97be631d40.c000.snappy.parquet.crc │ │ │ │ │ └── part-00000-88fba1af-b28d-4303-9c85-9a97be631d40.c000.snappy.parquet │ │ │ │ └── country=Germany │ │ │ │ │ ├── .part-00000-030076e1-5ec9-47c2-830a-1569f823b6ee.c000.snappy.parquet.crc │ │ │ │ │ └── part-00000-030076e1-5ec9-47c2-830a-1569f823b6ee.c000.snappy.parquet │ │ │ ├── lineitem_iceberg │ │ │ │ ├── README.md │ │ │ │ ├── data │ │ │ │ │ ├── .00000-411-0792dcfe-4e25-4ca3-8ada-175286069a47-00001.parquet.crc │ │ │ │ │ ├── .00041-414-f3c73457-bbd6-4b92-9c15-17b241171b16-00001.parquet.crc │ │ │ │ │ ├── 00000-411-0792dcfe-4e25-4ca3-8ada-175286069a47-00001.parquet │ │ │ │ │ └── 00041-414-f3c73457-bbd6-4b92-9c15-17b241171b16-00001.parquet │ │ │ │ └── metadata │ │ │ │ │ ├── .10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m0.avro.crc │ │ │ │ │ ├── .10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m1.avro.crc │ │ │ │ │ ├── .cf3d0be5-cf70-453d-ad8f-48fdc412e608-m0.avro.crc │ │ │ │ │ ├── .snap-3776207205136740581-1-cf3d0be5-cf70-453d-ad8f-48fdc412e608.avro.crc │ │ │ │ │ ├── .snap-7635660646343998149-1-10eaca8a-1e1c-421e-ad6d-b232e5ee23d3.avro.crc │ │ │ │ │ ├── .v1.metadata.json.crc │ │ │ │ │ ├── .v2.metadata.json.crc │ │ │ │ │ ├── .version-hint.text.crc │ │ │ │ │ ├── 10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m0.avro │ │ │ │ │ ├── 10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m1.avro │ │ │ │ │ ├── cf3d0be5-cf70-453d-ad8f-48fdc412e608-m0.avro │ │ │ │ │ ├── snap-3776207205136740581-1-cf3d0be5-cf70-453d-ad8f-48fdc412e608.avro │ │ │ │ │ ├── snap-7635660646343998149-1-10eaca8a-1e1c-421e-ad6d-b232e5ee23d3.avro │ │ │ │ │ ├── v1.metadata.json │ │ │ │ │ ├── v2.metadata.json │ │ │ │ │ └── version-hint.text │ │ │ ├── my_file.utf16.csv │ │ │ ├── test.excel1.xlsx │ │ │ ├── test.excel2.xlsx │ │ │ ├── test1.1.csv │ │ │ ├── test1.1.csv.gz │ │ │ ├── test1.1.parquet │ │ │ ├── test1.csv │ │ │ ├── test1.parquet │ │ │ ├── test2.1.noheader.csv │ │ │ └── test2.pipe.csv │ │ ├── transforms.go │ │ └── transforms_test.go │ ├── local │ │ ├── local.go │ │ └── local_test.go │ ├── scripts │ │ ├── check_parquet.py │ │ └── test.sh │ └── templates │ │ ├── _properties.yaml │ │ ├── athena.yaml │ │ ├── azuredwh.yaml │ │ ├── azuresql.yaml │ │ ├── base.yaml │ │ ├── bigquery.yaml │ │ ├── bigtable.yaml │ │ ├── clickhouse.yaml │ │ ├── d1.yaml │ │ ├── duckdb.yaml │ │ ├── elasticsearch.yaml │ │ ├── hive.yaml │ │ ├── mariadb.yaml │ │ ├── mongodb.yaml │ │ ├── motherduck.yaml │ │ ├── mysql.yaml │ │ ├── oracle.yaml │ │ ├── postgres.yaml │ │ ├── profile.def.yaml │ │ ├── profile.yaml │ │ ├── prometheus.yaml │ │ ├── proton.yaml │ │ ├── redshift.yaml │ │ ├── schema.pipeline.json │ │ ├── snowflake.yaml │ │ ├── spark.yaml │ │ ├── sqlite.yaml │ │ ├── sqlserver.yaml │ │ ├── starrocks.yaml │ │ ├── trino.yaml │ │ ├── types_general_to_native.tsv │ │ ├── types_native_to_general.tsv │ │ └── vertica.yaml ├── env │ ├── default.env.yaml │ ├── env.go │ ├── envfile.go │ └── vars.go ├── sling │ ├── config.go │ ├── config_test.go │ ├── hooks.go │ ├── http.go │ ├── pbar.go │ ├── pipeline.go │ ├── project.go │ ├── replication.go │ ├── replication_test.go │ ├── sling.go │ ├── task.go │ ├── task_func.go │ ├── task_run.go │ ├── task_run_read.go │ ├── task_run_write.go │ ├── task_state.go │ └── types.go ├── store │ ├── db.go │ └── store.go └── version.go ├── examples └── example.go ├── go.mod ├── go.sum ├── logo-with-text.png └── scripts ├── build.ps1 ├── build.sh ├── build.test.sh ├── ci ├── build.linux.sh ├── build.mac.sh ├── build.win.ps1 ├── prep.linux.sh ├── prep.mac.sh ├── prep.win.ps1 └── windows_ci.py ├── prep.gomod.sh └── test.sh /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | # github: ['flarco'] # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., 4 | patreon: # Replace with a single Patreon username 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: # Replace with a single Ko-fi username 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | otechie: # Replace with a single Otechie username 12 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry 13 | # custom: ['https://www.paypal.com/donate/?hosted_button_id=98DL44Z6JJVWS'] 14 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Report a problem 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | --- 8 | 9 | ## Issue Description 10 | 11 | - Description of the issue: 12 | 13 | - Sling version (`sling --version`): 14 | 15 | - Operating System (`linux`, `mac`, `windows`): 16 | 17 | - Replication Configuration: 18 | 19 | ```yaml 20 | source: 21 | target: 22 | streams: 23 | ... 24 | ``` 25 | 26 | - Log Output (please run command with `-d`): 27 | 28 | ``` 29 | Paste log here. 30 | ``` 31 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: true 2 | contact_links: 3 | - name: Sling Community 4 | url: https://discord.gg/q5xtaSNDvp 5 | about: Get support on Discord -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for Sling 4 | title: '' 5 | labels: 'enhancement' 6 | assignees: '' 7 | --- 8 | 9 | ## Feature Description 10 | 11 | What would you like to see added to Sling? 12 | -------------------------------------------------------------------------------- /.github/workflows/build-test.yml: -------------------------------------------------------------------------------- 1 | name: Build & Test 2 | 3 | on: 4 | workflow_dispatch: 5 | repository_dispatch: 6 | types: [build-test] 7 | 8 | env: 9 | GITHUB_TOKEN: ${{ secrets.REPO_ACCESS_TOKEN }} 10 | GOPRIVATE: github.com/slingdata-io/* 11 | 12 | jobs: 13 | 14 | build-test-sling: 15 | if: "! (contains(github.event.head_commit.message, '[skip ci]') || contains(github.event.head_commit.message, '[bump]'))" 16 | 17 | runs-on: [self-hosted, linux, ubuntu-20] 18 | timeout-minutes: 40 19 | 20 | steps: 21 | - uses: actions/checkout@v2 22 | 23 | - uses: myrotvorets/set-commit-status-action@master 24 | with: 25 | token: ${{ secrets.GITHUB_TOKEN }} 26 | sha: ${{ github.sha }} 27 | status: pending 28 | 29 | - name: Set up GoLang 30 | uses: actions/setup-go@v3 31 | with: 32 | go-version: "1.22" 33 | cache: false 34 | 35 | - name: Load Secrets 36 | uses: flarco/infisical-action@v3 37 | with: 38 | version: 0.28.1 39 | client_id: ${{ secrets.INFISICAL_CLIENT_ID }} 40 | client_secret: ${{ secrets.INFISICAL_CLIENT_SECRET }} 41 | 42 | - name: Load Secrets (dbio) 43 | uses: flarco/infisical-action@v3 44 | with: 45 | version: 0.28.1 46 | client_id: ${{ secrets.INFISICAL_CLIENT_ID }} 47 | client_secret: ${{ secrets.INFISICAL_CLIENT_SECRET }} 48 | path: /dbio 49 | env: dev 50 | 51 | - name: Build Binary 52 | run: | 53 | # Prep 54 | bash scripts/ci/prep.linux.sh 55 | 56 | # build 57 | bash scripts/ci/build.linux.sh dev 58 | 59 | - name: Run Go Tests (sling) 60 | run: | 61 | export DEBUG='' 62 | 63 | # Oracle env 64 | export LD_LIBRARY_PATH=$ORACLE_HOME/lib:$LD_LIBRARY_PATH 65 | export PATH="$PATH:$ORACLE_HOME/bin" 66 | 67 | bash scripts/test.sh 68 | 69 | build-test-dbio: 70 | if: "! (contains(github.event.head_commit.message, '[skip ci]') || contains(github.event.head_commit.message, '[bump]'))" 71 | 72 | runs-on: [self-hosted, linux, ubuntu-20] 73 | timeout-minutes: 40 74 | 75 | steps: 76 | - uses: actions/checkout@v2 77 | 78 | - name: Set up GoLang 79 | uses: actions/setup-go@v3 80 | with: 81 | go-version: "1.22" 82 | cache: false 83 | 84 | - name: Load Secrets 85 | uses: flarco/infisical-action@v3 86 | with: 87 | version: 0.28.1 88 | client_id: ${{ secrets.INFISICAL_CLIENT_ID }} 89 | client_secret: ${{ secrets.INFISICAL_CLIENT_SECRET }} 90 | 91 | - name: Load Secrets (dbio) 92 | uses: flarco/infisical-action@v3 93 | with: 94 | version: 0.28.1 95 | client_id: ${{ secrets.INFISICAL_CLIENT_ID }} 96 | client_secret: ${{ secrets.INFISICAL_CLIENT_SECRET }} 97 | path: /dbio 98 | env: dev 99 | 100 | - name: Build Binary 101 | run: | 102 | # Prep 103 | bash scripts/ci/prep.linux.sh 104 | 105 | # build 106 | bash scripts/ci/build.linux.sh dev 107 | 108 | - name: Run Go Tests (dbio) 109 | run: | 110 | export DEBUG='' 111 | 112 | cd core/dbio 113 | bash scripts/test.sh 114 | 115 | build-test-success: 116 | 117 | runs-on: [self-hosted, linux, ubuntu-20] 118 | needs: [ build-test-sling, build-test-dbio ] 119 | 120 | steps: 121 | - uses: actions/checkout@v2 122 | 123 | - uses: myrotvorets/set-commit-status-action@master 124 | if: always() 125 | with: 126 | token: ${{ secrets.GITHUB_TOKEN }} 127 | sha: ${{ github.sha }} 128 | status: ${{ job.status }} -------------------------------------------------------------------------------- /.github/workflows/bump.yml: -------------------------------------------------------------------------------- 1 | name: Bump 2 | 3 | on: 4 | pull_request: 5 | types: 6 | - closed 7 | branches: 8 | - main 9 | paths-ignore: 10 | - '.github/**' 11 | - 'core/dbio/api/specs/**' 12 | - 'api/**' 13 | - 'core/dbio/api/llms.md' 14 | - 'README.md' 15 | 16 | jobs: 17 | bump: 18 | if: github.event.pull_request.merged == true && (! contains(github.event.head_commit.message, '[skip ci]')) 19 | runs-on: ubuntu-latest 20 | outputs: 21 | new_tag: ${{ steps.tag_version.outputs.new_tag }} 22 | new_version: ${{ steps.tag_version.outputs.new_version }} 23 | steps: 24 | - uses: actions/checkout@v2 25 | - name: Bump version and push tag 26 | id: tag_version 27 | uses: mathieudutour/github-tag-action@v6.0 28 | with: 29 | github_token: ${{ secrets.GITHUB_TOKEN }} 30 | custom_tag: ${{ github.event.inputs.new_tag }} -------------------------------------------------------------------------------- /.github/workflows/test-sling-action.yml: -------------------------------------------------------------------------------- 1 | name: Test Sling Action 2 | 3 | on: 4 | workflow_dispatch: 5 | repository_dispatch: 6 | types: [test-sling-action] 7 | 8 | jobs: 9 | test-sling-action: 10 | runs-on: ubuntu-latest 11 | timeout-minutes: 5 12 | env: 13 | LOCAL: LOCAL 14 | SQLITE: sqlite:///tmp/test.db 15 | 16 | steps: 17 | - uses: actions/checkout@v2 18 | 19 | - name: List Connections 20 | uses: slingdata-io/sling-action@v1 21 | with: 22 | command: conns list 23 | 24 | - name: Run Replication 25 | uses: slingdata-io/sling-action@v1 26 | with: 27 | command: run -r cmd/sling/tests/replications/r.00.yaml -------------------------------------------------------------------------------- /.github/workflows/test-version.yml: -------------------------------------------------------------------------------- 1 | name: Test Version 2 | 3 | on: [workflow_dispatch] 4 | 5 | jobs: 6 | test1: 7 | runs-on: [self-hosted, linux] 8 | 9 | steps: 10 | - uses: actions/checkout@v2 11 | 12 | - name: Get the version 13 | id: get_version 14 | run: | 15 | TAG=$(git tag -l --sort=-creatordate | head -n 1) 16 | VERSION=$(echo $TAG | sed 's/v//') 17 | echo ::set-output name=version::$VERSION 18 | 19 | - name: Get the version 20 | env: 21 | VERSION: ${{ steps.get_version.outputs.VERSION }} 22 | run: | 23 | echo "VERSION -> $VERSION" 24 | echo "TAG2 -> $(git tag -l --sort=-creatordate | head -n 1)" 25 | 26 | test-mac: 27 | runs-on: [self-hosted, macOS, ARM64] 28 | timeout-minutes: 20 29 | 30 | steps: 31 | # - name: Git Fetch (to fix (would clobber existing tag) 32 | # run: git fetch --tags --force 33 | 34 | - name: Checkout 35 | uses: actions/checkout@v1 36 | with: 37 | fetch-depth: 0 38 | 39 | test-windows: 40 | # runs-on: [self-hosted, Windows] 41 | runs-on: windows-latest 42 | timeout-minutes: 20 43 | 44 | steps: 45 | - name: Checkout 46 | uses: actions/checkout@v3 47 | with: 48 | fetch-depth: 0 49 | 50 | test-linux-arm64: 51 | runs-on: [self-hosted, Linux, ARM64] 52 | timeout-minutes: 20 53 | 54 | steps: 55 | - name: Checkout 56 | uses: actions/checkout@v3 57 | with: 58 | fetch-depth: 0 59 | 60 | test-linux-amd64: 61 | # runs-on: ubuntu-20.04 62 | runs-on: [self-hosted, Linux, X64, ubuntu-16] 63 | timeout-minutes: 20 64 | 65 | steps: 66 | - name: Checkout 67 | uses: actions/checkout@v3 68 | with: 69 | fetch-depth: 0 70 | 71 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.exe~ 4 | *.dll 5 | *.so 6 | *.dylib 7 | 8 | # Test binary, built with `go test -c` 9 | *.test 10 | 11 | # Output of the go coverage tool, specifically when used with LiteIDE 12 | *.out 13 | 14 | # Dependency directories (remove the comment below to include it) 15 | # vendor/ 16 | python/sling/bin/ 17 | 18 | sling-mac 19 | sling-linux 20 | sling-win.exe 21 | dist/ 22 | .secrets 23 | .DS_Store 24 | demo/sling_commands_demo.workflow 25 | *.screenstudio 26 | ./sling 27 | core/dbio/filesys/test/dataset1M.csv 28 | core/dbio/filesys/test/dataset100k.csv 29 | cmd/sling/tests/suite/ 30 | cmd/sling/tests/replications/r.test.yaml 31 | cmd/sling/tests/pipelines/p.test.yaml 32 | 33 | *..go 34 | *..md 35 | *__test.go 36 | 37 | appicon.png 38 | wails.json 39 | 40 | cmd/sling/build/ 41 | cmd/sling/frontend 42 | .cursorignore 43 | .sling.json 44 | .python-version 45 | pyproject.toml 46 | uv.lock 47 | -------------------------------------------------------------------------------- /.goreleaser.linux.amd64.yaml: -------------------------------------------------------------------------------- 1 | # This is an example .goreleaser.yml file with some sensible defaults. 2 | # Make sure to check the documentation at https://goreleaser.com 3 | project_name: sling 4 | 5 | before: 6 | hooks: 7 | - go mod edit -dropreplace='github.com/flarco/g' go.mod 8 | - go mod edit -dropreplace='github.com/slingdata-io/sling' go.mod 9 | - go mod edit -droprequire='github.com/slingdata-io/sling' go.mod 10 | - go mod tidy 11 | 12 | builds: 13 | - main: ./cmd/sling 14 | 15 | env: 16 | - CGO_ENABLED=1 17 | 18 | tags: 19 | - fts5 20 | 21 | goarch: 22 | - amd64 23 | 24 | goos: 25 | - linux 26 | ldflags: 27 | - "-X 'github.com/slingdata-io/sling-cli/core.Version={{.Version}}' -X 'github.com/slingdata-io/sling-cli/core/env.PlausibleURL={{.Env.PLAUSIBLE_URL}}' -X 'github.com/slingdata-io/sling-cli/core/env.SentryDsn={{.Env.SENTRY_DSN}}' -X 'github.com/slingdata-io/sling/agent.Version={{.Version}}'" 28 | 29 | snapshot: 30 | name_template: "{{ incpatch .Version }}" 31 | 32 | archives: 33 | - name_template: '{{ .ProjectName }}_{{ .Os }}_{{ .Arch }}{{ with .Arm }}v{{ . }}{{ end }}{{ with .Mips }}_{{ . }}{{ end }}{{ if not (eq .Amd64 "v1") }}{{ .Amd64 }}{{ end }}' 34 | 35 | checksum: 36 | name_template: '{{ .Runtime.Goos }}.{{ .Runtime.Goarch }}.checksums.txt' 37 | 38 | dockers: 39 | - dockerfile: 'cmd/sling/Dockerfile' 40 | image_templates: 41 | - "slingdata/sling:{{ .Tag }}" 42 | - "slingdata/sling:{{ .Tag }}-amd64" 43 | - "slingdata/sling:latest-amd64" 44 | - "slingdata/sling:latest" 45 | 46 | # https://goreleaser.com/cookbooks/multi-platform-docker-images/#creating-multi-platform-docker-images-with-goreleaser 47 | # docker_manifests: 48 | # - name_template: 'slingdata/sling:latest' 49 | # image_templates: 50 | # - 'slingdata/sling:{{ .Tag }}-amd64' 51 | # - 'slingdata/sling:{{ .Tag }}-arm64' 52 | 53 | # - name_template: 'slingdata/sling:{{ .Tag }}' 54 | # image_templates: 55 | # - 'slingdata/sling:{{ .Tag }}-amd64' 56 | # - 'slingdata/sling:{{ .Tag }}-arm64' 57 | 58 | release: 59 | mode: replace 60 | header: | 61 | ## Sling {{ .Tag }} ({{ .Date }}) -------------------------------------------------------------------------------- /.goreleaser.linux.arm64.yaml: -------------------------------------------------------------------------------- 1 | # This is an example .goreleaser.yml file with some sensible defaults. 2 | # Make sure to check the documentation at https://goreleaser.com 3 | project_name: sling 4 | 5 | before: 6 | hooks: 7 | - go mod edit -dropreplace='github.com/flarco/g' go.mod 8 | - go mod edit -dropreplace='github.com/slingdata-io/sling' go.mod 9 | - go mod edit -droprequire='github.com/slingdata-io/sling' go.mod 10 | - go mod tidy 11 | 12 | builds: 13 | - main: ./cmd/sling 14 | 15 | env: 16 | - CGO_ENABLED=1 17 | 18 | tags: 19 | - fts5 20 | 21 | goarch: 22 | - arm64 23 | 24 | goos: 25 | - linux 26 | ldflags: 27 | - "-X 'github.com/slingdata-io/sling-cli/core.Version={{.Version}}' -X 'github.com/slingdata-io/sling-cli/core/env.PlausibleURL={{.Env.PLAUSIBLE_URL}}' -X 'github.com/slingdata-io/sling-cli/core/env.SentryDsn={{.Env.SENTRY_DSN}}' -X 'github.com/slingdata-io/sling/agent.Version={{.Version}}'" 28 | 29 | snapshot: 30 | name_template: "{{ incpatch .Version }}" 31 | 32 | archives: 33 | - name_template: '{{ .ProjectName }}_{{ .Os }}_{{ .Arch }}{{ with .Arm }}v{{ . }}{{ end }}{{ with .Mips }}_{{ . }}{{ end }}{{ if not (eq .Amd64 "v1") }}{{ .Amd64 }}{{ end }}' 34 | 35 | checksum: 36 | name_template: '{{ .Runtime.Goos }}.{{ .Runtime.Goarch }}.checksums.txt' 37 | 38 | # Could figure out how to make arm64 work, pushing manually 39 | # dockers: 40 | # - dockerfile: 'cmd/sling/Dockerfile.arm64' 41 | # image_templates: 42 | # - "slingdata/sling:{{ .Tag }}-arm64" 43 | 44 | release: 45 | mode: replace 46 | header: | 47 | ## Sling {{ .Tag }} ({{ .Date }}) -------------------------------------------------------------------------------- /.goreleaser.mac.yaml: -------------------------------------------------------------------------------- 1 | # This is an example .goreleaser.yml file with some sensible defaults. 2 | # Make sure to check the documentation at https://goreleaser.com 3 | project_name: sling 4 | 5 | before: 6 | hooks: 7 | - go mod edit -dropreplace='github.com/flarco/g' go.mod 8 | - go mod edit -dropreplace='github.com/slingdata-io/sling' go.mod 9 | - go mod edit -droprequire='github.com/slingdata-io/sling' go.mod 10 | - go mod tidy 11 | 12 | builds: 13 | - main: ./cmd/sling 14 | 15 | env: 16 | - CGO_ENABLED=1 17 | 18 | tags: 19 | - fts5 20 | 21 | goarch: 22 | - amd64 23 | - arm64 24 | 25 | goos: 26 | - darwin 27 | 28 | ldflags: 29 | - "-X 'github.com/slingdata-io/sling-cli/core.Version={{.Version}}' -X 'github.com/slingdata-io/sling-cli/core/env.PlausibleURL={{.Env.PLAUSIBLE_URL}}' -X 'github.com/slingdata-io/sling-cli/core/env.SentryDsn={{.Env.SENTRY_DSN}}' -X 'github.com/slingdata-io/sling/agent.Version={{.Version}}'" 30 | 31 | universal_binaries: 32 | - id: sling 33 | replace: false 34 | 35 | snapshot: 36 | name_template: "{{ incpatch .Version }}" 37 | 38 | checksum: 39 | name_template: '{{ .Runtime.Goos }}.{{ .Runtime.Goarch }}.checksums.txt' 40 | 41 | archives: 42 | - name_template: '{{ .ProjectName }}_{{ .Os }}_{{ .Arch }}{{ with .Arm }}v{{ . }}{{ end }}{{ with .Mips }}_{{ . }}{{ end }}{{ if not (eq .Amd64 "v1") }}{{ .Amd64 }}{{ end }}' 43 | 44 | release: 45 | mode: replace 46 | header: | 47 | ## Sling {{ .Tag }} ({{ .Date }}) 48 | 49 | brews: 50 | - name: sling 51 | repository: 52 | owner: slingdata-io 53 | name: homebrew-sling 54 | branch: main 55 | 56 | homepage: https://slingdata.io/ 57 | description: "Data Integration made simple, from the command line. Extract and load data from popular data sources to destinations with high performance and ease." -------------------------------------------------------------------------------- /.goreleaser.notes.md: -------------------------------------------------------------------------------- 1 | 2 | See https://github.com/slingdata-io/sling-cli/ for more details. -------------------------------------------------------------------------------- /.goreleaser.windows.yaml: -------------------------------------------------------------------------------- 1 | # This is an example .goreleaser.yml file with some sensible defaults. 2 | # Make sure to check the documentation at https://goreleaser.com 3 | project_name: sling 4 | 5 | before: 6 | hooks: 7 | - go mod edit -dropreplace='github.com/flarco/g' go.mod 8 | - go mod edit -dropreplace='github.com/slingdata-io/sling' go.mod 9 | - go mod tidy 10 | 11 | builds: 12 | - main: ./cmd/sling 13 | 14 | env: 15 | - CGO_ENABLED=1 16 | 17 | tags: 18 | - fts5 19 | 20 | goarch: 21 | - amd64 22 | 23 | goos: 24 | - windows 25 | 26 | ldflags: 27 | - "-X 'github.com/slingdata-io/sling-cli/core.Version={{.Version}}' -X 'github.com/slingdata-io/sling-cli/core/env.PlausibleURL={{.Env.PLAUSIBLE_URL}}' -X 'github.com/slingdata-io/sling-cli/core/env.SentryDsn={{.Env.SENTRY_DSN}}' -X 'github.com/slingdata-io/sling/agent.Version={{.Version}}'" 28 | 29 | snapshot: 30 | name_template: "{{ incpatch .Version }}" 31 | 32 | checksum: 33 | name_template: '{{ .Runtime.Goos }}.{{ .Runtime.Goarch }}.checksums.txt' 34 | 35 | archives: 36 | - name_template: '{{ .ProjectName }}_{{ .Os }}_{{ .Arch }}{{ with .Arm }}v{{ . }}{{ end }}{{ with .Mips }}_{{ . }}{{ end }}{{ if not (eq .Amd64 "v1") }}{{ .Amd64 }}{{ end }}' 37 | 38 | release: 39 | mode: replace 40 | header: | 41 | ## Sling {{ .Tag }} ({{ .Date }}) 42 | 43 | scoops: 44 | - name: sling 45 | repository: 46 | owner: slingdata-io 47 | name: scoop-sling 48 | branch: main 49 | 50 | homepage: https://slingdata.io/ 51 | 52 | description: "Data Integration made simple, from the command line. Extract and load data from popular data sources to destinations with high performance and ease." -------------------------------------------------------------------------------- /.infisical.json: -------------------------------------------------------------------------------- 1 | { 2 | "workspaceId": "273c6e9c-9e49-4d97-971c-3b89e6366b8c", 3 | "defaultEnvironment": "", 4 | "secretPath": "/cli", 5 | "gitBranchToEnvironmentMapping": null 6 | } -------------------------------------------------------------------------------- /api/README.md: -------------------------------------------------------------------------------- 1 | # Sling API Specs -------------------------------------------------------------------------------- /api/specs/attio.yaml: -------------------------------------------------------------------------------- 1 | name: attio 2 | 3 | defaults: 4 | state: 5 | base_url: https://api.attio.com/v2 6 | 7 | request: 8 | method: POST 9 | url: ${state.base_url}/objects/${state.object_id}/records/query 10 | 11 | headers: 12 | Authorization: 'Bearer ${secrets.api_key}' 13 | Content-Type: application/json 14 | 15 | concurrency: 10 16 | rate: 10 17 | 18 | response: 19 | records: 20 | jmespath: data[*] 21 | 22 | 23 | endpoints: 24 | people: 25 | 26 | state: 27 | object_id: ${secrets.people_object_id} 28 | limit: 250 29 | offset: 0 30 | 31 | request: 32 | payload: 33 | limit: ${state.limit} 34 | offset: ${state.offset} 35 | 36 | pagination: 37 | # stop if condition is met 38 | stop_condition: length(records) == 0 || state.offset > 2500 39 | 40 | # set state for next page 41 | next_state: 42 | offset: ${state.offset + state.limit} 43 | 44 | response: 45 | 46 | processors: 47 | - expression: record.id.record_id 48 | output: record.record_id 49 | 50 | # - expression: log(record, "warn") 51 | 52 | 53 | -------------------------------------------------------------------------------- /api/specs/dbt_cloud.yaml: -------------------------------------------------------------------------------- 1 | name: "dbt Cloud API" 2 | description: "API for interacting with dbt Cloud to retrieve jobs, runs, and users" 3 | 4 | defaults: 5 | state: 6 | base_url: https://cloud.getdbt.com/api/v2 7 | account_id: ${ require(secrets.account_id) } # error if account_id is not provided 8 | 9 | request: 10 | method: "GET" 11 | headers: 12 | Accept: "application/json" 13 | Content-Type: "application/json" 14 | Authorization: Token ${secrets.api_token} 15 | 16 | endpoints: 17 | jobs: 18 | description: "Retrieve a list of jobs from dbt Cloud" 19 | state: 20 | limit: 100 21 | offset: 0 22 | 23 | request: 24 | url: "${state.base_url}/accounts/${state.account_id}/jobs/" 25 | method: "GET" 26 | parameters: 27 | limit: "${state.limit}" 28 | offset: "${state.offset}" 29 | 30 | pagination: 31 | next_state: 32 | offset: ${state.offset + state.limit} 33 | stop_condition: length(response.records) < state.limit 34 | 35 | response: 36 | records: 37 | jmespath: "data[]" 38 | primary_key: ["id"] 39 | 40 | runs: 41 | description: "Retrieve a list of job runs from dbt Cloud" 42 | 43 | state: 44 | limit: 100 45 | offset: 0 46 | id__gt: ${ coalesce(env.RUN_START_ID, sync.last_max_id, 0) } 47 | 48 | sync: [ last_max_id ] 49 | 50 | request: 51 | url: "${state.base_url}/accounts/${state.account_id}/runs/" 52 | method: "GET" 53 | parameters: 54 | limit: "${state.limit}" 55 | offset: "${state.offset}" 56 | id__gt: "${state.id__gt}" 57 | 58 | pagination: 59 | next_state: 60 | offset: "${state.offset + state.limit}" 61 | stop_condition: length(response.records) < state.limit 62 | 63 | response: 64 | records: 65 | jmespath: "data[]" 66 | primary_key: ["id"] 67 | limit: 2000 68 | 69 | processors: 70 | - expression: record.id 71 | output: state.last_max_id # will be synced 72 | aggregation: maximum 73 | 74 | users: 75 | description: "Retrieve a list of users from dbt Cloud" 76 | state: 77 | limit: 100 78 | offset: 0 79 | 80 | request: 81 | url: "${state.base_url}/accounts/${state.account_id}/users/" 82 | method: "GET" 83 | parameters: 84 | limit: "${state.limit}" 85 | offset: "${state.offset}" 86 | 87 | pagination: 88 | next_state: 89 | offset: "${state.offset + state.limit}" 90 | stop_condition: length(response.records) < state.limit 91 | 92 | response: 93 | records: 94 | jmespath: "data[]" 95 | primary_key: ["id"] 96 | 97 | processors: 98 | - expression: nil 99 | output: record.licenses # remove license data -------------------------------------------------------------------------------- /api/specs/sling_platform.yaml: -------------------------------------------------------------------------------- 1 | name: sling-platform 2 | 3 | defaults: 4 | state: 5 | base_url: https://api.slingdata.io 6 | 7 | request: 8 | headers: 9 | Authorization: 'Sling-Project-Token ${secrets.api_token}' 10 | Content-Type: application/json 11 | 12 | 13 | endpoints: 14 | connections: 15 | 16 | request: 17 | url: ${state.base_url}/connection/list 18 | 19 | response: 20 | records: 21 | jmespath: connections[*] 22 | primary_key: [name] 23 | 24 | processors: 25 | - expression: date_format(now(), "%Y") 26 | output: record.loaded_year 27 | - expression: log(record) 28 | 29 | executions: 30 | 31 | request: 32 | url: ${state.base_url}/execution/list 33 | 34 | response: 35 | records: 36 | jmespath: executions[*] 37 | primary_key: [exec_id] 38 | 39 | # processors: 40 | # - expression: log(record) 41 | 42 | executions_many: 43 | 44 | state: 45 | start_date: ${ coalesce(env.START_DATE, state.max_start_time, "2025-01-01") } 46 | end_date: ${ coalesce(env.END_DATE, date_format(now(), "%Y-%m-%dT%H:%M:%S.%fZ")) } 47 | 48 | # external sync on start/end of replication. null on first run. 49 | state_sync: [ max_start_time ] 50 | 51 | request: 52 | url: ${state.base_url}/execution/list 53 | 54 | parameters: 55 | limit: 500 56 | filters: > 57 | {"period":["${state.start_date}","${state.end_date}"]} 58 | 59 | response: 60 | records: 61 | jmespath: > 62 | executions[*].{ 63 | exec_id: exec_id, 64 | rows: rows, 65 | replication_md5: replication_md5, 66 | task_md5: task_md5, 67 | start_time: start_time 68 | } 69 | primary_key: [exec_id] 70 | # limit: 3 71 | 72 | processors: 73 | - aggregation: maximum 74 | expression: | 75 | date_format( 76 | date_parse(record.start_time), 77 | "%Y-%m-%dT%H:%M:%S.%fZ" 78 | ) 79 | output: state.max_start_time 80 | 81 | # pagination: 82 | # # stop if condition is met 83 | # stop_condition: > 84 | # length(response.records) == 0 || 85 | # state.max_start_time == state.start_date 86 | 87 | # # set state for next page 88 | # next_state: 89 | # start_date: state.max_start_time 90 | # end_date: date_format(now(), "%Y-%m-%dT%H:%M:%S.%fZ") 91 | 92 | -------------------------------------------------------------------------------- /api/specs/uberall.yaml: -------------------------------------------------------------------------------- 1 | name: "Uberall" 2 | description: "API for managing business location data, listings, reviews, and analytics on the Uberall platform." 3 | 4 | defaults: 5 | request: 6 | headers: 7 | Accept: "application/json" 8 | privateKey: ${secrets.private_key} 9 | 10 | timeout: 60 11 | rate: 5 # Adjust based on observed API limits 12 | concurrency: 5 13 | 14 | endpoints: 15 | 16 | locations: 17 | description: "Get several locations with filtering and pagination." 18 | 19 | state: 20 | offset: 0 21 | limit: 100 22 | location_id: ${ coalesce(env.LOCATION_ID, secrets.location_id) } 23 | 24 | request: 25 | url: https://uberall.com/api/locations/${state.location_id} 26 | method: "GET" 27 | parameters: 28 | max: ${state.limit} 29 | offset: ${state.offset} 30 | 31 | pagination: 32 | next_state: 33 | offset: "${state.offset + state.limit}" 34 | stop_condition: "length(response.records) == 0" 35 | 36 | response: 37 | records: 38 | jmespath: "response" 39 | primary_key: ["id"] -------------------------------------------------------------------------------- /cmd/sling/.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.exe~ 4 | *.dll 5 | *.so 6 | *.dylib 7 | 8 | # Test binary, build with `go test -c` 9 | *.test 10 | 11 | # Output of the go coverage tool, specifically when used with LiteIDE 12 | *.out 13 | 14 | sling 15 | sling-linux 16 | sling-mac 17 | sling.exe 18 | pkged.go -------------------------------------------------------------------------------- /cmd/sling/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:focal 2 | 3 | RUN groupadd -r sling && useradd -r -g sling sling 4 | 5 | RUN apt update && DEBIAN_FRONTEND=noninteractive apt install -y unzip libaio1 postgresql-client wget curl gnupg2 && \ 6 | apt-get clean && \ 7 | rm -rf /var/lib/apt/lists /var/cache/apt 8 | 9 | # Install Oracle Instant Client 10 | RUN cd /tmp && \ 11 | wget https://f.slingdata.io/oracle_client_amd64.tar.gz && \ 12 | tar -xf oracle_client_amd64.tar.gz && \ 13 | mkdir -p /usr/lib/oracle/19.3 && mv oracle_client64 /usr/lib/oracle/19.3/client64 && \ 14 | rm -f oracle_client_amd64.tar.gz 15 | 16 | ## Install mssql-tools 17 | ## from https://docs.microsoft.com/en-us/sql/linux/sql-server-linux-setup-tools?view=sql-server-ver15#ubuntu 18 | RUN curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - 19 | 20 | RUN curl https://packages.microsoft.com/config/ubuntu/20.04/prod.list | tee /etc/apt/sources.list.d/msprod.list 21 | 22 | RUN export ACCEPT_EULA=y && apt-get update && apt-get install -y mssql-tools unixodbc-dev && \ 23 | apt-get clean && \ 24 | rm -rf /var/lib/apt/lists /var/cache/apt 25 | 26 | COPY --chown=sling:sling sling /usr/local/bin/sling 27 | RUN chmod 755 /usr/local/bin/sling 28 | 29 | # Switch to non-root user 30 | RUN mkdir -p /home/sling && chmod 755 /home/sling && chown sling:sling /home/sling 31 | USER sling 32 | 33 | # Final ENV updates 34 | ENV DBUS_SESSION_BUS_ADDRESS="/dev/null" 35 | ENV ORACLE_HOME="/usr/lib/oracle/19.3/client64" 36 | ENV LD_LIBRARY_PATH="/usr/lib/oracle/19.3/client64/lib" 37 | ENV PATH="${PATH}:/usr/local/go/bin:/root/go/bin:/opt/mssql-tools/bin:/usr/lib/oracle/19.3/client64/bin" 38 | ENV SLING_PACKAGE="DOCKER" 39 | 40 | ENTRYPOINT ["sling"] -------------------------------------------------------------------------------- /cmd/sling/Dockerfile.arm64: -------------------------------------------------------------------------------- 1 | FROM --platform=linux/arm64 ubuntu:focal 2 | 3 | RUN groupadd -r sling && useradd -r -g sling sling 4 | 5 | RUN apt update || true && DEBIAN_FRONTEND=noninteractive apt install -y libaio1 postgresql-client wget curl && apt clean && rm -rf /var/lib/apt/lists /var/cache/apt 6 | 7 | # Install Oracle Instant Client 8 | RUN cd /tmp && \ 9 | wget https://f.slingdata.io/oracle_client_arm64.tar.gz && \ 10 | tar -xf oracle_client_arm64.tar.gz && \ 11 | rm -f oracle_client_arm64.tar.gz && \ 12 | mkdir -p /opt/oracle && mv instantclient_19_19 /opt/oracle/ && \ 13 | echo /opt/oracle/instantclient_19_19 > /etc/ld.so.conf.d/oracle-instantclient.conf 14 | 15 | RUN echo ' \ 16 | # Oracle Client environment 17 | export ORACLE_HOME=/opt/oracle/instantclient_19_19 \ 18 | export LD_LIBRARY_PATH=$ORACLE_HOME:$LD_LIBRARY_PATH \ 19 | export PATH="$PATH:$ORACLE_HOME" \ 20 | ' >> /root/.bashrc && \ 21 | ldconfig 22 | 23 | ## Install Sql Server BCP 24 | # RUN curl https://packages.microsoft.com/keys/microsoft.asc | tee /etc/apt/trusted.gpg.d/microsoft.asc 25 | # RUN curl https://packages.microsoft.com/config/ubuntu/20.04/prod.list | tee /etc/apt/sources.list.d/mssql-release.list 26 | # RUN apt update || true && DEBIAN_FRONTEND=noninteractive apt install -y mssql-tools18 && apt clean && rm -rf /var/lib/apt/lists /var/cache/apt 27 | 28 | # Copy sling binary 29 | COPY --chown=sling:sling sling /usr/local/bin/sling 30 | RUN chmod 755 /usr/local/bin/sling 31 | 32 | # Switch to non-root user 33 | RUN mkdir -p /home/sling && chmod 755 /home/sling && chown sling:sling /home/sling 34 | USER sling 35 | 36 | # Final ENV updates 37 | ENV DBUS_SESSION_BUS_ADDRESS="/dev/null" 38 | ENV ORACLE_HOME="/opt/oracle/instantclient_19_19" 39 | ENV LD_LIBRARY_PATH="/opt/oracle/instantclient_19_19" 40 | ENV PATH="${PATH}:/usr/local/go/bin:/root/go/bin:/opt/oracle/instantclient_19_19:/opt/mssql-tools18/bin" 41 | ENV SLING_PACKAGE="DOCKER" 42 | 43 | 44 | ENTRYPOINT ["sling"] -------------------------------------------------------------------------------- /cmd/sling/resource/examples.sh: -------------------------------------------------------------------------------- 1 | 2 | ###### Database to Database ###### 3 | # drop / full refresh target table 4 | export PG_DB=postgres://xxxxxxxxxxxxxxxx 5 | export MYSQL_DB=mysql://xxxxxxxxxxxxxxxx 6 | 7 | sling run --src-conn PG_DB --src-stream public.transactions --tgt-conn MYSQL_DB --tgt-object mysql.bank_transactions --mode full-refresh 8 | # OR 9 | sling run -c ' 10 | source: 11 | conn: PG_DB 12 | stream: public.transactions 13 | target: 14 | conn: MYSQL_DB 15 | object: mysql.bank_transactions 16 | mode: full-refresh 17 | ' 18 | 19 | # custom sql in-line 20 | export PG_DB=$POSTGRES_URL 21 | export MYSQL_DB=$MYSQL_URL 22 | 23 | sling run --src-conn PG_DB --src-stream "select date, description, amount from public.transactions where transaction_type = 'debit'" --tgt-conn MYSQL_DB --tgt-object mysql.bank_transactions --mode full-refresh 24 | # OR 25 | sling run -c " 26 | source: 27 | conn: PG_DB 28 | stream: select date, description, amount from public.transactions where transaction_type = 'debit' 29 | target: 30 | conn: MYSQL_DB 31 | object: mysql.bank_transactions 32 | mode: full-refresh 33 | " 34 | 35 | # custom sql file 36 | sling run --src-conn PG_DB --src-stream file:///path/to/query.sql --tgt-conn MYSQL_DB --tgt-object mysql.bank_transactions --mode append 37 | # OR 38 | sling run -c ' 39 | source: 40 | conn: PG_DB 41 | stream: file:///path/to/query.sql 42 | target: 43 | conn: MYSQL_DB 44 | object: mysql.bank_transactions 45 | mode: append 46 | ' 47 | 48 | # incremental 49 | sling run -c ' 50 | source: 51 | conn: PG_DB 52 | stream: public.transactions 53 | update_key: modified_at 54 | primary_key: id 55 | target: 56 | conn: MYSQL_DB 57 | object: mysql.bank_transactions 58 | mode: incremental 59 | ' 60 | 61 | 62 | ###### Database to File ###### 63 | # CSV export full table 64 | 65 | sling run --src-conn PG_DB --src-stream public.transactions --tgt-object file:///tmp/public.transactions.csv 66 | # OR 67 | sling run -c ' 68 | source: 69 | conn: PG_DB 70 | stream: public.transactions 71 | target: 72 | object: file:///tmp/public.transactions.csv 73 | ' 74 | 75 | # CSV dump, custom SQL 76 | sling run -c " 77 | source: 78 | conn: PG_DB 79 | stream: select id, created_at, account_id, amount from public.transactions where type = 'A' 80 | target: 81 | object: file:///tmp/public.transactions.csv 82 | " 83 | 84 | # CSV export full table to S3, gzip 85 | export AWS_ACCESS_KEY_ID=xxxxxxxxxxxxx 86 | export AWS_SECRET_ACCESS_KEY=xxxxxxxxx 87 | sling run --src-conn PG_DB --src-stream public.transactions --tgt-object file:///tmp/public.transactions.csv --tgt-options 'compression: gzip' 88 | # OR 89 | sling run -c ' 90 | source: 91 | conn: PG_DB 92 | stream: public.transactions 93 | target: 94 | object: s3://my-bucket/public.transactions.csv.gz 95 | options: 96 | compression: gzip 97 | ' 98 | 99 | ###### File to Database ###### 100 | # local CSV import into table 101 | cat /tmp/public.transactions.csv.gz | sling run --tgt-conn PG_DB --tgt-object public.transactions 102 | # OR 103 | sling run --src-stream file:///tmp/public.transactions.csv.gz --tgt-conn PG_DB --tgt-object public.transactions 104 | # OR 105 | sling run -c ' 106 | source: 107 | stream: file:///tmp/public.transactions.csv.gz 108 | target: 109 | conn: PG_DB 110 | object: public.transactions 111 | mode: append 112 | ' 113 | 114 | 115 | # CSV folder import into table, incremental 116 | export AWS_ACCESS_KEY_ID=xxxxxxxxxxxxx 117 | export AWS_SECRET_ACCESS_KEY=xxxxxxxxx 118 | sling run -c ' 119 | source: 120 | stream: s3://my-bucket/public.transactions/ 121 | update_key: modified_at 122 | primary_key: id 123 | target: 124 | conn: PG_DB 125 | object: public.transactions 126 | mode: incremental 127 | ' -------------------------------------------------------------------------------- /cmd/sling/sling_cloud.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "github.com/flarco/g" 4 | 5 | func processCloud(c *g.CliSC) (ok bool, err error) { 6 | return 7 | } 8 | -------------------------------------------------------------------------------- /cmd/sling/sling_prompt.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "strings" 7 | 8 | "github.com/c-bata/go-prompt" 9 | "github.com/flarco/g" 10 | "github.com/slingdata-io/sling-cli/core" 11 | "github.com/slingdata-io/sling-cli/core/dbio/connection" 12 | "github.com/spf13/cast" 13 | ) 14 | 15 | var suggestions = []prompt.Suggest{} 16 | 17 | func init() { 18 | suggList := [][]string{ 19 | {"exit", "Exit interactive mode"}, 20 | {cliRun.Name, cliRun.Description}, 21 | {cliConns.Name, cliConns.Description}, 22 | } 23 | for _, sl := range suggList { 24 | suggestions = append(suggestions, prompt.Suggest{Text: sl[0], Description: sl[1]}) 25 | } 26 | } 27 | 28 | func completer(in prompt.Document) []prompt.Suggest { 29 | localSuggestions := []prompt.Suggest{} 30 | 31 | w := in.GetWordBeforeCursor() 32 | blocks := strings.Split(in.Text, " ") 33 | // trimmedBlocks := strings.Split(strings.TrimSpace(in.Text), " ") 34 | 35 | lastWord := blocks[len(blocks)-1] 36 | prevWord := lastWord 37 | 38 | if len(blocks) > 1 { 39 | prevWord = blocks[len(blocks)-2] 40 | } 41 | 42 | switch blocks[0] { 43 | case cliRun.Name: 44 | // collect strings flags 45 | stringFlags := []string{} 46 | for _, f := range cliRun.Flags { 47 | if f.Type == "string" { 48 | stringFlags = append(stringFlags, f.Name) 49 | } 50 | } 51 | 52 | // suggestions based on previous word 53 | switch { 54 | case g.In(prevWord, "src-conn", "tgt-conn"): 55 | for _, conn := range connection.GetLocalConns() { 56 | localSuggestions = append(localSuggestions, prompt.Suggest{Text: conn.Name, Description: conn.Description}) 57 | } 58 | return prompt.FilterHasPrefix(localSuggestions, w, true) 59 | case g.In(prevWord, stringFlags...): 60 | return []prompt.Suggest{} 61 | } 62 | 63 | // suggest normal flags 64 | for _, f := range cliRun.Flags { 65 | localSuggestions = append(localSuggestions, prompt.Suggest{Text: f.Name, Description: f.Description}) 66 | } 67 | return prompt.FilterHasPrefix(localSuggestions, w, true) 68 | 69 | case cliConns.Name: 70 | for _, f := range cliConns.Flags { 71 | localSuggestions = append(localSuggestions, prompt.Suggest{Text: f.Name, Description: f.Description}) 72 | } 73 | return prompt.FilterHasPrefix(localSuggestions, w, true) 74 | case "": 75 | return []prompt.Suggest{} 76 | } 77 | return prompt.FilterHasPrefix(suggestions, w, true) 78 | } 79 | 80 | func executor(in string) { 81 | in = strings.TrimSpace(in) 82 | 83 | blocks := strings.Split(in, " ") 84 | switch blocks[0] { 85 | case "exit": 86 | fmt.Println("exiting") 87 | os.Exit(0) 88 | case cliRun.Name: 89 | cliRun.Vals = g.M(cast.ToSlice(blocks[1:])...) 90 | _, err := cliRun.ExecProcess(cliRun) 91 | g.LogError(err) 92 | case cliConns.Name: 93 | if len(blocks) == 1 { 94 | return 95 | } 96 | for _, subCom := range cliConns.SubComs { 97 | if subCom.Name == blocks[1] { 98 | subCom.Vals = g.M(cast.ToSlice(blocks[2:])...) 99 | _, err := subCom.ExecProcess(subCom) 100 | g.LogError(err) 101 | } 102 | } 103 | } 104 | println(in) 105 | } 106 | 107 | func slingPrompt(c *g.CliSC) (ok bool, err error) { 108 | fmt.Println("sling - An Extract-Load tool") 109 | fmt.Println("Slings data from a data source to a data target.\nVersion " + core.Version) 110 | 111 | p := prompt.New( 112 | executor, 113 | completer, 114 | prompt.OptionPrefix("sling > "), 115 | // prompt.OptionLivePrefix(livePrefix), 116 | prompt.OptionTitle("sling"), 117 | ) 118 | 119 | p.Run() 120 | 121 | return 122 | } 123 | -------------------------------------------------------------------------------- /cmd/sling/tests/files/parquet/test1.1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/cmd/sling/tests/files/parquet/test1.1.parquet -------------------------------------------------------------------------------- /cmd/sling/tests/files/parquet/test1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/cmd/sling/tests/files/parquet/test1.parquet -------------------------------------------------------------------------------- /cmd/sling/tests/files/test1.1.csv: -------------------------------------------------------------------------------- 1 | id,first_name,last_name,email,target,create_dt,rating,code 2 | 1,"AOCG,"" 3 | 883",Lumox,ilumox0@unc.edu,true,2019-02-11 11:23:00.000,35.987,1 4 | 2,Terrell,"EKOZ,989",tmee1@example.com,true,2019-08-19 17:02:09.000,89.983,2 5 | 3,Frayda,Comolli,fcomolli2@cbslocal.com,false,2019-08-07 16:48:12.000,89.401,3 6 | 4,Alphonse,Crookshank,acrookshank3@mapy.cz,true,2019-03-31 14:41:45.000,19.933,4 7 | 5,Mathilda,Deaves,mdeaves4@va.gov,true,2019-07-15 21:20:39.000,90.643,5 8 | 6,Trixi,Livens,tlivens5@cargocollective.com,true,2019-06-20 10:58:40.000,74.341,6 9 | 7,Ida,Domican,idomican6@gravatar.com,false,2019-03-11 09:18:34.000,55.454,7 10 | 8,Bogey,Kears,bkears7@nih.gov,false,2019-09-11 22:08:40.000,3.869,8 11 | 9,Janifer,Melanaphy,jmelanaphy8@alibaba.com,true,2019-02-19 16:23:06.000,48.234,9 12 | 10,Kaela,Illsley,killsley9@feedburner.com,false,2019-04-17 05:05:18.000,86.13,10 13 | 11,Yorgos,Ruprechter,yruprechtera@baidu.com,false,2019-01-03 05:57:01.000,88.736,11 14 | 12,Bunni,Bothams,bbothamsb@amazon.co.jp,false,2019-08-12 14:51:09.000,94.945,12 15 | 13,Sharon,Atwater,satwaterc@webeden.co.uk,false,2019-05-22 09:38:10.000,30.112,13 16 | 14,Ellen,Real,ereald@amazonaws.com,true,2019-02-01 18:47:06.000,6.877,14 17 | 15,Charissa,Ballance,cballancee@amazonaws.com,true,2019-07-25 04:34:02.000,8.514,15 18 | 16,Zorina,Cudiff,zcudifff@jigsy.com,false,2019-10-06 04:13:55.000,27.63,16 19 | 17,Grove,Kenrack,gkenrackg@chronoengine.com,true,2019-09-01 05:11:02.000,18.658,17 20 | 18,Alexis,Fewtrell,afewtrellh@google.cn,true,2019-05-31 14:55:31.000,44.751,18 -------------------------------------------------------------------------------- /cmd/sling/tests/files/test1.1.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/cmd/sling/tests/files/test1.1.csv.gz -------------------------------------------------------------------------------- /cmd/sling/tests/files/test1.2.csv: -------------------------------------------------------------------------------- 1 | id,first_name,last_name,email,target,create_dt,rating 2 | 1,"AOCG,"" 3 | 883",Lumox,ilumox0@unc.edu,true,2019-02-11 11:23:00.000,35.987 4 | 2,Terrell,"EKOZ,989",tmee1@example.com,true,2019-08-19 17:02:09.000,89.983 5 | 3,Frayda,Comolli,fcomolli2@cbslocal.com,false,2019-08-07 16:48:12.000,89.401 6 | 4,Alphonse,Crookshank,acrookshank3@mapy.cz,true,2019-03-31 14:41:45.000,19.933 7 | 5,Mathilda,Deaves,mdeaves4@va.gov,true,2019-07-15 21:20:39.000,90.643 8 | 6,Trixi,Livens,tlivens5@cargocollective.com,true,2019-06-20 10:58:40.000,74.341 9 | 7,Ida,Domican,idomican6@gravatar.com,false,2019-03-11 09:18:34.000,55.454 10 | 8,Bogey,Kears,bkears7@nih.gov,false,2019-09-11 22:08:40.000,3.869 11 | 9,Janifer,Melanaphy,jmelanaphy8@alibaba.com,true,2019-02-19 16:23:06.000,48.234 12 | 10,Kaela,Illsley,killsley9@feedburner.com,false,2019-04-17 05:05:18.000,86.13 13 | 11,Yorgos,Ruprechter,yruprechtera@baidu.com,false,2019-01-03 05:57:01.000,88.736 14 | 12,Bunni,Bothams,bbothamsb@amazon.co.jp,false,2019-08-12 14:51:09.000,94.945 15 | 13,Sharon,Atwater,satwaterc@webeden.co.uk,false,2019-05-22 09:38:10.000,30.112 16 | 14,Ellen,Real,ereald@amazonaws.com,true,2019-02-01 18:47:06.000,6.877 17 | 15,Charissa,Ballance,cballancee@amazonaws.com,true,2019-07-25 04:34:02.000,8.514 18 | 16,Zorina,Cudiff,zcudifff@jigsy.com,false,2019-10-06 04:13:55.000,27.63 19 | 17,Grove,Kenrack,gkenrackg@chronoengine.com,true,2019-09-01 05:11:02.000,18.658 20 | 18.1,Alexis,Fewtrell,afewtrellh@google.cn,true,2019-05-31 14:55:31.000,44.751 -------------------------------------------------------------------------------- /cmd/sling/tests/files/test1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/cmd/sling/tests/files/test1.parquet -------------------------------------------------------------------------------- /cmd/sling/tests/files/test1.upsert.csv: -------------------------------------------------------------------------------- 1 | id,first_name,last_name,email,target,create_dt,date,rating,code,json_data 2 | 706,Prue,Hendonson,phendonsonjl@vistaprint.com,false,2020-10-17 19:01:59.000,,1234567,706, 3 | 707,Mattie,Berger,mbergerjm@pinterest.com,true,2020-10-21 07:16:33.000,,1234567,707, 4 | 708,Mada,Iannello,miannellojn@craigslist.org,false,2020-01-17 01:49:58.000,,1234567,708, 5 | 758,Rafa,Lagne,rlagnel1@dailymotion.com,false,2020-11-11 02:01:42.000,,1234567,758, 6 | 759,Garth,Casetta,gcasettal2@wikimedia.org,true,2020-03-25 18:05:43.000,,1234567,759, 7 | 760,Kirby,Storr,kstorrl3@ifeng.com,false,2020-03-22 15:32:24.000,,1234567,760, 8 | 761,Florencia,Gusney,fgusneyl4@soup.io,true,2020-11-10 03:11:04.000,,1234567,761, 9 | 846,Janette,Boobier,jboobiernh@live.com,false,2020-03-11 01:25:12.000,,1234567,846, 10 | 968,Bev,Fligg,bfliggqv@imdb.com,false,2020-08-14 17:55:43.000,,1234567,968,"{""number"":1, ""tags"": [1,2,3]}" 11 | 969,Aviva,Varsey,avarseyqw@i2i.jp,true,2020-04-19 00:53:38.000,,1234567,969, 12 | 997,Phillis,Shier,pshierro@free.fr,true,2020-07-06 22:36:51.000,,1234567,997, 13 | 998,Corny,Greenless,cgreenlessrp@zdnet.com,true,2020-04-09 16:34:37.000,,1234567,998, 14 | 1001,Audric,Larco,aularco@nytimes.com,true,2021-04-23 19:31:25.000,2021-04-23,1234567,1001, 15 | 1002,Adalie,Larco,adlarco@nytimes.com,true,2021-04-23 19:31:25.000,2021-04-23,1234567,1002, -------------------------------------------------------------------------------- /cmd/sling/tests/files/test3.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "Col1": "--Geschenk-Gutschein 3--", 4 | "Col2": { 5 | "Col3": { 6 | "@Col4": "2", 7 | "Col-5": "G75118_2.jpg" 8 | } 9 | } 10 | } 11 | ] -------------------------------------------------------------------------------- /cmd/sling/tests/files/test4.csv: -------------------------------------------------------------------------------- 1 | col1|header|col3 2 | column1_data|"column2 data with \"space\""|column3_data 3 | "column1_data"|"column2 data with \"space\""|column3_data 4 | "column2 data with \"space\""|"column2 data with \"space\""|column3_data 5 | "column2 data with 6 | heeelo \"space\" -- "|"column2 data with \"space\""|column3_data -------------------------------------------------------------------------------- /cmd/sling/tests/files/test5.csv: -------------------------------------------------------------------------------- 1 | Obj;PropId;Value;TimeStamp;TimeStampISO 2 | BB01;85;45,3828582763672;133245162327228051;2023-03-28T22:30:32Z 3 | BB01;85;40,3816032409668;133245181140278467;2023-03-28T23:01:54Z 4 | BB01;85;45,3858795166016;133245207233952957 5 | BB01;85;50,388298034668;133245209487304477;2023-03-28T23:49:08Z 6 | BB01;85;45,3873443603516;133245215378614197;2023-03-28T23:58:57Z 7 | BB01;85;40,3829345703125;133245217529463186;2023-03-29T00:02:32Z 8 | BB01;85;35,3816719055176;133245220376169720;2023-03-29T00:07:17Z 9 | BB01;85;40,3844985961914;133245230678878369;2023-03-29T00:24:27Z 10 | BB01;85;45,3865814208984;133245234406821951;2023-03-29T00:30:40Z 11 | BB01;85;45,3828582763672;133245162327228051;2023-03-28T22:30:32Z 12 | BB01;85;40,3816032409668;133245181140278467;2023-03-28T23:01:54Z 13 | BB01;85;45,3858795166016;133245207233952957;2023-03-28T23:45:23Z 14 | BB01;85;50,388298034668;133245209487304477;2023-03-28T23:49:08Z 15 | BB01;85;45,3873443603516;133245215378614197;2023-03-28T23:58:57Z 16 | BB01;85;40,3829345703125;133245217529463186;2023-03-29T00:02:32Z 17 | BB01;85;35,3816719055176;133245220376169720;2023-03-29T00:07:17Z 18 | BB01;85;40,3844985961914;133245230678878369;2023-03-29T00:24:27Z 19 | BB01;85;45,3865814208984;133245234406821951;2023-03-29T00:30:40Z 20 | BB01;85;45,3828582763672;133245162327228051;2023-03-28T22:30:32Z 21 | BB01;85;40,3816032409668;133245181140278467;2023-03-28T23:01:54Z 22 | BB01;85;45,3858795166016;133245207233952957;2023-03-28T23:45:23Z 23 | BB01;85;50,388298034668;133245209487304477;2023-03-28T23:49:08Z 24 | BB01;85;45,3873443603516;133245215378614197;2023-03-28T23:58:57Z 25 | BB01;85;40,3829345703125;133245217529463186;2023-03-29T00:02:32Z 26 | BB01;85;35,3816719055176;133245220376169720;2023-03-29T00:07:17Z 27 | BB01;85;40,3844985961914;133245230678878369;2023-03-29T00:24:27Z 28 | BB01;85;45,3865814208984;133245234406821951;2023-03-29T00:30:40Z 29 | BB01;85;45,3828582763672;133245162327228051;2023-03-28T22:30:32Z 30 | BB01;85;40,3816032409668;133245181140278467;2023-03-28T23:01:54Z 31 | BB01;85;45,3858795166016;133245207233952957;2023-03-28T23:45:23Z 32 | BB01;85;50,388298034668;133245209487304477 33 | BB01;85;45,3873443603516;133245215378614197;2023-03-28T23:58:57Z 34 | BB01;85;40,3829345703125;133245217529463186;2023-03-29T00:02:32Z 35 | BB01;85;35,3816719055176;133245220376169720;2023-03-29T00:07:17Z 36 | BB01;85;40,3844985961914;133245230678878369;2023-03-29T00:24:27Z 37 | BB01;85;45,3865814208984;133245234406821951;2023-03-29T00:30:40Z -------------------------------------------------------------------------------- /cmd/sling/tests/files/test6.csv: -------------------------------------------------------------------------------- 1 | BB01;85;45,3828582763672;133245162327228051;2023-03-28T22:30:32Z 2 | BB01;85;40,3816032409668;133245181140278467;2023-03-28T23:01:54Z -------------------------------------------------------------------------------- /cmd/sling/tests/files/test7.csv: -------------------------------------------------------------------------------- 1 | col1|header|col3 2 | data1|'data with single quote \' inside'|data3 3 | 'data with pipe | inside'|data2|data3 4 | 'data with backslash \\ inside'|'data with \"escaped\" double quotes'|data3 5 | -------------------------------------------------------------------------------- /cmd/sling/tests/files/test8.csv: -------------------------------------------------------------------------------- 1 | col1|header|col3 2 | data1|$data with single quote \' inside$|data3 3 | $data with pipe | inside$|data2|data3 4 | $data with backslash \\ inside$|$data with \"escaped\" double quotes$|data3 5 | -------------------------------------------------------------------------------- /cmd/sling/tests/pipelines/p.01.yaml: -------------------------------------------------------------------------------- 1 | steps: 2 | - type: log 3 | message: '{runtime_state}' 4 | 5 | - type: replication 6 | path: cmd/sling/tests/replications/r.19.yaml 7 | 8 | - type: log 9 | message: '{runtime_state}' 10 | level: warn 11 | 12 | - type: command 13 | command: [ls, -l] 14 | print: true -------------------------------------------------------------------------------- /cmd/sling/tests/pipelines/p.02.yaml: -------------------------------------------------------------------------------- 1 | steps: 2 | - type: log 3 | message: '{runtime_state}' 4 | 5 | - type: group 6 | id: group1 7 | steps: 8 | - type: log 9 | message: 'Hello from group 1' 10 | 11 | - type: group 12 | id: group2 13 | loop: [11, 22, 33] 14 | steps: 15 | - type: log 16 | message: 'Hello from group 2: {runtime_state}' 17 | 18 | - type: log 19 | message: 'value: {state.group2.loop_value} ({state.group2.loop_index})' 20 | 21 | 22 | - type: list 23 | id: s3_list 24 | location: aws_s3/sling_test/files/ 25 | recursive: true 26 | only: files 27 | 28 | - type: group 29 | id: group3 30 | loop: state.s3_list.result 31 | steps: 32 | - type: log 33 | message: 'file: ({loop_index}) {loop_value.location}' 34 | 35 | - type: copy 36 | id: my_copy 37 | from: '{loop_value.location}' 38 | to: azure_storage/sftp-test/{loop_value.name} 39 | 40 | - type: log 41 | message: 'bytes_written: {state.my_copy.bytes_written}' 42 | level: warn 43 | 44 | - type: list 45 | id: sftp_list 46 | location: sftp//tmp/*.csv 47 | only: files 48 | 49 | - type: delete 50 | location: azure_storage/sftp-test/ 51 | 52 | - type: group 53 | loop: state.sftp_list.result 54 | steps: 55 | - type: log 56 | message: 'file: ({loop_index}) {loop_value.location}' 57 | 58 | - type: log 59 | message: 'group3.my_copy.bytes_written: {state.my_copy.bytes_written}' 60 | level: warn 61 | 62 | - type: copy 63 | from: '{loop_value.location}' 64 | to: azure_storage/sftp-test/{loop_value.name} 65 | 66 | - type: log 67 | message: 'group3.my_copy.bytes_written: {state.my_copy.bytes_written}' 68 | level: warn 69 | 70 | # - type: log 71 | # message: '{runtime_state}' 72 | # level: warn 73 | 74 | -------------------------------------------------------------------------------- /cmd/sling/tests/replications/apis/r.61.stripe.yaml: -------------------------------------------------------------------------------- 1 | # ./sling run -r cmd/sling/tests/replications/apis/r.61.stripe.yaml --streams customer,customer_balance_transaction -d --mode full-refresh 2 | 3 | source: stripe_sling 4 | target: postgres 5 | 6 | defaults: 7 | mode: incremental 8 | object: apis.{source_name}_{stream_name} 9 | 10 | source_options: 11 | flatten: 1 # flatten records 1 level only 12 | 13 | streams: 14 | '*': 15 | 16 | env: 17 | SLING_STATE: postgres/sling_state.stripe # one state table per replication 18 | SLING_LOADED_AT_COLUMN: timestamp -------------------------------------------------------------------------------- /cmd/sling/tests/replications/apis/r.62.shopify.yaml: -------------------------------------------------------------------------------- 1 | source: shopify 2 | target: postgres 3 | 4 | defaults: 5 | mode: incremental 6 | object: apis.{source_name}_{stream_name} 7 | 8 | streams: 9 | # '*': 10 | 'order': 11 | 12 | env: 13 | SLING_STATE: postgres/sling_state.shopify # one state table per replication 14 | SLING_LOADED_AT_COLUMN: timestamp -------------------------------------------------------------------------------- /cmd/sling/tests/replications/r.00.yaml: -------------------------------------------------------------------------------- 1 | source: '{source}' 2 | target: SQLITE 3 | 4 | defaults: 5 | mode: full-refresh 6 | 7 | streams: 8 | file://./cmd/sling/{folder}/files/test1.1.csv: 9 | object: 'main.test' 10 | 11 | env: 12 | folder: tests 13 | source: LOCAL -------------------------------------------------------------------------------- /cmd/sling/tests/replications/r.01.yaml: -------------------------------------------------------------------------------- 1 | source: TEST_PG_SLING_MASTER 2 | target: SQLITE 3 | 4 | defaults: 5 | mode: full-refresh 6 | object: 'main.{stream_schema}_{stream_table}' 7 | 8 | streams: 9 | public.food_food_category: -------------------------------------------------------------------------------- /cmd/sling/tests/replications/r.02.yaml: -------------------------------------------------------------------------------- 1 | source: TEST_PG_SLING_MASTER 2 | target: OCRAL_R2 3 | 4 | defaults: 5 | mode: full-refresh 6 | object: 's3://{target_bucket}/temp/{stream_schema}.{stream_table}.csv' 7 | 8 | streams: 9 | public.food_food_category: -------------------------------------------------------------------------------- /cmd/sling/tests/replications/r.03.yaml: -------------------------------------------------------------------------------- 1 | source: OCRAL_R2 2 | target: TEST_PG_SLING_MASTER 3 | 4 | defaults: 5 | mode: full-refresh 6 | object: '{target_schema}.{stream_file_name}' 7 | 8 | streams: 9 | s3://ocral/temp/public.food_food_category.csv: -------------------------------------------------------------------------------- /cmd/sling/tests/replications/r.04.yaml: -------------------------------------------------------------------------------- 1 | source: DO_SPACES 2 | target: PG_BIONIC 3 | 4 | defaults: 5 | mode: full-refresh 6 | object: '{target_schema}.{stream_file_name}' 7 | 8 | streams: 9 | s3://ocral/rudderstack/rudder-logs/1uXKxCrhN2WGAt2fojy6k2fqDSb: 10 | mode: append 11 | update_key: _sling_loaded_at 12 | object: public.rudder_events 13 | -------------------------------------------------------------------------------- /cmd/sling/tests/replications/r.05.yaml: -------------------------------------------------------------------------------- 1 | source: OCRAL_R2 2 | target: POSTGRES 3 | 4 | defaults: 5 | object: 'public.{stream_file_name}_slack_data' 6 | source_options: 7 | flatten: true 8 | mode: full-refresh 9 | 10 | streams: 11 | s3://ocral/mlo.community.test/channels.json: 12 | s3://ocral/mlo.community.test/integration_logs.json: 13 | s3://ocral/mlo.community.test/users.json: 14 | s3://ocral/mlo.community.test/be-shameless/: 15 | s3://ocral/mlo.community.test/content-archive/: 16 | s3://ocral/mlo.community.test/discussions/: 17 | s3://ocral/mlo.community.test/events/: 18 | s3://ocral/mlo.community.test/fiddler/: 19 | s3://ocral/mlo.community.test/general/: 20 | s3://ocral/mlo.community.test/jobs/: 21 | s3://ocral/mlo.community.test/leadership/: 22 | s3://ocral/mlo.community.test/random/: 23 | 24 | env: 25 | SLING_THREADS: 5 26 | SLING_RETRIES: 2 -------------------------------------------------------------------------------- /cmd/sling/tests/replications/r.06.yaml: -------------------------------------------------------------------------------- 1 | source: GOOGLE_STORAGE 2 | target: BIGQUERY 3 | 4 | defaults: 5 | object: 'public.{stream_file_name}_slack_data' 6 | source_options: 7 | flatten: true 8 | target_options: 9 | adjust_column_type: true 10 | mode: full-refresh 11 | 12 | streams: 13 | gs://flarco_us_bucket/mlo.community.test/be-shameless/: 14 | gs://flarco_us_bucket/mlo.community.test/channels.json: 15 | gs://flarco_us_bucket/mlo.community.test/integration_logs.json: -------------------------------------------------------------------------------- /cmd/sling/tests/replications/r.07.yaml: -------------------------------------------------------------------------------- 1 | source: LOCAL 2 | target: SQLITE 3 | 4 | defaults: 5 | mode: full-refresh 6 | object: 'main.{stream_file_folder}_{stream_file_name}' 7 | 8 | streams: 9 | file://./cmd/sling/tests/files/parquet/*: 10 | file://./cmd/sling/tests/files/*.csv: 11 | columns: { "*": string } 12 | file://./cmd/sling/tests/files/test?.json: -------------------------------------------------------------------------------- /cmd/sling/tests/replications/r.08.yaml: -------------------------------------------------------------------------------- 1 | source: OCRAL_R2 2 | target: SQLITE 3 | 4 | defaults: 5 | mode: full-refresh 6 | object: 'main.do_{stream_file_name}' 7 | 8 | streams: 9 | s3://ocral/test.fs.write/*: 10 | s3://ocral/test.fs.write/*.csv: 11 | single: true -------------------------------------------------------------------------------- /cmd/sling/tests/replications/r.09.yaml: -------------------------------------------------------------------------------- 1 | source: postgres 2 | target: sqlite 3 | 4 | defaults: 5 | mode: full-refresh 6 | object: 'main.pg_{stream_table}' 7 | sql: "select *, 'hi' there from {stream_name} {where_clause} limit 10" 8 | 9 | streams: 10 | public.my_table?: 11 | tags: [ my_table ] 12 | where: '2 > 1' 13 | public.my_table: 14 | hooks: 15 | pre: 16 | - type: query 17 | connection: '{source.name}' 18 | query: select 'heelo' as b 19 | id: hook1 20 | 21 | post: 22 | - type: http 23 | if: run.status == "success" 24 | url: https://webhook.ocral.org/status 25 | id: webhook 26 | 27 | - type: log 28 | message: runtime_state => '{runtime_state}' 29 | 30 | - type: query 31 | if: state.webhook.status == "success" 32 | connection: '{target.name}' 33 | query: | 34 | select 35 | '{state.hook1.connection}' as conn, 36 | '{state.hook1.columns}' as columns1, 37 | '{state.hook1.result}' as result, 38 | '{state.webhook.request.url}' as request, 39 | '{state.webhook.response.text}' as response_text, 40 | '{state.webhook.response.json.status}' as response_status 41 | 42 | public.my_table_*: 43 | columns: 44 | there: string | value_len == 3 -------------------------------------------------------------------------------- /cmd/sling/tests/replications/r.10.yaml: -------------------------------------------------------------------------------- 1 | source: aws_s3 2 | target: sftp 3 | 4 | defaults: 5 | mode: full-refresh 6 | 7 | streams: 8 | test/parquet/*.parquet: 9 | single: true 10 | object: test/parquet/aws_s3.text 11 | target_options: 12 | format: csv -------------------------------------------------------------------------------- /cmd/sling/tests/replications/r.11.yaml: -------------------------------------------------------------------------------- 1 | source: postgres 2 | target: aws_s3 3 | 4 | defaults: 5 | mode: full-refresh 6 | object: 'test/{stream_schema}.{stream_table}' 7 | target_options: 8 | file_max_rows: 1000000 # multiple files 9 | compression: gzip 10 | 11 | streams: 12 | test1: 13 | sql: | 14 | select * from public.test1k_postgres_pg 15 | where date_trunc('year', create_dt) = '{YEAR}-01-01' 16 | object: 'test1k/{YEAR}' 17 | 18 | public.many_jsons: 19 | 20 | env: 21 | # pass in env vars, e.g. YEAR=2005 22 | YEAR: $YEAR -------------------------------------------------------------------------------- /cmd/sling/tests/replications/r.12.yaml: -------------------------------------------------------------------------------- 1 | source: postgres 2 | target: snowflake 3 | 4 | hooks: 5 | start: 6 | - type: query 7 | connection: snowflake 8 | query: drop table if exists public.public_test1k_postgres_pg 9 | 10 | defaults: 11 | mode: incremental 12 | object: 'public.{stream_schema}_{stream_table}' 13 | target_options: 14 | add_new_columns: true 15 | 16 | streams: 17 | public.test1k_postgres_pg: 18 | primary_key: [id] 19 | update_key: create_dt 20 | target_options: 21 | table_keys: 22 | cluster: [ date(create_dt) ] 23 | -------------------------------------------------------------------------------- /cmd/sling/tests/replications/r.13.yaml: -------------------------------------------------------------------------------- 1 | source: postgres 2 | target: starrocks 3 | 4 | defaults: 5 | mode: incremental 6 | object: 'public.{stream_schema}_{stream_table}' 7 | target_options: 8 | add_new_columns: true 9 | datetime_format: 2006-01-02 15:04:05.000000 -07 10 | 11 | streams: 12 | public.test1k_postgres_pg: 13 | mode: full-refresh 14 | # primary_key: [id] 15 | update_key: create_dt 16 | target_options: 17 | table_keys: 18 | duplicate: [ id, name ] 19 | hash: [ id ] 20 | -------------------------------------------------------------------------------- /cmd/sling/tests/replications/r.14.yaml: -------------------------------------------------------------------------------- 1 | source: local 2 | target: postgres 3 | 4 | defaults: 5 | mode: incremental 6 | update_key: create_dt 7 | primary_key: id 8 | object: public.incremental_csv 9 | target_options: 10 | adjust_column_type: true 11 | 12 | streams: 13 | cmd/sling/tests/files/test1.csv: 14 | cmd/sling/tests/files/test1.upsert.csv: 15 | 16 | cmd/sling/tests/files/parquet/test1.parquet: 17 | sql: select id, first_name from {stream_scanner} 18 | object: public.sling_test_parquet_sql 19 | 20 | cmd/sling/tests/files/test2.csv: 21 | mode: full-refresh 22 | sql: select first_name, email from {stream_scanner} 23 | object: public.sling_test_csv_sql 24 | 25 | parquet_sql: 26 | mode: full-refresh 27 | sql: select id, first_name from parquet_scan('cmd/sling/tests/files/parquet/test1.parquet') 28 | source_options: 29 | format: parquet 30 | object: public.sling_test_parquet_sql 31 | -------------------------------------------------------------------------------- /cmd/sling/tests/replications/r.15.yaml: -------------------------------------------------------------------------------- 1 | source: aws_s3 2 | target: postgres 3 | 4 | defaults: 5 | mode: full-refresh 6 | 7 | streams: 8 | 9 | sling_test/*iceberg: 10 | source_options: 11 | format: iceberg 12 | limit: 100 13 | object: public.sling_test_{stream_file_name} 14 | update_key: l_commitdate 15 | mode: '{iceberg_mode}' # if blank, defaults to full-refresh 16 | 17 | # Duckdb has issues with delta via s3 18 | sling_test/delta: 19 | disabled: true 20 | source_options: 21 | format: delta 22 | object: public.sling_test_{stream_file_name} 23 | 24 | # Duckdb has issues with delta via s3 25 | delta_sql: 26 | disabled: true 27 | sql: select last_name from delta_scan('s3://{source_bucket}/sling_test/delta') 28 | source_options: 29 | format: delta 30 | object: public.sling_test_delta_sql 31 | 32 | iceberg_sql: 33 | sql: select l_commitdate from iceberg_scan('s3://{source_bucket}/sling_test/lineitem_iceberg', allow_moved_paths = true) 34 | source_options: 35 | format: iceberg 36 | object: public.sling_test_iceberg_sql 37 | 38 | test/parquet/test1.parquet: 39 | sql: select id, first_name from {stream_scanner} where id != '100' 40 | object: public.sling_test_parquet_sql 41 | 42 | env: 43 | iceberg_limit: $ICEBERG_LIMIT 44 | iceberg_mode: $ICEBERG_MODE -------------------------------------------------------------------------------- /cmd/sling/tests/replications/r.16.yaml: -------------------------------------------------------------------------------- 1 | source: oracle 2 | target: local 3 | 4 | defaults: 5 | mode: full-refresh 6 | 7 | streams: 8 | oracle.TEST1K_ORACLE_WIDE: 9 | object: /tmp/test.csv 10 | where: '2 > 1' -------------------------------------------------------------------------------- /cmd/sling/tests/replications/r.17.yaml: -------------------------------------------------------------------------------- 1 | source: postgres 2 | target: aws_s3 3 | 4 | defaults: 5 | mode: full-refresh 6 | 7 | streams: 8 | public.test1k_postgres_pg: 9 | update_key: update_dt 10 | primary_key: id 11 | mode: incremental 12 | object: test/{stream_name}_{format}/{part_year}/{part_month} 13 | target_options: 14 | format: '{format}' 15 | 16 | env: 17 | SLING_STATE: AWS_S3/state/r.17/${FORMAT} 18 | format: ${FORMAT} -------------------------------------------------------------------------------- /cmd/sling/tests/replications/r.18.yaml: -------------------------------------------------------------------------------- 1 | source: postgres 2 | target: sqlite 3 | 4 | defaults: 5 | mode: full-refresh 6 | 7 | hooks: 8 | start: 9 | - type: log 10 | message: starting replication!!!! 11 | id: start_msg 12 | 13 | end: 14 | - type: log 15 | level: warn 16 | message: | 17 | ending replication 👍 18 | start_msg: {state.start_msg.message} 19 | first row => { state.table_cnt.result[0] } 20 | all rows => { state.table_cnt.result } 21 | runtime_state => {runtime_state} 22 | 23 | post: 24 | - type: log 25 | message: | 26 | "state.table_cnt.columns" => {state.table_cnt.columns} 27 | 28 | 29 | streams: 30 | public.test1k_postgres_pg: 31 | object: main.test1k_postgres_pg 32 | hooks: 33 | post: 34 | - type: query 35 | id: table_cnt 36 | connection: sqlite 37 | query: select count(1) as cnt, '1' as r2 from main.test1k_postgres_pg 38 | 39 | 40 | public.my_table1: 41 | object: main.my_table1 -------------------------------------------------------------------------------- /cmd/sling/tests/replications/r.19.yaml: -------------------------------------------------------------------------------- 1 | source: aws_s3 2 | target: postgres 3 | 4 | env: 5 | SLING_STATE: aws_s3/sling-state/test/r.19 6 | RESET: ${RESET} 7 | 8 | hooks: 9 | start: 10 | - type: log 11 | message: 'runtime_state => {runtime_state}' 12 | 13 | - type: delete 14 | if: env.RESET == "true" 15 | connection: aws_s3 16 | path: sling-state/test/r.19 # delete state on replication start 17 | 18 | - type: inspect 19 | id: inspect_file 20 | location: aws_s3/test/public_test1k_postgres_pg_parquet 21 | message: 'runtime_state ==> {runtime_state}' 22 | 23 | # test check, should fail and warn 24 | - type: check 25 | id: inspect_file_check 26 | check: state.inspect_file.is_dir != true 27 | on_failure: warn 28 | 29 | - type: query 30 | connection: postgres 31 | if: env.RESET == "true" 32 | query: 'drop table public.test1k_postgres_pg_parquet' 33 | 34 | end: 35 | 36 | - type: log 37 | message: | 38 | inspect-output ==> {state.inspect_file} 39 | 40 | runtime_state ==> {runtime_state} 41 | 42 | - type: copy 43 | id: file-copy 44 | from: aws_s3/sling-state/test/r.19 45 | to: local//tmp/test/sling-state/test/r.19 46 | 47 | - type: check 48 | check: state["file-copy"].bytes_written > 0 49 | on_failure: warn 50 | 51 | streams: 52 | test/public_test1k_postgres_pg_parquet/{part_year}/{part_month}/: 53 | id: test1k_postgres_pg_parquet 54 | object: public.test1k_postgres_pg_parquet 55 | mode: incremental 56 | primary_key: [id] 57 | update_key: update_dt 58 | source_options: 59 | format: parquet 60 | hooks: 61 | post: 62 | - type: log 63 | message: | 64 | run-output ==> {run} 65 | -------------------------------------------------------------------------------- /cmd/sling/tests/replications/r.20.yaml: -------------------------------------------------------------------------------- 1 | source: postgres 2 | target: oracle 3 | 4 | defaults: 5 | mode: backfill 6 | object: oracle.{stream_table_lower} 7 | primary_key: [id] 8 | target_options: 9 | use_bulk: false 10 | 11 | streams: 12 | public.test1k_mariadb_pg: 13 | mode: backfill 14 | update_key: update_dt 15 | source_options: 16 | range: '2018-11-01,2018-12-01' 17 | chunk_size: 10d 18 | 19 | public.test1k_sqlserver_pg: 20 | update_key: date 21 | source_options: 22 | range: '2019-01-01,2019-06-01' 23 | chunk_size: 2m 24 | 25 | public.test1k_snowflake_pg: 26 | update_key: id 27 | source_options: 28 | range: '1,800' 29 | chunk_size: 200 30 | -------------------------------------------------------------------------------- /cmd/sling/tests/replications/r.21.yaml: -------------------------------------------------------------------------------- 1 | source: postgres 2 | target: mssql 3 | 4 | streams: 5 | dbo.test1k_clickhouse_pg: 6 | sql: > 7 | select id, first_name as "firstName" 8 | from public.test1k_clickhouse_pg 9 | -- where 1=0 10 | object: main.{stream_table} 11 | transforms: 12 | id: [hash_md5] 13 | columns: 14 | id: string(32) 15 | first_name: string(100) 16 | 17 | target_options: 18 | column_casing: snake 19 | table_keys: 20 | primary: [id] 21 | index: [first_name] -------------------------------------------------------------------------------- /cmd/sling/tests/suite.file.template.tsv: -------------------------------------------------------------------------------- 1 | n test_name source_conn source_stream source_options stream_config target_conn target_object mode source_primary_key source_update_key target_options env 2 | 1 csv_pg_full_refresh local file://tests/files/test1.csv {} {} postgres [schema].[table] full-refresh "{""adjust_column_type"":true, ""add_new_columns"":true, ""use_bulk"": true, ""table_keys"": { ""unique"": [ ""id"" ] }}" "{""validation_file"": ""file://tests/files/test1.csv"", ""validation_cols"": ""0,1,2,3,4,6""}" 3 | 2 pg_file_csv postgres [schema].[table] {} [conn] [folder]/files/[table].csv full-refresh 4 | 3 pg_file_json postgres [schema].[table] {} [conn] [folder]/files/[table].json full-refresh 5 | 4 pg_file_parquet postgres [schema].[table] {} [conn] [folder]/files/[table].parquet full-refresh 6 | 5 pg_file_csv_many postgres [schema].[table] {} [conn] [folder]/csv/*.csv full-refresh "{""file_max_rows"": 100}" 7 | 6 discover_root [conn] discover "{""validation_contains"": ""[folder]/"", ""validation_not_contains"": ""[folder]/files/[table]"", ""validation_row_count"": "">0""}" 8 | 7 discover_folder [conn] [folder]/files discover "{""validation_contains"": ""[folder]/files/"", ""validation_row_count"": ""1""}" 9 | 8 discover_folder_files [conn] [folder]/files/ discover "{""validation_contains"": ""[folder]/files/[table].csv,[folder]/files/[table].json,[folder]/files/[table].parquet"", ""validation_not_contains"": ""[folder]/files/[table].parquet/"", ""validation_row_count"": ""3""}" 10 | 9 discover_folder_files_csv_wildcard [conn] [folder]/files/*.csv discover "{""validation_contains"": ""[folder]/files/[table].csv"", ""validation_row_count"": ""1""}" 11 | 10 discover_filter [conn] [folder]/**/*[table].js* discover "{""recursive"": true, ""validation_contains"": ""[folder]/files/[table].json"", ""validation_row_count"": ""1""}" 12 | 11 discover_file [conn] [folder]/files/[table].parquet discover "{""validation_contains"": ""create_dt"", ""validation_row_count"": ""12"", ""level"": ""column""}" 13 | 12 discover_csv_folder_files [conn] [folder]/csv/*.csv discover "{""validation_row_count"": "">5""}" 14 | 13 discover_recursive [conn] [folder] discover "{""recursive"": true, ""validation_row_count"": "">5""}" 15 | 14 csv_to_pg [conn] [folder]/files/[table].csv postgres [schema].[table] full-refresh "{""adjust_column_type"":true, ""add_new_columns"":true, ""use_bulk"": true, ""table_keys"": { ""index"": [ ""id"" ] }}" "{""validation_file"": ""file://tests/files/test1.csv"", ""validation_cols"": ""0,1,2,3,4,6""}" 16 | 15 parquet_to_pg [conn] [folder]/files/[table].parquet postgres [schema].[table] incremental _sling_loaded_at "{""validation_row_count"": ""2000""}" 17 | 16 folder_to_pg [conn] [folder]/csv postgres [schema].[table] full-refresh "{""adjust_column_type"":true, ""add_new_columns"":true, ""use_bulk"": true, ""table_keys"": { ""unique"": [ ""id"" ] }}" "{""validation_file"": ""file://tests/files/test1.csv"", ""validation_cols"": ""0,1,2,3,4,6""}" 18 | 17 folder_files_to_pg [conn] [folder]/csv/ postgres [schema].[table] full-refresh "{""adjust_column_type"":true, ""add_new_columns"":true, ""use_bulk"": true, ""table_keys"": { ""unique"": [ ""id"" ] }}" "{""validation_file"": ""file://tests/files/test1.csv"", ""validation_cols"": ""0,1,2,3,4,6""}" 19 | 18 files_single_to_pg [conn] [folder]/csv/*.csv "{""single"": true}" postgres [schema].[table] full-refresh "{""adjust_column_type"":true, ""add_new_columns"":true, ""use_bulk"": true, ""table_keys"": { ""unique"": [ ""id"" ] }}" "{""validation_file"": ""file://tests/files/test1.csv"", ""validation_cols"": ""0,1,2,3,4,6""}" 20 | 19 files_single_to_local [conn] [folder]/csv/*.csv "{""single"": true}" local /tmp/sling_test/[folder]/files/[table].csv full-refresh "{""file_max_rows"": 0, ""file_max_bytes"": 0}" 21 | -------------------------------------------------------------------------------- /cmd/sling/tests/task.yaml: -------------------------------------------------------------------------------- 1 | source: 2 | conn: OCRAL_R2 3 | stream: s3://ocral/mlo.community.test/channels.json 4 | 5 | target: 6 | conn: SQLITE 7 | object: 'main.{stream_file_name}_slack_data' -------------------------------------------------------------------------------- /cmd/sling/tests/test1.json: -------------------------------------------------------------------------------- 1 | { 2 | "source": { 3 | "stream": "testing", 4 | "conn": "testing", 5 | "options": { 6 | "limit": 111 7 | } 8 | }, 9 | "target": { 10 | "options": { 11 | "table_ddl": "testing", 12 | "post_sql": "testing", 13 | "pre_sql": "testing", 14 | "table_tmp": "testing" 15 | }, 16 | "object": "testing", 17 | "conn": "testing" 18 | }, 19 | "mode": "testing" 20 | } -------------------------------------------------------------------------------- /cmd/sling/tests/test1.yaml: -------------------------------------------------------------------------------- 1 | source: 2 | conn: testing 3 | stream: testing 4 | options: 5 | limit: 111 6 | 7 | target: 8 | conn: testing 9 | object: testing 10 | options: 11 | pre_sql: testing 12 | post_sql: testing 13 | table_ddl: testing 14 | table_tmp: testing 15 | mode: testing -------------------------------------------------------------------------------- /core/dbio/.envkey: -------------------------------------------------------------------------------- 1 | {"appId":"2575beb9-5cc5-4f3d-b4a9-5e5418007687","orgId":"be03e88e-c93e-43af-9f5c-0af0910e3ff4"} -------------------------------------------------------------------------------- /core/dbio/.gitignore: -------------------------------------------------------------------------------- 1 | .env 2 | database/test.db 3 | filesys/test/test_write/ 4 | filesys/test/test1/sas7bdat/ed2021_sas.sas7bdat 5 | .DS_Store 6 | -------------------------------------------------------------------------------- /core/dbio/README.md: -------------------------------------------------------------------------------- 1 | # dbio 2 | 3 | `dbio` is a golang library for all things Database, Storage I/O processing. 4 | 5 | The particularly useful struct used are: 6 | * `Datastream`: https://github.com/slingdata-io/sling-cli/core/dbio/blob/main/iop/datastream.go 7 | * `Dataflow`: https://github.com/slingdata-io/sling-cli/core/dbio/blob/main/iop/dataflow.go 8 | 9 | ## Projects using `dbio`: 10 | * https://slingdata.io 11 | * https://github.com/dbnet-io/dbnet 12 | * https://github.com/dbrest-io/dbREST 13 | -------------------------------------------------------------------------------- /core/dbio/database/analyzer_test.go: -------------------------------------------------------------------------------- 1 | package database 2 | 3 | import ( 4 | "os" 5 | "testing" 6 | 7 | "github.com/flarco/g" 8 | ) 9 | 10 | func TestDataAnalyzer(t *testing.T) { 11 | dbURL := os.Getenv("POSTGRES_URL") 12 | conn, err := NewConn(dbURL) 13 | if !g.AssertNoError(t, err) { 14 | return 15 | } 16 | 17 | schemas := []string{"product_transactional", "stripe", "salesforce_raw", "hubspot_raw"} 18 | da, err := NewDataAnalyzer(conn, DataAnalyzerOptions{SchemaNames: schemas}) 19 | if !g.AssertNoError(t, err) { 20 | return 21 | } 22 | 23 | err = da.AnalyzeColumns(10000, false) 24 | if !g.AssertNoError(t, err) { 25 | return 26 | } 27 | 28 | err = da.ProcessRelations() 29 | if !g.AssertNoError(t, err) { 30 | return 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /core/dbio/database/clickhouse_test.go: -------------------------------------------------------------------------------- 1 | // Licensed to ClickHouse, Inc. under one or more contributor 2 | // license agreements. See the NOTICE file distributed with 3 | // this work for additional information regarding copyright 4 | // ownership. ClickHouse, Inc. licenses this file to you under 5 | // the Apache License, Version 2.0 (the "License"); you may 6 | // not use this file except in compliance with the License. 7 | // You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package database 19 | 20 | import ( 21 | "fmt" 22 | "testing" 23 | "time" 24 | 25 | "github.com/ClickHouse/clickhouse-go/v2" 26 | "github.com/flarco/g" 27 | "github.com/stretchr/testify/assert" 28 | 29 | _ "github.com/ClickHouse/clickhouse-go/v2" 30 | "github.com/google/uuid" 31 | ) 32 | 33 | func TestBatchInsertClickhouse(t *testing.T) { 34 | conn := clickhouse.OpenDB(&clickhouse.Options{ 35 | Addr: []string{fmt.Sprintf("100.110.2.70:9000")}, 36 | Auth: clickhouse.Auth{ 37 | Database: "default", 38 | Username: "admin", 39 | Password: "dElta123!", 40 | }, 41 | Settings: g.M( 42 | "allow_experimental_map_type", "1", 43 | // "allow_experimental_lightweight_delete", "true", 44 | ), 45 | DialTimeout: 5 * time.Second, 46 | // Compression: compression, 47 | // TLS: tlsConfig, 48 | // Protocol: protocol, 49 | }) 50 | 51 | conn.SetMaxIdleConns(5) 52 | 53 | if _, err := conn.Exec(`DROP TABLE IF EXISTS example`); err != nil { 54 | assert.NoError(t, err) 55 | return 56 | } 57 | _, err := conn.Exec(` 58 | CREATE TABLE IF NOT EXISTS example ( 59 | Col1 UInt8 60 | , Col2 String 61 | , Col3 FixedString(3) 62 | , Col4 UUID 63 | , Col5 Map(String, UInt8) 64 | , Col6 Array(String) 65 | , Col7 Tuple(String, UInt8, Array(Map(String, String))) 66 | , Col8 DateTime 67 | ) Engine = Memory 68 | `) 69 | if err != nil { 70 | assert.NoError(t, err) 71 | return 72 | } 73 | scope, err := conn.Begin() 74 | if err != nil { 75 | assert.NoError(t, err) 76 | return 77 | } 78 | batch, err := scope.Prepare("insert into example") 79 | if err != nil { 80 | assert.NoError(t, err) 81 | return 82 | } 83 | for i := 0; i < 1000; i++ { 84 | _, err := batch.Exec( 85 | uint8(42), 86 | "ClickHouse", "Inc", 87 | uuid.New(), 88 | map[string]uint8{"key": 1}, // Map(String, UInt8) 89 | []string{"Q", "W", "E", "R", "T", "Y"}, // Array(String) 90 | []interface{}{ // Tuple(String, UInt8, Array(Map(String, String))) 91 | "String Value", uint8(5), []map[string]string{ 92 | {"key": "value"}, 93 | {"key": "value"}, 94 | {"key": "value"}, 95 | }, 96 | }, 97 | time.Now(), 98 | ) 99 | if err != nil { 100 | assert.NoError(t, err) 101 | return 102 | } 103 | } 104 | err = scope.Commit() 105 | assert.NoError(t, err) 106 | 107 | rows, err := conn.Query(` 108 | select count(*) cnt, sum(Col1) total from example 109 | `) 110 | assert.NoError(t, err) 111 | 112 | rows.Next() 113 | var cnt, total int 114 | rows.Scan(&cnt, &total) 115 | g.Info("count: %d, total %d", cnt, total) 116 | } 117 | -------------------------------------------------------------------------------- /core/dbio/database/database_duckdb_unix.go: -------------------------------------------------------------------------------- 1 | //go:build linux || darwin 2 | 3 | package database 4 | 5 | import ( 6 | "bufio" 7 | "io" 8 | "os" 9 | "path" 10 | "strings" 11 | "syscall" 12 | 13 | "github.com/flarco/g" 14 | "github.com/samber/lo" 15 | "github.com/slingdata-io/sling-cli/core/dbio/iop" 16 | "github.com/slingdata-io/sling-cli/core/env" 17 | ) 18 | 19 | func (conn *DuckDbConn) BulkImportFlow(tableFName string, df *iop.Dataflow) (count uint64, err error) { 20 | switch conn.GetProp("copy_method") { 21 | case "named_pipes": 22 | return conn.importViaNamedPipe(tableFName, df) 23 | case "csv_files": 24 | return conn.importViaTempCSVs(tableFName, df) 25 | case "http_server": 26 | return conn.importViaHTTP(tableFName, df) 27 | default: 28 | return conn.importViaTempCSVs(tableFName, df) 29 | } 30 | } 31 | 32 | func (conn *DuckDbConn) importViaNamedPipe(tableFName string, df *iop.Dataflow) (count uint64, err error) { 33 | 34 | table, err := ParseTableName(tableFName, conn.GetType()) 35 | if err != nil { 36 | err = g.Error(err, "could not get table name for import") 37 | return 38 | } 39 | 40 | // Create a named pipe 41 | folderPath := path.Join(env.GetTempFolder(), "duckdb", "import", env.CleanTableName(tableFName), g.NowFileStr()) 42 | if err = os.MkdirAll(folderPath, 0755); err != nil { 43 | return 0, g.Error(err, "could not create temp folder: %s", folderPath) 44 | } 45 | 46 | pipePath := path.Join(folderPath, "duckdb_pipe") 47 | if err = syscall.Mkfifo(pipePath, 0666); err != nil { 48 | return 0, g.Error(err, "could not create named pipe") 49 | } 50 | defer os.Remove(pipePath) 51 | 52 | importContext := g.NewContext(conn.context.Ctx) 53 | 54 | importContext.Wg.Write.Add() 55 | readyChn := make(chan bool) 56 | go func() { 57 | defer importContext.Wg.Write.Done() 58 | 59 | config := iop.DefaultStreamConfig() 60 | config.Header = true 61 | config.Delimiter = "," 62 | config.Escape = `"` 63 | config.Quote = `"` 64 | config.NullAs = `\N` 65 | config.DatetimeFormat = conn.Type.GetTemplateValue("variable.timestampz_layout") 66 | 67 | readyChn <- true 68 | 69 | pipeFile, err := os.OpenFile(pipePath, os.O_WRONLY, os.ModeNamedPipe) 70 | if err != nil { 71 | df.Context.CaptureErr(g.Error(err, "could not open named pipe for writing")) 72 | return 73 | } 74 | defer pipeFile.Close() 75 | bufWriter := bufio.NewWriter(pipeFile) 76 | 77 | tbw := int64(0) 78 | for ds := range df.StreamCh { 79 | for batchR := range ds.NewCsvReaderChnl(config) { 80 | bw, err := io.Copy(bufWriter, batchR.Reader) 81 | if err != nil { 82 | err = g.Error(err, "Error writing from reader") 83 | df.Context.CaptureErr(err) 84 | return 85 | } 86 | tbw += bw 87 | } 88 | } 89 | 90 | g.Debug("wrote %d bytes via named pipe", tbw) 91 | }() 92 | 93 | columnNames := lo.Map(df.Columns.Names(), func(col string, i int) string { 94 | return `"` + col + `"` 95 | }) 96 | 97 | sqlLines := []string{ 98 | g.F(`insert into %s (%s) select * from read_csv('%s', delim=',', auto_detect=False, header=True, columns=%s, max_line_size=134217728, parallel=false, quote='"', escape='"', nullstr='\N', auto_detect=false);`, table.FDQN(), strings.Join(columnNames, ", "), pipePath, conn.generateCsvColumns(df.Columns)), 99 | } 100 | 101 | sql := strings.Join(sqlLines, ";\n") 102 | 103 | <-readyChn // wait for writer to be ready 104 | result, err := conn.duck.ExecContext(conn.Context().Ctx, sql) 105 | if err != nil { 106 | return 0, g.Error(err, "could not insert into %s", tableFName) 107 | } 108 | 109 | importContext.Wg.Write.Wait() 110 | 111 | if result != nil { 112 | inserted, _ := result.RowsAffected() 113 | g.Debug("inserted %d rows", inserted) 114 | } 115 | 116 | return df.Count(), nil 117 | } 118 | -------------------------------------------------------------------------------- /core/dbio/database/database_duckdb_windows.go: -------------------------------------------------------------------------------- 1 | //go:build windows 2 | 3 | package database 4 | 5 | import ( 6 | "github.com/slingdata-io/sling-cli/core/dbio/iop" 7 | ) 8 | 9 | func (conn *DuckDbConn) BulkImportFlow(tableFName string, df *iop.Dataflow) (count uint64, err error) { 10 | switch conn.GetProp("copy_method") { 11 | case "csv_files": 12 | return conn.importViaTempCSVs(tableFName, df) 13 | case "http_server": 14 | return conn.importViaHTTP(tableFName, df) 15 | default: 16 | return conn.importViaTempCSVs(tableFName, df) 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /core/dbio/database/database_trino.go: -------------------------------------------------------------------------------- 1 | package database 2 | 3 | import ( 4 | "context" 5 | "crypto/tls" 6 | "database/sql" 7 | "net" 8 | "net/http" 9 | "strings" 10 | "time" 11 | 12 | "github.com/flarco/g" 13 | "github.com/slingdata-io/sling-cli/core/dbio" 14 | "github.com/slingdata-io/sling-cli/core/dbio/iop" 15 | "github.com/spf13/cast" 16 | "github.com/trinodb/trino-go-client/trino" 17 | _ "github.com/trinodb/trino-go-client/trino" 18 | ) 19 | 20 | // TrinoConn is a Trino connection 21 | type TrinoConn struct { 22 | BaseConn 23 | URL string 24 | } 25 | 26 | // Init initiates the object 27 | func (conn *TrinoConn) Init() error { 28 | 29 | conn.BaseConn.URL = conn.URL 30 | conn.BaseConn.Type = dbio.TypeDbTrino 31 | 32 | instance := Connection(conn) 33 | conn.BaseConn.instance = &instance 34 | 35 | return conn.BaseConn.Init() 36 | } 37 | 38 | func (conn *TrinoConn) ConnString() string { 39 | 40 | configMap := g.M() 41 | 42 | propMapping := map[string]string{ 43 | "source": "Source", 44 | "catalog": "Catalog", 45 | "schema": "Schema", 46 | "session_properties": "SessionProperties", 47 | "extra_credentials": "ExtraCredentials", 48 | "custom_client_name": "CustomClientName", 49 | "kerberos_enabled": "KerberosEnabled", 50 | "kerberos_keytab_path": "KerberosKeytabPath", 51 | "kerberos_principal": "KerberosPrincipal", 52 | "kerberos_realm": "KerberosRealm", 53 | "kerberos_config_path": "KerberosConfigPath", 54 | "ssl_cert_path": "SSLCertPath", 55 | "ssl_cert": "SSLCert", 56 | } 57 | 58 | for key, new_key := range propMapping { 59 | if val := conn.GetProp(key); val != "" { 60 | configMap[new_key] = val 61 | } 62 | } 63 | 64 | if cast.ToBool(conn.GetProp("skip_tls")) { 65 | 66 | // register client to skip tls 67 | skipTLSClient := &http.Client{ 68 | Transport: &http.Transport{ 69 | Proxy: http.ProxyFromEnvironment, 70 | DialContext: (&net.Dialer{ 71 | Timeout: 30 * time.Second, 72 | KeepAlive: 30 * time.Second, 73 | DualStack: true, 74 | }).DialContext, 75 | MaxIdleConns: 10, 76 | IdleConnTimeout: 90 * time.Second, 77 | TLSHandshakeTimeout: 10 * time.Second, 78 | ExpectContinueTimeout: 1 * time.Second, 79 | TLSClientConfig: &tls.Config{ 80 | InsecureSkipVerify: true, 81 | }, 82 | }, 83 | } 84 | trino.RegisterCustomClient("skip_tls", skipTLSClient) 85 | configMap["CustomClientName"] = "skip_tls" 86 | } 87 | 88 | URI := g.F( 89 | "http://%s:%s@%s:%s", 90 | conn.GetProp("username"), 91 | conn.GetProp("password"), 92 | conn.GetProp("host"), 93 | conn.GetProp("port"), 94 | ) 95 | config := trino.Config{ServerURI: URI} 96 | g.Unmarshal(g.Marshal(configMap), &config) 97 | 98 | // set default timeouts 99 | trino.DefaultQueryTimeout = 360 * time.Minute 100 | 101 | if val := conn.GetProp("query_timeout"); val != "" { 102 | trino.DefaultQueryTimeout = time.Duration(cast.ToInt(val)) * time.Minute 103 | } 104 | 105 | if url := conn.GetProp("http_url"); url != "" { 106 | config.ServerURI = url 107 | } 108 | 109 | dsn, err := config.FormatDSN() 110 | if err != nil { 111 | g.Warn("invalid dsn: %s", err.Error()) 112 | } 113 | 114 | return dsn 115 | } 116 | 117 | // NewTransaction creates a new transaction 118 | func (conn *TrinoConn) NewTransaction(ctx context.Context, options ...*sql.TxOptions) (tx Transaction, err error) { 119 | // does not support transactions 120 | return nil, nil 121 | } 122 | 123 | // ExecContext runs a sql query with context, returns `error` 124 | func (conn *TrinoConn) ExecContext(ctx context.Context, q string, args ...interface{}) (result sql.Result, err error) { 125 | return conn.BaseConn.ExecContext(ctx, strings.TrimRight(strings.TrimSpace(q), ";"), args...) 126 | } 127 | 128 | func processTrinoInsertRow(columns iop.Columns, row []any) []any { 129 | for i := range row { 130 | if row[i] == nil { 131 | continue 132 | } 133 | 134 | if columns[i].Type == iop.DecimalType { 135 | row[i] = trino.Numeric(cast.ToString(row[i])) 136 | } else if columns[i].Type == iop.BoolType { 137 | row[i] = cast.ToBool(row[i]) 138 | } else if columns[i].Type == iop.FloatType { 139 | row[i] = trino.Numeric(cast.ToString(row[i])) 140 | } 141 | } 142 | return row 143 | } 144 | -------------------------------------------------------------------------------- /core/dbio/database/dbx_test.go: -------------------------------------------------------------------------------- 1 | package database_test 2 | 3 | import ( 4 | "os" 5 | "testing" 6 | "time" 7 | 8 | "github.com/flarco/g" 9 | "github.com/slingdata-io/sling-cli/core/dbio/database" 10 | "github.com/stretchr/testify/assert" 11 | ) 12 | 13 | type User9 struct { 14 | database.ModelDbX 15 | Name string `json:"name" db:"name" gorm:"primaryKey"` 16 | Age int `json:"age" db:"age"` 17 | Mature bool `json:"mature" db:"mature"` 18 | Birthdate time.Time `json:"birthdate" db:"birthdate"` 19 | UpdatedDt time.Time `json:"updated_dt" db:"updated_dt" gorm:"autoUpdateTime"` 20 | CreatedDt time.Time `json:"created_dt" db:"created_dt" gorm:"autoCreateTime"` 21 | } 22 | 23 | func NewUser(u User9) *User9 { 24 | u.Ptr = &u 25 | return &u 26 | } 27 | 28 | func TestDbX(t *testing.T) { 29 | database.Debug = true 30 | conn, err := database.NewConn(os.Getenv("POSTGRES_URL")) 31 | if !assert.NoError(t, err) { 32 | return 33 | } 34 | 35 | _, err = conn.Exec("drop table if exists users9") 36 | if !assert.NoError(t, err) { 37 | return 38 | } 39 | _, err = conn.Exec(` 40 | create table users9 ( 41 | name text, 42 | age int, 43 | mature boolean, 44 | birthdate timestamp, 45 | updated_dt timestamp, 46 | created_dt timestamp 47 | ) 48 | `) 49 | if !assert.NoError(t, err) { 50 | return 51 | } 52 | 53 | // insert 54 | db := conn.Db() 55 | user := NewUser(User9{Name: "Fritz", Age: 35, Mature: true}) 56 | err = user.Insert(db) 57 | if !assert.NoError(t, err) { 58 | g.LogError(err) 59 | return 60 | } 61 | 62 | // read 63 | user.Age = 6 64 | user.Mature = true 65 | err = user.Get(db) 66 | assert.NoError(t, err) 67 | assert.Equal(t, 35, user.Age) 68 | assert.Equal(t, true, user.Mature) 69 | 70 | // update 71 | user = NewUser(User9{Name: "Fritz Larco", Age: 36, Mature: true}) 72 | err = user.Where("name = ?", "Fritz").Update(db, "name", "age") 73 | assert.NoError(t, err) 74 | 75 | // read 76 | user = NewUser(User9{}) 77 | err = user.Get(db) 78 | assert.NoError(t, err) 79 | assert.Equal(t, 36, user.Age) 80 | assert.Equal(t, "Fritz Larco", user.Name) 81 | 82 | // upsert 83 | // read 84 | 85 | // delete 86 | err = user.Where("age = ?", 36).Delete(db) 87 | assert.NoError(t, err) 88 | 89 | // read 90 | user = NewUser(User9{}) 91 | err = user.Get(db) 92 | assert.Error(t, err) 93 | 94 | // dbx := conn.DbX() 95 | 96 | } 97 | 98 | /* 99 | 100 | 101 | type Users []User 102 | type User { 103 | database.ModelDbX 104 | Name string 105 | Age int 106 | } 107 | 108 | func NewUser() *User { 109 | u := new(User) 110 | u.Ptr = u 111 | return u 112 | } 113 | 114 | func NewUsers() []*User { 115 | us := Users{} 116 | u.Ptr = u 117 | return u 118 | } 119 | 120 | fields := []string{} => ("Name", "Age") 121 | where := []interface{}{} => ("name in (?, ?)", "Jason", "Tina") 122 | limit := 10 123 | values ;= map[string]interface{}{"name": "Fritz", "age": 37} 124 | 125 | s.Config{ 126 | Operation: s.INSERT, 127 | Fields: []strings{"Name", "Age"}, 128 | Where: g.M() 129 | } 130 | 131 | single.Bind(rec, &single) 132 | many.Bind(recs, &many) 133 | 134 | db.Insert(&single, fields) 135 | db.Get(&single, fields, where...) // also use PK 136 | db.Update(&single, fields, where...) // also use PK 137 | db.Upsert(&single, fields, where...) // also use PK 138 | db.Delete(&single, where...) // also use PK 139 | 140 | db.Insert(&many, fields) 141 | db.Get(&many, fields, where...) // also use PK 142 | db.Update(&many, fields, where...) // also use PK 143 | db.Upsert(&many, fields, where...) // also use PK 144 | db.Delete(&many, where...) // also use PK 145 | 146 | single.Where("name in (?, ?)", "Jason", "Tina").Insert() 147 | single.Insert(fields) 148 | single.Get(fields, where...) 149 | single.Update(fields, where...) // also use PK 150 | single.Upsert(fields, where...) // also use PK 151 | single.Delete(where...) // also use PK 152 | 153 | many.Insert(fields) 154 | many.Get(fields, where) 155 | many.Update(fields, where...) // also use PK 156 | many.Upsert(fields, where...) // also use PK 157 | many.Delete(where...) // also use PK 158 | 159 | users := []User{} 160 | err = users.Get("name in (?, ?)", "Jason", "Tina") 161 | 162 | user := User{} 163 | err = user.Get("name = ?", "Jason") 164 | 165 | err = user.Delete("name = ?", "Fritz") 166 | 167 | err = user.Update("name = ?", "Fritz") 168 | 169 | */ 170 | -------------------------------------------------------------------------------- /core/dbio/database/test/test1.1.csv: -------------------------------------------------------------------------------- 1 | id,first_name,last_name,email,target,create_dt,rating 2 | 1,"AOCG,"" 3 | 883",Lumox,ilumox0@unc.edu,true,2019-02-11 11:23:00.000,35.987 4 | 2,Terrell,"EKOZ,989",tmee1@example.com,true,2019-08-19 17:02:09.000,89.983 5 | 3,Frayda,Comolli,fcomolli2@cbslocal.com,false,2019-08-07 16:48:12.000,89.401 6 | 4,Alphonse,Crookshank,acrookshank3@mapy.cz,true,2019-03-31 14:41:45.000,19.933 7 | 5,Mathilda,Deaves,mdeaves4@va.gov,true,2019-07-15 21:20:39.000,90.643 8 | 6,Trixi,Livens,tlivens5@cargocollective.com,true,2019-06-20 10:58:40.000,74.341 9 | 7,Ida,Domican,idomican6@gravatar.com,false,2019-03-11 09:18:34.000,55.454 10 | 8,Bogey,Kears,bkears7@nih.gov,false,2019-09-11 22:08:40.000,3.869 11 | 9,Janifer,Melanaphy,jmelanaphy8@alibaba.com,true,2019-02-19 16:23:06.000,48.234 12 | 10,Kaela,Illsley,killsley9@feedburner.com,false,2019-04-17 05:05:18.000,86.13 13 | 11,Yorgos,Ruprechter,yruprechtera@baidu.com,false,2019-01-03 05:57:01.000,88.736 14 | 12,Bunni,Bothams,bbothamsb@amazon.co.jp,false,2019-08-12 14:51:09.000,94.945 15 | 13,Sharon,Atwater,satwaterc@webeden.co.uk,false,2019-05-22 09:38:10.000,30.112 16 | 14,Ellen,Real,ereald@amazonaws.com,true,2019-02-01 18:47:06.000,6.877 17 | 15,Charissa,Ballance,cballancee@amazonaws.com,true,2019-07-25 04:34:02.000,8.514 18 | 16,Zorina,Cudiff,zcudifff@jigsy.com,false,2019-10-06 04:13:55.000,27.63 19 | 17,Grove,Kenrack,gkenrackg@chronoengine.com,true,2019-09-01 05:11:02.000,18.658 20 | 18,Alexis,Fewtrell,afewtrellh@google.cn,true,2019-05-31 14:55:31.000,44.751 -------------------------------------------------------------------------------- /core/dbio/database/test/test1.1.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/database/test/test1.1.csv.gz -------------------------------------------------------------------------------- /core/dbio/database/test/test1.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/database/test/test1.csv.gz -------------------------------------------------------------------------------- /core/dbio/dbio.go: -------------------------------------------------------------------------------- 1 | package dbio 2 | 3 | func init() { 4 | 5 | } 6 | -------------------------------------------------------------------------------- /core/dbio/filesys/test/delta/_delta_log/.00000000000000000000.json.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/filesys/test/delta/_delta_log/.00000000000000000000.json.crc -------------------------------------------------------------------------------- /core/dbio/filesys/test/delta/_delta_log/00000000000000000000.json: -------------------------------------------------------------------------------- 1 | {"commitInfo":{"timestamp":1706278148531,"operation":"WRITE","operationParameters":{"mode":"Overwrite","partitionBy":"[\"country\"]"},"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numFiles":"3","numOutputRows":"5","numOutputBytes":"3045"},"engineInfo":"Apache-Spark/3.4.0 Delta-Lake/2.4.0","txnId":"1cbc9537-63eb-4799-8647-2d947ae8fa41"}} 2 | {"protocol":{"minReaderVersion":1,"minWriterVersion":2}} 3 | {"metaData":{"id":"1f110132-a652-4be9-815e-348f294515cf","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"first_name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"last_name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"country\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"continent\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["country"],"configuration":{},"createdTime":1706278146762}} 4 | {"add":{"path":"country=Argentina/part-00000-8d0390a3-f797-4265-b9c2-da1c941680a3.c000.snappy.parquet","partitionValues":{"country":"Argentina"},"size":1018,"modificationTime":1706278148083,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"first_name\":\"Ernesto\",\"last_name\":\"Guevara\",\"continent\":\"NaN\"},\"maxValues\":{\"first_name\":\"Ernesto\",\"last_name\":\"Guevara\",\"continent\":\"NaN\"},\"nullCount\":{\"first_name\":0,\"last_name\":0,\"continent\":0}}"}} 5 | {"add":{"path":"country=China/part-00000-88fba1af-b28d-4303-9c85-9a97be631d40.c000.snappy.parquet","partitionValues":{"country":"China"},"size":1002,"modificationTime":1706278148138,"dataChange":true,"stats":"{\"numRecords\":2,\"minValues\":{\"first_name\":\"Bruce\",\"last_name\":\"Lee\",\"continent\":\"Asia\"},\"maxValues\":{\"first_name\":\"Jack\",\"last_name\":\"Ma\",\"continent\":\"Asia\"},\"nullCount\":{\"first_name\":0,\"last_name\":0,\"continent\":0}}"}} 6 | {"add":{"path":"country=Germany/part-00000-030076e1-5ec9-47c2-830a-1569f823b6ee.c000.snappy.parquet","partitionValues":{"country":"Germany"},"size":1025,"modificationTime":1706278148185,"dataChange":true,"stats":"{\"numRecords\":2,\"minValues\":{\"first_name\":\"Soraya\",\"last_name\":\"Jala\",\"continent\":\"NaN\"},\"maxValues\":{\"first_name\":\"Wolfgang\",\"last_name\":\"Manche\",\"continent\":\"NaN\"},\"nullCount\":{\"first_name\":0,\"last_name\":0,\"continent\":0}}"}} 7 | -------------------------------------------------------------------------------- /core/dbio/filesys/test/delta/country=Argentina/.part-00000-8d0390a3-f797-4265-b9c2-da1c941680a3.c000.snappy.parquet.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/filesys/test/delta/country=Argentina/.part-00000-8d0390a3-f797-4265-b9c2-da1c941680a3.c000.snappy.parquet.crc -------------------------------------------------------------------------------- /core/dbio/filesys/test/delta/country=Argentina/part-00000-8d0390a3-f797-4265-b9c2-da1c941680a3.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/filesys/test/delta/country=Argentina/part-00000-8d0390a3-f797-4265-b9c2-da1c941680a3.c000.snappy.parquet -------------------------------------------------------------------------------- /core/dbio/filesys/test/delta/country=China/.part-00000-88fba1af-b28d-4303-9c85-9a97be631d40.c000.snappy.parquet.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/filesys/test/delta/country=China/.part-00000-88fba1af-b28d-4303-9c85-9a97be631d40.c000.snappy.parquet.crc -------------------------------------------------------------------------------- /core/dbio/filesys/test/delta/country=China/part-00000-88fba1af-b28d-4303-9c85-9a97be631d40.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/filesys/test/delta/country=China/part-00000-88fba1af-b28d-4303-9c85-9a97be631d40.c000.snappy.parquet -------------------------------------------------------------------------------- /core/dbio/filesys/test/delta/country=Germany/.part-00000-030076e1-5ec9-47c2-830a-1569f823b6ee.c000.snappy.parquet.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/filesys/test/delta/country=Germany/.part-00000-030076e1-5ec9-47c2-830a-1569f823b6ee.c000.snappy.parquet.crc -------------------------------------------------------------------------------- /core/dbio/filesys/test/delta/country=Germany/part-00000-030076e1-5ec9-47c2-830a-1569f823b6ee.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/filesys/test/delta/country=Germany/part-00000-030076e1-5ec9-47c2-830a-1569f823b6ee.c000.snappy.parquet -------------------------------------------------------------------------------- /core/dbio/filesys/test/lineitem_iceberg/README.md: -------------------------------------------------------------------------------- 1 | # README 2 | this iceberg table is generated by using DuckDB (v0.7.0) to generated TPC-H lineitem 3 | SF0.01 then storing that to a parquet file. 4 | 5 | Then pyspark (3.3.1) was used with the iceberg extension from https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-spark-runtime-3.3_2.12/1.0.0/iceberg-spark-runtime-3.3_2.12-1.0.0.jar 6 | to write the iceberg table. 7 | 8 | finally, using pyspark, a delete query was performed on this iceberg table: 9 | 10 | ``` 11 | DELETE FROM iceberg_catalog.lineitem_iceberg where l_extendedprice < 10000 12 | ``` 13 | 14 | The result for Q06 of TPC-H on this table according to pyspark is now: 15 | ``` 16 | [Row(revenue=Decimal('1077536.9101'))] 17 | ``` 18 | 19 | Note: it appears that there are no deletes present in this iceberg table, the whole thing was rewritten. 20 | this is likely due to the fact that the table is so small? -------------------------------------------------------------------------------- /core/dbio/filesys/test/lineitem_iceberg/data/.00000-411-0792dcfe-4e25-4ca3-8ada-175286069a47-00001.parquet.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/filesys/test/lineitem_iceberg/data/.00000-411-0792dcfe-4e25-4ca3-8ada-175286069a47-00001.parquet.crc -------------------------------------------------------------------------------- /core/dbio/filesys/test/lineitem_iceberg/data/.00041-414-f3c73457-bbd6-4b92-9c15-17b241171b16-00001.parquet.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/filesys/test/lineitem_iceberg/data/.00041-414-f3c73457-bbd6-4b92-9c15-17b241171b16-00001.parquet.crc -------------------------------------------------------------------------------- /core/dbio/filesys/test/lineitem_iceberg/data/00000-411-0792dcfe-4e25-4ca3-8ada-175286069a47-00001.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/filesys/test/lineitem_iceberg/data/00000-411-0792dcfe-4e25-4ca3-8ada-175286069a47-00001.parquet -------------------------------------------------------------------------------- /core/dbio/filesys/test/lineitem_iceberg/data/00041-414-f3c73457-bbd6-4b92-9c15-17b241171b16-00001.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/filesys/test/lineitem_iceberg/data/00041-414-f3c73457-bbd6-4b92-9c15-17b241171b16-00001.parquet -------------------------------------------------------------------------------- /core/dbio/filesys/test/lineitem_iceberg/metadata/.10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m0.avro.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/filesys/test/lineitem_iceberg/metadata/.10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m0.avro.crc -------------------------------------------------------------------------------- /core/dbio/filesys/test/lineitem_iceberg/metadata/.10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m1.avro.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/filesys/test/lineitem_iceberg/metadata/.10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m1.avro.crc -------------------------------------------------------------------------------- /core/dbio/filesys/test/lineitem_iceberg/metadata/.cf3d0be5-cf70-453d-ad8f-48fdc412e608-m0.avro.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/filesys/test/lineitem_iceberg/metadata/.cf3d0be5-cf70-453d-ad8f-48fdc412e608-m0.avro.crc -------------------------------------------------------------------------------- /core/dbio/filesys/test/lineitem_iceberg/metadata/.snap-3776207205136740581-1-cf3d0be5-cf70-453d-ad8f-48fdc412e608.avro.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/filesys/test/lineitem_iceberg/metadata/.snap-3776207205136740581-1-cf3d0be5-cf70-453d-ad8f-48fdc412e608.avro.crc -------------------------------------------------------------------------------- /core/dbio/filesys/test/lineitem_iceberg/metadata/.snap-7635660646343998149-1-10eaca8a-1e1c-421e-ad6d-b232e5ee23d3.avro.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/filesys/test/lineitem_iceberg/metadata/.snap-7635660646343998149-1-10eaca8a-1e1c-421e-ad6d-b232e5ee23d3.avro.crc -------------------------------------------------------------------------------- /core/dbio/filesys/test/lineitem_iceberg/metadata/.v1.metadata.json.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/filesys/test/lineitem_iceberg/metadata/.v1.metadata.json.crc -------------------------------------------------------------------------------- /core/dbio/filesys/test/lineitem_iceberg/metadata/.v2.metadata.json.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/filesys/test/lineitem_iceberg/metadata/.v2.metadata.json.crc -------------------------------------------------------------------------------- /core/dbio/filesys/test/lineitem_iceberg/metadata/.version-hint.text.crc: -------------------------------------------------------------------------------- 1 | crcվ -------------------------------------------------------------------------------- /core/dbio/filesys/test/lineitem_iceberg/metadata/10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m0.avro: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/filesys/test/lineitem_iceberg/metadata/10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m0.avro -------------------------------------------------------------------------------- /core/dbio/filesys/test/lineitem_iceberg/metadata/10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m1.avro: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/filesys/test/lineitem_iceberg/metadata/10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m1.avro -------------------------------------------------------------------------------- /core/dbio/filesys/test/lineitem_iceberg/metadata/cf3d0be5-cf70-453d-ad8f-48fdc412e608-m0.avro: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/filesys/test/lineitem_iceberg/metadata/cf3d0be5-cf70-453d-ad8f-48fdc412e608-m0.avro -------------------------------------------------------------------------------- /core/dbio/filesys/test/lineitem_iceberg/metadata/snap-3776207205136740581-1-cf3d0be5-cf70-453d-ad8f-48fdc412e608.avro: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/filesys/test/lineitem_iceberg/metadata/snap-3776207205136740581-1-cf3d0be5-cf70-453d-ad8f-48fdc412e608.avro -------------------------------------------------------------------------------- /core/dbio/filesys/test/lineitem_iceberg/metadata/snap-7635660646343998149-1-10eaca8a-1e1c-421e-ad6d-b232e5ee23d3.avro: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/filesys/test/lineitem_iceberg/metadata/snap-7635660646343998149-1-10eaca8a-1e1c-421e-ad6d-b232e5ee23d3.avro -------------------------------------------------------------------------------- /core/dbio/filesys/test/lineitem_iceberg/metadata/v1.metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "format-version" : 2, 3 | "table-uuid" : "a319422b-6f8c-44d0-90ba-96242d9a1d7b", 4 | "location" : "./lineitem_iceberg", 5 | "last-sequence-number" : 1, 6 | "last-updated-ms" : 1676473674504, 7 | "last-column-id" : 16, 8 | "current-schema-id" : 0, 9 | "schemas" : [ { 10 | "type" : "struct", 11 | "schema-id" : 0, 12 | "fields" : [ { 13 | "id" : 1, 14 | "name" : "l_orderkey", 15 | "required" : false, 16 | "type" : "int" 17 | }, { 18 | "id" : 2, 19 | "name" : "l_partkey", 20 | "required" : false, 21 | "type" : "int" 22 | }, { 23 | "id" : 3, 24 | "name" : "l_suppkey", 25 | "required" : false, 26 | "type" : "int" 27 | }, { 28 | "id" : 4, 29 | "name" : "l_linenumber", 30 | "required" : false, 31 | "type" : "int" 32 | }, { 33 | "id" : 5, 34 | "name" : "l_quantity", 35 | "required" : false, 36 | "type" : "int" 37 | }, { 38 | "id" : 6, 39 | "name" : "l_extendedprice", 40 | "required" : false, 41 | "type" : "decimal(15, 2)" 42 | }, { 43 | "id" : 7, 44 | "name" : "l_discount", 45 | "required" : false, 46 | "type" : "decimal(15, 2)" 47 | }, { 48 | "id" : 8, 49 | "name" : "l_tax", 50 | "required" : false, 51 | "type" : "decimal(15, 2)" 52 | }, { 53 | "id" : 9, 54 | "name" : "l_returnflag", 55 | "required" : false, 56 | "type" : "string" 57 | }, { 58 | "id" : 10, 59 | "name" : "l_linestatus", 60 | "required" : false, 61 | "type" : "string" 62 | }, { 63 | "id" : 11, 64 | "name" : "l_shipdate", 65 | "required" : false, 66 | "type" : "date" 67 | }, { 68 | "id" : 12, 69 | "name" : "l_commitdate", 70 | "required" : false, 71 | "type" : "date" 72 | }, { 73 | "id" : 13, 74 | "name" : "l_receiptdate", 75 | "required" : false, 76 | "type" : "date" 77 | }, { 78 | "id" : 14, 79 | "name" : "l_shipinstruct", 80 | "required" : false, 81 | "type" : "string" 82 | }, { 83 | "id" : 15, 84 | "name" : "l_shipmode", 85 | "required" : false, 86 | "type" : "string" 87 | }, { 88 | "id" : 16, 89 | "name" : "l_comment", 90 | "required" : false, 91 | "type" : "string" 92 | } ] 93 | } ], 94 | "default-spec-id" : 0, 95 | "partition-specs" : [ { 96 | "spec-id" : 0, 97 | "fields" : [ ] 98 | } ], 99 | "last-partition-id" : 999, 100 | "default-sort-order-id" : 0, 101 | "sort-orders" : [ { 102 | "order-id" : 0, 103 | "fields" : [ ] 104 | } ], 105 | "properties" : { 106 | "owner" : "root", 107 | "write.update.mode" : "merge-on-read" 108 | }, 109 | "current-snapshot-id" : 3776207205136740581, 110 | "refs" : { 111 | "main" : { 112 | "snapshot-id" : 3776207205136740581, 113 | "type" : "branch" 114 | } 115 | }, 116 | "snapshots" : [ { 117 | "sequence-number" : 1, 118 | "snapshot-id" : 3776207205136740581, 119 | "timestamp-ms" : 1676473674504, 120 | "summary" : { 121 | "operation" : "append", 122 | "spark.app.id" : "local-1676472783435", 123 | "added-data-files" : "1", 124 | "added-records" : "60175", 125 | "added-files-size" : "1390176", 126 | "changed-partition-count" : "1", 127 | "total-records" : "60175", 128 | "total-files-size" : "1390176", 129 | "total-data-files" : "1", 130 | "total-delete-files" : "0", 131 | "total-position-deletes" : "0", 132 | "total-equality-deletes" : "0" 133 | }, 134 | "manifest-list" : "lineitem_iceberg/metadata/snap-3776207205136740581-1-cf3d0be5-cf70-453d-ad8f-48fdc412e608.avro", 135 | "schema-id" : 0 136 | } ], 137 | "snapshot-log" : [ { 138 | "timestamp-ms" : 1676473674504, 139 | "snapshot-id" : 3776207205136740581 140 | } ], 141 | "metadata-log" : [ ] 142 | } -------------------------------------------------------------------------------- /core/dbio/filesys/test/lineitem_iceberg/metadata/version-hint.text: -------------------------------------------------------------------------------- 1 | 2 -------------------------------------------------------------------------------- /core/dbio/filesys/test/test.excel1.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/filesys/test/test.excel1.xlsx -------------------------------------------------------------------------------- /core/dbio/filesys/test/test.excel2.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/filesys/test/test.excel2.xlsx -------------------------------------------------------------------------------- /core/dbio/filesys/test/test1/avro/train.avro: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/filesys/test/test1/avro/train.avro -------------------------------------------------------------------------------- /core/dbio/filesys/test/test1/avro/twitter.avro: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/filesys/test/test1/avro/twitter.avro -------------------------------------------------------------------------------- /core/dbio/filesys/test/test1/csv/test1.1.csv: -------------------------------------------------------------------------------- 1 | id,first_name,last_name,email,target,create_dt,rating 2 | 1,"AOCG,"" 3 | 883",Lumox,ilumox0@unc.edu,true,2019-02-11 11:23:00.000,35.987 4 | 2,Terrell,"EKOZ,989",tmee1@example.com,true,2019-08-19 17:02:09.000,89.983 5 | 3,Frayda,Comolli,fcomolli2@cbslocal.com,false,2019-08-07 16:48:12.000,89.401 6 | 4,Alphonse,Crookshank,acrookshank3@mapy.cz,true,2019-03-31 14:41:45.000,19.933 7 | 5,Mathilda,Deaves,mdeaves4@va.gov,true,2019-07-15 21:20:39.000,90.643 8 | 6,Trixi,Livens,tlivens5@cargocollective.com,true,2019-06-20 10:58:40.000,74.341 9 | 7,Ida,Domican,idomican6@gravatar.com,false,2019-03-11 09:18:34.000,55.454 10 | 8,Bogey,Kears,bkears7@nih.gov,false,2019-09-11 22:08:40.000,3.869 11 | 9,Janifer,Melanaphy,jmelanaphy8@alibaba.com,true,2019-02-19 16:23:06.000,48.234 12 | 10,Kaela,Illsley,killsley9@feedburner.com,false,2019-04-17 05:05:18.000,86.13 13 | 11,Yorgos,Ruprechter,yruprechtera@baidu.com,false,2019-01-03 05:57:01.000,88.736 14 | 12,Bunni,Bothams,bbothamsb@amazon.co.jp,false,2019-08-12 14:51:09.000,94.945 15 | 13,Sharon,Atwater,satwaterc@webeden.co.uk,false,2019-05-22 09:38:10.000,30.112 16 | 14,Ellen,Real,ereald@amazonaws.com,true,2019-02-01 18:47:06.000,6.877 17 | 15,Charissa,Ballance,cballancee@amazonaws.com,true,2019-07-25 04:34:02.000,8.514 18 | 16,Zorina,Cudiff,zcudifff@jigsy.com,false,2019-10-06 04:13:55.000,27.63 19 | 17,Grove,Kenrack,gkenrackg@chronoengine.com,true,2019-09-01 05:11:02.000,18.658 20 | 18,Alexis,Fewtrell,afewtrellh@google.cn,true,2019-05-31 14:55:31.000,44.751 -------------------------------------------------------------------------------- /core/dbio/filesys/test/test1/csv/test1.1.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/filesys/test/test1/csv/test1.1.csv.gz -------------------------------------------------------------------------------- /core/dbio/filesys/test/test1/json/test1.1.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": 1, 4 | "first_name": "AOCG,\"\n883", 5 | "last_name": "Lumox", 6 | "email": "ilumox0@unc.edu", 7 | "target": true, 8 | "create_dt": "2019-02-11 11:23:00.000", 9 | "rating": 35.987 10 | }, 11 | { 12 | "id": 2, 13 | "first_name": "Terrell", 14 | "last_name": "EKOZ,989", 15 | "email": "tmee1@example.com", 16 | "target": true, 17 | "create_dt": "2019-08-19 17:02:09.000", 18 | "rating": 89.983 19 | }, 20 | { 21 | "id": 3, 22 | "first_name": "Frayda", 23 | "last_name": "Comolli", 24 | "email": "fcomolli2@cbslocal.com", 25 | "target": false, 26 | "create_dt": "2019-08-07 16:48:12.000", 27 | "rating": 89.401 28 | }, 29 | { 30 | "id": 4, 31 | "first_name": "Alphonse", 32 | "last_name": "Crookshank", 33 | "email": "acrookshank3@mapy.cz", 34 | "target": true, 35 | "create_dt": "2019-03-31 14:41:45.000", 36 | "rating": 19.933 37 | }, 38 | { 39 | "id": 5, 40 | "first_name": "Mathilda", 41 | "last_name": "Deaves", 42 | "email": "mdeaves4@va.gov", 43 | "target": true, 44 | "create_dt": "2019-07-15 21:20:39.000", 45 | "rating": 90.643 46 | }, 47 | { 48 | "id": 6, 49 | "first_name": "Trixi", 50 | "last_name": "Livens", 51 | "email": "tlivens5@cargocollective.com", 52 | "target": true, 53 | "create_dt": "2019-06-20 10:58:40.000", 54 | "rating": 74.341 55 | }, 56 | { 57 | "id": 7, 58 | "first_name": "Ida", 59 | "last_name": "Domican", 60 | "email": "idomican6@gravatar.com", 61 | "target": false, 62 | "create_dt": "2019-03-11 09:18:34.000", 63 | "rating": 55.454 64 | }, 65 | { 66 | "id": 8, 67 | "first_name": "Bogey", 68 | "last_name": "Kears", 69 | "email": "bkears7@nih.gov", 70 | "target": false, 71 | "create_dt": "2019-09-11 22:08:40.000", 72 | "rating": 3.869 73 | }, 74 | { 75 | "id": 9, 76 | "first_name": "Janifer", 77 | "last_name": "Melanaphy", 78 | "email": "jmelanaphy8@alibaba.com", 79 | "target": true, 80 | "create_dt": "2019-02-19 16:23:06.000", 81 | "rating": 48.234 82 | }, 83 | { 84 | "id": 10, 85 | "first_name": "Kaela", 86 | "last_name": "Illsley", 87 | "email": "killsley9@feedburner.com", 88 | "target": false, 89 | "create_dt": "2019-04-17 05:05:18.000", 90 | "rating": 86.13 91 | }, 92 | { 93 | "id": 11, 94 | "first_name": "Yorgos", 95 | "last_name": "Ruprechter", 96 | "email": "yruprechtera@baidu.com", 97 | "target": false, 98 | "create_dt": "2019-01-03 05:57:01.000", 99 | "rating": 88.736 100 | }, 101 | { 102 | "id": 12, 103 | "first_name": "Bunni", 104 | "last_name": "Bothams", 105 | "email": "bbothamsb@amazon.co.jp", 106 | "target": false, 107 | "create_dt": "2019-08-12 14:51:09.000", 108 | "rating": 94.945 109 | }, 110 | { 111 | "id": 13, 112 | "first_name": "Sharon", 113 | "last_name": "Atwater", 114 | "email": "satwaterc@webeden.co.uk", 115 | "target": false, 116 | "create_dt": "2019-05-22 09:38:10.000", 117 | "rating": 30.112 118 | }, 119 | { 120 | "id": 14, 121 | "first_name": "Ellen", 122 | "last_name": "Real", 123 | "email": "ereald@amazonaws.com", 124 | "target": true, 125 | "create_dt": "2019-02-01 18:47:06.000", 126 | "rating": 6.877 127 | }, 128 | { 129 | "id": 15, 130 | "first_name": "Charissa", 131 | "last_name": "Ballance", 132 | "email": "cballancee@amazonaws.com", 133 | "target": true, 134 | "create_dt": "2019-07-25 04:34:02.000", 135 | "rating": 8.514 136 | }, 137 | { 138 | "id": 16, 139 | "first_name": "Zorina", 140 | "last_name": "Cudiff", 141 | "email": "zcudifff@jigsy.com", 142 | "target": false, 143 | "create_dt": "2019-10-06 04:13:55.000", 144 | "rating": 27.63 145 | }, 146 | { 147 | "id": 17, 148 | "first_name": "Grove", 149 | "last_name": "Kenrack", 150 | "email": "gkenrackg@chronoengine.com", 151 | "target": true, 152 | "create_dt": "2019-09-01 05:11:02.000", 153 | "rating": 18.658 154 | }, 155 | { 156 | "id": 18, 157 | "first_name": "Alexis", 158 | "last_name": "Fewtrell", 159 | "email": "afewtrellh@google.cn", 160 | "target": true, 161 | "create_dt": "2019-05-31 14:55:31.000", 162 | "rating": 44.751 163 | } 164 | ] -------------------------------------------------------------------------------- /core/dbio/filesys/test/test1/json/test1.2.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": [ 3 | { 4 | "id": 1, 5 | "first_name": "AOCG,\"\n883", 6 | "last_name": "Lumox", 7 | "email": "ilumox0@unc.edu", 8 | "target": true, 9 | "create_dt": "2019-02-11 11:23:00.000", 10 | "rating": 35.987 11 | }, 12 | { 13 | "id": 2, 14 | "first_name": "Terrell", 15 | "last_name": "EKOZ,989", 16 | "email": "tmee1@example.com", 17 | "target": true, 18 | "create_dt": "2019-08-19 17:02:09.000", 19 | "rating": 89.983 20 | }, 21 | { 22 | "id": 3, 23 | "first_name": "Frayda", 24 | "last_name": "Comolli", 25 | "email": "fcomolli2@cbslocal.com", 26 | "target": false, 27 | "create_dt": "2019-08-07 16:48:12.000", 28 | "rating": 89.401 29 | }, 30 | { 31 | "id": 4, 32 | "first_name": "Alphonse", 33 | "last_name": "Crookshank", 34 | "email": "acrookshank3@mapy.cz", 35 | "target": true, 36 | "create_dt": "2019-03-31 14:41:45.000", 37 | "rating": 19.933 38 | }, 39 | { 40 | "id": 5, 41 | "first_name": "Mathilda", 42 | "last_name": "Deaves", 43 | "email": "mdeaves4@va.gov", 44 | "target": true, 45 | "create_dt": "2019-07-15 21:20:39.000", 46 | "rating": 90.643 47 | }, 48 | { 49 | "id": 6, 50 | "first_name": "Trixi", 51 | "last_name": "Livens", 52 | "email": "tlivens5@cargocollective.com", 53 | "target": true, 54 | "create_dt": "2019-06-20 10:58:40.000", 55 | "rating": 74.341 56 | }, 57 | { 58 | "id": 7, 59 | "first_name": "Ida", 60 | "last_name": "Domican", 61 | "email": "idomican6@gravatar.com", 62 | "target": false, 63 | "create_dt": "2019-03-11 09:18:34.000", 64 | "rating": 55.454 65 | }, 66 | { 67 | "id": 8, 68 | "first_name": "Bogey", 69 | "last_name": "Kears", 70 | "email": "bkears7@nih.gov", 71 | "target": false, 72 | "create_dt": "2019-09-11 22:08:40.000", 73 | "rating": 3.869 74 | }, 75 | { 76 | "id": 9, 77 | "first_name": "Janifer", 78 | "last_name": "Melanaphy", 79 | "email": "jmelanaphy8@alibaba.com", 80 | "target": true, 81 | "create_dt": "2019-02-19 16:23:06.000", 82 | "rating": 48.234 83 | }, 84 | { 85 | "id": 10, 86 | "first_name": "Kaela", 87 | "last_name": "Illsley", 88 | "email": "killsley9@feedburner.com", 89 | "target": false, 90 | "create_dt": "2019-04-17 05:05:18.000", 91 | "rating": 86.13 92 | }, 93 | { 94 | "id": 11, 95 | "first_name": "Yorgos", 96 | "last_name": "Ruprechter", 97 | "email": "yruprechtera@baidu.com", 98 | "target": false, 99 | "create_dt": "2019-01-03 05:57:01.000", 100 | "rating": 88.736 101 | }, 102 | { 103 | "id": 12, 104 | "first_name": "Bunni", 105 | "last_name": "Bothams", 106 | "email": "bbothamsb@amazon.co.jp", 107 | "target": false, 108 | "create_dt": "2019-08-12 14:51:09.000", 109 | "rating": 94.945 110 | }, 111 | { 112 | "id": 13, 113 | "first_name": "Sharon", 114 | "last_name": "Atwater", 115 | "email": "satwaterc@webeden.co.uk", 116 | "target": false, 117 | "create_dt": "2019-05-22 09:38:10.000", 118 | "rating": 30.112 119 | }, 120 | { 121 | "id": 14, 122 | "first_name": "Ellen", 123 | "last_name": "Real", 124 | "email": "ereald@amazonaws.com", 125 | "target": true, 126 | "create_dt": "2019-02-01 18:47:06.000", 127 | "rating": 6.877 128 | }, 129 | { 130 | "id": 15, 131 | "first_name": "Charissa", 132 | "last_name": "Ballance", 133 | "email": "cballancee@amazonaws.com", 134 | "target": true, 135 | "create_dt": "2019-07-25 04:34:02.000", 136 | "rating": 8.514 137 | }, 138 | { 139 | "id": 16, 140 | "first_name": "Zorina", 141 | "last_name": "Cudiff", 142 | "email": "zcudifff@jigsy.com", 143 | "target": false, 144 | "create_dt": "2019-10-06 04:13:55.000", 145 | "rating": 27.63 146 | }, 147 | { 148 | "id": 17, 149 | "first_name": "Grove", 150 | "last_name": "Kenrack", 151 | "email": "gkenrackg@chronoengine.com", 152 | "target": true, 153 | "create_dt": "2019-09-01 05:11:02.000", 154 | "rating": 18.658 155 | }, 156 | { 157 | "id": 18, 158 | "first_name": "Alexis", 159 | "last_name": "Fewtrell", 160 | "email": "afewtrellh@google.cn", 161 | "target": true, 162 | "create_dt": "2019-05-31 14:55:31.000", 163 | "rating": 44.751 164 | } 165 | ] 166 | } -------------------------------------------------------------------------------- /core/dbio/filesys/test/test1/parquet/test1.1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/filesys/test/test1/parquet/test1.1.parquet -------------------------------------------------------------------------------- /core/dbio/filesys/test/test1/parquet/test1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/filesys/test/test1/parquet/test1.parquet -------------------------------------------------------------------------------- /core/dbio/filesys/test/test2/json/test1.1.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": 1, 4 | "first_name": "AOCG,\"\n883", 5 | "last_name": "Lumox", 6 | "email": "ilumox0@unc.edu", 7 | "target": true, 8 | "create_dt": "2019-02-11 11:23:00.000", 9 | "rating": 35.987 10 | }, 11 | { 12 | "id": 2, 13 | "first_name": "Terrell", 14 | "last_name": "EKOZ,989", 15 | "email": "tmee1@example.com", 16 | "target": true, 17 | "create_dt": "2019-08-19 17:02:09.000", 18 | "rating": 89.983 19 | }, 20 | { 21 | "id": 3, 22 | "first_name": "Frayda", 23 | "last_name": "Comolli", 24 | "email": "fcomolli2@cbslocal.com", 25 | "target": false, 26 | "create_dt": "2019-08-07 16:48:12.000", 27 | "rating": 89.401 28 | }, 29 | { 30 | "id": 4, 31 | "first_name": "Alphonse", 32 | "last_name": "Crookshank", 33 | "email": "acrookshank3@mapy.cz", 34 | "target": true, 35 | "create_dt": "2019-03-31 14:41:45.000", 36 | "rating": 19.933 37 | }, 38 | { 39 | "id": 5, 40 | "first_name": "Mathilda", 41 | "last_name": "Deaves", 42 | "email": "mdeaves4@va.gov", 43 | "target": true, 44 | "create_dt": "2019-07-15 21:20:39.000", 45 | "rating": 90.643 46 | }, 47 | { 48 | "id": 6, 49 | "first_name": "Trixi", 50 | "last_name": "Livens", 51 | "email": "tlivens5@cargocollective.com", 52 | "target": true, 53 | "create_dt": "2019-06-20 10:58:40.000", 54 | "rating": 74.341 55 | }, 56 | { 57 | "id": 7, 58 | "first_name": "Ida", 59 | "last_name": "Domican", 60 | "email": "idomican6@gravatar.com", 61 | "target": false, 62 | "create_dt": "2019-03-11 09:18:34.000", 63 | "rating": 55.454 64 | }, 65 | { 66 | "id": 8, 67 | "first_name": "Bogey", 68 | "last_name": "Kears", 69 | "email": "bkears7@nih.gov", 70 | "target": false, 71 | "create_dt": "2019-09-11 22:08:40.000", 72 | "rating": 3.869 73 | }, 74 | { 75 | "id": 9, 76 | "first_name": "Janifer", 77 | "last_name": "Melanaphy", 78 | "email": "jmelanaphy8@alibaba.com", 79 | "target": true, 80 | "create_dt": "2019-02-19 16:23:06.000", 81 | "rating": 48.234, 82 | "extraCol": true 83 | }, 84 | { 85 | "id": 10, 86 | "first_name": "Kaela", 87 | "last_name": "Illsley", 88 | "email": "killsley9@feedburner.com", 89 | "target": false, 90 | "create_dt": "2019-04-17 05:05:18.000", 91 | "rating": 86.13 92 | }, 93 | { 94 | "id": 11, 95 | "first_name": "Yorgos", 96 | "last_name": "Ruprechter", 97 | "email": "yruprechtera@baidu.com", 98 | "target": false, 99 | "create_dt": "2019-01-03 05:57:01.000", 100 | "rating": 88.736 101 | }, 102 | { 103 | "id": 12, 104 | "first_name": "Bunni", 105 | "last_name": "Bothams", 106 | "email": "bbothamsb@amazon.co.jp", 107 | "target": false, 108 | "create_dt": "2019-08-12 14:51:09.000", 109 | "rating": 94.945 110 | }, 111 | { 112 | "id": 13, 113 | "first_name": "Sharon", 114 | "last_name": "Atwater", 115 | "email": "satwaterc@webeden.co.uk", 116 | "target": false, 117 | "create_dt": "2019-05-22 09:38:10.000", 118 | "rating": 30.112 119 | }, 120 | { 121 | "id": 14, 122 | "first_name": "Ellen", 123 | "last_name": "Real", 124 | "email": "ereald@amazonaws.com", 125 | "target": true, 126 | "create_dt": "2019-02-01 18:47:06.000", 127 | "rating": 6.877 128 | }, 129 | { 130 | "id": 15, 131 | "first_name": "Charissa", 132 | "last_name": "Ballance", 133 | "email": "cballancee@amazonaws.com", 134 | "target": true, 135 | "create_dt": "2019-07-25 04:34:02.000", 136 | "rating": 8.514 137 | }, 138 | { 139 | "id": 16, 140 | "first_name": "Zorina", 141 | "last_name": "Cudiff", 142 | "email": "zcudifff@jigsy.com", 143 | "target": false, 144 | "create_dt": "2019-10-06 04:13:55.000", 145 | "rating": 27.63 146 | }, 147 | { 148 | "id": 17, 149 | "first_name": "Grove", 150 | "last_name": "Kenrack", 151 | "email": "gkenrackg@chronoengine.com", 152 | "target": true, 153 | "create_dt": "2019-09-01 05:11:02.000", 154 | "rating": 18.658 155 | }, 156 | { 157 | "id": 18, 158 | "first_name": "Alexis", 159 | "last_name": "Fewtrell", 160 | "email": "afewtrellh@google.cn", 161 | "target": true, 162 | "create_dt": "2019-05-31 14:55:31.000", 163 | "rating": 44.751 164 | } 165 | ] -------------------------------------------------------------------------------- /core/dbio/filesys/test/test2/json/test1.2.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": 19, 4 | "first_name": "AOCG,\"\n883", 5 | "last_name": "Lumox", 6 | "email": "ilumox0@unc.edu", 7 | "target": true, 8 | "create_dt": "2019-02-11 11:23:00.000", 9 | "rating": 35.987, 10 | "extraCol2": "44.751" 11 | }, 12 | { 13 | "id": 21, 14 | "first_name": "Alexis", 15 | "last_name": "Fewtrell", 16 | "email": "afewtrellh@google.cn", 17 | "target": true, 18 | "create_dt": "2019-05-31 14:55:31.000", 19 | "rating": 44.751, 20 | "extraCol2": 44.751 21 | } 22 | ] -------------------------------------------------------------------------------- /core/dbio/filesys/test/test2/test2.1.noheader.csv: -------------------------------------------------------------------------------- 1 | 1,"AOCG,"" 2 | 883",Lumox,ilumox0@unc.edu,true,2019-02-11 11:23:00.000,35.987 3 | 2,Terrell,"EKOZ,989",tmee1@example.com,true,2019-08-19 17:02:09.000,89.983 4 | 3,Frayda,Comolli,fcomolli2@cbslocal.com,false,2019-08-07 16:48:12.000,89.401 5 | 4,Alphonse,Crookshank,acrookshank3@mapy.cz,true,2019-03-31 14:41:45.000,19.933 6 | 5,Mathilda,Deaves,mdeaves4@va.gov,true,2019-07-15 21:20:39.000,90.643 7 | 6,Trixi,Livens,tlivens5@cargocollective.com,true,2019-06-20 10:58:40.000,74.341 8 | 7,Ida,Domican,idomican6@gravatar.com,false,2019-03-11 09:18:34.000,55.454 9 | 8,Bogey,Kears,bkears7@nih.gov,false,2019-09-11 22:08:40.000,3.869 10 | 9,Janifer,Melanaphy,jmelanaphy8@alibaba.com,true,2019-02-19 16:23:06.000,48.234 11 | 10,Kaela,Illsley,killsley9@feedburner.com,false,2019-04-17 05:05:18.000,86.13 12 | 11,Yorgos,Ruprechter,yruprechtera@baidu.com,false,2019-01-03 05:57:01.000,88.736 13 | 12,Bunni,Bothams,bbothamsb@amazon.co.jp,false,2019-08-12 14:51:09.000,94.945 14 | 13,Sharon,Atwater,satwaterc@webeden.co.uk,false,2019-05-22 09:38:10.000,30.112 15 | 14,Ellen,Real,ereald@amazonaws.com,true,2019-02-01 18:47:06.000,6.877 16 | 15,Charissa,Ballance,cballancee@amazonaws.com,true,2019-07-25 04:34:02.000,8.514 17 | 16,Zorina,Cudiff,zcudifff@jigsy.com,false,2019-10-06 04:13:55.000,27.63 18 | 17,Grove,Kenrack,gkenrackg@chronoengine.com,true,2019-09-01 05:11:02.000,18.658 19 | 18,Alexis,Fewtrell,afewtrellh@google.cn,true,2019-05-31 14:55:31.000,44.751 -------------------------------------------------------------------------------- /core/dbio/iop/README.md: -------------------------------------------------------------------------------- 1 | 2 | ## Input-Process-Output (ipo) -------------------------------------------------------------------------------- /core/dbio/iop/avro.go: -------------------------------------------------------------------------------- 1 | package iop 2 | 3 | import ( 4 | "io" 5 | "strings" 6 | 7 | "github.com/flarco/g" 8 | "github.com/jmespath/go-jmespath" 9 | "github.com/linkedin/goavro/v2" 10 | "github.com/samber/lo" 11 | "github.com/spf13/cast" 12 | ) 13 | 14 | // Avro is a avro` object 15 | type Avro struct { 16 | Path string 17 | Reader *goavro.OCFReader 18 | Data *Dataset 19 | colMap map[string]int 20 | codec *goavro.Codec 21 | } 22 | 23 | func NewAvroStream(reader io.ReadSeeker, columns Columns) (a *Avro, err error) { 24 | ar, err := goavro.NewOCFReader(reader) 25 | if err != nil { 26 | err = g.Error(err, "could not read avro reader") 27 | return 28 | } 29 | 30 | a = &Avro{Reader: ar, codec: ar.Codec()} 31 | a.colMap = a.Columns().FieldMap(true) 32 | 33 | return 34 | } 35 | 36 | func (a *Avro) Columns() Columns { 37 | 38 | typeMap := map[string]ColumnType{ 39 | "string": StringType, 40 | "int": IntegerType, 41 | "long": BigIntType, 42 | "float": DecimalType, 43 | "double": DecimalType, 44 | "bytes": BinaryType, 45 | "null": StringType, 46 | "array": JsonType, 47 | "map": JsonType, 48 | "record": JsonType, 49 | "enum": StringType, 50 | } 51 | 52 | type avroField struct { 53 | Name string `json:"name"` 54 | Type any `json:"type"` 55 | } 56 | 57 | type avroSchema struct { 58 | Name string `json:"name"` 59 | Fields []avroField `json:"fields"` 60 | } 61 | 62 | schema := avroSchema{} 63 | 64 | g.Unmarshal(a.codec.Schema(), &schema) 65 | 66 | fields := lo.Map( 67 | schema.Fields, 68 | func(f avroField, i int) string { return f.Name }, 69 | ) 70 | 71 | cols := NewColumnsFromFields(fields...) 72 | for i, field := range schema.Fields { 73 | key := g.Marshal(field.Type) 74 | key = strings.TrimPrefix(key, `"`) 75 | key = strings.TrimSuffix(key, `"`) 76 | 77 | if strings.HasPrefix(key, "{") { 78 | keyI, err := jmespath.Search("type", field.Type) 79 | if err == nil { 80 | key = cast.ToString(keyI) 81 | } 82 | } else if strings.HasPrefix(key, "[") { 83 | key = "map" 84 | } 85 | 86 | cols[i].Type = StringType 87 | if typ, ok := typeMap[key]; ok { 88 | cols[i].Type = typ 89 | cols[i].Sourced = !g.In(typ, DecimalType) 90 | } 91 | } 92 | 93 | return cols 94 | } 95 | 96 | func (a *Avro) nextFunc(it *Iterator) bool { 97 | if !a.Reader.Scan() { 98 | return false 99 | } 100 | 101 | if err := a.Reader.Err(); err != nil { 102 | it.Context.CaptureErr(g.Error(err, "could not read Avro row")) 103 | return false 104 | } 105 | 106 | datum, err := a.Reader.Read() 107 | if err != nil { 108 | it.Context.CaptureErr(g.Error(err, "could not read Avro record")) 109 | return false 110 | } 111 | 112 | buf, err := a.codec.TextualFromNative(nil, datum) 113 | if err != nil { 114 | it.Context.CaptureErr(g.Error(err, "could not convert to Avro record")) 115 | return false 116 | } 117 | 118 | rec, err := g.JSONUnmarshalToMap(buf) 119 | if err != nil { 120 | it.Context.CaptureErr(g.Error(err, "could not unmarshal Avro record")) 121 | return false 122 | } 123 | 124 | it.Row = make([]interface{}, len(it.ds.Columns)) 125 | for k, v := range rec { 126 | col := it.ds.Columns[a.colMap[strings.ToLower(k)]] 127 | i := col.Position - 1 128 | if col.Type == JsonType { 129 | v = g.Marshal(v) 130 | } 131 | it.Row[i] = v 132 | } 133 | 134 | return true 135 | } 136 | -------------------------------------------------------------------------------- /core/dbio/iop/compression_test.go: -------------------------------------------------------------------------------- 1 | package iop 2 | 3 | import ( 4 | "io" 5 | "strings" 6 | "testing" 7 | 8 | "github.com/flarco/g" 9 | "github.com/stretchr/testify/assert" 10 | ) 11 | 12 | func TestCompression(t *testing.T) { 13 | var reader, cReader, dReader io.Reader 14 | var err error 15 | value := "testing" 16 | result := []byte("") 17 | 18 | // gzip 19 | reader = strings.NewReader(value) 20 | cp := NewCompressor(GzipCompressorType) 21 | cReader = cp.Compress(reader) 22 | dReader, err = cp.Decompress(cReader) 23 | g.AssertNoError(t, err) 24 | result, err = io.ReadAll(dReader) 25 | g.AssertNoError(t, err) 26 | assert.Equal(t, value, string(result)) 27 | 28 | // zstandard 29 | reader = strings.NewReader(value) 30 | cp = NewCompressor(ZStandardCompressorType) 31 | cReader = cp.Compress(reader) 32 | dReader, err = cp.Decompress(cReader) 33 | g.AssertNoError(t, err) 34 | result, err = io.ReadAll(dReader) 35 | g.AssertNoError(t, err) 36 | assert.Equal(t, value, string(result)) 37 | 38 | // snappy 39 | reader = strings.NewReader(value) 40 | cp = NewCompressor(SnappyCompressorType) 41 | cReader = cp.Compress(reader) 42 | dReader, err = cp.Decompress(cReader) 43 | g.AssertNoError(t, err) 44 | result, err = io.ReadAll(dReader) 45 | g.AssertNoError(t, err) 46 | assert.Equal(t, value, string(result)) 47 | 48 | } 49 | -------------------------------------------------------------------------------- /core/dbio/iop/csv_duckdb.go: -------------------------------------------------------------------------------- 1 | package iop 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/flarco/g" 7 | "github.com/slingdata-io/sling-cli/core/dbio" 8 | "github.com/spf13/cast" 9 | ) 10 | 11 | type CsvDuckDb struct { 12 | URI string 13 | Duck *DuckDb 14 | columns Columns 15 | sc *StreamConfig 16 | } 17 | 18 | func NewCsvReaderDuckDb(uri string, sc *StreamConfig, props ...string) (*CsvDuckDb, error) { 19 | duck := NewDuckDb(context.Background(), props...) 20 | 21 | uri = duck.PrepareFsSecretAndURI(uri) 22 | 23 | if sc == nil { 24 | sc = g.Ptr(DefaultStreamConfig()) 25 | sc.Header = true 26 | } 27 | 28 | if sc.Delimiter == "" { 29 | sc.Delimiter = "," 30 | } 31 | 32 | if sc.Escape == "" { 33 | sc.Escape = `"` 34 | } 35 | 36 | if sc.Quote == "" { 37 | sc.Quote = `"` 38 | } 39 | 40 | if sc.NullIf == "" { 41 | sc.NullIf = `\N` 42 | } 43 | 44 | return &CsvDuckDb{ 45 | URI: uri, 46 | Duck: duck, 47 | sc: sc, 48 | }, nil 49 | } 50 | 51 | func (r *CsvDuckDb) Columns() (Columns, error) { 52 | if len(r.columns) > 0 { 53 | return r.columns, nil 54 | } 55 | 56 | // query := fmt.Sprintf("SELECT path_in_schema as column_name, type as column_type, column_id, num_values, total_uncompressed_size FROM parquet_metadata('%s') order by column_id", p.URI) 57 | 58 | var err error 59 | r.columns, err = r.Duck.Describe(r.MakeQuery(FileStreamConfig{})) 60 | if err != nil { 61 | return nil, g.Error(err, "could not get columns") 62 | } 63 | return r.columns, nil 64 | } 65 | 66 | func (r *CsvDuckDb) Close() error { 67 | return r.Duck.Close() 68 | } 69 | 70 | func (r *CsvDuckDb) MakeQuery(fsc FileStreamConfig) string { 71 | quote := r.Duck.GetProp("quote_char") 72 | if quote == "" { 73 | quote = r.sc.Quote 74 | } 75 | 76 | sql := r.Duck.MakeScanQuery(dbio.FileTypeCsv, r.URI, fsc) 77 | 78 | sql = g.R(sql, "delimiter", r.sc.Delimiter) 79 | sql = g.R(sql, "header", cast.ToString(r.sc.Header)) 80 | // sql = g.R(sql, "columns", cfg.Columns) 81 | sql = g.R(sql, "quote", quote) 82 | sql = g.R(sql, "escape", r.sc.Escape) 83 | sql = g.R(sql, "null_if", r.sc.NullIf) 84 | 85 | return sql 86 | } 87 | -------------------------------------------------------------------------------- /core/dbio/iop/datastream_test.go: -------------------------------------------------------------------------------- 1 | package iop 2 | 3 | import ( 4 | "io" 5 | "testing" 6 | 7 | "github.com/flarco/g/csv" 8 | "github.com/spf13/cast" 9 | ) 10 | 11 | func TestBW(t *testing.T) { 12 | tests := []struct { 13 | name string 14 | input []string 15 | expected int64 16 | }{ 17 | { 18 | name: "ASCII only", 19 | input: []string{"hello", "world", "123"}, 20 | expected: 16, // "hello,world,123\n" = 5+1+5+1+3+1 = 13 21 | }, 22 | { 23 | name: "With Unicode", 24 | input: []string{"hello", "世界", "123"}, 25 | expected: 17, // "hello,世界,123\n" = 5+1+4+1+3+1 = 14 26 | }, 27 | { 28 | name: "Empty strings", 29 | input: []string{"", "", ""}, 30 | expected: 3, // ",,\n" = 1+1+1 = 3 31 | }, 32 | { 33 | name: "Mixed content", 34 | input: []string{"ABC", "世界", "123"}, 35 | expected: 15, // "ABC,世界,123\n" = 3+1+4+1+3+1 = 12 36 | }, 37 | } 38 | 39 | for _, tt := range tests { 40 | t.Run(tt.name, func(t *testing.T) { 41 | // Test original writeBwCsv 42 | ds1 := NewDatastream(nil) 43 | ds1.bwCsv = csv.NewWriter(io.Discard) 44 | ds1.writeBwCsv(tt.input) 45 | originalBytes := ds1.Bytes.Load() 46 | 47 | // Test new writeBwCsvSafe 48 | ds2 := NewDatastream(nil) 49 | ds2.writeBwCsvSafe(tt.input) 50 | safeBytes := ds2.Bytes.Load() 51 | 52 | // Compare results 53 | if originalBytes != safeBytes { 54 | t.Errorf("Byte count mismatch for %s: original=%d, safe=%d", 55 | tt.name, originalBytes, safeBytes) 56 | } 57 | 58 | // Verify against expected 59 | if safeBytes != cast.ToUint64(tt.expected) { 60 | t.Errorf("Expected %d bytes for %s, got %d", 61 | tt.expected, tt.name, safeBytes) 62 | } 63 | }) 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /core/dbio/iop/delta.go: -------------------------------------------------------------------------------- 1 | package iop 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/flarco/g" 7 | "github.com/slingdata-io/sling-cli/core/dbio" 8 | ) 9 | 10 | type DeltaReader struct { 11 | URI string 12 | Duck *DuckDb 13 | columns Columns 14 | } 15 | 16 | func NewDeltaReader(uri string, props ...string) (*DeltaReader, error) { 17 | duck := NewDuckDb(context.Background(), props...) 18 | 19 | // load extension 20 | duck.AddExtension("delta") 21 | 22 | uri = duck.PrepareFsSecretAndURI(uri) 23 | 24 | return &DeltaReader{ 25 | URI: uri, 26 | Duck: duck, 27 | }, nil 28 | } 29 | 30 | func (r *DeltaReader) Columns() (Columns, error) { 31 | if len(r.columns) > 0 { 32 | return r.columns, nil 33 | } 34 | 35 | var err error 36 | r.columns, err = r.Duck.Describe(r.MakeQuery(FileStreamConfig{})) 37 | if err != nil { 38 | return nil, g.Error(err, "could not get columns") 39 | } 40 | return r.columns, nil 41 | } 42 | 43 | func (r *DeltaReader) Close() error { 44 | return r.Duck.Close() 45 | } 46 | 47 | func (r *DeltaReader) MakeQuery(sc FileStreamConfig) string { 48 | sql := r.Duck.MakeScanQuery(dbio.FileTypeDelta, r.URI, sc) 49 | return sql 50 | } 51 | -------------------------------------------------------------------------------- /core/dbio/iop/iceberg.go: -------------------------------------------------------------------------------- 1 | package iop 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/flarco/g" 7 | "github.com/slingdata-io/sling-cli/core/dbio" 8 | ) 9 | 10 | type IcebergReader struct { 11 | URI string 12 | Duck *DuckDb 13 | columns Columns 14 | } 15 | 16 | func NewIcebergReader(uri string, props ...string) (*IcebergReader, error) { 17 | duck := NewDuckDb(context.Background(), props...) 18 | 19 | // load extension 20 | duck.AddExtension("iceberg") 21 | 22 | uri = duck.PrepareFsSecretAndURI(uri) 23 | 24 | return &IcebergReader{ 25 | URI: uri, 26 | Duck: duck, 27 | }, nil 28 | } 29 | 30 | func (r *IcebergReader) Columns() (Columns, error) { 31 | if len(r.columns) > 0 { 32 | return r.columns, nil 33 | } 34 | 35 | var err error 36 | r.columns, err = r.Duck.Describe(r.MakeQuery(FileStreamConfig{})) 37 | if err != nil { 38 | return nil, g.Error(err, "could not get columns") 39 | } 40 | return r.columns, nil 41 | } 42 | 43 | func (i *IcebergReader) Close() error { 44 | return i.Duck.Close() 45 | } 46 | 47 | func (r *IcebergReader) MakeQuery(sc FileStreamConfig) string { 48 | sql := r.Duck.MakeScanQuery(dbio.FileTypeIceberg, r.URI, sc) 49 | return sql 50 | } 51 | -------------------------------------------------------------------------------- /core/dbio/iop/parquet_duckdb.go: -------------------------------------------------------------------------------- 1 | package iop 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/flarco/g" 7 | "github.com/slingdata-io/sling-cli/core/dbio" 8 | ) 9 | 10 | type ParquetDuckDb struct { 11 | URI string 12 | Duck *DuckDb 13 | columns Columns 14 | } 15 | 16 | func NewParquetReaderDuckDb(uri string, props ...string) (*ParquetDuckDb, error) { 17 | duck := NewDuckDb(context.Background(), props...) 18 | 19 | uri = duck.PrepareFsSecretAndURI(uri) 20 | 21 | return &ParquetDuckDb{ 22 | URI: uri, 23 | Duck: duck, 24 | }, nil 25 | } 26 | 27 | func (r *ParquetDuckDb) Columns() (Columns, error) { 28 | if len(r.columns) > 0 { 29 | return r.columns, nil 30 | } 31 | 32 | // query := fmt.Sprintf("SELECT path_in_schema as column_name, type as column_type, column_id, num_values, total_uncompressed_size FROM parquet_metadata('%s') order by column_id", p.URI) 33 | 34 | var err error 35 | r.columns, err = r.Duck.Describe(r.MakeQuery(FileStreamConfig{})) 36 | if err != nil { 37 | return nil, g.Error(err, "could not get columns") 38 | } 39 | return r.columns, nil 40 | } 41 | 42 | func (r *ParquetDuckDb) Close() error { 43 | return r.Duck.Close() 44 | } 45 | 46 | func (r *ParquetDuckDb) MakeQuery(sc FileStreamConfig) string { 47 | sql := r.Duck.MakeScanQuery(dbio.FileTypeParquet, r.URI, sc) 48 | return sql 49 | } 50 | -------------------------------------------------------------------------------- /core/dbio/iop/sheet.go: -------------------------------------------------------------------------------- 1 | package iop 2 | 3 | import ( 4 | "strings" 5 | 6 | "github.com/spf13/cast" 7 | ) 8 | 9 | type spreadsheet struct { 10 | Props map[string]string 11 | } 12 | 13 | func (s *spreadsheet) makeDatasetAuto(rows [][]string) (data Dataset) { 14 | var blankCellCnt, trailingBlankRows int 15 | rowWidthDistro := map[int]int{} 16 | allRows := [][]string{} 17 | maxCount := 0 18 | // widthMostUsed := 0 19 | for _, row0 := range rows { 20 | blankCellCnt = 0 21 | row := make([]string, len(row0)) 22 | for i, val := range row0 { 23 | val = strings.TrimSpace(val) 24 | if val == "" { 25 | blankCellCnt++ 26 | } else { 27 | blankCellCnt = 0 28 | } 29 | row[i] = val 30 | } 31 | 32 | rowWidthDistro[len(row)]++ 33 | allRows = append(allRows, row) 34 | 35 | if blankCellCnt == len(row) { 36 | trailingBlankRows++ 37 | } else { 38 | trailingBlankRows = 0 39 | } 40 | 41 | if rowWidthDistro[len(row)] > maxCount { 42 | maxCount = rowWidthDistro[len(row)] 43 | } 44 | } 45 | 46 | // g.Debug("trailingBlankRows: %d", trailingBlankRows) 47 | data = NewDataset(nil) 48 | data.Sp.SetConfig(s.Props) 49 | hasHeader := cast.ToBool(s.Props["header"]) 50 | 51 | for i, row0 := range allRows[:len(allRows)-trailingBlankRows] { 52 | if i == 0 { 53 | if hasHeader { 54 | // assume first row is header row 55 | row0 = CleanHeaderRow(row0) 56 | data.SetFields(row0) 57 | continue 58 | } else if len(data.Columns) == 0 { 59 | data.SetFields(CreateDummyFields(len(row0))) 60 | } 61 | } 62 | 63 | row := make([]interface{}, len(row0)) 64 | for i, val := range row0 { 65 | row[i] = val 66 | } 67 | row = data.Sp.CastRow(row, data.Columns) 68 | data.Rows = append(data.Rows, row) 69 | 70 | if i == SampleSize { 71 | data.InferColumnTypes() 72 | for i, row := range data.Rows { 73 | data.Rows[i] = data.Sp.CastRow(row, data.Columns) 74 | } 75 | } 76 | } 77 | if !data.Inferred { 78 | data.InferColumnTypes() 79 | for i, row := range data.Rows { 80 | data.Rows[i] = data.Sp.CastRow(row, data.Columns) 81 | } 82 | } 83 | return 84 | } 85 | func (s *spreadsheet) makeDatasetStr(rangeRows [][]string) (data Dataset) { 86 | data = NewDataset(nil) 87 | data.Sp.SetConfig(s.Props) 88 | for i, row0 := range rangeRows { 89 | if i == 0 { 90 | // assume first row is header row 91 | data.SetFields(CleanHeaderRow(row0)) 92 | continue 93 | } 94 | 95 | row := make([]interface{}, len(row0)) 96 | for i, val := range row0 { 97 | row[i] = val 98 | } 99 | data.Append(row) 100 | 101 | if i == SampleSize { 102 | data.InferColumnTypes() 103 | for i, row := range data.Rows { 104 | data.Rows[i] = data.Sp.CastRow(row, data.Columns) 105 | } 106 | } 107 | } 108 | 109 | if !data.Inferred { 110 | data.InferColumnTypes() 111 | for i, row := range data.Rows { 112 | data.Rows[i] = data.Sp.CastRow(row, data.Columns) 113 | } 114 | } 115 | return 116 | } 117 | 118 | func (s *spreadsheet) makeDatasetInterf(rangeRows [][]interface{}) (data Dataset) { 119 | data = NewDataset(nil) 120 | data.Sp.SetConfig(s.Props) 121 | for i, row := range rangeRows { 122 | if i == 0 { 123 | // assume first row is header row 124 | row0 := make([]string, len(row)) 125 | for i, val := range row { 126 | row0[i] = cast.ToString(val) 127 | } 128 | data.SetFields(CleanHeaderRow(row0)) 129 | continue 130 | } 131 | 132 | data.Append(row) 133 | 134 | if i == SampleSize { 135 | data.InferColumnTypes() 136 | for i, row := range data.Rows { 137 | data.Rows[i] = data.Sp.CastRow(row, data.Columns) 138 | } 139 | } 140 | } 141 | 142 | if !data.Inferred { 143 | data.InferColumnTypes() 144 | for i, row := range data.Rows { 145 | data.Rows[i] = data.Sp.CastRow(row, data.Columns) 146 | } 147 | } 148 | return 149 | } 150 | -------------------------------------------------------------------------------- /core/dbio/iop/test/delta/_delta_log/.00000000000000000000.json.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/iop/test/delta/_delta_log/.00000000000000000000.json.crc -------------------------------------------------------------------------------- /core/dbio/iop/test/delta/_delta_log/00000000000000000000.json: -------------------------------------------------------------------------------- 1 | {"commitInfo":{"timestamp":1706278148531,"operation":"WRITE","operationParameters":{"mode":"Overwrite","partitionBy":"[\"country\"]"},"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numFiles":"3","numOutputRows":"5","numOutputBytes":"3045"},"engineInfo":"Apache-Spark/3.4.0 Delta-Lake/2.4.0","txnId":"1cbc9537-63eb-4799-8647-2d947ae8fa41"}} 2 | {"protocol":{"minReaderVersion":1,"minWriterVersion":2}} 3 | {"metaData":{"id":"1f110132-a652-4be9-815e-348f294515cf","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"first_name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"last_name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"country\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"continent\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["country"],"configuration":{},"createdTime":1706278146762}} 4 | {"add":{"path":"country=Argentina/part-00000-8d0390a3-f797-4265-b9c2-da1c941680a3.c000.snappy.parquet","partitionValues":{"country":"Argentina"},"size":1018,"modificationTime":1706278148083,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"first_name\":\"Ernesto\",\"last_name\":\"Guevara\",\"continent\":\"NaN\"},\"maxValues\":{\"first_name\":\"Ernesto\",\"last_name\":\"Guevara\",\"continent\":\"NaN\"},\"nullCount\":{\"first_name\":0,\"last_name\":0,\"continent\":0}}"}} 5 | {"add":{"path":"country=China/part-00000-88fba1af-b28d-4303-9c85-9a97be631d40.c000.snappy.parquet","partitionValues":{"country":"China"},"size":1002,"modificationTime":1706278148138,"dataChange":true,"stats":"{\"numRecords\":2,\"minValues\":{\"first_name\":\"Bruce\",\"last_name\":\"Lee\",\"continent\":\"Asia\"},\"maxValues\":{\"first_name\":\"Jack\",\"last_name\":\"Ma\",\"continent\":\"Asia\"},\"nullCount\":{\"first_name\":0,\"last_name\":0,\"continent\":0}}"}} 6 | {"add":{"path":"country=Germany/part-00000-030076e1-5ec9-47c2-830a-1569f823b6ee.c000.snappy.parquet","partitionValues":{"country":"Germany"},"size":1025,"modificationTime":1706278148185,"dataChange":true,"stats":"{\"numRecords\":2,\"minValues\":{\"first_name\":\"Soraya\",\"last_name\":\"Jala\",\"continent\":\"NaN\"},\"maxValues\":{\"first_name\":\"Wolfgang\",\"last_name\":\"Manche\",\"continent\":\"NaN\"},\"nullCount\":{\"first_name\":0,\"last_name\":0,\"continent\":0}}"}} 7 | -------------------------------------------------------------------------------- /core/dbio/iop/test/delta/country=Argentina/.part-00000-8d0390a3-f797-4265-b9c2-da1c941680a3.c000.snappy.parquet.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/iop/test/delta/country=Argentina/.part-00000-8d0390a3-f797-4265-b9c2-da1c941680a3.c000.snappy.parquet.crc -------------------------------------------------------------------------------- /core/dbio/iop/test/delta/country=Argentina/part-00000-8d0390a3-f797-4265-b9c2-da1c941680a3.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/iop/test/delta/country=Argentina/part-00000-8d0390a3-f797-4265-b9c2-da1c941680a3.c000.snappy.parquet -------------------------------------------------------------------------------- /core/dbio/iop/test/delta/country=China/.part-00000-88fba1af-b28d-4303-9c85-9a97be631d40.c000.snappy.parquet.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/iop/test/delta/country=China/.part-00000-88fba1af-b28d-4303-9c85-9a97be631d40.c000.snappy.parquet.crc -------------------------------------------------------------------------------- /core/dbio/iop/test/delta/country=China/part-00000-88fba1af-b28d-4303-9c85-9a97be631d40.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/iop/test/delta/country=China/part-00000-88fba1af-b28d-4303-9c85-9a97be631d40.c000.snappy.parquet -------------------------------------------------------------------------------- /core/dbio/iop/test/delta/country=Germany/.part-00000-030076e1-5ec9-47c2-830a-1569f823b6ee.c000.snappy.parquet.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/iop/test/delta/country=Germany/.part-00000-030076e1-5ec9-47c2-830a-1569f823b6ee.c000.snappy.parquet.crc -------------------------------------------------------------------------------- /core/dbio/iop/test/delta/country=Germany/part-00000-030076e1-5ec9-47c2-830a-1569f823b6ee.c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/iop/test/delta/country=Germany/part-00000-030076e1-5ec9-47c2-830a-1569f823b6ee.c000.snappy.parquet -------------------------------------------------------------------------------- /core/dbio/iop/test/lineitem_iceberg/README.md: -------------------------------------------------------------------------------- 1 | # README 2 | this iceberg table is generated by using DuckDB (v0.7.0) to generated TPC-H lineitem 3 | SF0.01 then storing that to a parquet file. 4 | 5 | Then pyspark (3.3.1) was used with the iceberg extension from https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-spark-runtime-3.3_2.12/1.0.0/iceberg-spark-runtime-3.3_2.12-1.0.0.jar 6 | to write the iceberg table. 7 | 8 | finally, using pyspark, a delete query was performed on this iceberg table: 9 | 10 | ``` 11 | DELETE FROM iceberg_catalog.lineitem_iceberg where l_extendedprice < 10000 12 | ``` 13 | 14 | The result for Q06 of TPC-H on this table according to pyspark is now: 15 | ``` 16 | [Row(revenue=Decimal('1077536.9101'))] 17 | ``` 18 | 19 | Note: it appears that there are no deletes present in this iceberg table, the whole thing was rewritten. 20 | this is likely due to the fact that the table is so small? -------------------------------------------------------------------------------- /core/dbio/iop/test/lineitem_iceberg/data/.00000-411-0792dcfe-4e25-4ca3-8ada-175286069a47-00001.parquet.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/iop/test/lineitem_iceberg/data/.00000-411-0792dcfe-4e25-4ca3-8ada-175286069a47-00001.parquet.crc -------------------------------------------------------------------------------- /core/dbio/iop/test/lineitem_iceberg/data/.00041-414-f3c73457-bbd6-4b92-9c15-17b241171b16-00001.parquet.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/iop/test/lineitem_iceberg/data/.00041-414-f3c73457-bbd6-4b92-9c15-17b241171b16-00001.parquet.crc -------------------------------------------------------------------------------- /core/dbio/iop/test/lineitem_iceberg/data/00000-411-0792dcfe-4e25-4ca3-8ada-175286069a47-00001.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/iop/test/lineitem_iceberg/data/00000-411-0792dcfe-4e25-4ca3-8ada-175286069a47-00001.parquet -------------------------------------------------------------------------------- /core/dbio/iop/test/lineitem_iceberg/data/00041-414-f3c73457-bbd6-4b92-9c15-17b241171b16-00001.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/iop/test/lineitem_iceberg/data/00041-414-f3c73457-bbd6-4b92-9c15-17b241171b16-00001.parquet -------------------------------------------------------------------------------- /core/dbio/iop/test/lineitem_iceberg/metadata/.10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m0.avro.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/iop/test/lineitem_iceberg/metadata/.10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m0.avro.crc -------------------------------------------------------------------------------- /core/dbio/iop/test/lineitem_iceberg/metadata/.10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m1.avro.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/iop/test/lineitem_iceberg/metadata/.10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m1.avro.crc -------------------------------------------------------------------------------- /core/dbio/iop/test/lineitem_iceberg/metadata/.cf3d0be5-cf70-453d-ad8f-48fdc412e608-m0.avro.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/iop/test/lineitem_iceberg/metadata/.cf3d0be5-cf70-453d-ad8f-48fdc412e608-m0.avro.crc -------------------------------------------------------------------------------- /core/dbio/iop/test/lineitem_iceberg/metadata/.snap-3776207205136740581-1-cf3d0be5-cf70-453d-ad8f-48fdc412e608.avro.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/iop/test/lineitem_iceberg/metadata/.snap-3776207205136740581-1-cf3d0be5-cf70-453d-ad8f-48fdc412e608.avro.crc -------------------------------------------------------------------------------- /core/dbio/iop/test/lineitem_iceberg/metadata/.snap-7635660646343998149-1-10eaca8a-1e1c-421e-ad6d-b232e5ee23d3.avro.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/iop/test/lineitem_iceberg/metadata/.snap-7635660646343998149-1-10eaca8a-1e1c-421e-ad6d-b232e5ee23d3.avro.crc -------------------------------------------------------------------------------- /core/dbio/iop/test/lineitem_iceberg/metadata/.v1.metadata.json.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/iop/test/lineitem_iceberg/metadata/.v1.metadata.json.crc -------------------------------------------------------------------------------- /core/dbio/iop/test/lineitem_iceberg/metadata/.v2.metadata.json.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/iop/test/lineitem_iceberg/metadata/.v2.metadata.json.crc -------------------------------------------------------------------------------- /core/dbio/iop/test/lineitem_iceberg/metadata/.version-hint.text.crc: -------------------------------------------------------------------------------- 1 | crcվ -------------------------------------------------------------------------------- /core/dbio/iop/test/lineitem_iceberg/metadata/10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m0.avro: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/iop/test/lineitem_iceberg/metadata/10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m0.avro -------------------------------------------------------------------------------- /core/dbio/iop/test/lineitem_iceberg/metadata/10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m1.avro: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/iop/test/lineitem_iceberg/metadata/10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m1.avro -------------------------------------------------------------------------------- /core/dbio/iop/test/lineitem_iceberg/metadata/cf3d0be5-cf70-453d-ad8f-48fdc412e608-m0.avro: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/iop/test/lineitem_iceberg/metadata/cf3d0be5-cf70-453d-ad8f-48fdc412e608-m0.avro -------------------------------------------------------------------------------- /core/dbio/iop/test/lineitem_iceberg/metadata/snap-3776207205136740581-1-cf3d0be5-cf70-453d-ad8f-48fdc412e608.avro: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/iop/test/lineitem_iceberg/metadata/snap-3776207205136740581-1-cf3d0be5-cf70-453d-ad8f-48fdc412e608.avro -------------------------------------------------------------------------------- /core/dbio/iop/test/lineitem_iceberg/metadata/snap-7635660646343998149-1-10eaca8a-1e1c-421e-ad6d-b232e5ee23d3.avro: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/iop/test/lineitem_iceberg/metadata/snap-7635660646343998149-1-10eaca8a-1e1c-421e-ad6d-b232e5ee23d3.avro -------------------------------------------------------------------------------- /core/dbio/iop/test/lineitem_iceberg/metadata/v1.metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "format-version" : 2, 3 | "table-uuid" : "a319422b-6f8c-44d0-90ba-96242d9a1d7b", 4 | "location" : "./lineitem_iceberg", 5 | "last-sequence-number" : 1, 6 | "last-updated-ms" : 1676473674504, 7 | "last-column-id" : 16, 8 | "current-schema-id" : 0, 9 | "schemas" : [ { 10 | "type" : "struct", 11 | "schema-id" : 0, 12 | "fields" : [ { 13 | "id" : 1, 14 | "name" : "l_orderkey", 15 | "required" : false, 16 | "type" : "int" 17 | }, { 18 | "id" : 2, 19 | "name" : "l_partkey", 20 | "required" : false, 21 | "type" : "int" 22 | }, { 23 | "id" : 3, 24 | "name" : "l_suppkey", 25 | "required" : false, 26 | "type" : "int" 27 | }, { 28 | "id" : 4, 29 | "name" : "l_linenumber", 30 | "required" : false, 31 | "type" : "int" 32 | }, { 33 | "id" : 5, 34 | "name" : "l_quantity", 35 | "required" : false, 36 | "type" : "int" 37 | }, { 38 | "id" : 6, 39 | "name" : "l_extendedprice", 40 | "required" : false, 41 | "type" : "decimal(15, 2)" 42 | }, { 43 | "id" : 7, 44 | "name" : "l_discount", 45 | "required" : false, 46 | "type" : "decimal(15, 2)" 47 | }, { 48 | "id" : 8, 49 | "name" : "l_tax", 50 | "required" : false, 51 | "type" : "decimal(15, 2)" 52 | }, { 53 | "id" : 9, 54 | "name" : "l_returnflag", 55 | "required" : false, 56 | "type" : "string" 57 | }, { 58 | "id" : 10, 59 | "name" : "l_linestatus", 60 | "required" : false, 61 | "type" : "string" 62 | }, { 63 | "id" : 11, 64 | "name" : "l_shipdate", 65 | "required" : false, 66 | "type" : "date" 67 | }, { 68 | "id" : 12, 69 | "name" : "l_commitdate", 70 | "required" : false, 71 | "type" : "date" 72 | }, { 73 | "id" : 13, 74 | "name" : "l_receiptdate", 75 | "required" : false, 76 | "type" : "date" 77 | }, { 78 | "id" : 14, 79 | "name" : "l_shipinstruct", 80 | "required" : false, 81 | "type" : "string" 82 | }, { 83 | "id" : 15, 84 | "name" : "l_shipmode", 85 | "required" : false, 86 | "type" : "string" 87 | }, { 88 | "id" : 16, 89 | "name" : "l_comment", 90 | "required" : false, 91 | "type" : "string" 92 | } ] 93 | } ], 94 | "default-spec-id" : 0, 95 | "partition-specs" : [ { 96 | "spec-id" : 0, 97 | "fields" : [ ] 98 | } ], 99 | "last-partition-id" : 999, 100 | "default-sort-order-id" : 0, 101 | "sort-orders" : [ { 102 | "order-id" : 0, 103 | "fields" : [ ] 104 | } ], 105 | "properties" : { 106 | "owner" : "root", 107 | "write.update.mode" : "merge-on-read" 108 | }, 109 | "current-snapshot-id" : 3776207205136740581, 110 | "refs" : { 111 | "main" : { 112 | "snapshot-id" : 3776207205136740581, 113 | "type" : "branch" 114 | } 115 | }, 116 | "snapshots" : [ { 117 | "sequence-number" : 1, 118 | "snapshot-id" : 3776207205136740581, 119 | "timestamp-ms" : 1676473674504, 120 | "summary" : { 121 | "operation" : "append", 122 | "spark.app.id" : "local-1676472783435", 123 | "added-data-files" : "1", 124 | "added-records" : "60175", 125 | "added-files-size" : "1390176", 126 | "changed-partition-count" : "1", 127 | "total-records" : "60175", 128 | "total-files-size" : "1390176", 129 | "total-data-files" : "1", 130 | "total-delete-files" : "0", 131 | "total-position-deletes" : "0", 132 | "total-equality-deletes" : "0" 133 | }, 134 | "manifest-list" : "lineitem_iceberg/metadata/snap-3776207205136740581-1-cf3d0be5-cf70-453d-ad8f-48fdc412e608.avro", 135 | "schema-id" : 0 136 | } ], 137 | "snapshot-log" : [ { 138 | "timestamp-ms" : 1676473674504, 139 | "snapshot-id" : 3776207205136740581 140 | } ], 141 | "metadata-log" : [ ] 142 | } -------------------------------------------------------------------------------- /core/dbio/iop/test/lineitem_iceberg/metadata/version-hint.text: -------------------------------------------------------------------------------- 1 | 2 -------------------------------------------------------------------------------- /core/dbio/iop/test/my_file.utf16.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/iop/test/my_file.utf16.csv -------------------------------------------------------------------------------- /core/dbio/iop/test/test.excel1.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/iop/test/test.excel1.xlsx -------------------------------------------------------------------------------- /core/dbio/iop/test/test.excel2.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/iop/test/test.excel2.xlsx -------------------------------------------------------------------------------- /core/dbio/iop/test/test1.1.csv: -------------------------------------------------------------------------------- 1 | id,first_name,last_name,email,target,create_dt,rating 2 | 1,"AOCG,"" 3 | 883",Lumox,ilumox0@unc.edu,true,2019-02-11 11:23:00.000,35.987 4 | 2,Terrell,"EKOZ,989",tmee1@example.com,true,2019-08-19 17:02:09.000,89.983 5 | 3,Frayda,Comolli,fcomolli2@cbslocal.com,false,2019-08-07 16:48:12.000,89.401 6 | 4,Alphonse,Crookshank,acrookshank3@mapy.cz,true,2019-03-31 14:41:45.000,19.933 7 | 5,Mathilda,Deaves,mdeaves4@va.gov,true,2019-07-15 21:20:39.000,90.643 8 | 6,Trixi,Livens,tlivens5@cargocollective.com,true,2019-06-20 10:58:40.000,74.341 9 | 7,Ida,Domican,idomican6@gravatar.com,false,2019-03-11 09:18:34.000,55.454 10 | 8,Bogey,Kears,bkears7@nih.gov,false,2019-09-11 22:08:40.000,3.869 11 | 9,Janifer,Melanaphy,jmelanaphy8@alibaba.com,true,2019-02-19 16:23:06.000,48.234 12 | 10,Kaela,Illsley,killsley9@feedburner.com,false,2019-04-17 05:05:18.000,86.13 13 | 11,Yorgos,Ruprechter,yruprechtera@baidu.com,false,2019-01-03 05:57:01.000,88.736 14 | 12,Bunni,Bothams,bbothamsb@amazon.co.jp,false,2019-08-12 14:51:09.000,94.945 15 | 13,Sharon,Atwater,satwaterc@webeden.co.uk,false,2019-05-22 09:38:10.000,30.112 16 | 14,Ellen,Real,ereald@amazonaws.com,true,2019-02-01 18:47:06.000,6.877 17 | 15,Charissa,Ballance,cballancee@amazonaws.com,true,2019-07-25 04:34:02.000,8.514 18 | 16,Zorina,Cudiff,zcudifff@jigsy.com,false,2019-10-06 04:13:55.000,27.63 19 | 17,Grove,Kenrack,gkenrackg@chronoengine.com,true,2019-09-01 05:11:02.000,18.658 20 | 18,Alexis,Fewtrell,afewtrellh@google.cn,true,2019-05-31 14:55:31.000,44.751 -------------------------------------------------------------------------------- /core/dbio/iop/test/test1.1.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/iop/test/test1.1.csv.gz -------------------------------------------------------------------------------- /core/dbio/iop/test/test1.1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/iop/test/test1.1.parquet -------------------------------------------------------------------------------- /core/dbio/iop/test/test1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/core/dbio/iop/test/test1.parquet -------------------------------------------------------------------------------- /core/dbio/iop/test/test2.1.noheader.csv: -------------------------------------------------------------------------------- 1 | 1,"AOCG,"" 2 | 883",Lumox,ilumox0@unc.edu,true,2019-02-11 11:23:00.000,35.987 3 | 2,Terrell,"EKOZ,989",tmee1@example.com,true,2019-08-19 17:02:09.000,89.983 4 | 3,Frayda,Comolli,fcomolli2@cbslocal.com,false,2019-08-07 16:48:12.000,89.401 5 | 4,Alphonse,Crookshank,acrookshank3@mapy.cz,true,2019-03-31 14:41:45.000,19.933 6 | 5,Mathilda,Deaves,mdeaves4@va.gov,true,2019-07-15 21:20:39.000,90.643 7 | 6,Trixi,Livens,tlivens5@cargocollective.com,true,2019-06-20 10:58:40.000,74.341 8 | 7,Ida,Domican,idomican6@gravatar.com,false,2019-03-11 09:18:34.000,55.454 9 | 8,Bogey,Kears,bkears7@nih.gov,false,2019-09-11 22:08:40.000,3.869 10 | 9,Janifer,Melanaphy,jmelanaphy8@alibaba.com,true,2019-02-19 16:23:06.000,48.234 11 | 10,Kaela,Illsley,killsley9@feedburner.com,false,2019-04-17 05:05:18.000,86.13 12 | 11,Yorgos,Ruprechter,yruprechtera@baidu.com,false,2019-01-03 05:57:01.000,88.736 13 | 12,Bunni,Bothams,bbothamsb@amazon.co.jp,false,2019-08-12 14:51:09.000,94.945 14 | 13,Sharon,Atwater,satwaterc@webeden.co.uk,false,2019-05-22 09:38:10.000,30.112 15 | 14,Ellen,Real,ereald@amazonaws.com,true,2019-02-01 18:47:06.000,6.877 16 | 15,Charissa,Ballance,cballancee@amazonaws.com,true,2019-07-25 04:34:02.000,8.514 17 | 16,Zorina,Cudiff,zcudifff@jigsy.com,false,2019-10-06 04:13:55.000,27.63 18 | 17,Grove,Kenrack,gkenrackg@chronoengine.com,true,2019-09-01 05:11:02.000,18.658 19 | 18,Alexis,Fewtrell,afewtrellh@google.cn,true,2019-05-31 14:55:31.000,44.751 -------------------------------------------------------------------------------- /core/dbio/iop/transforms_test.go: -------------------------------------------------------------------------------- 1 | package iop 2 | 3 | import ( 4 | "os" 5 | "testing" 6 | 7 | "github.com/flarco/g" 8 | "github.com/spf13/cast" 9 | "github.com/stretchr/testify/assert" 10 | ) 11 | 12 | func TestNonPrintable(t *testing.T) { 13 | chars := []string{"\x00", "\u00A0", " ", "\t", "\n", "\x01"} 14 | for _, char := range chars { 15 | g.Info("%#v => %d => %#v => %#v", char, char[0], char[0], Transforms.ReplaceNonPrintable(char)) 16 | } 17 | uints := []uint8{0, 1, 2, 3, 49, 127, 160} 18 | for _, uintVal := range uints { 19 | g.Warn("%#v => %d => %#v", string(uintVal), uintVal, Transforms.ReplaceNonPrintable(string(uintVal))) 20 | } 21 | } 22 | 23 | func TestFIX(t *testing.T) { 24 | messages := []string{ 25 | "8=FIX.4.2|9=332|35=8|49=XXX|56=SYS1|34=190|52=20181106-08:00:23|128=802c88|1=802c88_ISA|6=1.2557|11=7314956|14=12|15=GBP|17=EAVVA18KA1117184|20=0|22=4|30=XLON|31=1.2557|32=12|37=OAVVA18KA8302522|38=12|39=2|40=1|48=JE00B6173J15|54=2|55=GCP|59=1|60=20181106-08:00:21|63=6|64=20181108|76=CSTEGB21|110=0|119=15.0684|120=GBP|150=2|151=0|167=CS|207=XLON|10=105|", 26 | "8=FIX.4.2|9=393|35=8|49=XXX|56=SYS1|34=191|52=20181106-08:00:33|128=802c11|1=569_C11_TPAB|6=0.2366|11=16669868|14=6061|15=GBP|17=EBSTI18KA1117185|20=0|21=2|22=4|30=XOFF|31=0.2366|32=6061|37=OBSTI18KA8302657|38=6061|39=2|40=2|44=0.2366|48=GB00B0DG3H29|54=1|55=SXX|59=6|60=20181106-08:00:31|63=3|64=20181108|76=WNTSGB2LBIC|110=0|119=1434.03|120=GBP|126=20181106-23:00:00|150=2|151=0|152=1434.03|167=CS|207=XLON|10=178|", 27 | "8=FIX.4.2|9=65|35=A|49=SERVER|56=CLIENT|34=177|52=20090107-18:15:16|98=0|108=30|10=062|", 28 | "8=FIX.4.2 | 9=178 | 35=8 | 49=PHLX | 56=PERS | 52=20071123-05:30:00.000 | 11=ATOMNOCCC9990900 | 20=3 | 150=E | 39=E | 55=MSFT | 167=CS | 54=1 | 38=15 | 40=2 | 44=15 | 58=PHLX EQUITY TESTING | 59=0 | 47=C | 32=0 | 31=0 | 151=15 | 14=0 | 6=0 | 10=128 |", 29 | "8=FIX.4.09=12835=D34=249=TW52=20060102-15:04:0556=ISLD115=116=CS128=MG129=CB11=ID21=338=10040=w54=155=INTC60=20060102-15:04:0510=123", 30 | } 31 | for i, message := range messages { 32 | fixMap, err := Transforms.ParseFIXMap(message) 33 | g.LogFatal(err) 34 | 35 | switch i { 36 | case 0: 37 | assert.Contains(t, fixMap, "account") 38 | assert.Contains(t, fixMap, "avg_px") 39 | case 1: 40 | assert.Contains(t, fixMap, "account") 41 | assert.Contains(t, fixMap, "settl_curr_amt") 42 | case 3: 43 | assert.Contains(t, fixMap, "begin_string") 44 | assert.Contains(t, fixMap, "sending_time") 45 | case 4: 46 | assert.Contains(t, fixMap, "cl_ord_id") 47 | assert.Contains(t, fixMap, "deliver_to_sub_id") 48 | } 49 | // g.Info("%s", g.Marshal(fixMap)) 50 | } 51 | } 52 | 53 | func TestDecode(t *testing.T) { 54 | filePath := "test/my_file.utf16.csv" 55 | bytes, err := os.ReadFile(filePath) 56 | assert.NoError(t, err) 57 | for i, r := range bytes { 58 | if i > 6 { 59 | break 60 | } 61 | g.Info("%#v, %#v, %d", string(r), r, r) 62 | } 63 | } 64 | 65 | func TestTransformMsUUID(t *testing.T) { 66 | uuidBytes := []byte{0x78, 0x56, 0x34, 0x12, 0x34, 0x12, 0x34, 0x12, 0x12, 0x34, 0x12, 0x34, 0x56, 0x78, 0x9a, 0xbc} 67 | sp := NewStreamProcessor() 68 | val, _ := Transforms.ParseMsUUID(sp, cast.ToString(uuidBytes)) 69 | assert.Equal(t, "12345678-1234-1234-1234-123456789abc", val) 70 | } 71 | -------------------------------------------------------------------------------- /core/dbio/local/local_test.go: -------------------------------------------------------------------------------- 1 | package local 2 | 3 | import ( 4 | "io/ioutil" 5 | "os" 6 | "testing" 7 | 8 | "github.com/stretchr/testify/assert" 9 | ) 10 | 11 | func TestCloneRepo(t *testing.T) { 12 | home, err := GetHome() 13 | assert.NoError(t, err) 14 | 15 | // clone 16 | path, err := home.CloneRepo("https://github.com/flarco/gonkey") 17 | assert.NoError(t, err) 18 | 19 | files, err := ioutil.ReadDir(path) 20 | assert.NoError(t, err) 21 | 22 | assert.Len(t, files, 10) 23 | 24 | // pull 25 | path, err = home.CloneRepo("https://github.com/flarco/gonkey") 26 | assert.NoError(t, err) 27 | 28 | os.RemoveAll(path) 29 | } 30 | -------------------------------------------------------------------------------- /core/dbio/scripts/check_parquet.py: -------------------------------------------------------------------------------- 1 | import pyarrow.parquet as pq 2 | import sys 3 | 4 | file = pq.ParquetFile(sys.argv[1]) 5 | 6 | print(f'num_row_groups: {file.num_row_groups}') 7 | print(f'metadata: {file.metadata}') 8 | # print(f'column: {file.metadata.row_group(0).column(3)}') 9 | print(f'compression: {file.metadata.row_group(0).column(0).compression}') 10 | print(file.schema) 11 | 12 | data = file.read() 13 | print(data) 14 | 15 | for col in data: 16 | for row in col: 17 | _ = row -------------------------------------------------------------------------------- /core/dbio/scripts/test.sh: -------------------------------------------------------------------------------- 1 | set -e # exit on error 2 | # set -o allexport; source .env; set +o allexport 3 | 4 | export DEBUG='' 5 | 6 | cd connection 7 | go test -v -run 'TestConnection' 8 | cd - 9 | 10 | cd iop 11 | go test -timeout 5m -v -run 'TestParseDate|TestDetectDelimiter|TestFIX|TestConstraints|TestDuckDb|TestParquetDuckDb|TestIcebergReader|TestDeltaReader|TestPartition|TestExtractPartitionTimeValue|TestGetLowestPartTimeUnit|TestMatchedPartitionMask|TestGeneratePartURIsFromRange|TestDataset|TestValidateNames|TestExcelDateToTime' 12 | cd - 13 | 14 | cd database 15 | go test -v -run 'TestParseTableName|TestRegexMatch|TestParseColumnName|TestParseSQLMultiStatements|TestTrimSQLComments' 16 | go test -run TestChunkByColumnRange 17 | cd - 18 | 19 | cd filesys 20 | go test -v -run 'TestFileSysLocalCsv|TestFileSysLocalJson|TestFileSysLocalParquet|TestFileSysLocalFormat|TestFileSysGoogle|TestFileSysS3|TestFileSysAzure|TestFileSysSftp|TestFileSysFtp|TestExcel|TestFileSysLocalIceberg|TestFileSysLocalDelta' 21 | cd - 22 | 23 | cd api 24 | go test -v 25 | cd - -------------------------------------------------------------------------------- /core/dbio/templates/bigtable.yaml: -------------------------------------------------------------------------------- 1 | core: 2 | drop_table: '{"action": "delete_table", "table": "{table}"}' 3 | 4 | variable: 5 | tmp_folder: /tmp 6 | timestamp_layout_str: '{value}' 7 | timestamp_layout: '2006-01-02 15:04:05.000000' 8 | date_layout_str: '{value}' 9 | date_layout: '2006-01-02 15:04:05' 10 | error_filter_table_exists: already 11 | error_ignore_drop_table: NotFound 12 | quote_char: '' -------------------------------------------------------------------------------- /core/dbio/templates/elasticsearch.yaml: -------------------------------------------------------------------------------- 1 | core: 2 | incremental_select: '{incremental_where_cond}' 3 | incremental_where: '{ "update_key": "{update_key}", "value": "{value}" }' 4 | backfill_where: '{ "update_key": "{update_key}", "start_value": "{start_value}", "end_value": "{end_value}" }' 5 | 6 | variable: 7 | tmp_folder: /tmp 8 | timestamp_layout_str: '"{value}"' 9 | timestamp_layout: '2006-01-02T15:04:05.000Z' 10 | timestampz_layout_str: '"{value}"' 11 | timestampz_layout: '2006-01-02T15:04:05.000Z' 12 | date_layout_str: '"{value}"' 13 | date_layout: '2006-01-02' 14 | error_filter_table_exists: already 15 | error_ignore_drop_table: NotFound 16 | quote_char: '' -------------------------------------------------------------------------------- /core/dbio/templates/hive.yaml: -------------------------------------------------------------------------------- 1 | core: 2 | drop_table: drop table if exists {} purge 3 | create_table: create table {table} stored as parquet as \n({col_types}) 4 | 5 | metadata: 6 | schemas: show databases 7 | tables: show tables in {schema} 8 | views: select 0 schema, 1 view where 1=0 9 | columns: describe {schema}.{table} 10 | primary_keys: select 0 as schema, 1 table, 2 pk_name, 3 column_name, 4 column_order where 1=0 11 | indexes: select 0 schema, 1 table, 2 index_name, 3 column_name, 4 column_order, 5 unique where 1=0 12 | ddl: show create table {schema}.{table} 13 | 14 | analysis: 15 | field_chars: | 16 | select 17 | '{schema}' as schema_nm, 18 | '{table}' as table_nm, 19 | '{field}' as field, 20 | sum(if({field} rlike '\\n', 1, 0)) as cnt_nline, 21 | sum(if({field} rlike '\\t', 1, 0)) as cnt_tab, 22 | sum(if({field} rlike ',', 1, 0)) as cnt_comma, 23 | sum(if({field} rlike '"', 1, 0)) as cnt_dquote, 24 | min(length({field})) as f_min_len, 25 | max(length({field})) as f_max_len 26 | from {schema}.{table} 27 | 28 | functions: 29 | replace: regexp_replace({string_exp}, {to_replace}, {replacement}) 30 | truncate_datef: date({field}) 31 | checksum_datetime: unix_timestamp({field}) 32 | 33 | variable: 34 | beeline_cmd: beeline -u "{url}" --outputformat=tsv --nullemptystring=true --incremental=true -e "{sql}" | sed "s/\\"/\\"\\"/g" | sed "s/'\\t'/\\",\\"/g" | sed "s/^'/\\"/g" | sed "s/'$/\\"/g" 35 | hive_cmd: hive -e "{sql}" | sed "s/\\"/\\"\\"/g" | sed "s/\\t/\\",\\"/g" | sed "s/^/\\"/g" | sed "s/$/\\"/g" | sed "s/\\"NULL\\"/\\"\\"/g" 36 | 37 | -------------------------------------------------------------------------------- /core/dbio/templates/mongodb.yaml: -------------------------------------------------------------------------------- 1 | core: 2 | incremental_select: '{incremental_where_cond}' 3 | incremental_where: '{ "update_key": "{update_key}", "value": "{value}" }' 4 | backfill_where: '{ "update_key": "{update_key}", "start_value": "{start_value}", "end_value": "{end_value}" }' 5 | 6 | variable: 7 | tmp_folder: /tmp 8 | timestamp_layout_str: 'ISODate("{value}")' 9 | timestamp_layout: '2006-01-02T15:04:05.000000Z' 10 | date_layout_str: 'ISODate(("{value}")' 11 | date_layout: '2006-01-02' 12 | error_filter_table_exists: already 13 | error_ignore_drop_table: NotFound 14 | quote_char: '' 15 | max_column_length: 255 -------------------------------------------------------------------------------- /core/dbio/templates/profile.def.yaml: -------------------------------------------------------------------------------- 1 | # leave this alone, unless you want to change the JARs 2 | drivers: 3 | oracle: 4 | path: jars/ojdbc6-11.2.0.3.jar 5 | class: oracle.jdbc.driver.OracleDriver 6 | sqlserver: 7 | path: jars/jtds-1.3.1.jar 8 | class: net.sourceforge.jtds.jdbc.Driver 9 | postgresql: 10 | path: jars/postgresql-9.4.1207.jre7.jar 11 | class: org.postgresql.Driver 12 | mysql: 13 | path: jars/mysql-connector-java-6.0.6.jar 14 | class: com.mysql.jdbc.Driver 15 | sqlite: 16 | path: jars/sqlite-jdbc-3.23.1.jar 17 | class: org.sqlite.JDBC 18 | 19 | # Here enter your database profiles, use proper JDBC URLs 20 | # databse type include: oracle, postgresql, sqlserver, hive, spark 21 | databases: 22 | 23 | PG: 24 | name: PG1 25 | host: pg1 26 | database: db1 27 | port: 5432 28 | user: user 29 | password: password 30 | sslmode: disable 31 | type: postgresql 32 | url: "jdbc:postgresql://host:port/database?&ssl=false" 33 | 34 | environment: 35 | ETL_TEMP_DIR: /tmp 36 | 37 | variable: 38 | tmp_folder: /tmp 39 | 40 | -------------------------------------------------------------------------------- /core/dbio/templates/profile.yaml: -------------------------------------------------------------------------------- 1 | # leave this alone, unless you want to change the JARs 2 | drivers: 3 | oracle: 4 | path: jars/ojdbc6-11.2.0.3.jar 5 | class: oracle.jdbc.driver.OracleDriver 6 | sqlserver: 7 | path: jars/jtds-1.3.1.jar 8 | class: net.sourceforge.jtds.jdbc.Driver 9 | postgresql: 10 | path: jars/postgresql-9.4.1207.jre7.jar 11 | class: org.postgresql.Driver 12 | mysql: 13 | path: jars/mysql-connector-java-6.0.6.jar 14 | class: com.mysql.jdbc.Driver 15 | sqlite: 16 | path: jars/sqlite-jdbc-3.23.1.jar 17 | class: org.sqlite.JDBC 18 | 19 | # Here enter your database profiles, use proper JDBC URLs 20 | # databse type include: oracle, postgresql, sqlserver, hive, spark 21 | databases: 22 | EDW1: 23 | name: EDW1 24 | host: edw1 25 | port: 1521 26 | service: edw1_service 27 | user: user 28 | password: password 29 | type: oracle 30 | url: "jdbc:oracle:thin:@//edw1:1521/edw1_service" 31 | 32 | PG1: 33 | name: PG1 34 | host: pg1 35 | database: db1 36 | port: 5432 37 | user: user 38 | password: password 39 | sslmode: disable 40 | type: postgresql 41 | url: "jdbc:postgresql://host:port/database?&ssl=false" 42 | 43 | MSSQL1: 44 | name: MSSQL1 45 | host: mssql1 46 | database: master 47 | port: 1433 48 | user: user 49 | password: password 50 | odbc_driver: ODBC Driver 13 for SQL Server 51 | type: sqlserver 52 | url: "jdbc:jtds:sqlserver://mssql1:1433/master;user=user;password=password;instance=;useNTLMv2=true;domain=workgroup" 53 | 54 | SPARK_LOCAL: 55 | name: SPARK_LOCAL 56 | user: user 57 | hive_enabled: false 58 | spark_home: /path/spark-2.2.0-bin-hadoop2.7 59 | type: spark 60 | 61 | SPARK_HIVE_21: 62 | name: SPARK_HIVE 63 | user: user 64 | hive_enabled: true 65 | spark_home: /path/spark-2.1.0-bin-hadoop2.7 66 | type: spark 67 | 68 | SPARK_HIVE: 69 | name: SPARK_HIVE 70 | user: user 71 | hive_enabled: true 72 | spark_home: /path/spark-2.2.0-bin-hadoop2.7 73 | type: spark 74 | 75 | PG_TEST: 76 | name: PG_TEST 77 | host: localhost 78 | database: test_db 79 | port: 35432 80 | user: user 81 | password: password 82 | sslmode: disable 83 | type: postgresql 84 | url: "jdbc:postgresql://localhost:35432/test_db?&ssl=false" 85 | 86 | ORCL_TEST: 87 | name: ORCL_TEST 88 | host: localhost 89 | port: 31521 90 | sid: xe 91 | user: system 92 | password: oracle 93 | type: oracle 94 | url: "jdbc:oracle:thin:@//localhost:31521//xe" 95 | 96 | TESTS: 97 | EDW1: 98 | object: SCHEMA1.OBJECT1 99 | 100 | environment: 101 | ETL_TEMP_DIR: /tmp 102 | 103 | variable: 104 | hive_cmd: hive -e "set hive.cli.print.header=true; {sql}" | sed "s/\\"/\\"\\"/g" | sed "s/\\t/\\",\\"/g" | sed "s/^/\\"/g" | sed "s/$/\\"/g" | sed "s/\\"NULL\\"/\\"\\"/g" 105 | beeline_cmd: beeline -u "jdbc:hive2://hivehost:10000/" --outputformat=tsv --nullemptystring=true --incremental=true -e "set hive.cli.print.header=true; {sql}" | sed "s/\\"/\\"\\"/g" | sed "s/'\\t'/\\",\\"/g" | sed "s/^'/\\"/g" | sed "s/'$/\\"/g" 106 | tmp_folder: /tmp 107 | 108 | spark-conf: 109 | # spark.master: 'yarn': 110 | spark.master: local[2] 111 | # spark.ui.port: '4040' 112 | # spark.cores.max: '2' 113 | # spark.yarn.queue: long 114 | # spark.driver.memory: 1g 115 | # spark.driver.maxResultSize: 2g 116 | # spark.driver.cores: '2' 117 | # spark.executor.memory: 500m 118 | # spark.executor.instances: '2' 119 | # spark.executor.cores: '1' 120 | # spark.sql.shuffle.partitions: 6 121 | spark.sql.broadcastTimeout: 900 122 | spark.sql.tungsten.enabled: "true" 123 | spark.io.compression.codec: snappy 124 | spark.rdd.compress: "true" 125 | spark.streaming.backpressure.enabled: "true" 126 | spark.sql.parquet.compression.codec: snappy 127 | # spark.local.dir: /tmp/spark_temp 128 | spark.sql.crossJoin.enabled: "true" 129 | # spark.driver.extraClassPath: jars/ojdbc6.jar 130 | # "spark.eventLog.enabled": "true" 131 | -------------------------------------------------------------------------------- /core/dbio/templates/prometheus.yaml: -------------------------------------------------------------------------------- 1 | variable: 2 | tmp_folder: /tmp 3 | timestamp_layout_str: '{value}' 4 | timestamp_layout: '2006-01-02 15:04:05.000000' 5 | date_layout_str: '{value}' 6 | date_layout: '2006-01-02 15:04:05' 7 | error_filter_table_exists: already 8 | error_ignore_drop_table: NotFound 9 | quote_char: '' -------------------------------------------------------------------------------- /core/dbio/templates/spark.yaml: -------------------------------------------------------------------------------- 1 | core: 2 | drop_table: drop table if exists {} purge 3 | create_table: create table {table} stored as parquet as \n({col_types}) 4 | 5 | metadata: 6 | schemas: show databases 7 | tables: show tables in {schema} 8 | views: select 0 schema, 1 view where 1=0 9 | columns: describe {schema}.{table} 10 | primary_keys: select 0 as schema, 1 table, 2 pk_name, 3 column_name, 4 column_order where 1=0 11 | indexes: select 0 schema, 1 table, 2 index_name, 3 column_name, 4 column_order, 5 unique where 1=0 12 | ddl: show create table {schema}.{table} 13 | 14 | analysis: 15 | field_chars: | 16 | select 17 | '{schema}' as schema_nm, 18 | '{table}' as table_nm, 19 | '{field}' as field, 20 | sum(if({field} rlike '\\n', 1, 0)) as cnt_nline, 21 | sum(if({field} rlike '\\t', 1, 0)) as cnt_tab, 22 | sum(if({field} rlike ',', 1, 0)) as cnt_comma, 23 | sum(if({field} rlike '"', 1, 0)) as cnt_dquote, 24 | min(length({field})) as f_min_len, 25 | max(length({field})) as f_max_len 26 | from {schema}.{table} 27 | 28 | functions: 29 | replace: regexp_replace({string_exp}, {to_replace}, {replacement}) 30 | truncate_datef: date({field}) 31 | checksum_datetime: unix_timestamp({field}) 32 | 33 | -------------------------------------------------------------------------------- /core/dbio/templates/types_general_to_native.tsv: -------------------------------------------------------------------------------- 1 | general_type oracle postgres mysql mariadb sqlserver azuresql azuredwh redshift snowflake sqlite d1 bigquery clickhouse duckdb motherduck starrocks trino proton athena 2 | bigint number(19) bigint bigint bigint bigint bigint bigint bigint bigint bigint bigint int64 Nullable(Int64) bigint bigint bigint bigint nullable(int64) bigint 3 | binary varbinary() bytea varbinary varbinary varbinary varbinary varbinary varchar(65535) binary blob blob bytes Nullable(String) binary binary varbinary varbinary nullable(string) binary 4 | bool varchar(5) bool char(5) char(5) varchar(5) varchar(5) varchar(5) bool boolean boolean boolean bool Nullable(String) bool bool char(5) boolean nullable(string) boolean 5 | date date date date date date date date date date text text date Nullable(Date) date date date date nullable(date) date 6 | datetime timestamp(9) timestamp datetime(6) datetime(6) datetime2 datetime2 datetime2 timestamp timestamp text text timestamp Nullable(DateTime64(6)) datetime datetime datetime timestamp nullable(datetime64(6)) timestamp 7 | decimal number(,) numeric decimal(,) decimal(,) decimal(,) decimal(,) decimal(,) decimal(,) decimal(,) real real numeric Nullable(Decimal(,)) decimal(,) decimal(,) decimal(,) decimal(,) nullable(decimal(,)) decimal(,) 8 | integer number(10) integer integer integer integer integer integer integer integer integer integer int64 Nullable(Int64) integer integer bigint integer nullable(int64) integer 9 | json clob jsonb json json nvarchar(max) nvarchar(max) nvarchar(max) varchar(65535) variant json json json Nullable(String) json json json json nullable(string) json 10 | smallint number(5) smallint smallint smallint smallint smallint smallint smallint smallint integer integer int64 Nullable(Int32) smallint smallint smallint smallint nullable(int32) smallint 11 | string varchar() varchar() varchar() varchar() nvarchar() nvarchar() nvarchar() varchar() varchar() text text string Nullable(String) varchar() varchar() varchar() varchar nullable(string) string 12 | text clob text mediumtext mediumtext nvarchar(max) nvarchar(max) nvarchar(max) varchar(65535) text text text string Nullable(String) text text varchar(65533) varchar nullable(string) string 13 | timestamp timestamp(9) timestamp datetime(6) datetime(6) datetime2 datetime2 datetime2 timestamp timestamp_ntz text text timestamp Nullable(DateTime64(6)) timestamp timestamp datetime timestamp nullable(datetime64(6)) timestamp 14 | timestampz timestamp(9) with time zone timestamptz datetime(6) datetime(6) datetimeoffset datetimeoffset datetimeoffset timestamptz timestamp_tz text text timestamp Nullable(DateTime64(6)) timestamptz timestamptz datetime timestamp with time zone nullable(datetime64(6)) timestamp with time zone 15 | float number double precision double double float float float double precision float real real float64 Nullable(Float64) double double double double nullable(float64) double 16 | time varchar() varchar() varchar() varchar() varchar() varchar() varchar() varchar(65535) varchar text text string Nullable(String) time time varchar() varchar nullable(string) string 17 | timez varchar() varchar() varchar() varchar() varchar() varchar() varchar() varchar(65535) varchar text text string Nullable(String) time time varchar() varchar nullable(string) string 18 | uuid varchar(36) uuid varchar(36) varchar(36) uniqueidentifier uniqueidentifier uniqueidentifier varchar(36) varchar(36) text text string Nullable(UUID) uuid uuid varchar(36) uuid nullable(string) string -------------------------------------------------------------------------------- /core/dbio/templates/vertica.yaml: -------------------------------------------------------------------------------- 1 | # https://github.com/vertica/vertica-sql-go 2 | -------------------------------------------------------------------------------- /core/env/default.env.yaml: -------------------------------------------------------------------------------- 1 | # Environment Credentials for Sling CLI 2 | # See https://docs.slingdata.io/sling-cli/environment 3 | 4 | # Holds all connection credentials for Extraction and Loading 5 | connections: 6 | 7 | 8 | # Global variables for specific settings, available to all connections at runtime 9 | variables: 10 | 11 | -------------------------------------------------------------------------------- /core/env/envfile.go: -------------------------------------------------------------------------------- 1 | package env 2 | 3 | import ( 4 | "os" 5 | "path" 6 | "sort" 7 | "strings" 8 | 9 | "github.com/flarco/g" 10 | "github.com/samber/lo" 11 | "github.com/spf13/cast" 12 | "gopkg.in/yaml.v2" 13 | ) 14 | 15 | type EnvFile struct { 16 | Connections map[string]map[string]interface{} `json:"connections,omitempty" yaml:"connections,omitempty"` 17 | Variables map[string]interface{} `json:"variables,omitempty" yaml:"variables,omitempty"` 18 | 19 | Path string `json:"-" yaml:"-"` 20 | TopComment string `json:"-" yaml:"-"` 21 | Body string `json:"-" yaml:"-"` 22 | } 23 | 24 | func SetHomeDir(name string) string { 25 | envKey := strings.ToUpper(name) + "_HOME_DIR" 26 | dir := os.Getenv(envKey) 27 | if dir == "" { 28 | dir = path.Join(g.UserHomeDir(), "."+name) 29 | os.Setenv(envKey, dir) 30 | } 31 | envMux.Lock() 32 | HomeDirs[name] = dir 33 | envMux.Unlock() 34 | return dir 35 | } 36 | 37 | func (ef *EnvFile) WriteEnvFile() (err error) { 38 | connsMap := yaml.MapSlice{} 39 | 40 | // order connections names 41 | names := lo.Keys(ef.Connections) 42 | sort.Strings(names) 43 | for _, name := range names { 44 | keyMap := ef.Connections[name] 45 | // order connection keys (type first) 46 | cMap := yaml.MapSlice{} 47 | keys := lo.Keys(keyMap) 48 | sort.Strings(keys) 49 | if v, ok := keyMap["type"]; ok { 50 | cMap = append(cMap, yaml.MapItem{Key: "type", Value: v}) 51 | } 52 | 53 | for _, k := range keys { 54 | if k == "type" { 55 | continue // already put first 56 | } 57 | k = cast.ToString(k) 58 | cMap = append(cMap, yaml.MapItem{Key: k, Value: keyMap[k]}) 59 | } 60 | 61 | // add to connection map 62 | connsMap = append(connsMap, yaml.MapItem{Key: name, Value: cMap}) 63 | } 64 | 65 | efMap := yaml.MapSlice{ 66 | {Key: "connections", Value: connsMap}, 67 | {Key: "variables", Value: ef.Variables}, 68 | } 69 | 70 | envBytes, err := yaml.Marshal(efMap) 71 | if err != nil { 72 | return g.Error(err, "could not marshal into YAML") 73 | } 74 | 75 | output := []byte(ef.TopComment + string(envBytes)) 76 | 77 | // fix windows path 78 | ef.Path = strings.ReplaceAll(ef.Path, `\`, `/`) 79 | err = os.WriteFile(ef.Path, formatYAML(output), 0644) 80 | if err != nil { 81 | return g.Error(err, "could not write YAML file") 82 | } 83 | 84 | return 85 | } 86 | 87 | func formatYAML(input []byte) []byte { 88 | newOutput := []byte{} 89 | pIndent := 0 90 | indent := 0 91 | inIndent := true 92 | prevC := byte('-') 93 | for _, c := range input { 94 | add := false 95 | if c == ' ' && inIndent { 96 | indent++ 97 | add = true 98 | } else if c == '\n' { 99 | pIndent = indent 100 | indent = 0 101 | add = true 102 | inIndent = true 103 | } else if prevC == '\n' { 104 | newOutput = append(newOutput, '\n') // add extra space 105 | add = true 106 | } else if prevC == ' ' && pIndent > indent && inIndent { 107 | newOutput = append(newOutput, '\n') // add extra space 108 | for i := 0; i < indent; i++ { 109 | newOutput = append(newOutput, ' ') 110 | } 111 | add = true 112 | inIndent = false 113 | } else { 114 | add = true 115 | inIndent = false 116 | } 117 | 118 | if add { 119 | newOutput = append(newOutput, c) 120 | } 121 | prevC = c 122 | } 123 | return newOutput 124 | } 125 | 126 | func LoadEnvFile(path string) (ef EnvFile) { 127 | bytes, _ := os.ReadFile(path) 128 | 129 | ef.Body = string(bytes) 130 | ef.Path = path 131 | 132 | // expand variables 133 | envMap := map[string]any{} 134 | for _, tuple := range os.Environ() { 135 | key := strings.Split(tuple, "=")[0] 136 | val := strings.TrimPrefix(tuple, key+"=") 137 | envMap[key] = val 138 | } 139 | ef.Body = g.Rmd(ef.Body, envMap) 140 | 141 | err := yaml.Unmarshal([]byte(ef.Body), &ef) 142 | if err != nil { 143 | err = g.Error(err, "error parsing yaml string") 144 | _ = err 145 | } 146 | 147 | if ef.Connections == nil { 148 | ef.Connections = map[string]map[string]interface{}{} 149 | } 150 | 151 | if ef.Variables == nil { 152 | ef.Variables = map[string]interface{}{} 153 | } 154 | 155 | for k, v := range ef.Variables { 156 | if _, found := envMap[k]; !found { 157 | os.Setenv(k, cast.ToString(v)) 158 | } 159 | } 160 | return ef 161 | } 162 | 163 | func GetEnvFilePath(dir string) string { 164 | return path.Join(dir, "env.yaml") 165 | } 166 | -------------------------------------------------------------------------------- /core/env/vars.go: -------------------------------------------------------------------------------- 1 | package env 2 | 3 | import "os" 4 | 5 | var envVars = []string{ 6 | "PARALLEL", "CONCURRENCY", "USE_BUFFERED_STREAM", "CONCURRENCY_LIMIT", 7 | 8 | "BUCKET", "ACCESS_KEY_ID", "SECRET_ACCESS_KEY", "SESSION_TOKEN", "ENDPOINT", "REGION", "DEFAULT_REGION", 9 | 10 | "AWS_BUCKET", "AWS_ACCESS_KEY_ID", 11 | "AWS_SECRET_ACCESS_KEY", "AWS_SESSION_TOKEN", "AWS_ENDPOINT", "AWS_REGION", "AWS_DEFAULT_REGION", "AWS_PROFILE", 12 | 13 | "COMPRESSION", "FILE_MAX_ROWS", "SAMPLE_SIZE", 14 | 15 | "KEY_FILE", "KEY_BODY", "CRED_API_KEY", 16 | 17 | "GC_BUCKET", "GOOGLE_APPLICATION_CREDENTIALS", "GSHEETS_CRED_FILE", 18 | "GC_KEY_BODY", "GC_CRED_API_KEY", 19 | 20 | "ACCOUNT", "CONTAINER", "SAS_SVC_URL", "CONN_STR", 21 | 22 | "AZURE_ACCOUNT", "AZURE_KEY", "AZURE_CONTAINER", "AZURE_SAS_SVC_URL", 23 | "AZURE_CONN_STR", 24 | 25 | "SSH_TUNNEL", "SSH_PRIVATE_KEY", "SSH_PUBLIC_KEY", 26 | 27 | "SMTP_HOST", "SMTP_PORT", "SMTP_USERNAME", "SMTP_PASSWORD", "SMTP_FROM_EMAIL", "SMTP_REPLY_EMAIL", 28 | 29 | "HTTP_USER", "HTTP_PASSWORD", "GSHEET_CLIENT_JSON_BODY", 30 | "GSHEET_SHEET_NAME", "GSHEET_MODE", 31 | 32 | "DIGITALOCEAN_ACCESS_TOKEN", "GITHUB_ACCESS_TOKEN", 33 | "SURVEYMONKEY_ACCESS_TOKEN", 34 | 35 | "SEND_ANON_USAGE", "DBIO_HOME", 36 | } 37 | 38 | // Vars are the variables we are using 39 | func Vars() (vars map[string]string) { 40 | vars = map[string]string{} 41 | // get default from environment 42 | for _, k := range envVars { 43 | val := os.Getenv(k) 44 | if vars[k] == "" && val != "" { 45 | vars[k] = val 46 | } 47 | } 48 | 49 | // default as true 50 | for _, k := range []string{} { 51 | if vars[k] == "" { 52 | vars[k] = "true" 53 | } 54 | } 55 | 56 | if vars["SAMPLE_SIZE"] == "" { 57 | vars["SAMPLE_SIZE"] = "900" 58 | } 59 | 60 | return 61 | } 62 | -------------------------------------------------------------------------------- /core/sling/config_test.go: -------------------------------------------------------------------------------- 1 | package sling 2 | 3 | import ( 4 | "math" 5 | "testing" 6 | "time" 7 | 8 | "github.com/flarco/g" 9 | "github.com/slingdata-io/sling-cli/core/dbio" 10 | "github.com/slingdata-io/sling-cli/core/dbio/iop" 11 | "github.com/spf13/cast" 12 | "github.com/stretchr/testify/assert" 13 | ) 14 | 15 | func TestGetRate(t *testing.T) { 16 | now := time.Now() 17 | now2 := time.Now() 18 | df := iop.Dataflow{} 19 | task := TaskExecution{ 20 | StartTime: &now, 21 | EndTime: &now2, 22 | //df: &df, 23 | } 24 | rate, _ := task.GetRate(10) 25 | 26 | st := *task.StartTime 27 | et := *task.EndTime 28 | 29 | g.P(et.UnixNano()) 30 | g.P(st.UnixNano()) 31 | g.P(df.Count()) 32 | g.P(rate) 33 | 34 | g.P(et.UnixNano() - st.UnixNano()) 35 | 36 | secElapsed := cast.ToFloat64(et.UnixNano()-st.UnixNano()) / 1000000000.0 37 | g.P(secElapsed) 38 | g.P(math.Round(cast.ToFloat64(df.Count()) / secElapsed)) 39 | rate = cast.ToInt64(math.Round(cast.ToFloat64(df.Count()) / secElapsed)) 40 | g.P(rate) 41 | } 42 | 43 | func TestConfig(t *testing.T) { 44 | 45 | cfgStr := `{ 46 | "post_dbt": { 47 | "conn": "ORACLE_SLING", 48 | "expr": "my_first_dbt_model", 49 | "name": "DBT_PROJ_1", 50 | "folder": "/", 51 | "version": "0.18.0", 52 | "repo_url": "https://github.com/fishtown-analytics/dbt-starter-project" 53 | }, 54 | "tgt_conn": "ORACLE_SLING" 55 | }` 56 | _, err := NewConfig(cfgStr) 57 | assert.NoError(t, err) 58 | 59 | } 60 | 61 | func TestColumnCasing(t *testing.T) { 62 | df := iop.NewDataflow(0) 63 | 64 | normalizeCasing := iop.NormalizeColumnCasing 65 | sourceCasing := iop.SourceColumnCasing 66 | snakeCasing := iop.SnakeColumnCasing 67 | targetCasing := iop.TargetColumnCasing 68 | 69 | df.Columns = iop.NewColumns(iop.Column{Name: "myCol"}) 70 | applyColumnCasingToDf(df, dbio.TypeDbSnowflake, &sourceCasing) 71 | assert.Equal(t, "myCol", df.Columns[0].Name) 72 | 73 | df.Columns = iop.NewColumns(iop.Column{Name: "myCol"}, iop.Column{Name: "hey-hey"}) 74 | applyColumnCasingToDf(df, dbio.TypeDbSnowflake, &snakeCasing) 75 | assert.Equal(t, "MY_COL", df.Columns[0].Name) 76 | assert.Equal(t, "HEY_HEY", df.Columns[1].Name) 77 | 78 | df.Columns = iop.NewColumns(iop.Column{Name: "myCol"}, iop.Column{Name: "hey-hey"}) 79 | applyColumnCasingToDf(df, dbio.TypeDbSnowflake, &normalizeCasing) 80 | assert.Equal(t, "myCol", df.Columns[0].Name) 81 | assert.Equal(t, "hey-hey", df.Columns[1].Name) 82 | 83 | df.Columns = iop.NewColumns(iop.Column{Name: "myCol"}) 84 | applyColumnCasingToDf(df, dbio.TypeDbSnowflake, &targetCasing) 85 | assert.Equal(t, "MYCOL", df.Columns[0].Name) 86 | 87 | df.Columns = iop.NewColumns(iop.Column{Name: "DHL OriginalTracking-Number"}) 88 | applyColumnCasingToDf(df, dbio.TypeDbDuckDb, &targetCasing) 89 | assert.Equal(t, "dhl_originaltracking_number", df.Columns[0].Name) 90 | 91 | df.Columns = iop.NewColumns(iop.Column{Name: "DHL OriginalTracking-Number"}) 92 | applyColumnCasingToDf(df, dbio.TypeDbDuckDb, &snakeCasing) 93 | assert.Equal(t, "dhl_original_tracking_number", df.Columns[0].Name) 94 | 95 | df.Columns = iop.NewColumns(iop.Column{Name: "DHL OriginalTracking-Number"}) 96 | applyColumnCasingToDf(df, dbio.TypeDbDuckDb, &normalizeCasing) 97 | assert.Equal(t, "DHL OriginalTracking-Number", df.Columns[0].Name) 98 | 99 | df.Columns = iop.NewColumns(iop.Column{Name: "HAPPY"}) 100 | applyColumnCasingToDf(df, dbio.TypeDbDuckDb, &normalizeCasing) 101 | assert.Equal(t, "happy", df.Columns[0].Name) 102 | applyColumnCasingToDf(df, dbio.TypeDbSnowflake, &normalizeCasing) 103 | assert.Equal(t, "HAPPY", df.Columns[0].Name) 104 | } 105 | -------------------------------------------------------------------------------- /core/sling/hooks.go: -------------------------------------------------------------------------------- 1 | package sling 2 | 3 | import "github.com/flarco/g" 4 | 5 | type HookType string 6 | type HookKind string 7 | type OnFailType string 8 | 9 | const ( 10 | HookKindHook HookKind = "hook" 11 | HookKindStep HookKind = "step" 12 | ) 13 | 14 | var HookRunReplication func(string, *Config, ...string) error 15 | 16 | type Hook interface { 17 | Type() HookType 18 | ID() string 19 | Data() map[string]any 20 | SetExtra(map[string]any) 21 | Stage() HookStage 22 | Execute() error 23 | PayloadMap() map[string]any 24 | ExecuteOnDone(error) (OnFailType, error) 25 | } 26 | 27 | type Hooks []Hook 28 | 29 | type HookMap struct { 30 | Start []any `json:"start,omitempty" yaml:"start,omitempty"` 31 | End []any `json:"end,omitempty" yaml:"end,omitempty"` 32 | Pre []any `json:"pre,omitempty" yaml:"pre,omitempty"` 33 | Post []any `json:"post,omitempty" yaml:"post,omitempty"` 34 | } 35 | 36 | type ParseOptions struct { 37 | stage HookStage 38 | kind HookKind 39 | index int 40 | state RuntimeState 41 | md5 string 42 | } 43 | 44 | type HookStage string 45 | 46 | const ( 47 | HookStagePre HookStage = "pre" 48 | HookStagePost HookStage = "post" 49 | HookStageStart HookStage = "start" 50 | HookStageEnd HookStage = "end" 51 | ) 52 | 53 | var ParseHook = func(any, ParseOptions) (Hook, error) { 54 | return nil, g.Error("please use the official sling-cli release for using hooks and pipelines") 55 | } 56 | 57 | func (hs Hooks) Execute() (err error) { 58 | for _, hook := range hs { 59 | if !g.In(hook.Type(), "log") { 60 | g.Debug(`executing hook "%s" (type: %s)`, hook.ID(), hook.Type()) 61 | } 62 | 63 | hookErr := hook.Execute() 64 | _, err = hook.ExecuteOnDone(hookErr) 65 | 66 | if err != nil { 67 | return g.Error(err, "error executing hook") 68 | } 69 | } 70 | return nil 71 | } 72 | -------------------------------------------------------------------------------- /core/sling/pbar.go: -------------------------------------------------------------------------------- 1 | package sling 2 | 3 | import ( 4 | "fmt" 5 | "time" 6 | 7 | "github.com/dustin/go-humanize" 8 | "github.com/flarco/g" 9 | "github.com/shirou/gopsutil/v3/cpu" 10 | "github.com/shirou/gopsutil/v3/mem" 11 | "github.com/spf13/cast" 12 | pb "gopkg.in/cheggaaa/pb.v2" 13 | ) 14 | 15 | var ShowProgress = true 16 | 17 | type ProgressBar struct { 18 | bar *pb.ProgressBar 19 | started bool 20 | finished bool 21 | } 22 | 23 | // NewPBar creates a new progress bar 24 | func NewPBar(d time.Duration) *ProgressBar { 25 | pbar := new(pb.ProgressBar) 26 | 27 | pb.RegisterElement("status", elementStatus, true) 28 | pb.RegisterElement("counters", elementCounters, true) 29 | pb.RegisterElement("bytes", elementBytes, true) 30 | pb.RegisterElement("rowRate", elementRowRate, true) 31 | pb.RegisterElement("byteRate", elementByteRate, true) 32 | tmpl := `{{etime . "%s" | yellow }} {{counters . }} {{speed . "%s r/s" | green }} {{ bytes . | blue }} {{ status . }}` 33 | if g.IsDebugLow() { 34 | pb.RegisterElement("mem", elementMem, true) 35 | pb.RegisterElement("cpu", elementCPU, true) 36 | // tmpl = `{{etime . "%s" | yellow }} {{counters . }} {{speed . "%s r/s" | green }} {{ bytes . | blue }} {{ byteRate . }} {{ mem . }} {{ cpu . }} {{ status . }}` 37 | tmpl = `{{etime . "%s" | yellow }} {{counters . }} {{speed . "%s r/s" | green }} {{ bytes . | blue }} {{ mem . }} {{ cpu . }} {{ status . }}` 38 | } 39 | barTmpl := pb.ProgressBarTemplate(tmpl) 40 | pbar = barTmpl.New(0) 41 | pbar.SetRefreshRate(d) 42 | pbar.SetWidth(40) 43 | return &ProgressBar{ 44 | bar: pbar, 45 | } 46 | } 47 | 48 | // SetStatus sets the progress bar status 49 | func (pb *ProgressBar) SetStatus(status string) { 50 | if !pb.finished { 51 | pb.bar.Set("status", status) 52 | pb.bar.Write() 53 | } 54 | } 55 | 56 | func (pb *ProgressBar) Start() { 57 | pb.started = true 58 | pb.bar.Start() 59 | } 60 | 61 | func (pb *ProgressBar) Finish() { 62 | if !pb.finished { 63 | pb.bar.Finish() 64 | pb.finished = true 65 | } 66 | } 67 | 68 | // https://github.com/cheggaaa/pb/blob/master/v3/element.go 69 | // calculates the RAM percent 70 | var elementMem pb.ElementFunc = func(state *pb.State, args ...string) string { 71 | memRAM, err := mem.VirtualMemory() 72 | if err != nil { 73 | return "" 74 | } 75 | return g.F("| %d%% MEM", cast.ToInt(memRAM.UsedPercent)) 76 | } 77 | 78 | // calculates the CPU percent 79 | var elementCPU pb.ElementFunc = func(state *pb.State, args ...string) string { 80 | cpuPct, err := cpu.Percent(0, false) 81 | if err != nil || len(cpuPct) == 0 { 82 | return "" 83 | } 84 | return g.F("| %d%% CPU", cast.ToInt(cpuPct[0])) 85 | } 86 | 87 | var elementStatus pb.ElementFunc = func(state *pb.State, args ...string) string { 88 | status := cast.ToString(state.Get("status")) 89 | if status == "" { 90 | return "" 91 | } 92 | return g.F("| %s", status) 93 | } 94 | 95 | type argsHelper []string 96 | 97 | func (args argsHelper) getOr(n int, value string) string { 98 | if len(args) > n { 99 | return args[n] 100 | } 101 | return value 102 | } 103 | 104 | func (args argsHelper) getNotEmptyOr(n int, value string) (v string) { 105 | if v = args.getOr(n, value); v == "" { 106 | return value 107 | } 108 | return 109 | } 110 | 111 | var elementCounters pb.ElementFunc = func(state *pb.State, args ...string) string { 112 | var f string 113 | if state.Total() > 0 { 114 | f = argsHelper(args).getNotEmptyOr(0, "%s / %s") 115 | } else { 116 | f = argsHelper(args).getNotEmptyOr(1, "%[1]s") 117 | } 118 | return fmt.Sprintf( 119 | f, humanize.Commaf(cast.ToFloat64(state.Value())), 120 | humanize.Commaf(cast.ToFloat64(state.Total())), 121 | ) 122 | } 123 | 124 | var elementBytes pb.ElementFunc = func(state *pb.State, args ...string) string { 125 | bytes := cast.ToString(state.Get("bytes")) 126 | if bytes == "0 B" { 127 | return "" 128 | } 129 | return g.F("%s", bytes) 130 | } 131 | 132 | var elementByteRate pb.ElementFunc = func(state *pb.State, args ...string) string { 133 | bytes := cast.ToString(state.Get("byteRate")) 134 | if bytes == "0 B" { 135 | return "" 136 | } 137 | return g.F("| %s", bytes) 138 | } 139 | 140 | var elementRowRate pb.ElementFunc = func(state *pb.State, args ...string) string { 141 | bytes := cast.ToString(state.Get("rowRate")) 142 | return g.F("| %s", bytes) 143 | } 144 | -------------------------------------------------------------------------------- /core/sling/project.go: -------------------------------------------------------------------------------- 1 | package sling 2 | 3 | type Project struct { 4 | Config ProjectConfig 5 | TaskConfigs map[string]Config 6 | } 7 | 8 | func LoadProject(path string) {} 9 | 10 | type ProjectConfig struct { 11 | Project string `json:"project" yaml:"project"` 12 | TaskPaths []string `json:"task-paths" yaml:"task-paths"` 13 | Defaults map[string]interface{} `json:"defaults" yaml:"defaults"` 14 | NotificationTags map[string]NotificationConfig `json:"notification_tags" yaml:"notification_tags"` 15 | } 16 | 17 | type NotificationConfig struct { 18 | Name string `json:"name"` 19 | Emails []string `json:"emails"` 20 | Slack bool `json:"slack"` 21 | MsTeams bool `json:"msteams"` 22 | WebhookURLs []string `json:"webhook_urls"` // urls 23 | OnSuccess bool `json:"on_success"` 24 | OnFailure bool `json:"on_failure"` 25 | OnLinger bool `json:"on_linger"` 26 | OnEmpty bool `json:"on_empty"` 27 | } 28 | -------------------------------------------------------------------------------- /core/sling/sling.go: -------------------------------------------------------------------------------- 1 | package sling 2 | 3 | import ( 4 | "os" 5 | 6 | "github.com/flarco/g" 7 | ) 8 | 9 | // Sling accepts a configuration and runs an Extract-Load task 10 | func Sling(cfg *Config) (err error) { 11 | task := NewTask(os.Getenv("SLING_EXEC_ID"), cfg) 12 | if task.Err != nil { 13 | return g.Error(task.Err, "error creating Sling task") 14 | } 15 | 16 | err = task.Execute() 17 | if err != nil { 18 | return g.Error(err, "error running Sling task") 19 | } 20 | 21 | return 22 | } 23 | -------------------------------------------------------------------------------- /core/store/db.go: -------------------------------------------------------------------------------- 1 | package store 2 | 3 | import ( 4 | "os" 5 | 6 | "github.com/denisbrodbeck/machineid" 7 | "github.com/flarco/g" 8 | "github.com/jmoiron/sqlx" 9 | "github.com/slingdata-io/sling-cli/core/dbio/database" 10 | "github.com/slingdata-io/sling-cli/core/env" 11 | "gorm.io/gorm" 12 | "gorm.io/gorm/logger" 13 | ) 14 | 15 | var ( 16 | // Db is the main databse connection 17 | Db *gorm.DB 18 | Dbx *sqlx.DB 19 | Conn database.Connection 20 | 21 | // DropAll signifies to drop all tables and recreate them 22 | DropAll = false 23 | ) 24 | 25 | // InitDB initializes the database 26 | func InitDB() { 27 | var err error 28 | 29 | if Db != nil { 30 | // already initiated 31 | return 32 | } 33 | 34 | dbURL := g.F("sqlite://%s/.sling.db?cache=shared&mode=rwc&_journal_mode=WAL", env.HomeDir) 35 | Conn, err = database.NewConn(dbURL, "silent=true") 36 | if err != nil { 37 | g.Debug("could not initialize local .sling.db. %s", err.Error()) 38 | return 39 | } 40 | 41 | Db, err = Conn.GetGormConn(&gorm.Config{ 42 | Logger: logger.Default.LogMode(logger.Silent), 43 | }) 44 | if err != nil { 45 | g.Debug("could not connect to local .sling.db. %s", err.Error()) 46 | return 47 | } 48 | 49 | allTables := []interface{}{ 50 | &Setting{}, 51 | } 52 | 53 | for _, table := range allTables { 54 | dryDB := Db.Session(&gorm.Session{DryRun: true}) 55 | tableName := dryDB.Find(table).Statement.Table 56 | if DropAll { 57 | Db.Exec(g.F(`drop table if exists "%s"`, tableName)) 58 | } 59 | err = Db.AutoMigrate(table) 60 | if err != nil { 61 | g.Debug("error AutoMigrating table for local .sling.db. => %s\n%s", tableName, err.Error()) 62 | return 63 | } 64 | } 65 | 66 | // settings 67 | settings() 68 | } 69 | 70 | type Setting struct { 71 | Key string `json:"key" gorm:"primaryKey"` 72 | Value string `json:"value"` 73 | } 74 | 75 | func settings() { 76 | // ProtectedID returns a hashed version of the machine ID in a cryptographically secure way, 77 | // using a fixed, application-specific key. 78 | // Internally, this function calculates HMAC-SHA256 of the application ID, keyed by the machine ID. 79 | machineID, _ := machineid.ProtectedID("sling") 80 | if machineID == "" { 81 | // generate random id then 82 | machineID = "m." + g.RandString(g.AlphaRunesLower+g.NumericRunes, 62) 83 | } 84 | 85 | Db.Create(&Setting{"machine-id", machineID}) 86 | os.Setenv("MACHINE_ID", machineID) 87 | } 88 | 89 | func GetMachineID() string { 90 | if Db == nil { 91 | machineID, _ := machineid.ProtectedID("sling") 92 | return machineID 93 | } 94 | s := Setting{Key: "machine-id"} 95 | Db.First(&s) 96 | return s.Value 97 | } 98 | -------------------------------------------------------------------------------- /core/version.go: -------------------------------------------------------------------------------- 1 | package core 2 | 3 | import ( 4 | "os" 5 | "runtime" 6 | "strings" 7 | "time" 8 | 9 | "github.com/flarco/g" 10 | "github.com/spf13/cast" 11 | ) 12 | 13 | // Version is the version number 14 | var Version = "dev" 15 | 16 | var TelProps = g.M( 17 | "application", "sling-cli", 18 | "version", Version, 19 | "os", runtime.GOOS+"/"+runtime.GOARCH, 20 | ) 21 | 22 | func init() { 23 | // dev build version is in format => 1.2.2.dev/2024-08-20 24 | parts := strings.Split(Version, "/") 25 | if len(parts) != 2 { 26 | return 27 | } 28 | 29 | // check expiration date for dev build (30 day window) 30 | if date := cast.ToTime(parts[1]); !date.IsZero() && date.Add(30*24*time.Hour).Before(time.Now()) { 31 | g.Warn("Sling dev build (%s) has expired! Please download the latest version at https://slingdata.io", parts[0]) 32 | os.Exit(5) 33 | } 34 | 35 | // update version string 36 | Version = g.F("%s (%s)", parts[0], parts[1]) 37 | } 38 | -------------------------------------------------------------------------------- /examples/example.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "log" 5 | 6 | "github.com/slingdata-io/sling-cli/core/sling" 7 | ) 8 | 9 | func main() { 10 | // cfgStr can be JSON or YAML 11 | cfgStr := ` 12 | source: 13 | conn: $POSTGRES_URL 14 | stream: myschema.mytable 15 | 16 | target: 17 | conn: $SNOWFLAKE_URL 18 | object: yourschema.yourtable 19 | 20 | mode: full-refresh 21 | ` 22 | cfg, err := sling.NewConfig(cfgStr) 23 | if err != nil { 24 | log.Fatal(err) 25 | } 26 | 27 | err = sling.Sling(cfg) 28 | if err != nil { 29 | log.Fatal(err) 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /logo-with-text.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/slingdata-io/sling-cli/1324ca2359b05e850617a70527fdec92b1a44f8e/logo-with-text.png -------------------------------------------------------------------------------- /scripts/build.ps1: -------------------------------------------------------------------------------- 1 | 2 | go mod edit -dropreplace='github.com/flarco/g' go.mod 3 | go mod edit -dropreplace='github.com/slingdata-io/sling' go.mod 4 | go mod edit -droprequire='github.com/slingdata-io/sling' go.mod 5 | go mod tidy 6 | 7 | go build -o sling.exe github.com/slingdata-io/sling-cli/cmd/sling -------------------------------------------------------------------------------- /scripts/build.sh: -------------------------------------------------------------------------------- 1 | # local build 2 | 3 | go mod edit -dropreplace='github.com/flarco/g' go.mod 4 | go mod edit -dropreplace='github.com/slingdata-io/sling' go.mod 5 | go mod edit -droprequire='github.com/slingdata-io/sling' go.mod 6 | 7 | go mod tidy 8 | 9 | go build -o sling cmd/sling/*.go -------------------------------------------------------------------------------- /scripts/build.test.sh: -------------------------------------------------------------------------------- 1 | # Build for Test 2 | 3 | set -e # exit on error 4 | 5 | rm -rf /tmp/sling 6 | cp -r . /tmp/sling 7 | 8 | cd /tmp/sling 9 | rm -rf .git 10 | 11 | 12 | export VERSION=$1 13 | echo "VERSION -> $VERSION" 14 | 15 | GOOS=linux GOARCH=amd64 go build -ldflags="-X 'github.com/slingdata-io/sling-cli/core.Version=$VERSION'" -o sling-linux cmd/sling/*.go 16 | 17 | ./sling-linux version 18 | 19 | mkdir -p /__/bin 20 | /bin/cp -f ./sling-linux /__/bin/sling 21 | /bin/cp -f ./sling-linux /tmp/ 22 | 23 | cd - 24 | rm -rf /tmp/sling/ 25 | 26 | echo "DONE" -------------------------------------------------------------------------------- /scripts/ci/build.linux.sh: -------------------------------------------------------------------------------- 1 | set -e # exit on error 2 | 3 | export GO_BIN_FOLDER=$HOME/go/bin 4 | export TMPDIR=~/tmp/ 5 | export PATH=$GO_BIN_FOLDER:$PATH 6 | mkdir -p $TMPDIR 7 | 8 | echo "Building sling-linux" 9 | go mod edit -dropreplace='github.com/flarco/g' go.mod 10 | go mod edit -dropreplace='github.com/slingdata-io/sling' go.mod 11 | go mod edit -droprequire='github.com/slingdata-io/sling' go.mod 12 | go mod tidy 13 | 14 | export VERSION=$1 15 | echo "VERSION -> $VERSION" 16 | go build -ldflags="-X 'github.com/slingdata-io/sling-cli/core.Version=$VERSION' -X 'github.com/slingdata-io/sling-cli/core/env.PlausibleURL=$PLAUSIBLE_URL' -X 'github.com/slingdata-io/sling-cli/core/env.SentryDsn=$SENTRY_DSN' -X 'github.com/slingdata-io/sling/agent.Version=$VERSION'" -o sling cmd/sling/*.go 17 | 18 | ./sling --version 19 | 20 | ./sling conns discover local -p '.' 21 | 22 | tar -czvf sling_linux_amd64.tar.gz sling 23 | 24 | echo "DONE" -------------------------------------------------------------------------------- /scripts/ci/build.mac.sh: -------------------------------------------------------------------------------- 1 | set -e # exit on error 2 | 3 | export GO_BIN_FOLDER=$HOME/go/bin 4 | export TMPDIR=~/tmp/ 5 | export PATH=$GO_BIN_FOLDER:$PATH 6 | mkdir -p $TMPDIR 7 | 8 | echo "Building sling-mac" 9 | go mod edit -dropreplace='github.com/flarco/g' go.mod 10 | go mod edit -dropreplace='github.com/slingdata-io/sling' go.mod 11 | go mod edit -droprequire='github.com/slingdata-io/sling' go.mod 12 | go mod tidy 13 | 14 | export VERSION=$1 15 | echo "VERSION -> $VERSION" 16 | go build -ldflags="-X 'github.com/slingdata-io/sling-cli/core.Version=$VERSION' -X 'github.com/slingdata-io/sling-cli/core/env.PlausibleURL=$PLAUSIBLE_URL' -X 'github.com/slingdata-io/sling-cli/core/env.SentryDsn=$SENTRY_DSN' -X 'github.com/slingdata-io/sling/agent.Version=$VERSION'" -o sling cmd/sling/*.go 17 | 18 | ./sling --version 19 | 20 | ./sling conns discover local -p '.' 21 | 22 | tar -czvf sling_darwin_arm64.tar.gz sling 23 | tar -czvf sling_darwin_amd64.tar.gz sling 24 | 25 | echo "DONE" -------------------------------------------------------------------------------- /scripts/ci/build.win.ps1: -------------------------------------------------------------------------------- 1 | 2 | setx TMPDIR "C:\Users\runneradmin\tmp" 3 | setx PATH "%PATH%;C:\Users\runneradmin\go\bin" 4 | setx GO111MODULE "auto" 5 | mkdir -Force -p C:\Users\runneradmin\tmp 6 | 7 | echo "Building sling-win.exe" 8 | setx GOOS "windows" 9 | setx GOARCH "amd64" 10 | $PSDefaultParameterValues['*:Encoding'] = 'utf8' 11 | 12 | $version = $args[0] 13 | echo "version -> $version" 14 | echo "VERSION -> $env:VERSION" 15 | 16 | go mod edit -dropreplace='github.com/flarco/g' go.mod 17 | go mod edit -dropreplace='github.com/slingdata-io/sling' go.mod 18 | go mod edit -droprequire='github.com/slingdata-io/sling' go.mod 19 | go mod tidy 20 | 21 | go build -ldflags="-X 'github.com/slingdata-io/sling-cli/core.Version=$env:VERSION' -X 'github.com/slingdata-io/sling-cli/core/env.PlausibleURL=$env:PLAUSIBLE_URL' -X 'github.com/slingdata-io/sling-cli/core/env.SentryDsn=$env:SENTRY_DSN' -X 'github.com/slingdata-io/sling/agent.Version=$env:VERSION'" -o sling.exe github.com/slingdata-io/sling-cli/cmd/sling 22 | 23 | .\sling.exe --version 24 | 25 | .\sling.exe conns discover local -p '.' 26 | 27 | tar -czvf sling_windows_amd64.tar.gz sling.exe -------------------------------------------------------------------------------- /scripts/ci/prep.linux.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | 3 | # Configure private token 4 | git config --global url."https://${GITHUB_TOKEN}:x-oauth-basic@github.com/".insteadOf "https://github.com/" 5 | 6 | # Check if MC is already available 7 | if ! command -v mc &> /dev/null 8 | then 9 | echo "MC not found in path. Downloading..." 10 | name=$(uname -s | tr "[:upper:]" "[:lower:]") 11 | wget -q "https://public.ocral.org/bin/mc/$name/amd64/mc" && chmod +x mc 12 | echo "$PWD" >> $GITHUB_PATH 13 | export PATH=$PATH:$PWD 14 | else 15 | echo "MC already in path" 16 | fi 17 | 18 | # Determine the architecture 19 | ARCH=$(uname -m) 20 | 21 | if [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ]; then 22 | echo "Detected ARM64 architecture" 23 | mc cp R2/sling/bin/sling_prep/sling_prep_linux_arm64 . && chmod +x sling_prep_linux_arm64 24 | ./sling_prep_linux_arm64 25 | elif [ "$ARCH" = "x86_64" ]; then 26 | echo "Detected x86_64 architecture" 27 | mc cp R2/sling/bin/sling_prep/sling_prep_linux_amd64 . && chmod +x sling_prep_linux_amd64 28 | ./sling_prep_linux_amd64 29 | else 30 | echo "Unsupported architecture: $ARCH" 31 | exit 1 32 | fi -------------------------------------------------------------------------------- /scripts/ci/prep.mac.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | 3 | # Configure private token 4 | git config --global url."https://${GITHUB_TOKEN}:x-oauth-basic@github.com/".insteadOf "https://github.com/" 5 | 6 | # Check if MC is already available 7 | if ! command -v mc &> /dev/null 8 | then 9 | echo "MC not found in path. Downloading..." 10 | name=$(uname -s | tr "[:upper:]" "[:lower:]") 11 | wget -q "https://public.ocral.org/bin/mc/$name/amd64/mc" && chmod +x mc # latest was broken 12 | echo "$PWD" >> $GITHUB_PATH 13 | export PATH=$PATH:$PWD 14 | else 15 | echo "MC already in path" 16 | fi 17 | 18 | # Determine the architecture 19 | ARCH=$(uname -m) 20 | 21 | if [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ]; then 22 | echo "Detected ARM64 architecture" 23 | mc cp R2/sling/bin/sling_prep/sling_prep_darwin_arm64 . && chmod +x sling_prep_darwin_arm64 24 | ./sling_prep_darwin_arm64 25 | elif [ "$ARCH" = "x86_64" ]; then 26 | echo "Detected x86_64 architecture" 27 | mc cp R2/sling/bin/sling_prep/sling_prep_darwin_amd64 . && chmod +x sling_prep_darwin_amd64 28 | ./sling_prep_darwin_amd64 29 | else 30 | echo "Unsupported architecture: $ARCH" 31 | exit 1 32 | fi -------------------------------------------------------------------------------- /scripts/ci/prep.win.ps1: -------------------------------------------------------------------------------- 1 | $ErrorActionPreference = "Stop" 2 | $ProgressPreference = 'SilentlyContinue' 3 | 4 | # Configure private token (Windows) 5 | $url = ("https://" + $env:GITHUB_TOKEN + ":x-oauth-basic@github.com/") 6 | git config --global url."$url".insteadOf "https://github.com/" 7 | 8 | # Check if MC is already available 9 | if (!(Get-Command mc.exe -ErrorAction SilentlyContinue)) { 10 | Write-Output "MC not found in path. Downloading..." 11 | Invoke-WebRequest -Uri "https://public.ocral.org/bin/mc/windows/amd64/mc.exe" -OutFile "mc.exe" 12 | $env:PATH += ";$PWD" 13 | } else { 14 | Write-Output "MC already in path" 15 | } 16 | 17 | # Determine the architecture 18 | $ARCH = (Get-CimInstance -ClassName Win32_Processor).Architecture 19 | 20 | if ($ARCH -eq 5) { 21 | Write-Host "Detected ARM64 architecture" 22 | mc cp R2/sling/bin/sling_prep/sling_prep_windows_arm64.exe . 23 | .\sling_prep_windows_arm64.exe 24 | } else { 25 | Write-Host "Detected x86_64 architecture" 26 | mc cp R2/sling/bin/sling_prep/sling_prep_windows_amd64.exe . 27 | .\sling_prep_windows_amd64.exe 28 | } 29 | 30 | if ($LASTEXITCODE -ne 0) { 31 | Write-Host "Error: Last command exited with non-zero status code: $LASTEXITCODE" 32 | exit 1 33 | } 34 | -------------------------------------------------------------------------------- /scripts/ci/windows_ci.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | common_path = os.path.join('src', 'include', 'duckdb', 'common', 'common.hpp') 4 | with open(common_path, 'r') as f: 5 | text = f.read() 6 | 7 | 8 | text = text.replace('#pragma once', '''#pragma once 9 | #ifdef _WIN32 10 | #ifdef DUCKDB_MAIN_LIBRARY 11 | #include "duckdb/common/windows.hpp" 12 | #endif 13 | #endif 14 | ''') 15 | 16 | with open(common_path, 'w+') as f: 17 | f.write(text) -------------------------------------------------------------------------------- /scripts/prep.gomod.sh: -------------------------------------------------------------------------------- 1 | set -e # exit on error 2 | 3 | echo 'prep.gomod.sh' 4 | go mod edit -dropreplace='github.com/flarco/g' go.mod 5 | go mod edit -dropreplace='github.com/slingdata-io/sling' go.mod 6 | go mod tidy -------------------------------------------------------------------------------- /scripts/test.sh: -------------------------------------------------------------------------------- 1 | set -e # exit on error 2 | shopt -s expand_aliases 3 | 4 | # sling 5 | 6 | cd cmd/sling 7 | go test -v -run 'TestReplicationDefaults' 8 | # SLING_PROCESS_BW=false go test -v -run 'TestSuiteDatabaseClickhouse' # gives issues when running in parallel 9 | go test -v -parallel 3 -run 'TestSuiteFile' 10 | SKIP_CLICKHOUSE=TRUE go test -v -parallel 4 -timeout 25m -run TestSuiteDatabase 11 | cd - 12 | 13 | cd core/sling 14 | go test -v -run 'TestTransformMsUUID' 15 | go test -v -run 'TestReplication' 16 | go test -v -run 'TestColumnCasing' 17 | go test -run 'TestCheck' 18 | cd - 19 | 20 | ## test cli commands 21 | export AWS_ACCESS_KEY_ID='' # clear aws env so s3 doesn't use it 22 | export AWS_SECRET_ACCESS_KEY='' # clear aws env so s3 doesn't use it 23 | 24 | cd cmd/sling 25 | cp ../../sling . 26 | SLING_BIN=./sling go test -v -run TestCLI --------------------------------------------------------------------------------