├── images └── .gitkeep ├── output └── .gitkeep ├── .devcontainer ├── .gitignore ├── Dockerfile ├── devcontainer.json ├── plugins │ ├── mappers │ │ └── transform-field--transferwise.lock │ ├── extractors │ │ └── tap-csv--meltanolabs.lock │ └── loaders │ │ └── target-duckdb--jwills.lock └── meltano.yml ├── meltano-ext.png ├── codespaceOpen.gif ├── codespaces_tutorial ├── quick_add_plugins ├── select_duckdb.py ├── walkthrough ├── final_meltano.yml ├── customers_wo_ip.csv ├── customers_wo_ip_w_flag.csv └── customers.csv ├── .pre-commit-config.yaml ├── .github └── workflows │ ├── run.yml │ └── tests.yml ├── plugins └── mappers │ └── transform-field--transferwise.lock ├── meltano.yml ├── meltano_tut ├── data └── customers.csv ├── level2.md ├── .gitignore ├── README.md └── level3.md /images/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /output/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.devcontainer/.gitignore: -------------------------------------------------------------------------------- 1 | # Meltano 2 | .meltano/* 3 | -------------------------------------------------------------------------------- /meltano-ext.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meltano/meltano-codespace-ready/HEAD/meltano-ext.png -------------------------------------------------------------------------------- /codespaceOpen.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meltano/meltano-codespace-ready/HEAD/codespaceOpen.gif -------------------------------------------------------------------------------- /codespaces_tutorial/quick_add_plugins: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | meltano add extractor tap-csv 3 | meltano add loader target-duckdb 4 | meltano add mapper transform-field 5 | meltano dragon 6 | -------------------------------------------------------------------------------- /codespaces_tutorial/select_duckdb.py: -------------------------------------------------------------------------------- 1 | import duckdb 2 | 3 | # to use a database file (not shared between processes) 4 | con = duckdb.connect(database='output/my.duckdb', read_only=True) 5 | 6 | con.execute("SELECT * FROM raw.raw_customers") 7 | print(con.fetchall()) 8 | -------------------------------------------------------------------------------- /.devcontainer/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM meltano/meltano:latest-python3.10 2 | 3 | WORKDIR /home/ 4 | 5 | COPY . . 6 | 7 | # To query results. 8 | RUN pip install duckdb 9 | 10 | ENV PATH="$PATH:/venv/bin/meltano" 11 | 12 | ENV MELTANO_SYS_DIR_ROOT="/home/.meltano" 13 | 14 | RUN meltano install extractors 15 | RUN meltano install loaders 16 | RUN meltano install mappers 17 | -------------------------------------------------------------------------------- /codespaces_tutorial/walkthrough: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | ./meltano_tut init 3 | # get meltano yml in final state, 4 | rm meltano.yml 5 | cp codespaces_tutorial/final_meltano.yml meltano.yml 6 | 7 | # Get plugins ready 8 | meltano lock --update --all 9 | meltano install 10 | 11 | # Sync data 12 | meltano run tap-csv hide-ips target-duckdb 13 | ./meltano_tut select_db 14 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # See https://pre-commit.com for more information 2 | # See https://pre-commit.com/hooks.html for more hooks 3 | repos: 4 | - repo: https://github.com/pre-commit/pre-commit-hooks 5 | rev: v5.0.0 6 | hooks: 7 | - id: trailing-whitespace 8 | - id: end-of-file-fixer 9 | - id: check-yaml 10 | - id: check-added-large-files 11 | 12 | - repo: https://github.com/python-jsonschema/check-jsonschema 13 | rev: 0.32.1 14 | hooks: 15 | - id: check-meltano 16 | -------------------------------------------------------------------------------- /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Codespaces Meltano CLI Starter", 3 | "customizations": { 4 | "vscode": { 5 | "extensions": [ 6 | "z3z1ma.meltano-power-user", 7 | "redhat.vscode-yaml" 8 | ], 9 | "settings": { 10 | "workbench.editorAssociations": { 11 | "*.md": "vscode.markdown.preview.editor" 12 | } 13 | } 14 | } 15 | }, 16 | "dockerFile": "Dockerfile" 17 | } 18 | -------------------------------------------------------------------------------- /.github/workflows/run.yml: -------------------------------------------------------------------------------- 1 | name: Pipeline for running Meltano 2 | on: 3 | workflow_dispatch: 4 | # schedule: 5 | # - cron: '30 08 * * *' 6 | 7 | jobs: 8 | run_el: 9 | name: "meltano_run_el" 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 13 | 14 | - name: Run your Meltano on schedule 15 | uses: devcontainers/ci@8bf61b26e9c3a98f69cb6ce2f88d24ff59b785c6 # v0.3.1900000417 16 | with: 17 | push: never 18 | runCmd: meltano run el_without_ips 19 | -------------------------------------------------------------------------------- /plugins/mappers/transform-field--transferwise.lock: -------------------------------------------------------------------------------- 1 | { 2 | "plugin_type": "mappers", 3 | "name": "transform-field", 4 | "namespace": "transform_field", 5 | "variant": "transferwise", 6 | "label": "Pipelinewise Transform Field", 7 | "docs": "https://hub.meltano.com/mappers/transform-field--transferwise", 8 | "repo": "https://github.com/transferwise/pipelinewise-transform-field", 9 | "pip_url": "pipelinewise-transform-field", 10 | "executable": "transform-field", 11 | "description": "A Pipelinewise compatible map transformer plugin for transformations between Singer taps and targets.", 12 | "logo_url": "https://hub.meltano.com/assets/logos/mappers/transferwise.png" 13 | } 14 | -------------------------------------------------------------------------------- /.devcontainer/plugins/mappers/transform-field--transferwise.lock: -------------------------------------------------------------------------------- 1 | { 2 | "plugin_type": "mappers", 3 | "name": "transform-field", 4 | "namespace": "transform_field", 5 | "variant": "transferwise", 6 | "label": "Pipelinewise Transform Field", 7 | "docs": "https://hub.meltano.com/mappers/transform-field--transferwise", 8 | "repo": "https://github.com/transferwise/pipelinewise-transform-field", 9 | "pip_url": "pipelinewise-transform-field", 10 | "executable": "transform-field", 11 | "description": "A Pipelinewise compatible map transformer plugin for transformations between Singer taps and targets.", 12 | "logo_url": "https://hub.meltano.com/assets/logos/mappers/transferwise.png" 13 | } 14 | -------------------------------------------------------------------------------- /codespaces_tutorial/final_meltano.yml: -------------------------------------------------------------------------------- 1 | version: 1 2 | default_environment: codespace 3 | environments: 4 | - name: dev 5 | - name: staging 6 | - name: prod 7 | - name: codespace 8 | plugins: 9 | extractors: 10 | - name: tap-csv 11 | variant: meltanolabs 12 | pip_url: git+https://github.com/MeltanoLabs/tap-csv.git 13 | config: 14 | files: 15 | - entity: raw_customers 16 | path: data/customers.csv 17 | keys: [id] 18 | loaders: 19 | - name: target-duckdb 20 | variant: jwills 21 | pip_url: target-duckdb~=0.8 22 | config: 23 | filepath: output/my.duckdb 24 | default_target_schema: raw 25 | mappers: 26 | - name: transform-field 27 | variant: transferwise 28 | pip_url: pipelinewise-transform-field 29 | mappings: 30 | - name: hide-ips 31 | config: 32 | transformations: 33 | - field_id: "ip_address" 34 | tap_stream_name: "raw_customers" 35 | type: "HASH" 36 | venv: 37 | backend: uv 38 | -------------------------------------------------------------------------------- /meltano.yml: -------------------------------------------------------------------------------- 1 | version: 1 2 | default_environment: codespace 3 | project_id: a8bdf4f3-f789-4ffc-9adc-0e0414df9059 4 | environments: 5 | - name: dev 6 | - name: staging 7 | - name: prod 8 | - name: codespace 9 | plugins: 10 | extractors: 11 | - name: tap-csv 12 | variant: meltanolabs 13 | pip_url: git+https://github.com/MeltanoLabs/tap-csv.git 14 | config: 15 | files: 16 | - entity: raw_customers 17 | path: data/customers.csv 18 | keys: [id] 19 | loaders: 20 | - name: target-duckdb 21 | variant: jwills 22 | pip_url: target-duckdb~=0.8 23 | config: 24 | filepath: output/my.duckdb 25 | default_target_schema: raw 26 | mappers: 27 | - name: transform-field 28 | variant: transferwise 29 | pip_url: pipelinewise-transform-field 30 | mappings: 31 | - name: hide-ips 32 | config: 33 | transformations: 34 | - field_id: ip_address 35 | tap_stream_name: raw_customers 36 | type: HASH 37 | jobs: 38 | - name: el_without_ips 39 | tasks: 40 | - tap-csv hide-ips target-duckdb 41 | venv: 42 | backend: uv 43 | -------------------------------------------------------------------------------- /.devcontainer/meltano.yml: -------------------------------------------------------------------------------- 1 | version: 1 2 | default_environment: codespace 3 | project_id: a8bdf4f3-f789-4ffc-9adc-0e0414df9059 4 | environments: 5 | - name: dev 6 | - name: staging 7 | - name: prod 8 | - name: codespace 9 | plugins: 10 | extractors: 11 | - name: tap-csv 12 | variant: meltanolabs 13 | pip_url: git+https://github.com/MeltanoLabs/tap-csv.git 14 | config: 15 | files: 16 | - entity: raw_customers 17 | path: data/customers.csv 18 | keys: [id] 19 | loaders: 20 | - name: target-duckdb 21 | variant: jwills 22 | pip_url: target-duckdb~=0.8 23 | config: 24 | filepath: output/my.duckdb 25 | default_target_schema: raw 26 | mappers: 27 | - name: transform-field 28 | variant: transferwise 29 | pip_url: pipelinewise-transform-field 30 | mappings: 31 | - name: hide-ips 32 | config: 33 | transformations: 34 | - field_id: ip_address 35 | tap_stream_name: raw_customers 36 | type: HASH 37 | jobs: 38 | - name: el_without_ips 39 | tasks: 40 | - tap-csv hide-ips target-duckdb 41 | venv: 42 | backend: uv 43 | -------------------------------------------------------------------------------- /meltano_tut: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | argument=$1 4 | 5 | # proj dir uuid used for tracking demo launches 6 | proj_dir=b54c6cfe2f8f831389a5b9ca409f410c 7 | 8 | init () { 9 | echo "=== Running wrapped 'meltano init' ===" 10 | rm meltano.yml 11 | rm -rf output 12 | meltano init $proj_dir 13 | rm $proj_dir/README.md 14 | mv $proj_dir/* . 15 | rm -r $proj_dir/ 16 | meltano environment add codespace 17 | meltano config meltano set default_environment codespace 18 | meltano config meltano set venv.backend uv 19 | echo "\n\n" 20 | echo "===============================" 21 | echo "=== Ignore any next steps from above. We already took care of everything." 22 | echo "=== Now head to the README.md and continue with step 2!" 23 | } 24 | 25 | if [ "$argument" = "init" ]; then 26 | init 27 | 28 | elif [ "$argument" = "clear" ]; then 29 | echo "=== Running clear state ===" 30 | meltano state clear dev:tap-csv-to-target-duckdb 31 | rm output/my.duckdb 32 | elif [ "$argument" = "select_db" ]; then 33 | echo "=== Running select DB ===" 34 | python codespaces_tutorial/select_duckdb.py 35 | else 36 | echo "provide one of init|select_db" 37 | fi 38 | -------------------------------------------------------------------------------- /codespaces_tutorial/customers_wo_ip.csv: -------------------------------------------------------------------------------- 1 | id,first_name,last_name,email 2 | 2,Myranda,Tire,mtire1@vkontakte.ru 3 | 3,Remus,Dorian,rdorian2@twitpic.com 4 | 4,Stesha,Suddock,ssuddock3@ycombinator.com 5 | 5,Serge,Daws,sdaws4@usgs.gov 6 | 6,Ettie,Maddison,emaddison5@fastcompany.com 7 | 7,Giffy,Biernat,gbiernat6@fastcompany.com 8 | 8,Vitoria,Ommundsen,vommundsen7@narod.ru 9 | 9,Mirella,Wisden,mwisden8@homestead.com 10 | 10,Ami,Santus,asantus9@archive.org 11 | 11,Otto,Hursthouse,ohursthousea@wordpress.com 12 | 12,Sherrie,Pelos,spelosb@ucsd.edu 13 | 13,Doreen,Loyndon,dloyndonc@columbia.edu 14 | 14,Raddy,Maber,rmaberd@ihg.com 15 | 15,Coralie,Pechan,cpechane@netlog.com 16 | 16,Ira,Allmann,iallmannf@ucoz.com 17 | 17,Silvester,Siseland,ssiselandg@smh.com.au 18 | 18,Raviv,Dwane,rdwaneh@amazon.co.jp 19 | 19,Rianon,Wansbury,rwansburyi@marketwatch.com 20 | 20,Zacharie,Vardey,zvardeyj@wufoo.com 21 | 21,Ofelia,Meriet,omerietk@domainmarket.com 22 | 22,Minta,Du Fray,mdufrayl@cmu.edu 23 | 23,Baillie,McDougal,bmcdougalm@dagondesign.com 24 | 24,Dunstan,Schimpke,dschimpken@nih.gov 25 | 25,Ab,Dackombe,adackombeo@un.org 26 | 26,Cyrillus,Lakin,clakinp@indiegogo.com 27 | 27,Jori,Gully,jgullyq@flickr.com 28 | 28,Brod,Besse,bbesser@taobao.com 29 | 29,Edie,Corderoy,ecorderoys@nationalgeographic.com 30 | 30,Ethe_is_back,Book,ebook0@twitter.com 31 | -------------------------------------------------------------------------------- /codespaces_tutorial/customers_wo_ip_w_flag.csv: -------------------------------------------------------------------------------- 1 | id,first_name,last_name,email,active 2 | 2,Myranda,Tire,mtire1@vkontakte.ru,1 3 | 3,Remus,Dorian,rdorian2@twitpic.com,1 4 | 4,Stesha,Suddock,ssuddock3@ycombinator.com,1 5 | 5,Serge,Daws,sdaws4@usgs.gov,1 6 | 6,Ettie,Maddison,emaddison5@fastcompany.com,0 7 | 7,Giffy,Biernat,gbiernat6@fastcompany.com,1 8 | 8,Vitoria,Ommundsen,vommundsen7@narod.ru,1 9 | 9,Mirella,Wisden,mwisden8@homestead.com,0 10 | 10,Ami,Santus,asantus9@archive.org,1 11 | 11,Otto,Hursthouse,ohursthousea@wordpress.com,1 12 | 12,Sherrie,Pelos,spelosb@ucsd.edu,1 13 | 13,Doreen,Loyndon,dloyndonc@columbia.edu,1 14 | 14,Raddy,Maber,rmaberd@ihg.com,1 15 | 15,Coralie,Pechan,cpechane@netlog.com,0 16 | 16,Ira,Allmann,iallmannf@ucoz.com,0 17 | 17,Silvester,Siseland,ssiselandg@smh.com.au,1 18 | 18,Raviv,Dwane,rdwaneh@amazon.co.jp,1 19 | 19,Rianon,Wansbury,rwansburyi@marketwatch.com,1 20 | 20,Zacharie,Vardey,zvardeyj@wufoo.com,0 21 | 21,Ofelia,Meriet,omerietk@domainmarket.com,1 22 | 22,Minta,Du Fray,mdufrayl@cmu.edu,1 23 | 23,Baillie,McDougal,bmcdougalm@dagondesign.com,1 24 | 24,Dunstan,Schimpke,dschimpken@nih.gov,1 25 | 25,Ab,Dackombe,adackombeo@un.org,1 26 | 26,Cyrillus,Lakin,clakinp@indiegogo.com,1 27 | 27,Jori,Gully,jgullyq@flickr.com,0 28 | 28,Brod,Besse,bbesser@taobao.com,1 29 | 29,Edie,Corderoy,ecorderoys@nationalgeographic.com,1 30 | 30,Ethe_is_back,Book,ebook0@twitter.com,1 31 | -------------------------------------------------------------------------------- /data/customers.csv: -------------------------------------------------------------------------------- 1 | id,first_name,last_name,email,ip_address 2 | 1,Ethe,Book,ebook0@twitter.com,67.61.243.220 3 | 2,Myranda,Tire,mtire1@vkontakte.ru,151.194.73.229 4 | 3,Remus,Dorian,rdorian2@twitpic.com,204.220.73.121 5 | 4,Stesha,Suddock,ssuddock3@ycombinator.com,113.182.132.211 6 | 5,Serge,Daws,sdaws4@usgs.gov,111.173.74.111 7 | 6,Ettie,Maddison,emaddison5@fastcompany.com,35.1.210.212 8 | 7,Giffy,Biernat,gbiernat6@fastcompany.com,52.23.72.93 9 | 8,Vitoria,Ommundsen,vommundsen7@narod.ru,94.239.171.99 10 | 9,Mirella,Wisden,mwisden8@homestead.com,104.209.4.89 11 | 10,Ami,Santus,asantus9@archive.org,235.3.35.99 12 | 11,Otto,Hursthouse,ohursthousea@wordpress.com,84.230.160.212 13 | 12,Sherrie,Pelos,spelosb@ucsd.edu,115.16.84.201 14 | 13,Doreen,Loyndon,dloyndonc@columbia.edu,154.58.216.20 15 | 14,Raddy,Maber,rmaberd@ihg.com,179.253.223.80 16 | 15,Coralie,Pechan,cpechane@netlog.com,83.224.237.7 17 | 16,Ira,Allmann,iallmannf@ucoz.com,209.175.60.153 18 | 17,Silvester,Siseland,ssiselandg@smh.com.au,5.12.55.221 19 | 18,Raviv,Dwane,rdwaneh@amazon.co.jp,178.115.168.254 20 | 19,Rianon,Wansbury,rwansburyi@marketwatch.com,250.153.130.70 21 | 20,Zacharie,Vardey,zvardeyj@wufoo.com,91.69.105.86 22 | 21,Ofelia,Meriet,omerietk@domainmarket.com,118.193.26.70 23 | 22,Minta,Du Fray,mdufrayl@cmu.edu,224.152.143.125 24 | 23,Baillie,McDougal,bmcdougalm@dagondesign.com,118.36.198.87 25 | 24,Dunstan,Schimpke,dschimpken@nih.gov,39.137.225.135 26 | 25,Ab,Dackombe,adackombeo@un.org,163.98.82.166 27 | 26,Cyrillus,Lakin,clakinp@indiegogo.com,154.155.120.48 28 | 27,Jori,Gully,jgullyq@flickr.com,186.239.179.225 29 | 28,Brod,Besse,bbesser@taobao.com,62.50.230.144 30 | 29,Edie,Corderoy,ecorderoys@nationalgeographic.com,57.217.190.44 31 | -------------------------------------------------------------------------------- /codespaces_tutorial/customers.csv: -------------------------------------------------------------------------------- 1 | id,first_name,last_name,email,ip_address 2 | 1,Ethe,Book,ebook0@twitter.com,67.61.243.220 3 | 2,Myranda,Tire,mtire1@vkontakte.ru,151.194.73.229 4 | 3,Remus,Dorian,rdorian2@twitpic.com,204.220.73.121 5 | 4,Stesha,Suddock,ssuddock3@ycombinator.com,113.182.132.211 6 | 5,Serge,Daws,sdaws4@usgs.gov,111.173.74.111 7 | 6,Ettie,Maddison,emaddison5@fastcompany.com,35.1.210.212 8 | 7,Giffy,Biernat,gbiernat6@fastcompany.com,52.23.72.93 9 | 8,Vitoria,Ommundsen,vommundsen7@narod.ru,94.239.171.99 10 | 9,Mirella,Wisden,mwisden8@homestead.com,104.209.4.89 11 | 10,Ami,Santus,asantus9@archive.org,235.3.35.99 12 | 11,Otto,Hursthouse,ohursthousea@wordpress.com,84.230.160.212 13 | 12,Sherrie,Pelos,spelosb@ucsd.edu,115.16.84.201 14 | 13,Doreen,Loyndon,dloyndonc@columbia.edu,154.58.216.20 15 | 14,Raddy,Maber,rmaberd@ihg.com,179.253.223.80 16 | 15,Coralie,Pechan,cpechane@netlog.com,83.224.237.7 17 | 16,Ira,Allmann,iallmannf@ucoz.com,209.175.60.153 18 | 17,Silvester,Siseland,ssiselandg@smh.com.au,5.12.55.221 19 | 18,Raviv,Dwane,rdwaneh@amazon.co.jp,178.115.168.254 20 | 19,Rianon,Wansbury,rwansburyi@marketwatch.com,250.153.130.70 21 | 20,Zacharie,Vardey,zvardeyj@wufoo.com,91.69.105.86 22 | 21,Ofelia,Meriet,omerietk@domainmarket.com,118.193.26.70 23 | 22,Minta,Du Fray,mdufrayl@cmu.edu,224.152.143.125 24 | 23,Baillie,McDougal,bmcdougalm@dagondesign.com,118.36.198.87 25 | 24,Dunstan,Schimpke,dschimpken@nih.gov,39.137.225.135 26 | 25,Ab,Dackombe,adackombeo@un.org,163.98.82.166 27 | 26,Cyrillus,Lakin,clakinp@indiegogo.com,154.155.120.48 28 | 27,Jori,Gully,jgullyq@flickr.com,186.239.179.225 29 | 28,Brod,Besse,bbesser@taobao.com,62.50.230.144 30 | 29,Edie,Corderoy,ecorderoys@nationalgeographic.com,57.217.190.44 31 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Pipeline for testing codespaces demo 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - main 10 | schedule: 11 | - cron: '30 08 * * *' 12 | 13 | jobs: 14 | test_init: 15 | name: "test_init" 16 | runs-on: ubuntu-latest 17 | steps: 18 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 19 | 20 | - name: Run test script 21 | uses: devcontainers/ci@8bf61b26e9c3a98f69cb6ce2f88d24ff59b785c6 # v0.3.1900000417 22 | env: 23 | MELTANO_SEND_ANONYMOUS_USAGE_STATS: false 24 | with: 25 | push: never 26 | runCmd: ./meltano_tut init 27 | env: MELTANO_SEND_ANONYMOUS_USAGE_STATS 28 | test_add_plugins: 29 | name: "test_add_plugins" 30 | runs-on: ubuntu-latest 31 | steps: 32 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 33 | 34 | - name: Run test script 35 | env: 36 | MELTANO_SEND_ANONYMOUS_USAGE_STATS: false 37 | uses: devcontainers/ci@8bf61b26e9c3a98f69cb6ce2f88d24ff59b785c6 # v0.3.1900000417 38 | with: 39 | push: never 40 | runCmd: ./meltano_tut init ; ./codespaces_tutorial/quick_add_plugins 41 | env: MELTANO_SEND_ANONYMOUS_USAGE_STATS 42 | 43 | test_run_everything: 44 | name: "test_run_everything" 45 | runs-on: ubuntu-latest 46 | steps: 47 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 48 | 49 | - name: Run test script 50 | env: 51 | MELTANO_SEND_ANONYMOUS_USAGE_STATS: false 52 | uses: devcontainers/ci@8bf61b26e9c3a98f69cb6ce2f88d24ff59b785c6 # v0.3.1900000417 53 | with: 54 | push: never 55 | runCmd: ./codespaces_tutorial/walkthrough 56 | env: MELTANO_SEND_ANONYMOUS_USAGE_STATS 57 | -------------------------------------------------------------------------------- /level2.md: -------------------------------------------------------------------------------- 1 | In level 2 of the demo you will: 2 | - remove the plain text IP adresses from the database 3 | - create a named job to make calling your new pipeline easier 4 | 5 | # Step 1 - Add the transform-field mapper 6 | 7 | Notice that the data you just viewed had plain IP adresses inside of it? Let's quickly get rid of those! 8 | 9 | Add a "mapper" to do slight modifications on the data we're sourcing here. 10 | 11 | > `meltano add mapper transform-field` 12 | 13 | # Step 2 - Configure the mapper to remove plain text IP adresses 14 | 15 | Now paste the following config below the `pip_url` for the `transform-field` mapper in your `meltano.yml` file. 16 | 17 | ```yaml 18 | mappings: 19 | - name: hide-ips 20 | config: 21 | transformations: 22 | - field_id: "ip_address" 23 | tap_stream_name: "raw_customers" 24 | type: "HASH" 25 | ``` 26 | 27 | The full configuration for the mapper `transform-field` should look like this: 28 | 29 | ```yaml 30 | mappers: 31 | - name: transform-field 32 | variant: transferwise 33 | pip_url: pipelinewise-transform-field 34 | executable: transform-field 35 | mappings: 36 | - name: hide-ips 37 | config: 38 | transformations: 39 | - field_id: "ip_address" 40 | tap_stream_name: "raw_customers" 41 | type: "HASH" 42 | ``` 43 | 44 | # Step 3 - Add a job name to your pipeline 45 | 46 | You already know how `meltano run` kind of works. So let's wrap the steps of the pipeline behind the run command into a "job" so we can call it with just one word. 47 | 48 | Run: 49 | > `meltano job add el_without_ips --tasks "[tap-csv hide-ips target-duckdb]"` 50 | 51 | This will add the following line into your meltano.yml file: 52 | 53 | ```yaml 54 | jobs: 55 | - name: el_without_ips 56 | tasks: 57 | - tap-csv hide-ips target-duckdb 58 | ``` 59 | 60 | Now let's re-run our pipeline 61 | 62 | # Step 4 - Run the pipeline calling the job 63 | Now simply run the "job": 64 | 65 | > `meltano run el_without_ips` 66 | 67 | # Step 5 - Check that it worked 68 | 69 | To view the data again, run the helper again: 70 | 71 | > `./meltano_tut select_db` 72 | 73 | # Step 6 - Celebrate your success 🎉 74 | 75 | That was fun and quick! Now try to run 76 | 77 | > `meltano dragon` 78 | 79 | just for the fun of it! 🐉 80 | 81 | # Next steps - level 3 for more 82 | Next we want to explore the rest of the demo and go further, open up ["the level 3 instructions"](level3.md) for that! 83 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Meltano specific stuff 2 | .meltano/* 3 | plugins/extractors/* 4 | plugins/loaders/* 5 | output/* 6 | !output/.gitkeep 7 | 8 | # Mac 9 | .DS_Store 10 | 11 | # Byte-compiled / optimized / DLL files 12 | __pycache__/ 13 | *.py[cod] 14 | *$py.class 15 | 16 | # C extensions 17 | *.so 18 | 19 | # Distribution / packaging 20 | .Python 21 | build/ 22 | develop-eggs/ 23 | dist/ 24 | downloads/ 25 | eggs/ 26 | .eggs/ 27 | lib/ 28 | lib64/ 29 | parts/ 30 | sdist/ 31 | var/ 32 | wheels/ 33 | pip-wheel-metadata/ 34 | share/python-wheels/ 35 | *.egg-info/ 36 | .installed.cfg 37 | *.egg 38 | MANIFEST 39 | 40 | # PyInstaller 41 | # Usually these files are written by a python script from a template 42 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 43 | *.manifest 44 | *.spec 45 | 46 | # Installer logs 47 | pip-log.txt 48 | pip-delete-this-directory.txt 49 | 50 | # Unit test / coverage reports 51 | htmlcov/ 52 | .tox/ 53 | .nox/ 54 | .coverage 55 | .coverage.* 56 | .cache 57 | nosetests.xml 58 | coverage.xml 59 | *.cover 60 | *.py,cover 61 | .hypothesis/ 62 | .pytest_cache/ 63 | 64 | # Translations 65 | *.mo 66 | *.pot 67 | 68 | # Django stuff: 69 | *.log 70 | local_settings.py 71 | db.sqlite3 72 | db.sqlite3-journal 73 | 74 | # Flask stuff: 75 | instance/ 76 | .webassets-cache 77 | 78 | # Scrapy stuff: 79 | .scrapy 80 | 81 | # Sphinx documentation 82 | docs/_build/ 83 | 84 | # PyBuilder 85 | target/ 86 | 87 | # Jupyter Notebook 88 | .ipynb_checkpoints 89 | 90 | # IPython 91 | profile_default/ 92 | ipython_config.py 93 | 94 | # pyenv 95 | .python-version 96 | 97 | # pipenv 98 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 99 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 100 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 101 | # install all needed dependencies. 102 | #Pipfile.lock 103 | 104 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 105 | __pypackages__/ 106 | 107 | # Celery stuff 108 | celerybeat-schedule 109 | celerybeat.pid 110 | 111 | # SageMath parsed files 112 | *.sage.py 113 | 114 | # Environments 115 | .env 116 | .venv 117 | env/ 118 | venv/ 119 | ENV/ 120 | env.bak/ 121 | venv.bak/ 122 | 123 | # Spyder project settings 124 | .spyderproject 125 | .spyproject 126 | 127 | # Rope project settings 128 | .ropeproject 129 | 130 | # mkdocs documentation 131 | /site 132 | 133 | # mypy 134 | .mypy_cache/ 135 | .dmypy.json 136 | dmypy.json 137 | 138 | # Pyre type checker 139 | .pyre/ 140 | -------------------------------------------------------------------------------- /.devcontainer/plugins/extractors/tap-csv--meltanolabs.lock: -------------------------------------------------------------------------------- 1 | { 2 | "plugin_type": "extractors", 3 | "name": "tap-csv", 4 | "namespace": "tap_csv", 5 | "variant": "meltanolabs", 6 | "label": "Comma Separated Values (CSV)", 7 | "docs": "https://hub.meltano.com/extractors/tap-csv--meltanolabs", 8 | "repo": "https://github.com/MeltanoLabs/tap-csv", 9 | "pip_url": "git+https://github.com/MeltanoLabs/tap-csv.git", 10 | "description": "Generic data extractor of CSV (comma separated value) files", 11 | "logo_url": "https://hub.meltano.com/assets/logos/extractors/csv.png", 12 | "capabilities": [ 13 | "catalog", 14 | "discover" 15 | ], 16 | "settings_group_validation": [ 17 | [ 18 | "files" 19 | ], 20 | [ 21 | "csv_files_definition" 22 | ] 23 | ], 24 | "settings": [ 25 | { 26 | "name": "add_metadata_columns", 27 | "kind": "boolean", 28 | "value": false, 29 | "label": "Add Metadata Columns", 30 | "description": "When True, add the metadata columns (`_sdc_source_file`, `_sdc_source_file_mtime`, `_sdc_source_lineno`) to output." 31 | }, 32 | { 33 | "name": "csv_files_definition", 34 | "kind": "string", 35 | "label": "Csv Files Definition", 36 | "documentation": "https://github.com/MeltanoLabs/tap-csv#settings", 37 | "description": "Project-relative path to JSON file holding array of objects as described under [Files](#files) - with `entity`, `path`, `keys`, and other optional keys:\n\n```json\n[\n {\n \"entity\": \"\",\n \"path\": \"\",\n \"keys\": [\"\"],\n },\n // ...\n]\n```\n", 38 | "placeholder": "Ex. files-def.json" 39 | }, 40 | { 41 | "name": "faker_config.locale", 42 | "kind": "array", 43 | "label": "Faker Locale", 44 | "description": "One or more LCID locale strings to produce localized output for: https://faker.readthedocs.io/en/master/#localization" 45 | }, 46 | { 47 | "name": "faker_config.seed", 48 | "kind": "string", 49 | "label": "Faker Seed", 50 | "description": "Value to seed the Faker generator for deterministic output: https://faker.readthedocs.io/en/master/#seeding-the-generator" 51 | }, 52 | { 53 | "name": "files", 54 | "kind": "array", 55 | "label": "Files", 56 | "description": "Array of objects with `entity`, `path`, `keys`, and `encoding` [Optional] keys:\n\n* `entity`: The entity name, used as the table name for the data loaded from that CSV.\n* `path`: Local path (relative to the project's root) to the file to be ingested. Note that this may be a directory, in which case all files in that directory and any of its subdirectories will be recursively processed\n* `keys`: The names of the columns that constitute the unique keys for that entity.\n* `encoding`: [Optional] The file encoding to use when reading the file (i.e. \"latin1\", \"UTF-8\"). Use this setting when you get a UnicodeDecodeError error.\n Each input CSV file must be a traditionally-delimited CSV (comma separated columns, newlines indicate new rows, double quoted values).\n\nThe following entries are passed through in an internal CSV dialect that then is used to configure the CSV reader:\n\n* `delimiter`: A one-character string used to separate fields. It defaults to ','.\n* `doublequote`: Controls how instances of quotechar appearing inside a field should themselves be quoted. When True, the character is doubled. When False, the escapechar is used as a prefix to the quotechar. It defaults to True.\n* `escapechar`: A one-character string used by the reader, where the escapechar removes any special meaning from the following character. It defaults to None, which disables escaping.\n* `quotechar`: A one-character string used to quote fields containing special characters, such as the delimiter or quotechar, or which contain new-line characters. It defaults to '\"'.\n* `skipinitialspace`: When True, spaces immediately following the delimiter are ignored. The default is False.\n* `strict`: When True, raise exception Error on bad CSV input. The default is False.\n\nThe first row is the header defining the attribute name for that column and will result to a column of the same name in the database. It must have a valid format with no spaces or special characters (like for example `!` or `@`, etc).\n" 57 | }, 58 | { 59 | "name": "flattening_enabled", 60 | "kind": "boolean", 61 | "label": "Enable Schema Flattening", 62 | "description": "'True' to enable schema flattening and automatically expand nested properties." 63 | }, 64 | { 65 | "name": "flattening_max_depth", 66 | "kind": "integer", 67 | "label": "Max Flattening Depth", 68 | "description": "The max depth to flatten schemas." 69 | }, 70 | { 71 | "name": "stream_map_config", 72 | "kind": "object", 73 | "label": "User Stream Map Configuration", 74 | "description": "User-defined config values to be used within map expressions." 75 | }, 76 | { 77 | "name": "stream_maps", 78 | "kind": "object", 79 | "label": "Stream Maps", 80 | "description": "Config object for stream maps capability. For more information check out [Stream Maps](https://sdk.meltano.com/en/latest/stream_maps.html)." 81 | } 82 | ] 83 | } 84 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Codespaces Meltano DIY Demo 2 | ## Welcome to Meltano! Run your first data pipeline within 5 minutes. 3 | Even if you never touched [Meltano](https://github.com/meltano) before. 4 | No install needed, just a GitHub account (and a few spare Codespaces minutes you get for free anyways). 5 | 6 | Let's get started! 7 | 8 | ## Open codespaces (if it isn't already open) 9 | Click "Open on Codespaces", to launch this project into a ready to use web VS-Code version with everything preloaded. 10 | 11 | 12 | 13 | **Make sure to open up the README.md inside Codespaces as well.** 14 | 15 | *Notes on codespaces:* 16 | 17 | *- If you at any point get an error "The user denied permission to use Service Worker", then you need to enable third-party cookies. [It's a codespaces related problem](https://github.com/orgs/community/discussions/26316).* 18 | 19 | *- In our experience, codespaces work best in Chrome or Firefox, not so well in Safari.* 20 | 21 | *- Files in codespaces autosave! No need to save anything.* 22 | 23 | # What you're building: let's run the final data pipeline first. # 24 | 25 | There's a csv [customers.csv](data/customers.csv) with 26 | - customer names, e-mail adresses and IPs 27 | - you're going to extract this CSV and load it into an SQL-database. 28 | 29 | Go ahead, just run 30 | 31 | > `meltano run tap-csv hide-ips target-duckdb` 32 | 33 | And that's it, you're done. Don't believe us? You can use a helper function to check the SQL-database: 34 | 35 | > `./meltano_tut select_db` 36 | 37 | Watch out for these things: 38 | 1. There are no ip addresses inside the database, right? Check [customers.csv](data/customers.csv), they were there. 39 | 2. That's because we added a "mapper" called "hide-ips" that is completely customizable and in this case hashes the IP addresses. 40 | 3. In the console output - Meltano told you at the beginning of the log ... "Schema 'raw' does not exist." 41 | 4. That is because Meltano has a lot of helper functions. It e.g. creates schemas and tables, should they not already exist. 42 | 43 | Feel free to explore the project, or dive right into building it yourself! 44 | 45 | **Let's go ahead and build it ourselves within 5 minutes** 46 | 47 | # Step 1 - initialize a new meltano project 48 | 49 | Inside the terminal (bottom window) run: 50 | 51 | > `./meltano_tut init` 52 | 53 | This runs a wrapped "meltano init", adding demo data for you to have fun with. This will *remove* what we preinstalled, so now we need to add a few things first. 54 | 55 | # Step 2 - add your first extractor 56 | 57 | Add your first extractor to get data from the CSV. Do so by running inside the terminal: 58 | 59 | > `meltano add extractor tap-csv` 60 | 61 | Then open up the file `meltano.yml`, copy the config below, and paste it below `pip_url`. 62 | 63 | ```yaml 64 | config: 65 | files: 66 | - entity: raw_customers 67 | path: data/customers.csv 68 | keys: [id] 69 | ``` 70 | 71 | Your complete config for tap-csv in `meltano.yml` should look like this: 72 | 73 | ```yaml 74 | plugins: 75 | extractors: 76 | - name: tap-csv 77 | variant: meltanolabs 78 | pip_url: git+https://github.com/MeltanoLabs/tap-csv.git 79 | config: 80 | files: 81 | - entity: raw_customers 82 | path: data/customers.csv 83 | keys: [id] 84 | ``` 85 | 86 | # Step 3 - test run your tap 87 | 88 | Let's test the tap by running: 89 | 90 | > `meltano invoke tap-csv` 91 | 92 | If everything works as expected, Meltano should extract the CSV and dump it as a "stream" onto standard output inside the terminal. 93 | 94 | # Step 4 - add a loader 95 | 96 | Next add a loader to load our data into a local duckdb: 97 | 98 | > `meltano add loader target-duckdb` 99 | 100 | Copy the configuration below and paste it below the `pip_url` for target-duckdb in the `meltano.yml` file. 101 | 102 | ```yaml 103 | config: 104 | filepath: output/my.duckdb 105 | default_target_schema: raw 106 | ``` 107 | 108 | The config in `meltano.yml` for target-duckdb should look like this: 109 | 110 | ```yaml 111 | loaders: 112 | - name: target-duckdb 113 | variant: jwills 114 | pip_url: target-duckdb~=0.4 115 | config: 116 | filepath: output/my.duckdb 117 | default_target_schema: raw 118 | ``` 119 | 120 | # Step 5 - run your EL pipeline 121 | 122 | Now you can do your first complete EL run by calling `meltano run`! 123 | 124 | > `meltano run tap-csv target-duckdb` 125 | 126 | Perfect! 127 | 128 | # Step 6 - view loaded data 129 | 130 | To view your data you can use our little helper: 131 | 132 | > `./meltano_tut select_db` 133 | 134 | This will run a `SELECT * FROM public.raw_customers` on your duckdb instance and write the output to the terminal. 135 | 136 | Great! You've completed your first extract and load run. 🥳 137 | 138 | PS. If you liked what you saw, don't forget to [star us on GitHub](https://github.com/meltano/meltano) and consider joining our [Slack community](https://meltano.com/slack)! 139 | 140 | # Next steps - level 2 to remove IP adresses 141 | Next we want to start to remove the IP adresses, open up ["the level 2 instructions"](level2.md) for that! 142 | -------------------------------------------------------------------------------- /level3.md: -------------------------------------------------------------------------------- 1 | 2 | ### Full table all in ### 3 | Starting with the most basic and simple replication method 4 | 5 | 6 | 7 | run meltano run el_without_ips again 8 | 9 | .... Beginning full_table sync of 'raw_customers'... 10 | 11 | .... message=Loading 29 rows into 'raw."raw_customers"' 12 | run ./meltano_tut select_db 13 | 14 | ... no change, same data right? because we're doing a "full table" sync. 15 | 16 | 17 | Next step: delete one line, and run again. 18 | 19 | delete "1,Ethe,Book,ebook0@twitter.com,67.61.243.220" 20 | 21 | ... Loading 28 rows into 'raw."raw_customers"' 22 | ... BUt Ethe is still there.. 23 | 24 | Add a line: 25 | "30,Ethe_is_back,Book,ebook0@twitter.com,67.61.243.220" 26 | 27 | meltano run el_without_ips 28 | ./meltano_tut select_db 29 | 30 | (now has 30 entries!) 31 | 32 | 33 | 34 | Next up, let's add metadata! 35 | add_metadata_columns = True.... 36 | 37 | loaders: 38 | - name: target-duckdb 39 | variant: jwills 40 | pip_url: target-duckdb~=0.4 41 | config: 42 | filepath: output/my.duckdb 43 | default_target_schema: raw 44 | add_metadata_columns: True 45 | 46 | 2023-01-19T09:10:01.896666Z [info ] time=2023-01-19 09:10:01 name=target_duckdb level=INFO message=Table '"raw_customers"' exists cmd_type=elb consumer=True name=target-duckdb producer=False stdio=stderr string_id=target-duckdb 47 | 2023-01-19T09:10:01.902129Z [info ] time=2023-01-19 09:10:01 name=target_duckdb level=INFO message=Adding column: ALTER TABLE raw."raw_customers" ADD COLUMN "_sdc_batched_at" timestamp cmd_type=elb consumer=True name=target-duckdb producer=False stdio=stderr string_id=target-duckdb 48 | 2023-01-19T09:10:01.923412Z [info ] time=2023-01-19 09:10:01 name=target_duckdb level=INFO message=Adding column: ALTER TABLE raw."raw_customers" ADD COLUMN "_sdc_deleted_at" varchar cmd_type=elb consumer=True name=target-duckdb producer=False stdio=stderr string_id=target-duckdb 49 | 2023-01-19T09:10:01.946326Z [info ] time=2023-01-19 09:10:01 name=target_duckdb level=INFO message=Adding column: ALTER TABLE raw."raw_customers" ADD COLUMN "_sdc_extracted_at" timestamp cmd_type=elb consumer=True name=target-duckdb producer=False stdio=stderr string_id=target-duckdb 50 | Change and remove a column! 51 | 52 | delete: 29,Edie,Corderoy,ecorderoys@nationalgeographic.com,1 53 | 54 | - name: target-duckdb 55 | variant: jwills 56 | pip_url: target-duckdb~=0.4 57 | config: 58 | filepath: output/my.duckdb 59 | default_target_schema: raw 60 | add_metadata_columns: True 61 | hard_delete: True 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | ## Deploying Meltano into Github Actions. 70 | 71 | Finally, we will deploy our little demo into Github Actions. 72 | 73 | *Warning*: This will eat up your GitHub Actions minutes, you should have free ones, but just take care. 74 | 75 | ```yaml 76 | name: Pipeline for testing codespaces demo 77 | 78 | on: 79 | workflow_dispatch: 80 | schedule: 81 | - cron: '30 08 * * *' 82 | 83 | jobs: 84 | install_plugins: 85 | name: "meltano_install" 86 | runs-on: ubuntu-latest 87 | steps: 88 | - uses: actions/checkout@v3.2.0 89 | 90 | - name: Run your Meltano on schedule 91 | uses: devcontainers/ci@v0.2 92 | with: 93 | push: never 94 | runCmd: meltano install 95 | 96 | run_el: 97 | name: "meltano_run_el" 98 | runs-on: ubuntu-latest 99 | steps: 100 | - uses: actions/checkout@v3.2.0 101 | 102 | - name: Run your Meltano on schedule 103 | uses: devcontainers/ci@v0.2 104 | with: 105 | push: never 106 | runCmd: meltano run el_without_ips 107 | ```yaml 108 | 109 | This runs now at 8:30 every morning, 110 | OR if you click on it. Try it out! 111 | 112 | (insert image!) 113 | 114 | 115 | ## Next Steps 116 | 117 | More things you can explore inside this codespace: 118 | 119 | * **Meltano VS Code Extension** 120 | 121 | Do you see this little dragon on the left hand side? 122 | 123 | ![Dragon](/meltano-ext.png) 124 | 125 | That's the [Meltano VS Code extension](https://marketplace.visualstudio.com/items?itemName=z3z1ma.meltano-power-user). It allows you to view and add all possible taps & targets we currently have on Meltano Hub. Take a look at them! 126 | 127 | * **Add another target** 128 | 129 | Why don't you try to add a second output? Try to add `target-jsonl` and do a `meltano run tap-csv target-jsonl`. 130 | 131 | * **Add another tap** 132 | 133 | Next, try to add another tap, for instance the `tap-carbon-intensity`, play around with it and push the data into either target. 134 | 135 | Once you're done, head over to the docs and check out our great [**getting started tutorial**](https://docs.meltano.com/) for more details, add a [**job**](https://docs.meltano.com/reference/command-line-interface#job) **and** [**schedule**](https://docs.meltano.com/reference/command-line-interface#schedule) to easily orchestrate your extract & load processes, and [**deploy it to production**](https://docs.meltano.com/guide/production). 136 | 137 | # (Coming Soon 🏗️) Advanced Tutorial # 138 | 139 | - Explore different [replication methods](https://docs.meltano.com/guide/integration#replication-methods) to run [incremental](https://docs.meltano.com/guide/integration#incremental-replication-state) loads instead of [full syncs](https://docs.meltano.com/guide/integration#full-table-replication) 140 | - Explore deploying to Github Actions. 141 | - Explore using [environments](https://docs.meltano.com/concepts/environments) to change configuration at runtime 142 | - Explore [running dbt](https://docs.meltano.com/guide/transformation) and other tools with Meltano 143 | -------------------------------------------------------------------------------- /.devcontainer/plugins/loaders/target-duckdb--jwills.lock: -------------------------------------------------------------------------------- 1 | { 2 | "plugin_type": "loaders", 3 | "name": "target-duckdb", 4 | "namespace": "target_duckdb", 5 | "variant": "jwills", 6 | "label": "DuckDB", 7 | "docs": "https://hub.meltano.com/loaders/target-duckdb--jwills", 8 | "repo": "https://github.com/jwills/target-duckdb", 9 | "pip_url": "target-duckdb~=0.8", 10 | "description": "DuckDB loader", 11 | "logo_url": "https://hub.meltano.com/assets/logos/loaders/duckdb.png", 12 | "settings_group_validation": [ 13 | [ 14 | "default_target_schema", 15 | "filepath" 16 | ] 17 | ], 18 | "settings": [ 19 | { 20 | "name": "add_metadata_columns", 21 | "kind": "boolean", 22 | "value": false, 23 | "label": "Add Metadata Columns", 24 | "description": "Metadata columns add extra row level information about data ingestions, (i.e. when was the row read in source, when was inserted or deleted in postgres etc.) Metadata columns are creating automatically by adding extra columns to the tables with a column prefix _SDC_. The column names are following the stitch naming conventions documented at https://www.stitchdata.com/docs/data-structure/integration-schemas#sdc-columns. Enabling metadata columns will flag the deleted rows by setting the _SDC_DELETED_AT metadata column. Without the add_metadata_columns option the deleted rows from singer taps will not be recognisable in DuckDB." 25 | }, 26 | { 27 | "name": "batch_size_rows", 28 | "kind": "integer", 29 | "value": 100000, 30 | "label": "Batch Size Rows", 31 | "description": "Maximum number of rows in each batch. At the end of each batch, the rows in the batch are loaded into DuckDB." 32 | }, 33 | { 34 | "name": "data_flattening_max_level", 35 | "kind": "integer", 36 | "value": 0, 37 | "label": "Data Flattening Max Level", 38 | "description": "Object type RECORD items from taps can be transformed to flattened columns by creating columns automatically.\n\nWhen value is 0 (default) then flattening functionality is turned off.\n" 39 | }, 40 | { 41 | "name": "database", 42 | "kind": "string", 43 | "label": "Database name", 44 | "description": "Alias of `dbname`." 45 | }, 46 | { 47 | "name": "dbname", 48 | "kind": "string", 49 | "label": "Database", 50 | "description": "The database name to write to; this will be inferred from the path property if it is not specified." 51 | }, 52 | { 53 | "name": "default_target_schema", 54 | "kind": "string", 55 | "value": "$MELTANO_EXTRACT__LOAD_SCHEMA", 56 | "label": "Default Target Schema", 57 | "description": "Name of the schema where the tables will be created. If schema_mapping is not defined then every stream sent by the tap is loaded into this schema." 58 | }, 59 | { 60 | "name": "delimiter", 61 | "kind": "string", 62 | "value": ",", 63 | "label": "Delimiter", 64 | "description": "The delimiter to use for the CSV files that are used for record imports." 65 | }, 66 | { 67 | "name": "filepath", 68 | "kind": "string", 69 | "value": "${MELTANO_PROJECT_ROOT}/output/warehouse.duckdb", 70 | "label": "File Path", 71 | "description": "Alias of `path`.", 72 | "placeholder": "/path/to/local/file.duckdb" 73 | }, 74 | { 75 | "name": "flush_all_streams", 76 | "kind": "boolean", 77 | "value": false, 78 | "label": "Flush All Streams", 79 | "description": "Flush and load every stream into DuckDB when one batch is full. Warning - This may trigger the COPY command to use files with low number of records." 80 | }, 81 | { 82 | "name": "hard_delete", 83 | "kind": "boolean", 84 | "value": false, 85 | "label": "Hard Delete", 86 | "description": "When hard_delete option is true then DELETE SQL commands will be performed in DuckDB to delete rows in tables. It's achieved by continuously checking the _SDC_DELETED_AT metadata column sent by the singer tap. Due to deleting rows requires metadata columns, hard_delete option automatically enables the add_metadata_columns option as well." 87 | }, 88 | { 89 | "name": "path", 90 | "kind": "string", 91 | "label": "Connection Path", 92 | "description": "The path to use for the `duckdb.connect` call; either a local file or a MotherDuck connection uri.", 93 | "placeholder": "/path/to/local/file.duckdb" 94 | }, 95 | { 96 | "name": "primary_key_required", 97 | "kind": "boolean", 98 | "value": true, 99 | "label": "Primary Key Required", 100 | "description": "Log based and Incremental replications on tables with no Primary Key cause duplicates when merging UPDATE events. When set to true, stop loading data if no Primary Key is defined." 101 | }, 102 | { 103 | "name": "quotechar", 104 | "kind": "string", 105 | "value": "\"", 106 | "label": "Quote Character", 107 | "description": "The quote character to use for the CSV files that are used for record imports." 108 | }, 109 | { 110 | "name": "schema_mapping", 111 | "kind": "object", 112 | "label": "schema_mapping", 113 | "description": "Useful if you want to load multiple streams from one tap to multiple DuckDB schemas.\n\nIf the tap sends the stream_id in - format then this option overwrites the default_target_schema value.\n" 114 | }, 115 | { 116 | "name": "temp_dir", 117 | "kind": "string", 118 | "label": "Temporary Directory", 119 | "description": "Directory of temporary CSV files with RECORD messages." 120 | }, 121 | { 122 | "name": "token", 123 | "kind": "string", 124 | "label": "Token", 125 | "description": "For MotherDuck connections, the auth token to use.", 126 | "sensitive": true 127 | }, 128 | { 129 | "name": "validate_records", 130 | "kind": "boolean", 131 | "value": false, 132 | "label": "Validate Records", 133 | "description": "Validate every single record message to the corresponding JSON schema. This option is disabled by default and invalid RECORD messages will fail only at load time by DuckDB. Enabling this option will detect invalid records earlier but could cause performance degradation." 134 | } 135 | ] 136 | } 137 | --------------------------------------------------------------------------------