├── images
    └── .gitkeep
├── output
    └── .gitkeep
├── .devcontainer
    ├── .gitignore
    ├── Dockerfile
    ├── devcontainer.json
    ├── plugins
    │   ├── mappers
    │   │   └── transform-field--transferwise.lock
    │   ├── extractors
    │   │   └── tap-csv--meltanolabs.lock
    │   └── loaders
    │   │   └── target-duckdb--jwills.lock
    └── meltano.yml
├── meltano-ext.png
├── codespaceOpen.gif
├── codespaces_tutorial
    ├── quick_add_plugins
    ├── select_duckdb.py
    ├── walkthrough
    ├── final_meltano.yml
    ├── customers_wo_ip.csv
    ├── customers_wo_ip_w_flag.csv
    └── customers.csv
├── .pre-commit-config.yaml
├── .github
    └── workflows
    │   ├── run.yml
    │   └── tests.yml
├── plugins
    └── mappers
    │   └── transform-field--transferwise.lock
├── meltano.yml
├── meltano_tut
├── data
    └── customers.csv
├── level2.md
├── .gitignore
├── README.md
└── level3.md


/images/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/output/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.devcontainer/.gitignore:
--------------------------------------------------------------------------------
1 | # Meltano
2 | .meltano/*
3 | 


--------------------------------------------------------------------------------
/meltano-ext.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meltano/meltano-codespace-ready/HEAD/meltano-ext.png


--------------------------------------------------------------------------------
/codespaceOpen.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meltano/meltano-codespace-ready/HEAD/codespaceOpen.gif


--------------------------------------------------------------------------------
/codespaces_tutorial/quick_add_plugins:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | meltano add extractor tap-csv
3 | meltano add loader target-duckdb
4 | meltano add mapper transform-field
5 | meltano dragon
6 | 


--------------------------------------------------------------------------------
/codespaces_tutorial/select_duckdb.py:
--------------------------------------------------------------------------------
1 | import duckdb
2 | 
3 | # to use a database file (not shared between processes)
4 | con = duckdb.connect(database='output/my.duckdb', read_only=True)
5 | 
6 | con.execute("SELECT * FROM raw.raw_customers")
7 | print(con.fetchall())
8 | 


--------------------------------------------------------------------------------
/.devcontainer/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM meltano/meltano:latest-python3.10
 2 | 
 3 | WORKDIR /home/
 4 | 
 5 | COPY . .
 6 | 
 7 | # To query results.
 8 | RUN pip install duckdb
 9 | 
10 | ENV PATH="$PATH:/venv/bin/meltano"
11 | 
12 | ENV MELTANO_SYS_DIR_ROOT="/home/.meltano"
13 | 
14 | RUN meltano install extractors
15 | RUN meltano install loaders
16 | RUN meltano install mappers
17 | 


--------------------------------------------------------------------------------
/codespaces_tutorial/walkthrough:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | ./meltano_tut init
 3 | # get meltano yml in final state,
 4 | rm meltano.yml
 5 | cp codespaces_tutorial/final_meltano.yml meltano.yml
 6 | 
 7 | # Get plugins ready
 8 | meltano lock --update --all
 9 | meltano install
10 | 
11 | # Sync data
12 | meltano run tap-csv hide-ips target-duckdb
13 | ./meltano_tut select_db
14 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | # See https://pre-commit.com for more information
 2 | # See https://pre-commit.com/hooks.html for more hooks
 3 | repos:
 4 | -   repo: https://github.com/pre-commit/pre-commit-hooks
 5 |     rev: v5.0.0
 6 |     hooks:
 7 |     -   id: trailing-whitespace
 8 |     -   id: end-of-file-fixer
 9 |     -   id: check-yaml
10 |     -   id: check-added-large-files
11 | 
12 | -   repo: https://github.com/python-jsonschema/check-jsonschema
13 |     rev: 0.32.1
14 |     hooks:
15 |     -   id: check-meltano
16 | 


--------------------------------------------------------------------------------
/.devcontainer/devcontainer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "Codespaces Meltano CLI Starter",
 3 |     "customizations": {
 4 |         "vscode": {
 5 |             "extensions": [
 6 |                 "z3z1ma.meltano-power-user",
 7 |                 "redhat.vscode-yaml"
 8 |             ],
 9 |             "settings": {
10 |                 "workbench.editorAssociations": {
11 |                     "*.md": "vscode.markdown.preview.editor"
12 |                 }
13 |             }
14 |         }
15 |     },
16 |     "dockerFile": "Dockerfile"
17 | }
18 | 


--------------------------------------------------------------------------------
/.github/workflows/run.yml:
--------------------------------------------------------------------------------
 1 | name: Pipeline for running Meltano
 2 | on:
 3 |   workflow_dispatch:
 4 | #  schedule:
 5 | #   - cron:  '30 08 * * *'
 6 | 
 7 | jobs:
 8 |   run_el:
 9 |       name: "meltano_run_el"
10 |       runs-on: ubuntu-latest
11 |       steps:
12 |         - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
13 | 
14 |         - name: Run your Meltano on schedule
15 |           uses: devcontainers/ci@8bf61b26e9c3a98f69cb6ce2f88d24ff59b785c6 # v0.3.1900000417
16 |           with:
17 |             push: never
18 |             runCmd: meltano run el_without_ips
19 | 


--------------------------------------------------------------------------------
/plugins/mappers/transform-field--transferwise.lock:
--------------------------------------------------------------------------------
 1 | {
 2 |   "plugin_type": "mappers",
 3 |   "name": "transform-field",
 4 |   "namespace": "transform_field",
 5 |   "variant": "transferwise",
 6 |   "label": "Pipelinewise Transform Field",
 7 |   "docs": "https://hub.meltano.com/mappers/transform-field--transferwise",
 8 |   "repo": "https://github.com/transferwise/pipelinewise-transform-field",
 9 |   "pip_url": "pipelinewise-transform-field",
10 |   "executable": "transform-field",
11 |   "description": "A Pipelinewise compatible map transformer plugin for transformations between Singer taps and targets.",
12 |   "logo_url": "https://hub.meltano.com/assets/logos/mappers/transferwise.png"
13 | }
14 | 


--------------------------------------------------------------------------------
/.devcontainer/plugins/mappers/transform-field--transferwise.lock:
--------------------------------------------------------------------------------
 1 | {
 2 |   "plugin_type": "mappers",
 3 |   "name": "transform-field",
 4 |   "namespace": "transform_field",
 5 |   "variant": "transferwise",
 6 |   "label": "Pipelinewise Transform Field",
 7 |   "docs": "https://hub.meltano.com/mappers/transform-field--transferwise",
 8 |   "repo": "https://github.com/transferwise/pipelinewise-transform-field",
 9 |   "pip_url": "pipelinewise-transform-field",
10 |   "executable": "transform-field",
11 |   "description": "A Pipelinewise compatible map transformer plugin for transformations between Singer taps and targets.",
12 |   "logo_url": "https://hub.meltano.com/assets/logos/mappers/transferwise.png"
13 | }
14 | 


--------------------------------------------------------------------------------
/codespaces_tutorial/final_meltano.yml:
--------------------------------------------------------------------------------
 1 | version: 1
 2 | default_environment: codespace
 3 | environments:
 4 | - name: dev
 5 | - name: staging
 6 | - name: prod
 7 | - name: codespace
 8 | plugins:
 9 |   extractors:
10 |   - name: tap-csv
11 |     variant: meltanolabs
12 |     pip_url: git+https://github.com/MeltanoLabs/tap-csv.git
13 |     config:
14 |       files:
15 |       - entity: raw_customers
16 |         path: data/customers.csv
17 |         keys: [id]
18 |   loaders:
19 |   - name: target-duckdb
20 |     variant: jwills
21 |     pip_url: target-duckdb~=0.8
22 |     config:
23 |       filepath: output/my.duckdb
24 |       default_target_schema: raw
25 |   mappers:
26 |   - name: transform-field
27 |     variant: transferwise
28 |     pip_url: pipelinewise-transform-field
29 |     mappings:
30 |     - name: hide-ips
31 |       config:
32 |           transformations:
33 |           - field_id: "ip_address"
34 |             tap_stream_name: "raw_customers"
35 |             type: "HASH"
36 | venv:
37 |   backend: uv
38 | 


--------------------------------------------------------------------------------
/meltano.yml:
--------------------------------------------------------------------------------
 1 | version: 1
 2 | default_environment: codespace
 3 | project_id: a8bdf4f3-f789-4ffc-9adc-0e0414df9059
 4 | environments:
 5 | - name: dev
 6 | - name: staging
 7 | - name: prod
 8 | - name: codespace
 9 | plugins:
10 |   extractors:
11 |   - name: tap-csv
12 |     variant: meltanolabs
13 |     pip_url: git+https://github.com/MeltanoLabs/tap-csv.git
14 |     config:
15 |       files:
16 |       - entity: raw_customers
17 |         path: data/customers.csv
18 |         keys: [id]
19 |   loaders:
20 |   - name: target-duckdb
21 |     variant: jwills
22 |     pip_url: target-duckdb~=0.8
23 |     config:
24 |       filepath: output/my.duckdb
25 |       default_target_schema: raw
26 |   mappers:
27 |   - name: transform-field
28 |     variant: transferwise
29 |     pip_url: pipelinewise-transform-field
30 |     mappings:
31 |     - name: hide-ips
32 |       config:
33 |         transformations:
34 |         - field_id: ip_address
35 |           tap_stream_name: raw_customers
36 |           type: HASH
37 | jobs:
38 | - name: el_without_ips
39 |   tasks:
40 |   - tap-csv hide-ips target-duckdb
41 | venv:
42 |   backend: uv
43 | 


--------------------------------------------------------------------------------
/.devcontainer/meltano.yml:
--------------------------------------------------------------------------------
 1 | version: 1
 2 | default_environment: codespace
 3 | project_id: a8bdf4f3-f789-4ffc-9adc-0e0414df9059
 4 | environments:
 5 | - name: dev
 6 | - name: staging
 7 | - name: prod
 8 | - name: codespace
 9 | plugins:
10 |   extractors:
11 |   - name: tap-csv
12 |     variant: meltanolabs
13 |     pip_url: git+https://github.com/MeltanoLabs/tap-csv.git
14 |     config:
15 |       files:
16 |       - entity: raw_customers
17 |         path: data/customers.csv
18 |         keys: [id]
19 |   loaders:
20 |   - name: target-duckdb
21 |     variant: jwills
22 |     pip_url: target-duckdb~=0.8
23 |     config:
24 |       filepath: output/my.duckdb
25 |       default_target_schema: raw
26 |   mappers:
27 |   - name: transform-field
28 |     variant: transferwise
29 |     pip_url: pipelinewise-transform-field
30 |     mappings:
31 |     - name: hide-ips
32 |       config:
33 |         transformations:
34 |         - field_id: ip_address
35 |           tap_stream_name: raw_customers
36 |           type: HASH
37 | jobs:
38 | - name: el_without_ips
39 |   tasks:
40 |   - tap-csv hide-ips target-duckdb
41 | venv:
42 |   backend: uv
43 | 


--------------------------------------------------------------------------------
/meltano_tut:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | argument=$1
 4 | 
 5 | # proj dir uuid used for tracking demo launches
 6 | proj_dir=b54c6cfe2f8f831389a5b9ca409f410c
 7 | 
 8 | init () {
 9 |      echo "=== Running wrapped 'meltano init' ==="
10 |     rm meltano.yml
11 |     rm -rf output
12 |     meltano init $proj_dir
13 |     rm $proj_dir/README.md
14 |     mv $proj_dir/* .
15 |     rm -r $proj_dir/
16 |     meltano environment add codespace
17 |     meltano config meltano set default_environment codespace
18 |     meltano config meltano set venv.backend uv
19 |     echo "\n\n"
20 |     echo "==============================="
21 |     echo "=== Ignore any next steps from above. We already took care of everything."
22 |     echo "=== Now head to the README.md and continue with step 2!"
23 | }
24 | 
25 | if [ "$argument" = "init" ]; then
26 |     init
27 | 
28 | elif [ "$argument" = "clear" ]; then
29 |     echo "=== Running clear state ==="
30 |     meltano state clear dev:tap-csv-to-target-duckdb
31 |     rm output/my.duckdb
32 | elif [ "$argument" = "select_db" ]; then
33 |     echo "=== Running select DB ==="
34 |     python codespaces_tutorial/select_duckdb.py
35 | else
36 |     echo "provide one of init|select_db"
37 | fi
38 | 


--------------------------------------------------------------------------------
/codespaces_tutorial/customers_wo_ip.csv:
--------------------------------------------------------------------------------
 1 | id,first_name,last_name,email
 2 | 2,Myranda,Tire,mtire1@vkontakte.ru
 3 | 3,Remus,Dorian,rdorian2@twitpic.com
 4 | 4,Stesha,Suddock,ssuddock3@ycombinator.com
 5 | 5,Serge,Daws,sdaws4@usgs.gov
 6 | 6,Ettie,Maddison,emaddison5@fastcompany.com
 7 | 7,Giffy,Biernat,gbiernat6@fastcompany.com
 8 | 8,Vitoria,Ommundsen,vommundsen7@narod.ru
 9 | 9,Mirella,Wisden,mwisden8@homestead.com
10 | 10,Ami,Santus,asantus9@archive.org
11 | 11,Otto,Hursthouse,ohursthousea@wordpress.com
12 | 12,Sherrie,Pelos,spelosb@ucsd.edu
13 | 13,Doreen,Loyndon,dloyndonc@columbia.edu
14 | 14,Raddy,Maber,rmaberd@ihg.com
15 | 15,Coralie,Pechan,cpechane@netlog.com
16 | 16,Ira,Allmann,iallmannf@ucoz.com
17 | 17,Silvester,Siseland,ssiselandg@smh.com.au
18 | 18,Raviv,Dwane,rdwaneh@amazon.co.jp
19 | 19,Rianon,Wansbury,rwansburyi@marketwatch.com
20 | 20,Zacharie,Vardey,zvardeyj@wufoo.com
21 | 21,Ofelia,Meriet,omerietk@domainmarket.com
22 | 22,Minta,Du Fray,mdufrayl@cmu.edu
23 | 23,Baillie,McDougal,bmcdougalm@dagondesign.com
24 | 24,Dunstan,Schimpke,dschimpken@nih.gov
25 | 25,Ab,Dackombe,adackombeo@un.org
26 | 26,Cyrillus,Lakin,clakinp@indiegogo.com
27 | 27,Jori,Gully,jgullyq@flickr.com
28 | 28,Brod,Besse,bbesser@taobao.com
29 | 29,Edie,Corderoy,ecorderoys@nationalgeographic.com
30 | 30,Ethe_is_back,Book,ebook0@twitter.com
31 | 


--------------------------------------------------------------------------------
/codespaces_tutorial/customers_wo_ip_w_flag.csv:
--------------------------------------------------------------------------------
 1 | id,first_name,last_name,email,active
 2 | 2,Myranda,Tire,mtire1@vkontakte.ru,1
 3 | 3,Remus,Dorian,rdorian2@twitpic.com,1
 4 | 4,Stesha,Suddock,ssuddock3@ycombinator.com,1
 5 | 5,Serge,Daws,sdaws4@usgs.gov,1
 6 | 6,Ettie,Maddison,emaddison5@fastcompany.com,0
 7 | 7,Giffy,Biernat,gbiernat6@fastcompany.com,1
 8 | 8,Vitoria,Ommundsen,vommundsen7@narod.ru,1
 9 | 9,Mirella,Wisden,mwisden8@homestead.com,0
10 | 10,Ami,Santus,asantus9@archive.org,1
11 | 11,Otto,Hursthouse,ohursthousea@wordpress.com,1
12 | 12,Sherrie,Pelos,spelosb@ucsd.edu,1
13 | 13,Doreen,Loyndon,dloyndonc@columbia.edu,1
14 | 14,Raddy,Maber,rmaberd@ihg.com,1
15 | 15,Coralie,Pechan,cpechane@netlog.com,0
16 | 16,Ira,Allmann,iallmannf@ucoz.com,0
17 | 17,Silvester,Siseland,ssiselandg@smh.com.au,1
18 | 18,Raviv,Dwane,rdwaneh@amazon.co.jp,1
19 | 19,Rianon,Wansbury,rwansburyi@marketwatch.com,1
20 | 20,Zacharie,Vardey,zvardeyj@wufoo.com,0
21 | 21,Ofelia,Meriet,omerietk@domainmarket.com,1
22 | 22,Minta,Du Fray,mdufrayl@cmu.edu,1
23 | 23,Baillie,McDougal,bmcdougalm@dagondesign.com,1
24 | 24,Dunstan,Schimpke,dschimpken@nih.gov,1
25 | 25,Ab,Dackombe,adackombeo@un.org,1
26 | 26,Cyrillus,Lakin,clakinp@indiegogo.com,1
27 | 27,Jori,Gully,jgullyq@flickr.com,0
28 | 28,Brod,Besse,bbesser@taobao.com,1
29 | 29,Edie,Corderoy,ecorderoys@nationalgeographic.com,1
30 | 30,Ethe_is_back,Book,ebook0@twitter.com,1
31 | 


--------------------------------------------------------------------------------
/data/customers.csv:
--------------------------------------------------------------------------------
 1 | id,first_name,last_name,email,ip_address
 2 | 1,Ethe,Book,ebook0@twitter.com,67.61.243.220
 3 | 2,Myranda,Tire,mtire1@vkontakte.ru,151.194.73.229
 4 | 3,Remus,Dorian,rdorian2@twitpic.com,204.220.73.121
 5 | 4,Stesha,Suddock,ssuddock3@ycombinator.com,113.182.132.211
 6 | 5,Serge,Daws,sdaws4@usgs.gov,111.173.74.111
 7 | 6,Ettie,Maddison,emaddison5@fastcompany.com,35.1.210.212
 8 | 7,Giffy,Biernat,gbiernat6@fastcompany.com,52.23.72.93
 9 | 8,Vitoria,Ommundsen,vommundsen7@narod.ru,94.239.171.99
10 | 9,Mirella,Wisden,mwisden8@homestead.com,104.209.4.89
11 | 10,Ami,Santus,asantus9@archive.org,235.3.35.99
12 | 11,Otto,Hursthouse,ohursthousea@wordpress.com,84.230.160.212
13 | 12,Sherrie,Pelos,spelosb@ucsd.edu,115.16.84.201
14 | 13,Doreen,Loyndon,dloyndonc@columbia.edu,154.58.216.20
15 | 14,Raddy,Maber,rmaberd@ihg.com,179.253.223.80
16 | 15,Coralie,Pechan,cpechane@netlog.com,83.224.237.7
17 | 16,Ira,Allmann,iallmannf@ucoz.com,209.175.60.153
18 | 17,Silvester,Siseland,ssiselandg@smh.com.au,5.12.55.221
19 | 18,Raviv,Dwane,rdwaneh@amazon.co.jp,178.115.168.254
20 | 19,Rianon,Wansbury,rwansburyi@marketwatch.com,250.153.130.70
21 | 20,Zacharie,Vardey,zvardeyj@wufoo.com,91.69.105.86
22 | 21,Ofelia,Meriet,omerietk@domainmarket.com,118.193.26.70
23 | 22,Minta,Du Fray,mdufrayl@cmu.edu,224.152.143.125
24 | 23,Baillie,McDougal,bmcdougalm@dagondesign.com,118.36.198.87
25 | 24,Dunstan,Schimpke,dschimpken@nih.gov,39.137.225.135
26 | 25,Ab,Dackombe,adackombeo@un.org,163.98.82.166
27 | 26,Cyrillus,Lakin,clakinp@indiegogo.com,154.155.120.48
28 | 27,Jori,Gully,jgullyq@flickr.com,186.239.179.225
29 | 28,Brod,Besse,bbesser@taobao.com,62.50.230.144
30 | 29,Edie,Corderoy,ecorderoys@nationalgeographic.com,57.217.190.44
31 | 


--------------------------------------------------------------------------------
/codespaces_tutorial/customers.csv:
--------------------------------------------------------------------------------
 1 | id,first_name,last_name,email,ip_address
 2 | 1,Ethe,Book,ebook0@twitter.com,67.61.243.220
 3 | 2,Myranda,Tire,mtire1@vkontakte.ru,151.194.73.229
 4 | 3,Remus,Dorian,rdorian2@twitpic.com,204.220.73.121
 5 | 4,Stesha,Suddock,ssuddock3@ycombinator.com,113.182.132.211
 6 | 5,Serge,Daws,sdaws4@usgs.gov,111.173.74.111
 7 | 6,Ettie,Maddison,emaddison5@fastcompany.com,35.1.210.212
 8 | 7,Giffy,Biernat,gbiernat6@fastcompany.com,52.23.72.93
 9 | 8,Vitoria,Ommundsen,vommundsen7@narod.ru,94.239.171.99
10 | 9,Mirella,Wisden,mwisden8@homestead.com,104.209.4.89
11 | 10,Ami,Santus,asantus9@archive.org,235.3.35.99
12 | 11,Otto,Hursthouse,ohursthousea@wordpress.com,84.230.160.212
13 | 12,Sherrie,Pelos,spelosb@ucsd.edu,115.16.84.201
14 | 13,Doreen,Loyndon,dloyndonc@columbia.edu,154.58.216.20
15 | 14,Raddy,Maber,rmaberd@ihg.com,179.253.223.80
16 | 15,Coralie,Pechan,cpechane@netlog.com,83.224.237.7
17 | 16,Ira,Allmann,iallmannf@ucoz.com,209.175.60.153
18 | 17,Silvester,Siseland,ssiselandg@smh.com.au,5.12.55.221
19 | 18,Raviv,Dwane,rdwaneh@amazon.co.jp,178.115.168.254
20 | 19,Rianon,Wansbury,rwansburyi@marketwatch.com,250.153.130.70
21 | 20,Zacharie,Vardey,zvardeyj@wufoo.com,91.69.105.86
22 | 21,Ofelia,Meriet,omerietk@domainmarket.com,118.193.26.70
23 | 22,Minta,Du Fray,mdufrayl@cmu.edu,224.152.143.125
24 | 23,Baillie,McDougal,bmcdougalm@dagondesign.com,118.36.198.87
25 | 24,Dunstan,Schimpke,dschimpken@nih.gov,39.137.225.135
26 | 25,Ab,Dackombe,adackombeo@un.org,163.98.82.166
27 | 26,Cyrillus,Lakin,clakinp@indiegogo.com,154.155.120.48
28 | 27,Jori,Gully,jgullyq@flickr.com,186.239.179.225
29 | 28,Brod,Besse,bbesser@taobao.com,62.50.230.144
30 | 29,Edie,Corderoy,ecorderoys@nationalgeographic.com,57.217.190.44
31 | 


--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
 1 | name: Pipeline for testing codespaces demo
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |   pull_request:
 8 |     branches:
 9 |       - main
10 |   schedule:
11 |     - cron:  '30 08 * * *'
12 | 
13 | jobs:
14 |   test_init:
15 |     name: "test_init"
16 |     runs-on: ubuntu-latest
17 |     steps:
18 |       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
19 | 
20 |       - name: Run test script
21 |         uses: devcontainers/ci@8bf61b26e9c3a98f69cb6ce2f88d24ff59b785c6 # v0.3.1900000417
22 |         env:
23 |           MELTANO_SEND_ANONYMOUS_USAGE_STATS: false
24 |         with:
25 |           push: never
26 |           runCmd: ./meltano_tut init
27 |           env: MELTANO_SEND_ANONYMOUS_USAGE_STATS
28 |   test_add_plugins:
29 |     name: "test_add_plugins"
30 |     runs-on: ubuntu-latest
31 |     steps:
32 |       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
33 | 
34 |       - name: Run test script
35 |         env:
36 |           MELTANO_SEND_ANONYMOUS_USAGE_STATS: false
37 |         uses: devcontainers/ci@8bf61b26e9c3a98f69cb6ce2f88d24ff59b785c6 # v0.3.1900000417
38 |         with:
39 |           push: never
40 |           runCmd: ./meltano_tut init ; ./codespaces_tutorial/quick_add_plugins
41 |           env: MELTANO_SEND_ANONYMOUS_USAGE_STATS
42 | 
43 |   test_run_everything:
44 |     name: "test_run_everything"
45 |     runs-on: ubuntu-latest
46 |     steps:
47 |       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
48 | 
49 |       - name: Run test script
50 |         env:
51 |           MELTANO_SEND_ANONYMOUS_USAGE_STATS: false
52 |         uses: devcontainers/ci@8bf61b26e9c3a98f69cb6ce2f88d24ff59b785c6 # v0.3.1900000417
53 |         with:
54 |           push: never
55 |           runCmd: ./codespaces_tutorial/walkthrough
56 |           env: MELTANO_SEND_ANONYMOUS_USAGE_STATS
57 | 


--------------------------------------------------------------------------------
/level2.md:
--------------------------------------------------------------------------------
 1 | In level 2 of the demo you will:
 2 | - remove the plain text IP adresses from the database
 3 | - create a named job to make calling your new pipeline easier
 4 | 
 5 | # Step 1 - Add the transform-field mapper
 6 | 
 7 | Notice that the data you just viewed had plain IP adresses inside of it? Let's quickly get rid of those!
 8 | 
 9 | Add a "mapper" to do slight modifications on the data we're sourcing here.
10 | 
11 | > `meltano add mapper transform-field`
12 | 
13 | # Step 2 - Configure the mapper to remove plain text IP adresses
14 | 
15 |  Now paste the following config below the `pip_url` for the `transform-field` mapper in your `meltano.yml` file.
16 | 
17 | ```yaml
18 |     mappings:
19 |     - name: hide-ips
20 |       config:
21 |          transformations:
22 |          - field_id: "ip_address"
23 |            tap_stream_name: "raw_customers"
24 |            type: "HASH"
25 | ```
26 | 
27 | The full configuration for the mapper `transform-field` should look like this:
28 | 
29 |  ```yaml
30 |   mappers:
31 |    - name: transform-field
32 |      variant: transferwise
33 |      pip_url: pipelinewise-transform-field
34 |      executable: transform-field
35 |      mappings:
36 |      - name: hide-ips
37 |        config:
38 |            transformations:
39 |            - field_id: "ip_address"
40 |              tap_stream_name: "raw_customers"
41 |              type: "HASH"
42 |  ```
43 | 
44 | # Step 3 - Add a job name to your pipeline
45 | 
46 | You already know how `meltano run` kind of works. So let's wrap the steps of the pipeline behind the run command into a "job" so we can call it with just one word.
47 | 
48 | Run:
49 | > `meltano job add el_without_ips --tasks "[tap-csv hide-ips target-duckdb]"`
50 | 
51 | This will add the following line into your meltano.yml file:
52 | 
53 |  ```yaml
54 | jobs:
55 | - name: el_without_ips
56 |   tasks:
57 |   - tap-csv hide-ips target-duckdb
58 |  ```
59 | 
60 | Now let's re-run our pipeline
61 | 
62 | # Step 4 - Run the pipeline calling the job
63 | Now simply run the "job":
64 | 
65 | > `meltano run el_without_ips`
66 | 
67 | # Step 5 - Check that it worked
68 | 
69 | To view the data again, run the helper again:
70 | 
71 | > `./meltano_tut select_db`
72 | 
73 | # Step 6 - Celebrate your success 🎉
74 | 
75 | That was fun and quick! Now try to run
76 | 
77 | > `meltano dragon`
78 | 
79 | just for the fun of it! 🐉
80 | 
81 | # Next steps - level 3 for more
82 | Next we want to explore the rest of the demo and go further, open up ["the level 3 instructions"](level3.md) for that!
83 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Meltano specific stuff
  2 | .meltano/*
  3 | plugins/extractors/*
  4 | plugins/loaders/*
  5 | output/*
  6 | !output/.gitkeep
  7 | 
  8 | # Mac
  9 | .DS_Store
 10 | 
 11 | # Byte-compiled / optimized / DLL files
 12 | __pycache__/
 13 | *.py[cod]
 14 | *$py.class
 15 | 
 16 | # C extensions
 17 | *.so
 18 | 
 19 | # Distribution / packaging
 20 | .Python
 21 | build/
 22 | develop-eggs/
 23 | dist/
 24 | downloads/
 25 | eggs/
 26 | .eggs/
 27 | lib/
 28 | lib64/
 29 | parts/
 30 | sdist/
 31 | var/
 32 | wheels/
 33 | pip-wheel-metadata/
 34 | share/python-wheels/
 35 | *.egg-info/
 36 | .installed.cfg
 37 | *.egg
 38 | MANIFEST
 39 | 
 40 | # PyInstaller
 41 | #  Usually these files are written by a python script from a template
 42 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 43 | *.manifest
 44 | *.spec
 45 | 
 46 | # Installer logs
 47 | pip-log.txt
 48 | pip-delete-this-directory.txt
 49 | 
 50 | # Unit test / coverage reports
 51 | htmlcov/
 52 | .tox/
 53 | .nox/
 54 | .coverage
 55 | .coverage.*
 56 | .cache
 57 | nosetests.xml
 58 | coverage.xml
 59 | *.cover
 60 | *.py,cover
 61 | .hypothesis/
 62 | .pytest_cache/
 63 | 
 64 | # Translations
 65 | *.mo
 66 | *.pot
 67 | 
 68 | # Django stuff:
 69 | *.log
 70 | local_settings.py
 71 | db.sqlite3
 72 | db.sqlite3-journal
 73 | 
 74 | # Flask stuff:
 75 | instance/
 76 | .webassets-cache
 77 | 
 78 | # Scrapy stuff:
 79 | .scrapy
 80 | 
 81 | # Sphinx documentation
 82 | docs/_build/
 83 | 
 84 | # PyBuilder
 85 | target/
 86 | 
 87 | # Jupyter Notebook
 88 | .ipynb_checkpoints
 89 | 
 90 | # IPython
 91 | profile_default/
 92 | ipython_config.py
 93 | 
 94 | # pyenv
 95 | .python-version
 96 | 
 97 | # pipenv
 98 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 99 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
100 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
101 | #   install all needed dependencies.
102 | #Pipfile.lock
103 | 
104 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
105 | __pypackages__/
106 | 
107 | # Celery stuff
108 | celerybeat-schedule
109 | celerybeat.pid
110 | 
111 | # SageMath parsed files
112 | *.sage.py
113 | 
114 | # Environments
115 | .env
116 | .venv
117 | env/
118 | venv/
119 | ENV/
120 | env.bak/
121 | venv.bak/
122 | 
123 | # Spyder project settings
124 | .spyderproject
125 | .spyproject
126 | 
127 | # Rope project settings
128 | .ropeproject
129 | 
130 | # mkdocs documentation
131 | /site
132 | 
133 | # mypy
134 | .mypy_cache/
135 | .dmypy.json
136 | dmypy.json
137 | 
138 | # Pyre type checker
139 | .pyre/
140 | 


--------------------------------------------------------------------------------
/.devcontainer/plugins/extractors/tap-csv--meltanolabs.lock:
--------------------------------------------------------------------------------
 1 | {
 2 |   "plugin_type": "extractors",
 3 |   "name": "tap-csv",
 4 |   "namespace": "tap_csv",
 5 |   "variant": "meltanolabs",
 6 |   "label": "Comma Separated Values (CSV)",
 7 |   "docs": "https://hub.meltano.com/extractors/tap-csv--meltanolabs",
 8 |   "repo": "https://github.com/MeltanoLabs/tap-csv",
 9 |   "pip_url": "git+https://github.com/MeltanoLabs/tap-csv.git",
10 |   "description": "Generic data extractor of CSV (comma separated value) files",
11 |   "logo_url": "https://hub.meltano.com/assets/logos/extractors/csv.png",
12 |   "capabilities": [
13 |     "catalog",
14 |     "discover"
15 |   ],
16 |   "settings_group_validation": [
17 |     [
18 |       "files"
19 |     ],
20 |     [
21 |       "csv_files_definition"
22 |     ]
23 |   ],
24 |   "settings": [
25 |     {
26 |       "name": "add_metadata_columns",
27 |       "kind": "boolean",
28 |       "value": false,
29 |       "label": "Add Metadata Columns",
30 |       "description": "When True, add the metadata columns (`_sdc_source_file`, `_sdc_source_file_mtime`, `_sdc_source_lineno`) to output."
31 |     },
32 |     {
33 |       "name": "csv_files_definition",
34 |       "kind": "string",
35 |       "label": "Csv Files Definition",
36 |       "documentation": "https://github.com/MeltanoLabs/tap-csv#settings",
37 |       "description": "Project-relative path to JSON file holding array of objects as described under [Files](#files) - with `entity`, `path`, `keys`, and other optional keys:\n\n```json\n[\n  {\n    \"entity\": \"<entity>\",\n    \"path\": \"<path>\",\n    \"keys\": [\"<key>\"],\n  },\n  // ...\n]\n```\n",
38 |       "placeholder": "Ex. files-def.json"
39 |     },
40 |     {
41 |       "name": "faker_config.locale",
42 |       "kind": "array",
43 |       "label": "Faker Locale",
44 |       "description": "One or more LCID locale strings to produce localized output for: https://faker.readthedocs.io/en/master/#localization"
45 |     },
46 |     {
47 |       "name": "faker_config.seed",
48 |       "kind": "string",
49 |       "label": "Faker Seed",
50 |       "description": "Value to seed the Faker generator for deterministic output: https://faker.readthedocs.io/en/master/#seeding-the-generator"
51 |     },
52 |     {
53 |       "name": "files",
54 |       "kind": "array",
55 |       "label": "Files",
56 |       "description": "Array of objects with `entity`, `path`, `keys`, and `encoding` [Optional] keys:\n\n* `entity`: The entity name, used as the table name for the data loaded from that CSV.\n* `path`: Local path (relative to the project's root) to the file to be ingested. Note that this may be a directory, in which case all files in that directory and any of its subdirectories will be recursively processed\n* `keys`: The names of the columns that constitute the unique keys for that entity.\n* `encoding`: [Optional] The file encoding to use when reading the file (i.e. \"latin1\", \"UTF-8\"). Use this setting when you get a UnicodeDecodeError error.\n  Each input CSV file must be a traditionally-delimited CSV (comma separated columns, newlines indicate new rows, double quoted values).\n\nThe following entries are passed through in an internal CSV dialect that then is used to configure the CSV reader:\n\n* `delimiter`: A one-character string used to separate fields. It defaults to ','.\n* `doublequote`: Controls how instances of quotechar appearing inside a field should themselves be quoted. When True, the character is doubled. When False, the escapechar is used as a prefix to the quotechar. It defaults to True.\n* `escapechar`: A one-character string used by the reader, where the escapechar removes any special meaning from the following character. It defaults to None, which disables escaping.\n* `quotechar`: A one-character string used to quote fields containing special characters, such as the delimiter or quotechar, or which contain new-line characters. It defaults to '\"'.\n* `skipinitialspace`: When True, spaces immediately following the delimiter are ignored. The default is False.\n* `strict`: When True, raise exception Error on bad CSV input. The default is False.\n\nThe first row is the header defining the attribute name for that column and will result to a column of the same name in the database. It must have a valid format with no spaces or special characters (like for example `!` or `@`, etc).\n"
57 |     },
58 |     {
59 |       "name": "flattening_enabled",
60 |       "kind": "boolean",
61 |       "label": "Enable Schema Flattening",
62 |       "description": "'True' to enable schema flattening and automatically expand nested properties."
63 |     },
64 |     {
65 |       "name": "flattening_max_depth",
66 |       "kind": "integer",
67 |       "label": "Max Flattening Depth",
68 |       "description": "The max depth to flatten schemas."
69 |     },
70 |     {
71 |       "name": "stream_map_config",
72 |       "kind": "object",
73 |       "label": "User Stream Map Configuration",
74 |       "description": "User-defined config values to be used within map expressions."
75 |     },
76 |     {
77 |       "name": "stream_maps",
78 |       "kind": "object",
79 |       "label": "Stream Maps",
80 |       "description": "Config object for stream maps capability. For more information check out [Stream Maps](https://sdk.meltano.com/en/latest/stream_maps.html)."
81 |     }
82 |   ]
83 | }
84 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Codespaces Meltano DIY Demo
  2 | ## Welcome to Meltano! Run your first data pipeline within 5 minutes.
  3 | Even if you never touched [Meltano](https://github.com/meltano) before.
  4 | No install needed, just a GitHub account (and a few spare Codespaces minutes you get for free anyways).
  5 | 
  6 | Let's get started!
  7 | 
  8 | ## Open codespaces (if it isn't already open)
  9 | Click "Open on Codespaces", to launch this project into a ready to use web VS-Code version with everything preloaded.
 10 | 
 11 | <img src="codespaceOpen.gif" alt="" width="400"/>
 12 | 
 13 | **Make sure to open up the README.md inside Codespaces as well.**
 14 | 
 15 | *Notes on codespaces:*
 16 | 
 17 | *- If you at any point get an error "The user denied permission to use Service Worker", then you need to enable third-party cookies. [It's a codespaces related problem](https://github.com/orgs/community/discussions/26316).*
 18 | 
 19 | *- In our experience, codespaces work best in Chrome or Firefox, not so well in Safari.*
 20 | 
 21 | *- Files in codespaces autosave! No need to save anything.*
 22 | 
 23 | # What you're building: let's run the final data pipeline first. #
 24 | 
 25 | There's a csv [customers.csv](data/customers.csv) with
 26 | - customer names, e-mail adresses and IPs
 27 | - you're going to extract this CSV and load it into an SQL-database.
 28 | 
 29 | Go ahead, just run
 30 | 
 31 | > `meltano run tap-csv hide-ips target-duckdb`
 32 | 
 33 | And that's it, you're done. Don't believe us? You can use a helper function to check the SQL-database:
 34 | 
 35 | > `./meltano_tut select_db`
 36 | 
 37 | Watch out for these things:
 38 | 1. There are no ip addresses inside the database, right? Check [customers.csv](data/customers.csv), they were there.
 39 | 2. That's because we added a "mapper" called "hide-ips" that is completely customizable and in this case hashes the IP addresses.
 40 | 3. In the console output - Meltano told you at the beginning of the log ... "Schema 'raw' does not exist."
 41 | 4. That is because Meltano has a lot of helper functions. It e.g. creates schemas and tables, should they not already exist.
 42 | 
 43 | Feel free to explore the project, or dive right into building it yourself!
 44 | 
 45 | **Let's go ahead and build it ourselves within 5 minutes**
 46 | 
 47 | # Step 1 - initialize a new meltano project
 48 | 
 49 | Inside the terminal (bottom window) run:
 50 | 
 51 | > `./meltano_tut init`
 52 | 
 53 | This runs a wrapped "meltano init", adding demo data for you to have fun with. This will *remove* what we preinstalled, so now we need to add a few things first.
 54 | 
 55 | # Step 2  - add your first extractor
 56 | 
 57 | Add your first extractor to get data from the CSV. Do so by running inside the terminal:
 58 | 
 59 | > `meltano add extractor tap-csv`
 60 | 
 61 | Then open up the file `meltano.yml`, copy the config below, and paste it below `pip_url`.
 62 | 
 63 | ```yaml
 64 |     config:
 65 |       files:
 66 |       - entity: raw_customers
 67 |         path: data/customers.csv
 68 |         keys: [id]
 69 | ```
 70 | 
 71 | Your complete config for tap-csv in `meltano.yml` should look like this:
 72 | 
 73 | ```yaml
 74 | plugins:
 75 |   extractors:
 76 |   - name: tap-csv
 77 |     variant: meltanolabs
 78 |     pip_url: git+https://github.com/MeltanoLabs/tap-csv.git
 79 |     config:
 80 |       files:
 81 |       - entity: raw_customers
 82 |         path: data/customers.csv
 83 |         keys: [id]
 84 | ```
 85 | 
 86 | # Step 3 - test run your tap
 87 | 
 88 | Let's test the tap by running:
 89 | 
 90 | > `meltano invoke tap-csv`
 91 | 
 92 | If everything works as expected, Meltano should extract the CSV and dump it as a "stream" onto standard output inside the terminal.
 93 | 
 94 | # Step 4 - add a loader
 95 | 
 96 | Next add a loader to load our data into a local duckdb:
 97 | 
 98 | > `meltano add loader target-duckdb`
 99 | 
100 | Copy the configuration below and paste it below the `pip_url` for target-duckdb in the `meltano.yml` file.
101 | 
102 | ```yaml
103 |     config:
104 |       filepath: output/my.duckdb
105 |       default_target_schema: raw
106 | ```
107 | 
108 | The config in `meltano.yml` for target-duckdb should look like this:
109 | 
110 | ```yaml
111 |   loaders:
112 |   - name: target-duckdb
113 |     variant: jwills
114 |     pip_url: target-duckdb~=0.4
115 |     config:
116 |       filepath: output/my.duckdb
117 |       default_target_schema: raw
118 | ```
119 | 
120 | # Step 5 - run your EL pipeline
121 | 
122 | Now you can do your first complete EL run by calling `meltano run`!
123 | 
124 | > `meltano run tap-csv target-duckdb`
125 | 
126 | Perfect!
127 | 
128 | # Step 6 - view loaded data
129 | 
130 | To view your data you can use our little helper:
131 | 
132 | > `./meltano_tut select_db`
133 | 
134 | This will run a `SELECT * FROM public.raw_customers` on your duckdb instance and write the output to the terminal.
135 | 
136 | Great! You've completed your first extract and load run. 🥳
137 | 
138 | PS. If you liked what you saw, don't forget to [star us on GitHub](https://github.com/meltano/meltano) and consider joining our [Slack community](https://meltano.com/slack)!
139 | 
140 | # Next steps - level 2 to remove IP adresses
141 | Next we want to start to remove the IP adresses, open up ["the level 2 instructions"](level2.md) for that!
142 | 


--------------------------------------------------------------------------------
/level3.md:
--------------------------------------------------------------------------------
  1 | 
  2 | ### Full table all in ###
  3 | Starting with the most basic and simple replication method
  4 | 
  5 | 
  6 | 
  7 | run meltano run el_without_ips again
  8 | 
  9 | .... Beginning full_table sync of 'raw_customers'...
 10 | 
 11 | .... message=Loading 29 rows into 'raw."raw_customers"'
 12 | run ./meltano_tut select_db
 13 | 
 14 | ... no change, same data right? because we're doing a "full table" sync.
 15 | 
 16 | 
 17 | Next step: delete one line, and run again.
 18 | 
 19 | delete "1,Ethe,Book,ebook0@twitter.com,67.61.243.220"
 20 | 
 21 | ... Loading 28 rows into 'raw."raw_customers"'
 22 | ...  BUt Ethe is still there..
 23 | 
 24 | Add a line:
 25 | "30,Ethe_is_back,Book,ebook0@twitter.com,67.61.243.220"
 26 | 
 27 | meltano run el_without_ips
 28 | ./meltano_tut select_db
 29 | 
 30 | (now has 30 entries!)
 31 | 
 32 | 
 33 | 
 34 | Next up, let's add metadata!
 35 | add_metadata_columns = True....
 36 | 
 37 |   loaders:
 38 |   - name: target-duckdb
 39 |     variant: jwills
 40 |     pip_url: target-duckdb~=0.4
 41 |     config:
 42 |       filepath: output/my.duckdb
 43 |       default_target_schema: raw
 44 |       add_metadata_columns: True
 45 | 
 46 | 2023-01-19T09:10:01.896666Z [info     ] time=2023-01-19 09:10:01 name=target_duckdb level=INFO message=Table '"raw_customers"' exists cmd_type=elb consumer=True name=target-duckdb producer=False stdio=stderr string_id=target-duckdb
 47 | 2023-01-19T09:10:01.902129Z [info     ] time=2023-01-19 09:10:01 name=target_duckdb level=INFO message=Adding column: ALTER TABLE raw."raw_customers" ADD COLUMN "_sdc_batched_at" timestamp cmd_type=elb consumer=True name=target-duckdb producer=False stdio=stderr string_id=target-duckdb
 48 | 2023-01-19T09:10:01.923412Z [info     ] time=2023-01-19 09:10:01 name=target_duckdb level=INFO message=Adding column: ALTER TABLE raw."raw_customers" ADD COLUMN "_sdc_deleted_at" varchar cmd_type=elb consumer=True name=target-duckdb producer=False stdio=stderr string_id=target-duckdb
 49 | 2023-01-19T09:10:01.946326Z [info     ] time=2023-01-19 09:10:01 name=target_duckdb level=INFO message=Adding column: ALTER TABLE raw."raw_customers" ADD COLUMN "_sdc_extracted_at" timestamp cmd_type=elb consumer=True name=target-duckdb producer=False stdio=stderr string_id=target-duckdb
 50 | Change and remove a column!
 51 | 
 52 | delete: 29,Edie,Corderoy,ecorderoys@nationalgeographic.com,1
 53 | 
 54 |   - name: target-duckdb
 55 |     variant: jwills
 56 |     pip_url: target-duckdb~=0.4
 57 |     config:
 58 |       filepath: output/my.duckdb
 59 |       default_target_schema: raw
 60 |       add_metadata_columns: True
 61 |       hard_delete: True
 62 | 
 63 | 
 64 | 
 65 | 
 66 | 
 67 | 
 68 | 
 69 | ## Deploying Meltano into Github Actions.
 70 | 
 71 | Finally, we will deploy our little demo into Github Actions.
 72 | 
 73 | *Warning*: This will eat up your GitHub Actions minutes, you should have free ones, but just take care.
 74 | 
 75 | ```yaml
 76 | name: Pipeline for testing codespaces demo
 77 | 
 78 | on:
 79 |   workflow_dispatch:
 80 |   schedule:
 81 |     - cron:  '30 08 * * *'
 82 | 
 83 | jobs:
 84 |   install_plugins:
 85 |     name: "meltano_install"
 86 |     runs-on: ubuntu-latest
 87 |     steps:
 88 |       - uses: actions/checkout@v3.2.0
 89 | 
 90 |       - name: Run your Meltano on schedule
 91 |         uses: devcontainers/ci@v0.2
 92 |         with:
 93 |           push: never
 94 |           runCmd: meltano install
 95 | 
 96 | run_el:
 97 |     name: "meltano_run_el"
 98 |     runs-on: ubuntu-latest
 99 |     steps:
100 |       - uses: actions/checkout@v3.2.0
101 | 
102 |       - name: Run your Meltano on schedule
103 |         uses: devcontainers/ci@v0.2
104 |         with:
105 |           push: never
106 |           runCmd: meltano run el_without_ips
107 | ```yaml
108 | 
109 | This runs now at 8:30 every morning,
110 | OR if you click on it. Try it out!
111 | 
112 | (insert image!)
113 | 
114 | 
115 | ## Next Steps
116 | 
117 | More things you can explore inside this codespace:
118 | 
119 |   * **Meltano VS Code Extension**
120 | 
121 |     Do you see this little dragon on the left hand side?
122 | 
123 |     ![Dragon](/meltano-ext.png)
124 | 
125 |     That's the [Meltano VS Code extension](https://marketplace.visualstudio.com/items?itemName=z3z1ma.meltano-power-user). It allows you to view and add all possible taps & targets we currently have on Meltano Hub. Take a look at them!
126 | 
127 |   * **Add another target**
128 | 
129 |     Why don't you try to add a second output? Try to add `target-jsonl` and do a `meltano run tap-csv target-jsonl`.
130 | 
131 |   * **Add another tap**
132 | 
133 |     Next, try to add another tap, for instance the `tap-carbon-intensity`, play around with it and push the data into either target.
134 | 
135 | Once you're done, head over to the docs and check out our great [**getting started tutorial**](https://docs.meltano.com/) for more details, add a [**job**](https://docs.meltano.com/reference/command-line-interface#job) **and** [**schedule**](https://docs.meltano.com/reference/command-line-interface#schedule) to easily orchestrate your extract & load processes, and [**deploy it to production**](https://docs.meltano.com/guide/production).
136 | 
137 | # (Coming Soon 🏗️) Advanced Tutorial #
138 | 
139 | - Explore different [replication methods](https://docs.meltano.com/guide/integration#replication-methods) to run [incremental](https://docs.meltano.com/guide/integration#incremental-replication-state) loads instead of [full syncs](https://docs.meltano.com/guide/integration#full-table-replication)
140 | - Explore deploying to Github Actions.
141 | - Explore using [environments](https://docs.meltano.com/concepts/environments) to change configuration at runtime
142 | - Explore [running dbt](https://docs.meltano.com/guide/transformation) and other tools with Meltano
143 | 


--------------------------------------------------------------------------------
/.devcontainer/plugins/loaders/target-duckdb--jwills.lock:
--------------------------------------------------------------------------------
  1 | {
  2 |   "plugin_type": "loaders",
  3 |   "name": "target-duckdb",
  4 |   "namespace": "target_duckdb",
  5 |   "variant": "jwills",
  6 |   "label": "DuckDB",
  7 |   "docs": "https://hub.meltano.com/loaders/target-duckdb--jwills",
  8 |   "repo": "https://github.com/jwills/target-duckdb",
  9 |   "pip_url": "target-duckdb~=0.8",
 10 |   "description": "DuckDB loader",
 11 |   "logo_url": "https://hub.meltano.com/assets/logos/loaders/duckdb.png",
 12 |   "settings_group_validation": [
 13 |     [
 14 |       "default_target_schema",
 15 |       "filepath"
 16 |     ]
 17 |   ],
 18 |   "settings": [
 19 |     {
 20 |       "name": "add_metadata_columns",
 21 |       "kind": "boolean",
 22 |       "value": false,
 23 |       "label": "Add Metadata Columns",
 24 |       "description": "Metadata columns add extra row level information about data ingestions, (i.e. when was the row read in source, when was inserted or deleted in postgres etc.) Metadata columns are creating automatically by adding extra columns to the tables with a column prefix _SDC_. The column names are following the stitch naming conventions documented at https://www.stitchdata.com/docs/data-structure/integration-schemas#sdc-columns. Enabling metadata columns will flag the deleted rows by setting the _SDC_DELETED_AT metadata column. Without the add_metadata_columns option the deleted rows from singer taps will not be recognisable in DuckDB."
 25 |     },
 26 |     {
 27 |       "name": "batch_size_rows",
 28 |       "kind": "integer",
 29 |       "value": 100000,
 30 |       "label": "Batch Size Rows",
 31 |       "description": "Maximum number of rows in each batch. At the end of each batch, the rows in the batch are loaded into DuckDB."
 32 |     },
 33 |     {
 34 |       "name": "data_flattening_max_level",
 35 |       "kind": "integer",
 36 |       "value": 0,
 37 |       "label": "Data Flattening Max Level",
 38 |       "description": "Object type RECORD items from taps can be transformed to flattened columns by creating columns automatically.\n\nWhen value is 0 (default) then flattening functionality is turned off.\n"
 39 |     },
 40 |     {
 41 |       "name": "database",
 42 |       "kind": "string",
 43 |       "label": "Database name",
 44 |       "description": "Alias of `dbname`."
 45 |     },
 46 |     {
 47 |       "name": "dbname",
 48 |       "kind": "string",
 49 |       "label": "Database",
 50 |       "description": "The database name to write to; this will be inferred from the path property if it is not specified."
 51 |     },
 52 |     {
 53 |       "name": "default_target_schema",
 54 |       "kind": "string",
 55 |       "value": "$MELTANO_EXTRACT__LOAD_SCHEMA",
 56 |       "label": "Default Target Schema",
 57 |       "description": "Name of the schema where the tables will be created. If schema_mapping is not defined then every stream sent by the tap is loaded into this schema."
 58 |     },
 59 |     {
 60 |       "name": "delimiter",
 61 |       "kind": "string",
 62 |       "value": ",",
 63 |       "label": "Delimiter",
 64 |       "description": "The delimiter to use for the CSV files that are used for record imports."
 65 |     },
 66 |     {
 67 |       "name": "filepath",
 68 |       "kind": "string",
 69 |       "value": "${MELTANO_PROJECT_ROOT}/output/warehouse.duckdb",
 70 |       "label": "File Path",
 71 |       "description": "Alias of `path`.",
 72 |       "placeholder": "/path/to/local/file.duckdb"
 73 |     },
 74 |     {
 75 |       "name": "flush_all_streams",
 76 |       "kind": "boolean",
 77 |       "value": false,
 78 |       "label": "Flush All Streams",
 79 |       "description": "Flush and load every stream into DuckDB when one batch is full. Warning - This may trigger the COPY command to use files with low number of records."
 80 |     },
 81 |     {
 82 |       "name": "hard_delete",
 83 |       "kind": "boolean",
 84 |       "value": false,
 85 |       "label": "Hard Delete",
 86 |       "description": "When hard_delete option is true then DELETE SQL commands will be performed in DuckDB to delete rows in tables. It's achieved by continuously checking the _SDC_DELETED_AT metadata column sent by the singer tap. Due to deleting rows requires metadata columns, hard_delete option automatically enables the add_metadata_columns option as well."
 87 |     },
 88 |     {
 89 |       "name": "path",
 90 |       "kind": "string",
 91 |       "label": "Connection Path",
 92 |       "description": "The path to use for the `duckdb.connect` call; either a local file or a MotherDuck connection uri.",
 93 |       "placeholder": "/path/to/local/file.duckdb"
 94 |     },
 95 |     {
 96 |       "name": "primary_key_required",
 97 |       "kind": "boolean",
 98 |       "value": true,
 99 |       "label": "Primary Key Required",
100 |       "description": "Log based and Incremental replications on tables with no Primary Key cause duplicates when merging UPDATE events. When set to true, stop loading data if no Primary Key is defined."
101 |     },
102 |     {
103 |       "name": "quotechar",
104 |       "kind": "string",
105 |       "value": "\"",
106 |       "label": "Quote Character",
107 |       "description": "The quote character to use for the CSV files that are used for record imports."
108 |     },
109 |     {
110 |       "name": "schema_mapping",
111 |       "kind": "object",
112 |       "label": "schema_mapping",
113 |       "description": "Useful if you want to load multiple streams from one tap to multiple DuckDB schemas.\n\nIf the tap sends the stream_id in <schema_name>-<table_name> format then this option overwrites the default_target_schema value.\n"
114 |     },
115 |     {
116 |       "name": "temp_dir",
117 |       "kind": "string",
118 |       "label": "Temporary Directory",
119 |       "description": "Directory of temporary CSV files with RECORD messages."
120 |     },
121 |     {
122 |       "name": "token",
123 |       "kind": "string",
124 |       "label": "Token",
125 |       "description": "For MotherDuck connections, the auth token to use.",
126 |       "sensitive": true
127 |     },
128 |     {
129 |       "name": "validate_records",
130 |       "kind": "boolean",
131 |       "value": false,
132 |       "label": "Validate Records",
133 |       "description": "Validate every single record message to the corresponding JSON schema. This option is disabled by default and invalid RECORD messages will fail only at load time by DuckDB. Enabling this option will detect invalid records earlier but could cause performance degradation."
134 |     }
135 |   ]
136 | }
137 | 


--------------------------------------------------------------------------------