color_white

├── .gitignore
├── .images
    ├── PostgreSQL_logo.3colors.120x120.png
    ├── Snowflake_Logo.svg.png
    ├── color_white_debezium_type_600px.svg
    ├── database-tables.png
    ├── debeziumio-ar21.svg
    ├── horizontal-logo-monochromatic-white.png
    ├── howto-flow.png
    ├── logo-mysql-170x115.png
    ├── snowflake-security.png
    ├── solution.drawio.png
    └── solution.png
├── LICENSE
├── README.md
├── articles
    ├── .images
    │   ├── docker-compose.png
    │   ├── snowflake_console.png
    │   ├── solution-capture-data-changes.png
    │   ├── solution-debezium.png
    │   ├── solution-kafka-to-snowflake.png
    │   ├── solution-replication.png
    │   ├── solution-sink-snowflake.png
    │   ├── solution-solution-points.png
    │   └── solution-solution.png
    └── dzone_howto_building-an-enterprise-cdc-solution.md
├── database
    ├── README.md
    ├── init_db.sh
    ├── mysql_crud.sh
    ├── postgres_crud.sh
    └── sql
    │   ├── 00_mysql_init.sql
    │   ├── 00_postgres_init.sql
    │   ├── 01_mysql_changes.sql
    │   └── 01_postgres_changes.sql
├── debezium
    ├── README.md
    ├── connect
    │   ├── debezium-mysql-inventory-connector.json
    │   └── debezium-postgres-inventory-connector.json
    ├── delete_cdc.sh
    ├── init_cdc.sh
    └── status_cdc.sh
├── services
    ├── .env
    ├── README.md
    ├── docker-compose.png
    ├── docker-compose.yml
    └── render_compose_image.sh
└── snowflake
    ├── README.md
    ├── connect
        └── snowflake-sink-connector.json
    ├── delete_sink.sh
    ├── init_sink.sh
    ├── keys
        ├── README.md
        ├── snowflake_rsa_key.p8
        ├── snowflake_rsa_key.pem
        └── snowflake_rsa_key.pub
    ├── sql
        ├── 00-security.sql
        ├── 01-cdc-to-replica-mysql.sql
        └── 01-cdc-to-replica-postgres.sql
    └── status_sink.sh


/.gitignore:
--------------------------------------------------------------------------------
1 | 
2 | *.class
3 | 
4 | database/data_mysql.csv
5 | 
6 | database/data_postgres.csv
7 | 
8 | services/.cache/
9 | 


--------------------------------------------------------------------------------
/.images/PostgreSQL_logo.3colors.120x120.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dariocazas/howto-debezium-to-snowflake/a2d8a9689929f2025cb6113af355e6ad77b77046/.images/PostgreSQL_logo.3colors.120x120.png


--------------------------------------------------------------------------------
/.images/Snowflake_Logo.svg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dariocazas/howto-debezium-to-snowflake/a2d8a9689929f2025cb6113af355e6ad77b77046/.images/Snowflake_Logo.svg.png


--------------------------------------------------------------------------------
/.images/color_white_debezium_type_600px.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 600 130.93"><defs><style>.cls-1{fill:#fff;}.cls-2{fill:url(#linear-gradient);}.cls-3{fill:url(#linear-gradient-2);}.cls-4{fill:url(#linear-gradient-3);}.cls-5{fill:url(#linear-gradient-4);}.cls-6{fill:url(#linear-gradient-5);}</style><linearGradient id="linear-gradient" x1="19.74" y1="19.73" x2="114.03" y2="114.02" gradientUnits="userSpaceOnUse"><stop offset="0" stop-color="#91d443"/><stop offset="1" stop-color="#48bfe0"/></linearGradient><linearGradient id="linear-gradient-2" x1="43.19" y1="-3.72" x2="137.48" y2="90.57" xlink:href="#linear-gradient"/><linearGradient id="linear-gradient-3" x1="66.56" y1="-27.09" x2="160.85" y2="67.2" xlink:href="#linear-gradient"/><linearGradient id="linear-gradient-4" x1="-3.72" y1="43.18" x2="90.57" y2="137.47" xlink:href="#linear-gradient"/><linearGradient id="linear-gradient-5" x1="-27.09" y1="66.55" x2="67.2" y2="160.84" xlink:href="#linear-gradient"/></defs><title>color_white</title><g id="Layer_1" data-name="Layer 1"><path class="cls-1" d="M174.56,92.39Q168,85.74,168,74.87t6.74-17.34A22.4,22.4,0,0,1,190.81,51q9.33,0,15,7.88V33.36h9.81v65h-9.81v-7Q200.4,99.05,190,99.05A21,21,0,0,1,174.56,92.39Zm3.24-17.13a14.6,14.6,0,0,0,4.29,10.91,13.93,13.93,0,0,0,10.08,4.2A13,13,0,0,0,202,86.13a15.2,15.2,0,0,0,4-10.9,16,16,0,0,0-4-11.13,12.82,12.82,0,0,0-9.9-4.47A13.35,13.35,0,0,0,182,64.1,15.66,15.66,0,0,0,177.8,75.27Z"/><path class="cls-1" d="M273.19,79.34H236.66a10.61,10.61,0,0,0,4.64,8.19,16.29,16.29,0,0,0,9.81,3.11q8.75,0,13.31-5.52L270,91.26q-7.53,7.8-19.62,7.8a23.35,23.35,0,0,1-16.69-6.53Q226.85,86,226.85,75t7-17.48A23.52,23.52,0,0,1,250.37,51a23.92,23.92,0,0,1,16.16,5.74q6.65,5.74,6.66,15.81Zm-36.53-7.71h26.72q0-5.87-3.68-9a13.46,13.46,0,0,0-9.07-3.15A15.52,15.52,0,0,0,241,62.74,10.6,10.6,0,0,0,236.66,71.63Z"/><path class="cls-1" d="M308.93,51a22.16,22.16,0,0,1,16,6.48q6.65,6.48,6.66,17.26T325,92.31a20.93,20.93,0,0,1-15.42,6.75q-8.8,0-15.81-7.71v7H284v-65h9.81V59.9Q299.56,51.05,308.93,51ZM293.6,75.14a15.21,15.21,0,0,0,4,10.91,13,13,0,0,0,9.81,4.25,13.93,13.93,0,0,0,10.07-4.21,14.59,14.59,0,0,0,4.29-10.91,15.51,15.51,0,0,0-4.2-11.12,13.42,13.42,0,0,0-10.08-4.42,12.87,12.87,0,0,0-9.9,4.42A15.84,15.84,0,0,0,293.6,75.14Z"/><path class="cls-1" d="M386.19,79.34H349.66a10.61,10.61,0,0,0,4.64,8.19,16.29,16.29,0,0,0,9.81,3.11q8.76,0,13.32-5.52l5.6,6.13q-7.53,7.8-19.62,7.8a23.35,23.35,0,0,1-16.69-6.53Q339.85,86,339.85,75t7-17.48A23.52,23.52,0,0,1,363.37,51a23.92,23.92,0,0,1,16.16,5.74q6.66,5.74,6.66,15.81Zm-36.53-7.71h26.72q0-5.87-3.68-9a13.45,13.45,0,0,0-9.07-3.15A15.51,15.51,0,0,0,354,62.74,10.6,10.6,0,0,0,349.66,71.63Z"/><path class="cls-1" d="M395.48,60.33V51.93h36.44v8.85L405.72,89.85h27.59v8.5H393.72V88.8l25.49-28.47Z"/><path class="cls-1" d="M445,42.47a6.14,6.14,0,1,1,4.38,1.84A6,6,0,0,1,445,42.47Zm9.2,55.89h-9.81V51.75h9.81Z"/><path class="cls-1" d="M500.94,77V51.75h9.81v46.6h-9.81v-8.5a17.67,17.67,0,0,1-15.5,9.2q-7.88,0-12.75-4.82t-4.86-13.93V51.75h9.81V77.33q0,12.87,10.69,12.88a12.83,12.83,0,0,0,8.85-3.37Q500.94,83.46,500.94,77Z"/><path class="cls-1" d="M534.75,73.12V98.35h-9.81V51.75h9.81v8.5a15.55,15.55,0,0,1,6.35-6.74A18,18,0,0,1,550.25,51q11,0,15,9.11,6.92-9.11,17.26-9.11,7.88,0,12.75,4.82t4.86,13.93V98.35h-9.81V72.77q0-12.88-10.69-12.88a13,13,0,0,0-8.72,3.2q-3.72,3.2-3.9,9.33V98.35h-9.81V72.77q0-6.66-2.36-9.77t-7.45-3.11a12.84,12.84,0,0,0-8.85,3.37Q534.75,66.64,534.75,73.12Z"/><path class="cls-2" d="M93.79,114.29a16.65,16.65,0,0,0,16.63,16.63h20.53V110.39a16.65,16.65,0,0,0-16.63-16.63h-3.91A26.39,26.39,0,0,1,84,67.4V63.51A16.65,16.65,0,0,0,67.41,46.89h-3.9A26.39,26.39,0,0,1,37.16,20.52h0V16.62A16.65,16.65,0,0,0,20.53,0H0V20.52A16.65,16.65,0,0,0,16.63,37.15h3.91A26.39,26.39,0,0,1,46.9,63.51V67.4A16.65,16.65,0,0,0,63.53,84h3.9a26.39,26.39,0,0,1,26.36,26.36h0Z"/><path class="cls-3" d="M130.94,63.51a16.65,16.65,0,0,0-16.63-16.63h-3.91A26.39,26.39,0,0,1,84,20.52V16.62A16.65,16.65,0,0,0,67.41,0H46.9V20.75A16.64,16.64,0,0,0,63.52,37.15h3.91A26.39,26.39,0,0,1,93.79,63.51h0V67.4A16.65,16.65,0,0,0,110.36,84h20.58Z"/><path class="cls-4" d="M130.94,16.49A16.63,16.63,0,0,0,115,0H93.47V20.7a16.64,16.64,0,0,0,16.62,16.44h20.85Z"/><path class="cls-5" d="M0,67.44A16.65,16.65,0,0,0,16.64,84.06h3.91a26.39,26.39,0,0,1,26.36,26.36v3.89a16.65,16.65,0,0,0,16.63,16.63H84V110.19A16.64,16.64,0,0,0,67.43,93.8H63.53A26.39,26.39,0,0,1,37.16,67.44h0V63.54A16.65,16.65,0,0,0,20.59,46.91H0Z"/><path class="cls-6" d="M0,114.45a16.63,16.63,0,0,0,16,16.5H37.48V110.24A16.64,16.64,0,0,0,20.86,93.8H0Z"/></g></svg>


--------------------------------------------------------------------------------
/.images/database-tables.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dariocazas/howto-debezium-to-snowflake/a2d8a9689929f2025cb6113af355e6ad77b77046/.images/database-tables.png


--------------------------------------------------------------------------------
/.images/debeziumio-ar21.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <!-- Generator: Adobe Illustrator 24.0.1, SVG Export Plug-In . SVG Version: 6.00 Build 0)  -->
 3 | <svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
 4 | 	 viewBox="0 0 120 60" width="120" height="60" style="enable-background:new 0 0 120 60;" xml:space="preserve">
 5 | <style type="text/css">
 6 | 	.st0{fill:#333333;}
 7 | 	.st1{fill:url(#SVGID_1_);}
 8 | 	.st2{fill:url(#SVGID_2_);}
 9 | 	.st3{fill:url(#SVGID_3_);}
10 | 	.st4{fill:url(#SVGID_4_);}
11 | 	.st5{fill:url(#SVGID_5_);}
12 | </style>
13 | <g>
14 | 	<path class="st0" d="M37,34.9c-0.8-0.8-1.2-1.9-1.2-3.2c0-1.3,0.4-2.4,1.2-3.2c0.8-0.8,1.8-1.2,2.9-1.2c1.1,0,2.1,0.5,2.7,1.4v-4.7
15 | 		h1.8V36h-1.8v-1.3c-0.7,0.9-1.6,1.4-2.9,1.4C38.8,36.2,37.8,35.7,37,34.9z M37.6,31.8c0,0.7,0.3,1.5,0.8,2c0.5,0.5,1.2,0.8,1.8,0.8
16 | 		c0.7,0,1.3-0.3,1.8-0.8c0.5-0.5,0.8-1.3,0.7-2c0-0.7-0.2-1.5-0.7-2c-0.5-0.5-1.1-0.8-1.8-0.8c-0.7,0-1.4,0.3-1.8,0.8
17 | 		C37.9,30.3,37.6,31,37.6,31.8L37.6,31.8z"/>
18 | 	<path class="st0" d="M55.1,32.5h-6.7c0,0.6,0.3,1.2,0.8,1.5c0.5,0.4,1.2,0.6,1.8,0.6c1.1,0,1.9-0.3,2.4-1l1,1.1
19 | 		c-0.9,1-2.1,1.4-3.6,1.4c-1.1,0-2.2-0.4-3.1-1.2c-0.8-0.8-1.3-1.9-1.3-3.2c0-1.3,0.4-2.4,1.3-3.2c0.8-0.8,1.9-1.2,3-1.2
20 | 		c1.1,0,2.1,0.3,3,1.1c0.8,0.7,1.2,1.7,1.2,2.9V32.5z M48.4,31.1h4.9c0-0.7-0.2-1.3-0.7-1.6c-0.5-0.4-1.1-0.6-1.7-0.6
21 | 		c-0.6,0-1.3,0.2-1.8,0.6C48.7,29.9,48.4,30.5,48.4,31.1z"/>
22 | 	<path class="st0" d="M61.6,27.4c1.1,0,2.2,0.4,2.9,1.2c0.8,0.8,1.2,1.8,1.2,3.2c0,1.3-0.4,2.4-1.2,3.2c-0.7,0.8-1.8,1.2-2.8,1.2
23 | 		c-1.1,0-2-0.5-2.9-1.4V36h-1.8V24.1h1.8V29C59.6,27.9,60.5,27.4,61.6,27.4z M58.8,31.8c0,0.7,0.2,1.5,0.7,2
24 | 		c0.5,0.5,1.1,0.8,1.8,0.8c0.7,0,1.4-0.3,1.8-0.8c0.5-0.5,0.8-1.3,0.8-2c0-0.8-0.3-1.5-0.8-2c-0.5-0.5-1.1-0.8-1.8-0.8
25 | 		c-0.7,0-1.4,0.3-1.8,0.8C59.1,30.3,58.8,31,58.8,31.8z"/>
26 | 	<path class="st0" d="M75.8,32.5h-6.7c0,0.6,0.3,1.2,0.8,1.5c0.5,0.4,1.2,0.6,1.8,0.6c1.1,0,1.9-0.3,2.4-1l1,1.1
27 | 		c-0.9,1-2.1,1.4-3.6,1.4c-1.1,0-2.2-0.4-3.1-1.2c-0.8-0.8-1.3-1.9-1.3-3.2c0-1.3,0.4-2.4,1.3-3.2c0.8-0.8,1.9-1.2,3-1.2
28 | 		c1.1,0,2.1,0.3,3,1.1c0.8,0.7,1.2,1.7,1.2,2.9V32.5z M69.1,31.1H74c0-0.7-0.2-1.3-0.7-1.6c-0.5-0.4-1.1-0.6-1.7-0.6
29 | 		c-0.6,0-1.3,0.2-1.8,0.6C69.4,29.9,69.1,30.5,69.1,31.1z"/>
30 | 	<path class="st0" d="M77.5,29.1v-1.5h6.7v1.6l-4.8,5.3h5.1V36h-7.2v-1.7l4.7-5.2H77.5z"/>
31 | 	<path class="st0" d="M86.5,25.8c-0.4-0.4-0.4-1.2,0-1.6c0.4-0.4,1.2-0.4,1.6,0s0.4,1.2,0,1.6c-0.2,0.2-0.5,0.3-0.8,0.3
32 | 		C87,26.1,86.7,26,86.5,25.8z M88.2,36h-1.8v-8.5h1.8V36z"/>
33 | 	<path class="st0" d="M96.8,32.1v-4.6h1.8V36h-1.8v-1.6c-0.6,1-1.7,1.7-2.8,1.7c-1,0-1.7-0.3-2.3-0.9c-0.6-0.6-0.9-1.4-0.9-2.6v-5.2
34 | 		h1.8v4.7c0,1.6,0.7,2.4,2,2.4c0.6,0,1.2-0.2,1.6-0.6C96.5,33.5,96.8,32.9,96.8,32.1z"/>
35 | 	<path class="st0" d="M103,31.4V36h-1.8v-8.5h1.8V29c0.3-0.5,0.7-1,1.2-1.2c0.5-0.3,1.1-0.5,1.7-0.5c1.3,0,2.3,0.6,2.7,1.7
36 | 		c0.8-1.1,1.9-1.7,3.2-1.7c1,0,1.7,0.3,2.3,0.9c0.6,0.6,0.9,1.4,0.9,2.6V36h-1.8v-4.7c0-1.6-0.7-2.4-2-2.4c-0.6,0-1.2,0.2-1.6,0.6
37 | 		c-0.5,0.4-0.7,1-0.7,1.7V36h-1.8v-4.7c0-0.8-0.1-1.4-0.4-1.8c-0.3-0.4-0.7-0.6-1.4-0.6c-0.6,0-1.2,0.2-1.6,0.6
38 | 		C103.2,30,103,30.6,103,31.4z"/>
39 | 	
40 | 		<linearGradient id="SVGID_1_" gradientUnits="userSpaceOnUse" x1="8.6823" y1="39.4695" x2="25.9441" y2="22.2077" gradientTransform="matrix(1 0 0 -1 0 61.094)">
41 | 		<stop  offset="0" style="stop-color:#91D443"/>
42 | 		<stop  offset="1" style="stop-color:#48BFE0"/>
43 | 	</linearGradient>
44 | 	<path class="st1" d="M22.2,38.9c0,1.7,1.4,3,3,3H29v-3.8c0-1.7-1.4-3-3-3h-0.7c-2.7,0-4.8-2.2-4.8-4.8v-0.7c0-1.7-1.4-3-3-3h-0.7
45 | 		c-2.7,0-4.8-2.2-4.8-4.8l0,0v-0.7c0-1.7-1.4-3-3-3H5.1v3.8c0,1.7,1.4,3,3,3h0.7c2.7,0,4.8,2.2,4.8,4.8v0.7c0,1.7,1.4,3,3,3h0.7
46 | 		c2.7,0,4.8,2.2,4.8,4.8l0,0V38.9z"/>
47 | 	
48 | 		<linearGradient id="SVGID_2_" gradientUnits="userSpaceOnUse" x1="12.9736" y1="43.7608" x2="30.2354" y2="26.499" gradientTransform="matrix(1 0 0 -1 0 61.094)">
49 | 		<stop  offset="0" style="stop-color:#91D443"/>
50 | 		<stop  offset="1" style="stop-color:#48BFE0"/>
51 | 	</linearGradient>
52 | 	<path class="st2" d="M29,29.6c0-1.7-1.4-3-3-3h-0.7c-2.7,0-4.8-2.2-4.8-4.8v-0.7c0-1.7-1.4-3-3-3h-3.8v3.8c0,1.7,1.4,3,3,3h0.7
53 | 		c2.7,0,4.8,2.2,4.8,4.8l0,0v0.7c0,1.7,1.4,3,3,3H29V29.6z"/>
54 | 	
55 | 		<linearGradient id="SVGID_3_" gradientUnits="userSpaceOnUse" x1="17.2523" y1="48.0395" x2="34.5141" y2="30.7777" gradientTransform="matrix(1 0 0 -1 0 61.094)">
56 | 		<stop  offset="0" style="stop-color:#91D443"/>
57 | 		<stop  offset="1" style="stop-color:#48BFE0"/>
58 | 	</linearGradient>
59 | 	<path class="st3" d="M29,21c0-1.6-1.3-3-2.9-3h-3.9v3.8c0,1.7,1.4,3,3,3H29V21z"/>
60 | 	
61 | 		<linearGradient id="SVGID_4_" gradientUnits="userSpaceOnUse" x1="4.3861" y1="35.1751" x2="21.6479" y2="17.9133" gradientTransform="matrix(1 0 0 -1 0 61.094)">
62 | 		<stop  offset="0" style="stop-color:#91D443"/>
63 | 		<stop  offset="1" style="stop-color:#48BFE0"/>
64 | 	</linearGradient>
65 | 	<path class="st4" d="M5.1,30.4c0,1.7,1.4,3,3,3h0.7c2.7,0,4.8,2.2,4.8,4.8v0.7c0,1.7,1.4,3,3,3h3.7v-3.8c0-1.7-1.4-3-3-3h-0.7
66 | 		c-2.7,0-4.8-2.2-4.8-4.8l0,0v-0.7c0-1.7-1.4-3-3-3H5.1V30.4z"/>
67 | 	
68 | 		<linearGradient id="SVGID_5_" gradientUnits="userSpaceOnUse" x1="0.1078" y1="30.8969" x2="17.3697" y2="13.6351" gradientTransform="matrix(1 0 0 -1 0 61.094)">
69 | 		<stop  offset="0" style="stop-color:#91D443"/>
70 | 		<stop  offset="1" style="stop-color:#48BFE0"/>
71 | 	</linearGradient>
72 | 	<path class="st5" d="M5.1,39c0,1.6,1.3,3,2.9,3h3.9v-3.8c0-1.7-1.4-3-3-3H5.1V39z"/>
73 | </g>
74 | </svg>
75 | 


--------------------------------------------------------------------------------
/.images/horizontal-logo-monochromatic-white.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dariocazas/howto-debezium-to-snowflake/a2d8a9689929f2025cb6113af355e6ad77b77046/.images/horizontal-logo-monochromatic-white.png


--------------------------------------------------------------------------------
/.images/howto-flow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dariocazas/howto-debezium-to-snowflake/a2d8a9689929f2025cb6113af355e6ad77b77046/.images/howto-flow.png


--------------------------------------------------------------------------------
/.images/logo-mysql-170x115.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dariocazas/howto-debezium-to-snowflake/a2d8a9689929f2025cb6113af355e6ad77b77046/.images/logo-mysql-170x115.png


--------------------------------------------------------------------------------
/.images/snowflake-security.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dariocazas/howto-debezium-to-snowflake/a2d8a9689929f2025cb6113af355e6ad77b77046/.images/snowflake-security.png


--------------------------------------------------------------------------------
/.images/solution.drawio.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dariocazas/howto-debezium-to-snowflake/a2d8a9689929f2025cb6113af355e6ad77b77046/.images/solution.drawio.png


--------------------------------------------------------------------------------
/.images/solution.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dariocazas/howto-debezium-to-snowflake/a2d8a9689929f2025cb6113af355e6ad77b77046/.images/solution.png


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Debezium to Snowflake
 2 | 
 3 | - [Debezium to Snowflake](#debezium-to-snowflake)
 4 |   - [Requirements](#requirements)
 5 |   - [Organization](#organization)
 6 |   - [How-to steps](#how-to-steps)
 7 |   - [I need more!!](#i-need-more)
 8 | 
 9 | This repo is a demo of how to use Debezium to capture changes over tables in MySQL and PostgreSQL 
10 | to generate a replica in near-real-time in Snowflake. This is extensible to other databases and
11 | describes several common points about CDC, Kafka, Kafka connect, or Snowflake tools. 
12 | 
13 | [Miguel García] and I work together on a DZone article [Data Platform: Building an Enterprise CDC Solution],
14 | and as next step I publish this repo as [HOWTO: Building an Enterprise CDC Solution]
15 | 
16 | ![solution.png](./.images/solution.png)
17 | 
18 | ## Requirements
19 | 
20 | To facilitate the execution of the howto, the services will be deployed using **[docker-compose]**. 
21 | It has a dependency of **[docker engine]**. For better compatibility, we are using the docker-compose specification 2,
22 | so a **docker engine 1.10.0** or later should work. 
23 | 
24 | As part of the howto, you will create a Snowflake account, and the howto guide you to create a key pair for authentication.
25 | To perform these actions, you should have an **[OpenSSL toolkit]**. Is commonly available in Linux distributions and
26 | can be installed in Windows or Mac. If you need it, you can run it inside a docker image (will be commented in the howto).
27 | 
28 | About hardware requirements, review **[docker engine]** requirements.
29 | 
30 | ## Organization
31 | 
32 | Well, this demo has several parts. To simplify this, it has been split into several folders in this repo.
33 | For each folder you can found a README file with explanations:
34 | 
35 | - **[services]**: relative to docker images and services
36 | - **[database]**: sentences and scripts to run inside the local databases
37 | - **[debezium]**: configuration and scripts to start and check the status of Debezium connectors
38 | - **[snowflake]**: Snowflake scripts, and configuration of the Snowflake sink connector
39 | 
40 | ## How-to steps
41 | 
42 | You can see a detailed howto in DZone article [HOWTO: Building an Enterprise CDC Solution] that follows these steps
43 | 
44 | ![howto-flow](.images/howto-flow.png)
45 | 
46 | In this flow:
47 | - Gray: local services
48 | - Yellow: external resources
49 | 
50 | ## I need more!!
51 | 
52 | Well, check the README available in each folder. It includes some detail about his components
53 | and some additional scripts or functions that you can use to explore this solution.
54 | 
55 | I hope this tutorial has been helpful for you and you have enjoyed it.
56 | 
57 | 
58 | [Miguel García]: https://dzone.com/users/4531976/miguelglor.html
59 | [Data Platform: Building an Enterprise CDC Solution]: https://dzone.com/articles/data-platform-building-an-enterprise-cdc-solution
60 | [HOWTO: Building an Enterprise CDC Solution]: https://dzone.com/articles/howto_building-an-enterprise-cdc-solution
61 | [docker-compose]: https://docs.docker.com/compose/install/
62 | [docker engine]: https://docs.docker.com/engine/
63 | [OpenSSL toolkit]: https://github.com/openssl/openssl#build-and-install
64 | [services]: services/README.md
65 | [database]: database/README.md
66 | [debezium]: debezium/README.md
67 | [snowflake]: snowflake/README.md
68 | [snowflake/keys README]: snowflake/keys
69 | [snowflake/sql/00-security.sql]: snowflake/sql/00-security.sql
70 | [snowflake/connect/snowflake-sink-connector.json]: snowflake/connect/snowflake-sink-connector.json
71 | [snowflake/sql/01-cdc-to-replica-mysql.sql]: snowflake/sql/01-cdc-to-replica-mysql.sql
72 | [snowflake/sql/01-cdc-to-replica-postgres.sql]: snowflake/sql/01-cdc-to-replica-postgres.sql
73 | 


--------------------------------------------------------------------------------
/articles/.images/docker-compose.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dariocazas/howto-debezium-to-snowflake/a2d8a9689929f2025cb6113af355e6ad77b77046/articles/.images/docker-compose.png


--------------------------------------------------------------------------------
/articles/.images/snowflake_console.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dariocazas/howto-debezium-to-snowflake/a2d8a9689929f2025cb6113af355e6ad77b77046/articles/.images/snowflake_console.png


--------------------------------------------------------------------------------
/articles/.images/solution-capture-data-changes.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dariocazas/howto-debezium-to-snowflake/a2d8a9689929f2025cb6113af355e6ad77b77046/articles/.images/solution-capture-data-changes.png


--------------------------------------------------------------------------------
/articles/.images/solution-debezium.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dariocazas/howto-debezium-to-snowflake/a2d8a9689929f2025cb6113af355e6ad77b77046/articles/.images/solution-debezium.png


--------------------------------------------------------------------------------
/articles/.images/solution-kafka-to-snowflake.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dariocazas/howto-debezium-to-snowflake/a2d8a9689929f2025cb6113af355e6ad77b77046/articles/.images/solution-kafka-to-snowflake.png


--------------------------------------------------------------------------------
/articles/.images/solution-replication.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dariocazas/howto-debezium-to-snowflake/a2d8a9689929f2025cb6113af355e6ad77b77046/articles/.images/solution-replication.png


--------------------------------------------------------------------------------
/articles/.images/solution-sink-snowflake.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dariocazas/howto-debezium-to-snowflake/a2d8a9689929f2025cb6113af355e6ad77b77046/articles/.images/solution-sink-snowflake.png


--------------------------------------------------------------------------------
/articles/.images/solution-solution-points.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dariocazas/howto-debezium-to-snowflake/a2d8a9689929f2025cb6113af355e6ad77b77046/articles/.images/solution-solution-points.png


--------------------------------------------------------------------------------
/articles/.images/solution-solution.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dariocazas/howto-debezium-to-snowflake/a2d8a9689929f2025cb6113af355e6ad77b77046/articles/.images/solution-solution.png


--------------------------------------------------------------------------------
/articles/dzone_howto_building-an-enterprise-cdc-solution.md:
--------------------------------------------------------------------------------
  1 | # Introduction
  2 | 
  3 | This article is a follow-up to the [Data Platform: Building an Enterprise CDC Solution](https://dzone.com/articles/data-platform-building-an-enterprise-cdc-solution), where [Miguel García](https://dzone.com/users/4531976/miguelglor.html) and I described:
  4 | 
  5 | * Several Change Data Capture (CDC) use cases and common scenarios in an enterprise platform
  6 | * A proposal using Debezium (as log-based CDC) to capture data from the relational databases, and Kafka as a channel that enables several consumers to propagate data changes for different use cases.
  7 | 
  8 | One of the common scenarios for this solution consists of data replication from OLTP Database to OLAP Database (from the operational database to the data warehouse).
  9 | 
 10 | In this article, I'm going to provide a "how-to" to deploy a sample of a CDC process to replicate data from two different relational databases to Snowflake:
 11 | * Manage the data changes in a common format.
 12 | * Set up a Debezium in Kafka Connect to get data changes and push into Kafka topics.
 13 | * Set up Snowflake Sink in Kafka Connect to get data changes from Kafka topics and push the data to Snowflake.
 14 | * Apply a specific replication logic to consolidate the data change events in Snowflake, avoiding the use of the JDBC connector for better cost-effectiveness.
 15 | 
 16 | ![solution](.images/solution-solution.png)
 17 | 
 18 | # Step-by-step
 19 | 
 20 | [The GitHub repository](https://github.com/dariocazas/howto-debezium-to-snowflake) includes a detailed description as well as several scripts that you will need in this "how-to":
 21 | 
 22 | ```sh
 23 | git clone https://github.com/dariocazas/howto-debezium-to-snowflake.git
 24 | ```
 25 | 
 26 | > Note: every folder in this repository has a README file with more info about the process.
 27 | 
 28 | The steps are:
 29 | 
 30 | 1. Pre-requirements
 31 |    1. Local environment
 32 |    2. Snowflake database
 33 |    3. Snowflake authentication
 34 | 2. How to capture data changes from databases to a Kafka topic
 35 |    1. Start local services
 36 |    2. Prepare the databases
 37 |    3. Start Debezium
 38 |    4. Check data capture
 39 | 3. How to push data changes from a Kafka topic into Snowflake
 40 |    1. Start local sink process
 41 |    2. Check data capture into CDC tables
 42 |    3. Apply replication logic
 43 |    4. Check data replication
 44 | 
 45 | ![steps](.images/solution-solution-points.png)
 46 | 
 47 | ## 1. Pre-requirements
 48 | 
 49 | ### 1.1. Local environment 
 50 | - [docker-compose](https://docs.docker.com/compose/install/) and [docker engine](https://docs.docker.com/engine/) 1.10.0 or later.
 51 | - [jq](https://stedolan.github.io/jq/download/) as a JSON parser used in scripts.
 52 | 
 53 | ### 1.2. Snowflake database
 54 | 
 55 | You need a Snowflake Account. To create a trial follow the [Snowflake Trial Accounts doc](https://docs.snowflake.com/en/user-guide/admin-trial-account.html)
 56 | 
 57 | Access to your Snowflake Account, create a database and run the next steps in it:
 58 | 
 59 | ```sh
 60 | USE ROLE ACCOUNTADMIN;
 61 | CREATE DATABASE HOWTO_DB;
 62 | ```
 63 | 
 64 | > Note: in a production environment, it is not recommended to use the role ACCOUNTADMIN for all the tasks like I describe in this howto. 
 65 | 
 66 | ### 1.3. Snowflake authentication
 67 | 
 68 | In this howto, we use a key-pair authentication. The detailed process is documented [here](https://docs.snowflake.com/en/user-guide/kafka-connector-install.html#using-key-pair-authentication-key-rotation). You can use the key-pair provided by the repository:
 69 | * Encrypted private key: `snowflake/keys/snowflake_rsa_key.p8`
 70 | * Private passphrase to decrypt: `mypassphrase`
 71 | * Public key: `snowflake/keys/snowflake_rsa_key.pub`
 72 | 
 73 | As the next step, in the Snowflake Worksheet, we need to register the public key (replace in this script the key with your snowflake/keys/snowflake_rsa_key.pub without header and footer)
 74 | 
 75 | ```sql
 76 | USE ROLE ACCOUNTADMIN;
 77 | ALTER USER dariocazas SET rsa_public_key='MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAwBwYbPtbEUXueQ6u3KDw
 78 | zlKu4IhAkGdcUBVbdTdUVBLNVsZX+eiKOedN3EnMtDeVzRlaT8JAwHX0LVXkgXtn
 79 | KzMBp6TpS4j+2kKvbZc5p0KfZHjn42G+C/DXI4ZNQZEBQ/Q4UY6OkTZepFaOX3ev
 80 | 2icxB6LnnVYI3WHkSnq3vTthhYhTuUOQ4YRudadOtoT4By09hxbsaanVl42FXIZP
 81 | AXX1jwawzKe52V1+FB5/UMv+JMUFfczlO+acn/EaZvKbR55Vk/+OVrUP4KIKvdWn
 82 | s/n4ASYqxiw9xjrizGCoUyl+b+Ch6A02fTU02HrT9jOOj+dVAeFD2QGOqaze0eCD
 83 | dwIDAQAB';
 84 | ```
 85 | 
 86 | ## 2. How to capture data changes from databases to a Kafka topic
 87 | 
 88 | In this step, you start two different database engines and enable a CDC process. As a result, you have two Kafka topics with Debezium events that you can consume.
 89 | 
 90 | ![capture-data-changes](.images/solution-capture-data-changes.png)
 91 | 
 92 | ### 2.1. Start local services
 93 | 
 94 | The repository contains a docker-compose to run several services in your local environment:
 95 | * Two database engines: MySQL and PostgreSQL
 96 | * One Kafka broker (and its zookeeper)
 97 | * Two Kafka connect services: one to run CDC Debezium tasks and another to send the events to Snowflake
 98 | 
 99 | ![docker-compose](.images/docker-compose.png)
100 | 
101 | In a terminal run:
102 | 
103 | ```sh
104 | cd services
105 | docker-compose up
106 | ```
107 | 
108 | It can take several minutes to download and start the services. Keep this terminal open to be able to see the log of services. When the turorial is finished, you can stop all using `Ctrl+C`. 
109 | 
110 | ### 2.2. Prepare the databases
111 | 
112 | There are two SQL initialization scripts:
113 | * `database/sql/00_mysql_init.sql`: create table `users`
114 | * `database/sql/00_postgres_init.sql`: create table `product`
115 | 
116 | To apply these SQL scripts it in dockerized services, and populate data, run the following lines in a terminal:
117 | 
118 | ```sh
119 | cd database
120 | # create tables
121 | ./init_db.sh
122 | # Populate data
123 | ./mysql_crud.sh
124 | ./postgres_crud.sh
125 | ```
126 | 
127 | In the output, you can see several CRUD operations in the tables, and the last state after operations. You can close this terminal.
128 | 
129 | ### 2.3. Start Debezium
130 | 
131 | The docker service `cdc_connect` has the necessary dependencies to run Debezium over MySQL and Postgres. The configuration is available in:
132 | * `debezium/connect/debezium-mysql-inventory-connector.json`
133 | * `debezium/connect/debezium-postgres-inventory-connector.json`
134 | 
135 | Open a terminal and init the capture of the tables:
136 | ```sh
137 | cd debezium
138 | ./init_cdc.sh
139 | ```
140 | 
141 | In the docker-compose terminal, you can see how the connectors start. When the log stabilizes, you can check the status of the Debezium connectors in the previous terminal using:
142 | ```sh
143 | # I assume you are in the debezium folder
144 | ./status_cdc.sh
145 | ```
146 | 
147 | You can close this terminal.
148 | 
149 | ### 2.4. Check data capture
150 | 
151 | You can test if the capture is working with this strategy:
152 | * Open a terminal with live consumer events
153 | * Do CRUD operations in the database
154 | 
155 | First for MySQL, open a terminal and run:
156 | ```sh
157 | cd services
158 | docker-compose exec kafka /kafka/bin/kafka-console-consumer.sh \
159 |     --bootstrap-server kafka:9092 --from-beginning \
160 |     --topic mysqldb.inventory.users \
161 |     --property print.key=true --property print.timestamp=true
162 | ``` 
163 | 
164 | The terminal will populate every new event pushed from Debezium to Kafka, sending every insert/update/delete done in `inventory.users` in MySQL
165 | 
166 | Open now a terminal in PostgreSQL and do the same:
167 | ```sh
168 | cd services
169 | docker-compose exec kafka /kafka/bin/kafka-console-consumer.sh \
170 |     --bootstrap-server kafka:9092 --from-beginning \
171 |     --topic postgresdb.inventory.product \
172 |     --property print.key=true --property print.timestamp=true
173 | ```
174 | 
175 | To generate new events, open a terminal and run:
176 | ```sh
177 | cd database
178 | ./mysql_crud.sh
179 | ./postgres_crud.sh
180 | ```
181 | 
182 | You should see new data change events in the consumer terminals.
183 | 
184 | ## 3. How to push data changes from Kafka topic into Snowflake
185 | 
186 | In this step, you send the Kafka events to Snowflake and generate a replica of the source tables.
187 | 
188 | ![sink-snowflake](.images/solution-sink-snowflake.png)
189 | 
190 | ### 3.1. Start local sink process
191 | 
192 | The docker service `sink_connect` has the necessary dependencies to run the Snowflake Sink connector to push new Kafka events into the Snowflake table. The configuration is available in `snowflake/connect/snowflake-sink-connector.json` and you need to update:
193 | * The Snowflake URL with yours in field `snowflake.url.name`
194 | * The authentication fields if you generate your key-pair in the previous step: `snowflake.private.key` and `snowflake.private.key.passphrase`
195 | 
196 | Open a terminal and init the upload of the Kafka topics:
197 | ```sh
198 | cd snowflake
199 | ./init_sink.sh
200 | ```
201 | 
202 | In the docker-compose terminal, you can see how the connector starts. When the log stabilizes, you can check the status of the Snowflake connector in the previous terminal using:
203 | ```sh
204 | # From the snowflake folder
205 | ./status_sink.sh
206 | ```
207 | 
208 | ### 3.2. Check data capture into CDC tables
209 | 
210 | When the sink connector uploads the events from the Kafka topics, it creates these tables:
211 | * `CDC_MYSQL_INVENTORY_USERS`
212 | * `CDC_POSTGRESDB_INVENTORY_PRODUCT`
213 | 
214 | The upload to Snowflake will be done in batches, so it may take some time until the data is available in Snowflake (in the order of 30-60 seconds).
215 | 
216 | From your Snowflake Worksheet, validate that your events are populated in the new tables:
217 | ```sql
218 | USE ROLE ACCOUNTADMIN;
219 | USE SCHEMA HOWTO_DB.PUBLIC;
220 | SELECT * FROM CDC_MYSQL_INVENTORY_USERS;
221 | SELECT * FROM CDC_POSTGRESDB_INVENTORY_PRODUCT;
222 | ```
223 | 
224 | Adding new changes in your dockerized databases produces new rows in your tables.
225 | 
226 | 1. In the Snowflake Worksheet:
227 |     ```sql
228 |     SELECT 'Events MySQL', COUNT(1) FROM CDC_MYSQL_INVENTORY_USERS
229 |     UNION ALL
230 |     SELECT 'Events PostgreSQL', COUNT(1) FROM CDC_POSTGRESDB_INVENTORY_PRODUCT;
231 |     ```
232 | 2. From a terminal, apply changes in your databases:
233 |    ```sh
234 |    cd database
235 |    ./mysql_crud.sh
236 |    ./postgres_crud.sh
237 |    ```
238 | 3. Wait until the events are sent to Snowflake (you can see the log in docker-compose terminal)
239 | 4. Repeat the query in the Snowflake Worksheet
240 | 
241 | ### 3.3. Apply replication logic
242 | 
243 | In the repository there are two scripts with the SQL logic to generate the replica of the source tables:
244 | * `snowflake/sql/01-cdc-to-replica-mysql.sql`
245 | * `snowflake/sql/01-cdc-to-replica-postgres.sql`
246 | 
247 | From your Snowflake Worksheet, execute these two scripts. As a result, you have two views with the same structure of the source databases:
248 | * `MYSQL_INVENTORY_USERS`
249 | * `POSTGRESDB_INVENTORY_PRODUCT`
250 | 
251 | These scripts follow the same logic, creating a scheduled task that processes the new events that arrive and updates the replica table.
252 | 
253 | ![replication](.images/solution-replication.png)
254 | 
255 | > Note: one part of these SQL scripts (the MERGE sentence) depends on the source database engine. The Debezium events have the metadata about the source engine and are used to know which is the last event for an entity. Take into account if you replicate this logic in your production systems.
256 | 
257 | ### 3.4. Check data replication
258 | 
259 | The end-to-end is running now. You can check the data available in your local databases and validate it against the Snowflake view:
260 | 1. In a terminal, get the actual state of MySQL users table:
261 |    ```sh
262 |    cd services
263 |    echo "SELECT * FROM users ORDER BY id" | docker-compose \
264 |       exec -T mysql \
265 |       bash -c 'mysql -u $MYSQL_USER -p$MYSQL_PASSWORD inventory'
266 |    ```
267 | 2. Go to the Snowflake Worksheet and validate the result with:
268 |    ```sql
269 |    USE ROLE ACCOUNTADMIN;
270 |    USE SCHEMA HOWTO_DB.PUBLIC;
271 |    SELECT * FROM MYSQL_INVENTORY_USERS;
272 |    ```
273 | 3. In a terminal, get the actual state of the PostgreSQL product table:
274 |    ```sh
275 |    # I assume you are in the services folder
276 |    echo "SELECT * FROM product ORDER BY id" |  docker-compose \
277 |       exec -T postgres \
278 |       env PGOPTIONS="--search_path=inventory" \
279 |       bash -c 'psql -U $POSTGRES_USER postgres'
280 |    ```
281 | 4. And validate in the Snowflake Worksheet.
282 |    ```sql
283 |    USE ROLE ACCOUNTADMIN;
284 |    USE SCHEMA HOWTO_DB.PUBLIC;
285 |    SELECT * FROM POSTGRESDB_INVENTORY_PRODUCT;
286 |    ```
287 | 5. Generate new insert-delete-update operations from a terminal:
288 |    ```sh
289 |    cd database
290 |    ./mysql_crud.sh
291 |    ./postgres_crud.sh
292 |    ```
293 | 6. Wait until the events are sent to Snowflake (review docker-compose terminal log).
294 | 7. Wait until the scheduled task is triggered in Snowflake:
295 |    ```sql
296 |    USE ROLE ACCOUNTADMIN;
297 |    select name, state, error_code, error_message,scheduled_time, next_scheduled_time
298 |       from table(HOWTO_DB.information_schema.task_history())
299 |       order by scheduled_time desc;
300 |    ```
301 | 8. Validate again the content of the tables in Snowflake
302 | 
303 | # Conclusions
304 | 
305 | **Debezium provides an easy way to capture changes from databases** and populate change events in a Kafka service, that you can consume in several ways.
306 | 
307 | To populate these changes to another database you can use the simplest way, but this is not always the best option **in the context of the new generation of data warehouses**, and probably you need to **take into account another kind of strategy close to this service** for better performance and reduce the cost of use.
308 | 
309 | First of all, study the possibilities, and after **testing it with a POC similar to this howto**, including some aspects like performance and cost review, **proceed to do the next steps** (security, naming, automatization, data quality, failover, ...).
310 | 


--------------------------------------------------------------------------------
/database/README.md:
--------------------------------------------------------------------------------
 1 | # Howto - Database description
 2 | 
 3 | 
 4 | ![PostgreSQL-logo](../.images/PostgreSQL_logo.3colors.120x120.png)
 5 | ![MySQL-logo](../.images/logo-mysql-170x115.png)
 6 | 
 7 |   * [Access to database shell](#access-to-database-shell)
 8 |   * [Tables](#tables)
 9 |   * [CRUD operations](#crud-operations)
10 | 
11 | 
12 | As part of this howto, I provide:
13 | 
14 | - SQL scripts to create new tables and data
15 | - Bash scripts to apply the SQL over the dockerized databases
16 | 
17 | ## Access to database shell
18 | 
19 | You can open the shell of your database and run it your commands:
20 | 
21 | ```sh
22 | # Go to services folder (important)
23 | cd howto-debezium-to-snowflake/services
24 | 
25 | # Access to MySQL shell
26 | docker-compose exec mysql \
27 |     bash -c 'mysql -u $MYSQL_USER -p$MYSQL_PASSWORD inventory'
28 | 
29 | # Access to Postgres shell
30 | docker-compose exec postgres \
31 |     env PGOPTIONS="--search_path=inventory" \
32 |     bash -c 'psql -U $POSTGRES_USER postgres'
33 | ```
34 | 
35 | ## Tables
36 | 
37 | Well, to simplify the howto, we use database images provided by Debezium.
38 | When the service databases are UP, you should perform this script:
39 | 
40 | ```sh
41 | ./init_db.sh
42 | ```
43 | 
44 | This script initializes tables in both database instances (MySQL and PostgreSQL)
45 | loaded from `./sql` folder.
46 | 
47 | The SQL script [`sql/00_mysql_init.sql`](./sql/00_mysql_init.sql) create the
48 | **users table** with five basic fields, common for a lot of databases.
49 | 
50 | The SQL script [`sql/00_postgres_init.sql`](./sql/00_postgres_init.sql) create the
51 | **product table** with five basic fields, common for a lot of databases.
52 | 
53 | The `init_db.sh` script uses these SQL files to init database tables (one for each database)
54 | in preconfigured database `inventory`.
55 | 
56 | Both tables have a `created_on` field with the timestamp of creation. This field
57 | is not necessary for CDC, but can be util to perform some checks in sink destination.
58 | 
59 | ## CRUD operations
60 | 
61 | Well, as part of the demo, you should do actions over the databases. For each reason,
62 | I provide two scripts:
63 | 
64 | - `mysql_crud.sh`: trigger several inserts, update, delete and show the final status of the **users** table
65 | - `postgres_crud.sh`: same again, but over PostgreSQL **product** table
66 | 
67 | You can launch these scripts over and over again to generate new data in the database,
68 | which via CDC will be replicated as events in Kafka.
69 | 
70 | 


--------------------------------------------------------------------------------
/database/init_db.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | DOCKER_COMPOSE_FILE=docker-compose.yml
 3 | DOCKER_COMPOSE_RELATIVE_PATH=../services
 4 | 
 5 | MYSQL=`cat sql/00_mysql_init.sql`
 6 | POSTGRES=`cat sql/00_postgres_init.sql`
 7 | 
 8 | cd $DOCKER_COMPOSE_RELATIVE_PATH
 9 | 
10 | echo "MySQL new table"
11 | echo "$MYSQL"
12 | echo "$MYSQL" | docker-compose \
13 |     -f $DOCKER_COMPOSE_FILE \
14 |     exec -T mysql \
15 |     bash -c 'mysql -u $MYSQL_USER -p$MYSQL_PASSWORD inventory'
16 | 
17 | echo "PostgreSQL new table"
18 | echo "$POSTGRES"
19 | echo "$POSTGRES" | docker-compose \
20 |     -f docker-compose.yml \
21 |     exec -T postgres \
22 |     env PGOPTIONS="--search_path=inventory" \
23 |     bash -c 'psql -U $POSTGRES_USER postgres'
24 | 


--------------------------------------------------------------------------------
/database/mysql_crud.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | DOCKER_COMPOSE_FILE=docker-compose.yml
 3 | DOCKER_COMPOSE_RELATIVE_PATH=../services
 4 | 
 5 | run_sql() {
 6 |     echo "$1"
 7 |     echo "$1" | docker-compose \
 8 |         -f $DOCKER_COMPOSE_FILE \
 9 |         exec -T mysql \
10 |         bash -c 'mysql -u $MYSQL_USER -p$MYSQL_PASSWORD inventory 2> /dev/null'
11 | }
12 | 
13 | DML=$(cat sql/01_mysql_changes.sql)
14 | 
15 | cd $DOCKER_COMPOSE_RELATIVE_PATH
16 | run_sql "$DML"
17 | 


--------------------------------------------------------------------------------
/database/postgres_crud.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | DOCKER_COMPOSE_FILE=docker-compose.yml
 3 | DOCKER_COMPOSE_RELATIVE_PATH=../services
 4 | 
 5 | run_sql() {
 6 |     echo "$1"
 7 |     echo "$1" | docker-compose \
 8 |         -f docker-compose.yml \
 9 |         exec -T postgres \
10 |         env PGOPTIONS="--search_path=inventory" \
11 |         bash -c 'psql -U $POSTGRES_USER postgres 2> /dev/null'
12 | }
13 | 
14 | DML=$(cat sql/01_postgres_changes.sql)
15 | 
16 | cd $DOCKER_COMPOSE_RELATIVE_PATH
17 | run_sql "$DML"
18 | 


--------------------------------------------------------------------------------
/database/sql/00_mysql_init.sql:
--------------------------------------------------------------------------------
1 | CREATE TABLE users (
2 |     id MEDIUMINT PRIMARY KEY AUTO_INCREMENT,
3 |     name VARCHAR(20), 
4 |     email VARCHAR(255),
5 |     password VARCHAR(100),
6 |     created_on TIMESTAMP DEFAULT CURRENT_TIMESTAMP
7 | );
8 | 


--------------------------------------------------------------------------------
/database/sql/00_postgres_init.sql:
--------------------------------------------------------------------------------
1 | CREATE TABLE product (
2 |     id serial PRIMARY KEY,
3 |     name VARCHAR(100), 
4 |     description VARCHAR(255),
5 |     created_on TIMESTAMP NOT NULL DEFAULT NOW()
6 | );
7 | 


--------------------------------------------------------------------------------
/database/sql/01_mysql_changes.sql:
--------------------------------------------------------------------------------
 1 | -- Insert six users in three sentences
 2 | INSERT INTO users(name, email, password) 
 3 | SELECT 'Lara', concat('lara', LEFT(UUID(), 4), '@email.com'), LEFT(UUID(), 25)
 4 | ;
 5 | INSERT INTO users(name, email, password) 
 6 | SELECT 'Jackson', concat('jackson', LEFT(UUID(), 4), '@email.com'), LEFT(UUID(), 25)
 7 | ;
 8 | INSERT INTO users(name, email, password) 
 9 | SELECT name, concat(lower(name), LEFT(UUID(), 4), '@email.com'), LEFT(UUID(), 25)
10 | FROM (
11 |     SELECT 'Hana' AS name
12 |     UNION SELECT 'Morgan'
13 |     UNION SELECT 'Willie'
14 |     UNION SELECT 'Bruce'
15 | ) t;
16 | -- Update last two user passwords
17 | UPDATE users SET password=LEFT(UUID(), 10) ORDER BY id DESC LIMIT 2
18 | ;
19 | -- Update first user password
20 | UPDATE users SET password=LEFT(UUID(), 5) ORDER BY id LIMIT 1
21 | ;
22 | -- Delete last user
23 | DELETE FROM users ORDER BY id DESC LIMIT 1
24 | ;
25 | -- Show actual state
26 | SELECT * FROM users ORDER BY id
27 | ;


--------------------------------------------------------------------------------
/database/sql/01_postgres_changes.sql:
--------------------------------------------------------------------------------
 1 | -- Insert six products in two sentences
 2 | INSERT INTO product(name, description)
 3 | SELECT name, concat('Description for ', name)
 4 |     FROM (
 5 |         VALUES ('Harley Davidson Ultimate Chopper'),
 6 |             ('1996 Moto Guzzi 1100i')
 7 |     ) t (name)
 8 | ;
 9 | INSERT INTO product(name, description)
10 | SELECT name, concat('Description for ', name)
11 |     FROM (
12 |         VALUES ('1985 Toyota Supra'),
13 |             ('1957 Ford Thunderbird'),
14 |             ('1938 Cadillac V-16 Presidential Limousine'),
15 |             ('1982 Lamborghini Diablo')
16 |     ) t (name)
17 | ;
18 | -- Update last two descriptions
19 | UPDATE product 
20 |     SET description=concat('(Update ', NOW(), ') - Desc. for ', name)
21 |     WHERE id in (
22 |         SELECT id FROM product ORDER BY id DESC LIMIT 2
23 |     )
24 | ;
25 | -- Update first description
26 | UPDATE product 
27 |     SET description=concat('(Up. ', NOW(), ') - Desc. for ', name)
28 |     WHERE id in (
29 |         SELECT min(id) FROM product
30 |     )
31 | ;
32 | -- Delete last product
33 | DELETE FROM product 
34 |     WHERE id in (
35 |         SELECT id FROM product ORDER BY id DESC LIMIT 1
36 |     )
37 | ;
38 | -- Show actual state
39 | SELECT * FROM product ORDER BY id
40 | ;


--------------------------------------------------------------------------------
/debezium/README.md:
--------------------------------------------------------------------------------
  1 | # Howto - CDC with Debezium
  2 | 
  3 | ![Debezum-logo](../.images/color_white_debezium_type_600px.svg)
  4 | 
  5 |   * [Usage](#usage)
  6 |   * [Context](#context)
  7 |     + [Change Events](#change-events)
  8 |     + [Connector actions](#connector-actions)
  9 |     + [Connectors config](#connectors-config)
 10 |       - [MySQL connector](#mysql-connector)
 11 |       - [PostgreSQL connector](#postgresql-connector)
 12 |       - [Secret management](#secret-management)
 13 | 
 14 | As part of this howto, I provide:
 15 | 
 16 | - Kafka connect configurations to capture changes from MySQL and PostgreSQL databases
 17 | - Scripts to create, destroy and check the status of these connectors
 18 | 
 19 | ## Usage
 20 | 
 21 | This folder includes three scripts, that perform actions against the docker service `cdc_connector`:
 22 | 
 23 | - `init_cdc.sh`: take the configurations available in `./connect` folder, and call 
 24 |     the Kafka connect REST API to create the connector that captures the changes 
 25 |     in the databases and push it in Kafka
 26 | - `status_cdc.sh`: call the Kafka connect REST API, get the list of configured 
 27 |     connectors, and for each connector call to show you the status
 28 | - `delete_cdc.sh`: similar to status, but delete all the connectors in this 
 29 |     Kafka connect service
 30 | 
 31 | With these scripts, you can perform your test as you wish:
 32 | 
 33 | - Create connectors after or before the tables exists or have data
 34 | - Destroy connectors, insert new data, and create again to check data loss
 35 | - Wherever test that you can do
 36 | 
 37 | ## Context
 38 | 
 39 | Kafka connect enables the ability to push/poll events to Kafka from/to 
 40 | other system using only a configuration file, without developing a source/sink application.
 41 | 
 42 | The Kafka connector plugin need to be deployed into the Kafka connect nodes (called
 43 | worker nodes), and after doing this you can call a REST API with a configuration to
 44 | enable the connector to push data from an external source to Kafka (like CDC connector do for you) 
 45 | or pull data from Kafka to other sink destinations.
 46 | 
 47 | ### Change Events
 48 | 
 49 | In Kafka, a topic can have one or more partitions. This enables parallel read from consumers 
 50 | in the same consumer group.  A consumer group is a group of consumers that see the topic as 
 51 | a queue and each consumer can pull events from several partitions but one partition cannot 
 52 | have more than one consumer for each consumer group. This is the main point to understand 
 53 | one part of the event: the key.
 54 | 
 55 | An event has three parts:
 56 | - Key: 
 57 |     - By default, all events with the same key are pushed to the same partition. 
 58 |     - This can be null, in this case by default, a round-robin between partitions on push is performed.
 59 | - Value: the event data
 60 | - Headers: a collection of pair key-value that can be setted
 61 | 
 62 | Compared to the native CDC of each database, Debezium provides decoupling between the
 63 | database engine and the events it emits, standardizing them and making them common as far as possible.
 64 | 
 65 | As a key, Debezium (and other change data capture tools) include the key fields of the table 
 66 | 
 67 | As a value, Debezium sends these common fields:
 68 | - source: a metadata document about the connector and the source database
 69 | - op: the operation code, can be `r` (read, snapshot), `c`(create, insert), `u` (update), `d` (delete)
 70 | - after: a document with the data state after database operation
 71 | - before: a document with the data state before database operation
 72 | 
 73 | <details>
 74 | <summary>Example of key seralized as JSON</summary>
 75 | 
 76 | ```JSON
 77 | {
 78 |     "payload": {
 79 |       "id": 1
 80 |     },
 81 |     "schema": {
 82 |       "fields": [
 83 |         {
 84 |           "field": "id",
 85 |           "optional": false,
 86 |           "type": "int32"
 87 |         }
 88 |       ],
 89 |       "name": "mysqldb.inventory.users.Key",
 90 |       "optional": false,
 91 |       "type": "struct"
 92 |     }
 93 | }
 94 | ```
 95 | 
 96 | </details>
 97 | 
 98 | <details>
 99 | <summary>Example of value seralized as JSON</summary>
100 | 
101 | ```JSON
102 | {
103 |   "payload": {
104 |     "after": {
105 |       "created_on": "2021-07-28T16:32:45Z",
106 |       "email": "lara7012@email.com",
107 |       "id": 1,
108 |       "name": "Lara",
109 |       "password": "701280aa-efc1-11eb-a7c9-0"
110 |     },
111 |     "before": null,
112 |     "op": "c",
113 |     "source": {
114 |       "connector": "mysql",
115 |       "db": "inventory",
116 |       "file": "mysql-bin.000003",
117 |       "gtid": null,
118 |       "name": "mysqldb",
119 |       "pos": 703,
120 |       "query": null,
121 |       "row": 0,
122 |       "sequence": null,
123 |       "server_id": 223344,
124 |       "snapshot": "false",
125 |       "table": "users",
126 |       "thread": null,
127 |       "ts_ms": 1627489965000,
128 |       "version": "1.6.1.Final"
129 |     },
130 |     "transaction": null,
131 |     "ts_ms": 1627489965300
132 |   },
133 |   "schema": {
134 |     "fields": [
135 |       {
136 |         "field": "before",
137 |         "fields": [
138 |           {
139 |             "field": "id",
140 |             "optional": false,
141 |             "type": "int32"
142 |           },
143 |           {
144 |             "field": "name",
145 |             "optional": true,
146 |             "type": "string"
147 |           },
148 |           {
149 |             "field": "email",
150 |             "optional": true,
151 |             "type": "string"
152 |           },
153 |           {
154 |             "field": "password",
155 |             "optional": true,
156 |             "type": "string"
157 |           },
158 |           {
159 |             "field": "created_on",
160 |             "name": "io.debezium.time.ZonedTimestamp",
161 |             "optional": true,
162 |             "type": "string",
163 |             "version": 1
164 |           }
165 |         ],
166 |         "name": "mysqldb.inventory.users.Value",
167 |         "optional": true,
168 |         "type": "struct"
169 |       },
170 |       {
171 |         "field": "after",
172 |         "fields": [
173 |           {
174 |             "field": "id",
175 |             "optional": false,
176 |             "type": "int32"
177 |           },
178 |           {
179 |             "field": "name",
180 |             "optional": true,
181 |             "type": "string"
182 |           },
183 |           {
184 |             "field": "email",
185 |             "optional": true,
186 |             "type": "string"
187 |           },
188 |           {
189 |             "field": "password",
190 |             "optional": true,
191 |             "type": "string"
192 |           },
193 |           {
194 |             "field": "created_on",
195 |             "name": "io.debezium.time.ZonedTimestamp",
196 |             "optional": true,
197 |             "type": "string",
198 |             "version": 1
199 |           }
200 |         ],
201 |         "name": "mysqldb.inventory.users.Value",
202 |         "optional": true,
203 |         "type": "struct"
204 |       },
205 |       {
206 |         "field": "source",
207 |         "fields": [
208 |           {
209 |             "field": "version",
210 |             "optional": false,
211 |             "type": "string"
212 |           },
213 |           {
214 |             "field": "connector",
215 |             "optional": false,
216 |             "type": "string"
217 |           },
218 |           {
219 |             "field": "name",
220 |             "optional": false,
221 |             "type": "string"
222 |           },
223 |           {
224 |             "field": "ts_ms",
225 |             "optional": false,
226 |             "type": "int64"
227 |           },
228 |           {
229 |             "default": "false",
230 |             "field": "snapshot",
231 |             "name": "io.debezium.data.Enum",
232 |             "optional": true,
233 |             "parameters": {
234 |               "allowed": "true,last,false"
235 |             },
236 |             "type": "string",
237 |             "version": 1
238 |           },
239 |           {
240 |             "field": "db",
241 |             "optional": false,
242 |             "type": "string"
243 |           },
244 |           {
245 |             "field": "sequence",
246 |             "optional": true,
247 |             "type": "string"
248 |           },
249 |           {
250 |             "field": "table",
251 |             "optional": true,
252 |             "type": "string"
253 |           },
254 |           {
255 |             "field": "server_id",
256 |             "optional": false,
257 |             "type": "int64"
258 |           },
259 |           {
260 |             "field": "gtid",
261 |             "optional": true,
262 |             "type": "string"
263 |           },
264 |           {
265 |             "field": "file",
266 |             "optional": false,
267 |             "type": "string"
268 |           },
269 |           {
270 |             "field": "pos",
271 |             "optional": false,
272 |             "type": "int64"
273 |           },
274 |           {
275 |             "field": "row",
276 |             "optional": false,
277 |             "type": "int32"
278 |           },
279 |           {
280 |             "field": "thread",
281 |             "optional": true,
282 |             "type": "int64"
283 |           },
284 |           {
285 |             "field": "query",
286 |             "optional": true,
287 |             "type": "string"
288 |           }
289 |         ],
290 |         "name": "io.debezium.connector.mysql.Source",
291 |         "optional": false,
292 |         "type": "struct"
293 |       },
294 |       {
295 |         "field": "op",
296 |         "optional": false,
297 |         "type": "string"
298 |       },
299 |       {
300 |         "field": "ts_ms",
301 |         "optional": true,
302 |         "type": "int64"
303 |       },
304 |       {
305 |         "field": "transaction",
306 |         "fields": [
307 |           {
308 |             "field": "id",
309 |             "optional": false,
310 |             "type": "string"
311 |           },
312 |           {
313 |             "field": "total_order",
314 |             "optional": false,
315 |             "type": "int64"
316 |           },
317 |           {
318 |             "field": "data_collection_order",
319 |             "optional": false,
320 |             "type": "int64"
321 |           }
322 |         ],
323 |         "optional": true,
324 |         "type": "struct"
325 |       }
326 |     ],
327 |     "name": "mysqldb.inventory.users.Envelope",
328 |     "optional": false,
329 |     "type": "struct"
330 |   }
331 | }
332 | ```
333 | 
334 | </details>
335 | 
336 | To maintain simplicity, this demo works with JSON events with the schema included in the event. 
337 | In a non-test environment, the recommended approach is to use a Schema Registry to store the schemas
338 | and other serialization format like Avro to store it. 
339 | 
340 | ### Connector actions
341 | 
342 | When connectors perform the first run, you can see an initial snapshot of the database (which is a configurable option).
343 | After doing this, every change applied to the tables that these connectors listen will be the track to Kafka. This include:
344 | - When you add new rows, one event per row will be inserted
345 | - When you update rows,
346 |     - One event per row will be updated
347 |     - If an update affects the key of the table, Debezium throw like a delete action and a new insert of data
348 | - When you delete rows, two events per row will be deleted (configurable option):
349 |     - One event with info about the operation DELETE
350 |     - Another event with a null value (events in Kafka have key, value, and headers, and any can be null)
351 | 
352 | Each event has as key the key of the table, that enables guarantees of order. The topics of Kafka
353 | have properties to identify data retention and clean policies:
354 | - Retention by time
355 | - Retention by size
356 | - Retention by compaction
357 | 
358 | When using compaction hold, when Kafka triggers the cleanup process, it keeps the last event for each key on the topic. 
359 | If the last event for a key has a null value, Kafka removes all events for this key. With this approach, 
360 | when a new consumer begins to read the topic, he does not have to download the changes from the origin of the replica: 
361 | he first obtains the state of the table from the moment of the last compaction, and then continues reading 
362 | the changes captured since then.
363 | 
364 | 
365 | ### Connectors config
366 | 
367 | The Kafka connectors have common configuration properties and others that depend of 
368 | the Kafka connector plugin that you use. A FileStreamSource connector needs 
369 | the configuration of the file to read, and a CDC connector need info about the 
370 | database that should be read: the configuration is not the same, but 
371 | some parts are common:
372 | - name: all connectors should have a name to reference it
373 | - connector.class: the class that implements the connector, that may be a 
374 |     source (push external data to Kafka) or sink (pull data from Kafka to another system)
375 | - tasks.max: the maximum number of tasks that perform the source/sink action
376 | 
377 | To review other common configurations, you can review [the official doc about kafka connect configuring].
378 | 
379 | Another main point of the Kafka connector is the ability to do some basic transformations (called SMT)
380 | of the event, like add some field or change the event key. We don't perform this 
381 | in this howto, but can be interested in some use cases.
382 | 
383 | #### MySQL connector
384 | 
385 | You can see all the documentation about this Kafka connector plugin in 
386 | the [Debezium connector for MySQL] page.
387 | 
388 | This connector supports several MySQL topologies, but this demo will track
389 | changes for a standalone MySQL server.
390 | 
391 | When you start the connector, you can see three new topics:
392 | 
393 | - `mysqldb`: schema change topic, with schema change events that include all DDL 
394 |     statements applied to databases in the MySQL server. The name of this topic is 
395 |     the same described in property `database.server.name`
396 | - `mysqldb.schema-changes.inventory`: track DDL changes in the database, and it
397 |     is necessary by internal management of the CDC connector. You can configure the 
398 |     topic name in `database.history.kafka.topic`
399 | - `mysqldb.inventory.users`: 
400 |     - If you were run the steps in [database readme], you should have a topic for this table
401 |     - This topic manage the change events for table users
402 |     
403 | Well, you can see the connector config in [`connect/debezium-mysql-inventory-connector.json`](./connector/debezium-mysql-inventory-connector.json)
404 | 
405 | - Connection properties:
406 |     - `database.hostname`: IP address or hostname of the MySQL database server.
407 |     - `database.port`: integer port number of the MySQL database server.
408 |     - `database.user`: name of the MySQL user to use when connecting to the MySQL database server.
409 |     - `database.password`: password to use when connecting to the MySQL database server.
410 |     - `database.server.id`: a numeric ID of this database client, which must be unique across all 
411 |         currently-running database processes in the MySQL cluster. If not set, a random number will be use.
412 |     - `database.server.name`: logical name that identifies and provides a namespace for the particular 
413 |         MySQL database server/cluster in which Debezium is capturing changes.
414 | - CDC properties:
415 |     - `database.history.kafka.bootstrap.servers`: a list of host/port pairs that the connector uses for 
416 |         establishing an initial connection to the Kafka cluster. Each pair should point to the same Kafka 
417 |         cluster used by the Kafka Connect process.
418 |     - `database.history.kafka.topic`: the full name of the Kafka topic where the connector stores the 
419 |         database schema history.
420 |     - `database.include`: name of the database for which to capture changes. The connector does not capture 
421 |         changes in any database whose name is not in this property or `database.include.list`
422 |     - `table.include.list`: an optional, comma-separated list of regular expressions that match 
423 |         fully-qualified table identifiers of tables whose changes you want to capture. 
424 |         The connector does not capture changes in any table not included in table.include.list.
425 |     - Exists properties to configure the exclude instead of include databases/tables, and a lot of
426 |         parametrized options. Review the [official doc](https://debezium.io/documentation/reference/connectors/mysql.html#mysql-connector-properties).
427 | 
428 | #### PostgreSQL connector
429 | 
430 | You can see all the documentation about this Kafka connector plugin in 
431 | the [Debezium connector for PostgreSQL] page.
432 | 
433 | In this case, when you start the connector you only see one topic:
434 | - `postgres.inventory.product`: 
435 |     - If you were run the steps in [database readme], you should have a topic for this table
436 |     - This topic manage the change events for table product
437 | 
438 | If you review the properties used, is very similar to the MySQL connector, and no new description is needed.
439 | 
440 | #### Secret management
441 | 
442 | Is a good practice externalize your secrets outside of connector configs. You can review the [KIP-297] to use
443 | an external provider to reference it.
444 | 
445 | 
446 | [database readme]: ../database/README.md
447 | [docker readme]: ../docker/README.md
448 | [Debezium connector for MySQL]: https://debezium.io/documentation/reference/connectors/mysql.html
449 | [Debezium connector for PostgreSQL]: https://debezium.io/documentation/reference/connectors/postgresql.html
450 | [the official doc about kafka connect configuring]: https://kafka.apache.org/documentation.html#connect_configuring
451 | [KIP-297]: https://cwiki.apache.org/confluence/display/KAFKA/KIP-297%3A+Externalizing+Secrets+for+Connect+Configurations


--------------------------------------------------------------------------------
/debezium/connect/debezium-mysql-inventory-connector.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "debezium-mysql-inventory-connector",
 3 |     "config": {
 4 |         "connector.class": "io.debezium.connector.mysql.MySqlConnector",
 5 |         "tasks.max": "1",
 6 |         "database.hostname": "mysql",
 7 |         "database.port": "3306",
 8 |         "database.user": "debezium",
 9 |         "database.password": "dbz",
10 |         "database.server.id": "184054",
11 |         "database.server.name": "mysqldb",
12 |         "database.include": "inventory",
13 |         "database.history.kafka.bootstrap.servers": "kafka:9092",
14 |         "database.history.kafka.topic": "mysqldb.schema-changes.inventory",
15 |         "table.include.list": "inventory.users"
16 |     }
17 | }


--------------------------------------------------------------------------------
/debezium/connect/debezium-postgres-inventory-connector.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "debezium-postgres-inventory-connector",
 3 |     "config": {
 4 |         "connector.class": "io.debezium.connector.postgresql.PostgresConnector",
 5 |         "tasks.max": "1",
 6 |         "database.hostname": "postgres",
 7 |         "database.port": "5432",
 8 |         "database.user": "postgres",
 9 |         "database.password": "postgres",
10 |         "database.dbname": "postgres",
11 |         "database.server.name": "postgresdb",
12 |         "schema.include": "inventory",
13 |         "table.include.list": "inventory.product"
14 |     }
15 | }


--------------------------------------------------------------------------------
/debezium/delete_cdc.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | CONNECT_URL=http://localhost:8083
 4 | 
 5 | CONNECTORS=$(curl -s -k ${CONNECT_URL}/connectors)
 6 | echo Connector list:
 7 | echo $CONNECTORS
 8 | echo 
 9 | 
10 | for row in $(echo "${CONNECTORS}" | jq -c -r '.[]'); do
11 |     status=$(curl -s -k -X DELETE "${CONNECT_URL}/connectors/${row}")
12 |     echo Deleted ${row}
13 | done
14 | 


--------------------------------------------------------------------------------
/debezium/init_cdc.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Based on https://github.com/debezium/debezium-examples/tree/master/tutorial
 4 | 
 5 | CONNECT_URL=http://localhost:8083
 6 | MYSQL_CONNECT_CONFIG=connect/debezium-mysql-inventory-connector.json
 7 | POSTGRES_CONNECT_CONFIG=connect/debezium-postgres-inventory-connector.json
 8 | 
 9 | echo "### Creating MySQL CDC connect ###"
10 | curl -i -X POST $CONNECT_URL/connectors \
11 |     -H "Content-Type:application/json" \
12 |     -d @$MYSQL_CONNECT_CONFIG
13 | echo .
14 | 
15 | echo "### Creating Postgres CDC connect ###"
16 | curl -i -X POST $CONNECT_URL/connectors \
17 |     -H "Accept:application/json" \
18 |     -H "Content-Type:application/json" \
19 |     -d @$POSTGRES_CONNECT_CONFIG
20 | echo .
21 | 
22 | 
23 | 


--------------------------------------------------------------------------------
/debezium/status_cdc.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | CONNECT_URL=http://localhost:8083
 4 | 
 5 | CONNECTORS=$(curl -s -k ${CONNECT_URL}/connectors)
 6 | echo Connector list:
 7 | echo $CONNECTORS
 8 | echo 
 9 | 
10 | echo Connector status:
11 | echo
12 | 
13 | for row in $(echo "${CONNECTORS}" | jq -c -r '.[]'); do
14 |     status=$(curl -s -k -X GET "${CONNECT_URL}/connectors/${row}/status")
15 |     echo $status
16 |     echo 
17 | done
18 | 


--------------------------------------------------------------------------------
/services/.env:
--------------------------------------------------------------------------------
1 | DEBEZIUM_VERSION=1.6
2 | COMPOSE_PROJECT_NAME=howto-debeizum-to-snowflake-${DEBEZIUM_VERSION}
3 | CONFLUENT_VERSION=5.5.5
4 | 


--------------------------------------------------------------------------------
/services/README.md:
--------------------------------------------------------------------------------
 1 | # Services
 2 | 
 3 | ![docker-logo](../.images/horizontal-logo-monochromatic-white.png)
 4 | 
 5 |   * [Usage](#usage)
 6 |   * [Context](#context)
 7 |     + [Docker-compose detail](#docker-compose-detail)
 8 |     + [Access to containers](#access-to-containers)
 9 |       - [Kafka commands](#kafka-commands)
10 |       - [Database commands](#database-commands)
11 |   * [References](#references)
12 | 
13 | As part of this howto, I provide:
14 | 
15 | - A docker-compose to run it
16 | - `credentials` folder with Snowflake keys
17 | - `.env` file with product versions
18 | 
19 | ## Usage
20 | 
21 | You can run it with a single command, and see all logs in your terminal. Clone this repository and go to the 
22 | docker folder to run it:
23 | ```sh
24 | git clone https://github.com/dariocazas/howto-debezium-to-snowflake.git
25 | cd howto-debezium-to-snowflake/services
26 | docker-compose up
27 | ```
28 | 
29 | You can stop this using `Ctrl+C`
30 | 
31 | **It is important** go to the docker folder due to use a `.env` file available in this folder
32 | 
33 | ## Context 
34 | 
35 | ### Docker-compose detail
36 | 
37 | The compose YML run several images and expose several ports. For simplicity, I use Debezium images for many parts:
38 | 
39 | - **mysql**: database instance provided by Debezium team
40 | - **postgres**: database instance provide by Debezium team
41 | - **zookeeper**: as part of the Kafka ecosystem
42 | - **kafka**: single Kafka broker, exposing his 9092 port
43 | - **cdc_connect**: Kafka connect worker node, provided by Debezium team, with the connector plugins for his supported databases
44 | - **sink_connect**: Kafka connect worker node, provided by confluent. I include the installation of snowflake connector plugin
45 | 
46 | ![Docker compose info](docker-compose.png)
47 | 
48 | ### Access to containers
49 | 
50 | Inside of docker-compose file, you can see several commands to enable access to the containers.
51 | You can run these commands inside the `docker` folder (to enable docker to read the `.env` file)
52 | 
53 | #### Kafka commands
54 | 
55 | ```sh
56 | # List topics
57 | docker-compose -f docker-compose.yml exec kafka /kafka/bin/kafka-topics.sh --bootstrap-server kafka:9092 --list
58 | 
59 | # Show all CDC MySQL data (including keys for the events)
60 | docker-compose -f docker-compose.yml exec kafka /kafka/bin/kafka-console-consumer.sh \
61 |     --bootstrap-server kafka:9092 --from-beginning \
62 |     --topic mysqldb.inventory.users
63 | 
64 | # Show all CDC MySQL data (including keys for the events and timestamp which the event was received in Kafka)
65 | docker-compose -f docker-compose.yml exec kafka /kafka/bin/kafka-console-consumer.sh \
66 |     --bootstrap-server kafka:9092 --from-beginning \
67 |     --topic mysqldb.inventory.users \
68 |     --property print.key=true --property print.timestamp=true
69 | 
70 | # Show all CDC Posgres data
71 | docker-compose -f docker-compose.yml exec kafka /kafka/bin/kafka-console-consumer.sh \
72 |     --bootstrap-server kafka:9092 --from-beginning \
73 |     --topic postgresdb.inventory.product
74 | ```
75 | 
76 | #### Database commands
77 | 
78 | ```sh
79 | # Access to MySQL shell
80 | docker-compose -f docker-compose.yml exec mysql \
81 |     bash -c 'mysql -u $MYSQL_USER -p$MYSQL_PASSWORD inventory'
82 | 
83 | # Access to Postgres shell
84 |  docker-compose -f docker-compose.yml exec postgres \
85 |     env PGOPTIONS="--search_path=inventory" \
86 |     bash -c 'psql -U $POSTGRES_USER postgres'
87 | ```
88 | 
89 | ## References
90 | 
91 | - [Debezium tutorial](https://debezium.io/documentation/reference/1.6/tutorial.html)
92 | - [Debezium images github](https://github.com/debezium/docker-images)
93 | - [Confluent: kafka connect zero to hero](https://github.com/confluentinc/demo-scene/tree/master/kafka-connect-zero-to-hero)
94 | - [Docker compose graph visualization](https://github.com/pmsipilot/docker-compose-viz)


--------------------------------------------------------------------------------
/services/docker-compose.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dariocazas/howto-debezium-to-snowflake/a2d8a9689929f2025cb6113af355e6ad77b77046/services/docker-compose.png


--------------------------------------------------------------------------------
/services/docker-compose.yml:
--------------------------------------------------------------------------------
  1 | # Based on https://debezium.io/documentation/reference/1.5/tutorial.html
  2 | # Run as:
  3 | #   docker-compose up
  4 | ---
  5 | version: "2"
  6 | services:
  7 | 
  8 |   zookeeper:
  9 |     image: debezium/zookeeper:${DEBEZIUM_VERSION}
 10 |     ports:
 11 |       - 2181:2181
 12 |       - 2888:2888
 13 |       - 3888:3888
 14 | 
 15 |   # You can list the existing topics with
 16 |   #   docker-compose exec kafka /kafka/bin/kafka-topics.sh --bootstrap-server kafka:9092 --list
 17 |   # After start CDC, you can consume the events using this command
 18 |   # MySQL: 
 19 |   #   docker-compose exec kafka /kafka/bin/kafka-console-consumer.sh --bootstrap-server kafka:9092 --from-beginning --property print.key=true --topic mysqldb.inventory.users
 20 |   # Postgres:
 21 |   #   docker-compose exec kafka /kafka/bin/kafka-console-consumer.sh --bootstrap-server kafka:9092 --from-beginning --property print.key=true --topic postgresdb.inventory.product
 22 |   kafka:
 23 |     image: debezium/kafka:${DEBEZIUM_VERSION}
 24 |     ports:
 25 |       - 9092:9092
 26 |     environment: 
 27 |       #ADVERTISED_HOST_NAME: localhost
 28 |       ZOOKEEPER_CONNECT: zookeeper:2181
 29 |     depends_on: 
 30 |       - zookeeper
 31 |     links:
 32 |       - zookeeper:zookeeper
 33 | 
 34 |   # docker-compose exec mysql bash -c 'mysql -u $MYSQL_USER -p$MYSQL_PASSWORD inventory'
 35 |   mysql:
 36 |     image: debezium/example-mysql:${DEBEZIUM_VERSION}
 37 |     ports: 
 38 |       - 3306:3306
 39 |     environment: 
 40 |       MYSQL_ROOT_PASSWORD: debezium
 41 |       MYSQL_USER: mysqluser
 42 |       MYSQL_PASSWORD: mysqlpw
 43 |   
 44 |   # docker-compose exec postgres env PGOPTIONS="--search_path=inventory" bash -c 'psql -U $POSTGRES_USER postgres'
 45 |   postgres:
 46 |     image: debezium/example-postgres:${DEBEZIUM_VERSION}
 47 |     ports:
 48 |       - 5432:5432
 49 |     environment:
 50 |       POSTGRES_USER: postgres
 51 |       POSTGRES_PASSWORD: postgres  
 52 |   
 53 |   cdc_connect:
 54 |     image: debezium/connect:${DEBEZIUM_VERSION}
 55 |     ports: 
 56 |       - 8083:8083      
 57 |     environment: 
 58 |       BOOTSTRAP_SERVERS: kafka:9092
 59 |       GROUP_ID: cdc_connect_group
 60 |       REST_PORT: 8083
 61 |       REST_ADVERTISED_HOST_NAME: localhost
 62 |       CONFIG_STORAGE_TOPIC: my_cdc_connect_configs
 63 |       OFFSET_STORAGE_TOPIC: my_cdc_connect_offsets
 64 |       STATUS_STORAGE_TOPIC: my_cdc_connect_statuses
 65 |       CONFIG_STORAGE_REPLICATION_FACTOR: "1"
 66 |       OFFSET_STORAGE_REPLICATION_FACTOR: "1"
 67 |       STATUS_STORAGE_REPLICATION_FACTOR: "1"
 68 |     depends_on: 
 69 |       - zookeeper
 70 |       - kafka
 71 |       - mysql
 72 |       - postgres
 73 |     links:
 74 |       - zookeeper:zookeeper
 75 |       - kafka:kafka
 76 |       - mysql:mysql
 77 |       - postgres:postgres
 78 | 
 79 |   sink_connect:
 80 |     image: confluentinc/cp-kafka-connect-base:${CONFLUENT_VERSION}
 81 |     ports: 
 82 |       - 8085:8085
 83 |     environment: 
 84 |       CONNECT_BOOTSTRAP_SERVERS: kafka:9092
 85 |       CONNECT_REST_PORT: 8085
 86 |       CONNECT_REST_ADVERTISED_HOST_NAME: "localhost"
 87 |       CONNECT_GROUP_ID: sink_connect_group
 88 |       CONNECT_CONFIG_STORAGE_TOPIC: my_sink_connect_configs
 89 |       CONNECT_OFFSET_STORAGE_TOPIC: my_sink_connect_offsets
 90 |       CONNECT_STATUS_STORAGE_TOPIC: my_sink_connect_statuses
 91 |       CONNECT_CONFIG_STORAGE_REPLICATION_FACTOR: "1"
 92 |       CONNECT_OFFSET_STORAGE_REPLICATION_FACTOR: "1"
 93 |       CONNECT_STATUS_STORAGE_REPLICATION_FACTOR: "1"
 94 |       CONNECT_KEY_CONVERTER: org.apache.kafka.connect.json.JsonConverter
 95 |       CONNECT_VALUE_CONVERTER: org.apache.kafka.connect.json.JsonConverter
 96 |       DEBEZIUM_VERSION: ${DEBEZIUM_VERSION}
 97 |     depends_on: 
 98 |       - zookeeper
 99 |       - kafka
100 |     links:
101 |       - zookeeper:zookeeper
102 |       - kafka:kafka
103 |     # https://github.com/confluentinc/demo-scene/blob/master/kafka-connect-zero-to-hero/docker-compose.yml#L89-L101
104 |     command:
105 |       - bash
106 |       - -c
107 |       - |
108 |         echo "Installing Connector"
109 |         confluent-hub install --no-prompt snowflakeinc/snowflake-kafka-connector:1.5.5
110 |         #
111 |         echo "Launching Kafka Connect worker"
112 |         /etc/confluent/docker/run &
113 |         #
114 |         sleep infinity
115 | 


--------------------------------------------------------------------------------
/services/render_compose_image.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # This script update the PNG with the services of the docker-compose.yml 
4 | # Is based on https://github.com/pmsipilot/docker-compose-viz
5 | 
6 | docker run --rm -it --name dcv -v $(pwd):/input pmsipilot/docker-compose-viz render -m image docker-compose.yml --force
7 | 


--------------------------------------------------------------------------------
/snowflake/README.md:
--------------------------------------------------------------------------------
  1 | # Snowflake
  2 | 
  3 | ![Snowflake-logo](../.images/Snowflake_Logo.svg.png)
  4 | 
  5 |   * [Sink to Snowflake scripts](#sink-to-snowflake-scripts)
  6 |     + [Snowflake scripts](#snowflake-scripts)
  7 |   * [Context](#context)
  8 |     + [Sink connector](#sink-connector)
  9 |     + [Snowflake security](#snowflake-security)
 10 |     + [Snowflake resource naming used](#snowflake-resource-naming-used)
 11 |     + [Snowflake CDC Debeizum table](#snowflake-cdc-debeizum-table)
 12 |     + [Snowflake replica table](#snowflake-replica-table)
 13 |     + [The final view](#the-final-view)
 14 | 
 15 | As part of this howto, I provide:
 16 | 
 17 | - Kafka connect configurations to push event changes from CDC topics to Snowflake
 18 | - Scripts to create, destroy and check the status of these connectors
 19 | - Snowflake SQL scripts with replica transformation of the change event tables
 20 | 
 21 | ## Sink to Snowflake scripts
 22 | 
 23 | This folder includes three bash scripts, that perform actions against the docker service `cdc_sink`:
 24 | 
 25 | - `init_cdc.sh`: take the configuration available in `./connect/snowflake-sink-connector.json` file, and call
 26 |     the Kafka connect REST API to create the connector sink the CDC topics to Snowflake event tables
 27 | - `status_cdc.sh`: call the Kafka connect REST API, get the list of configured 
 28 |     connectors, and for each connector call to show you the status
 29 | - `delete_cdc.sh`: similar to status, but delete all the connectors in this 
 30 |     Kafka connect service
 31 | 
 32 | **IMPORTANT**: you MUST change several parameters in `./connect/snowflake-sink-connector.json` file:
 33 | - `snowflake.url.name`: the entry point for your Snowflake environment
 34 | - `snowflake.user.name`: your user name
 35 | - `snowflake.private.key`: your pub certificate
 36 | - `snowflake.private.key.passphrase`: well, in this demo not include it because the generated certificate isn't encrypted
 37 | 
 38 | Is a good practice to externalize your secrets outside of connector configs. You can review the [KIP-297] to use
 39 | an external provider to reference it.
 40 | 
 41 | With these scripts, you can perform your test as you wish:
 42 | 
 43 | - Create connector after or before the topics exist or have data
 44 | - Destroy connector, insert new data, and create again to check data loss
 45 | - Wherever test that you can do
 46 | 
 47 | ### Snowflake scripts
 48 | 
 49 | Configure your Snowflake account replication with:
 50 | 
 51 | - `sql/00-security.sql`: you partially include it when you do the [snowflake/keys] README. The script is documented.
 52 | - `sql/01-cdc-to-replica-mysql.sql`: create a view similar to the original MySQL table, and the necessary to replicate 
 53 |     the events uploaded to Snowflake
 54 | - `sql/01-cdc-to-replica-postgres.sql`: like the MySQL, but for the PostgreSQL table
 55 | 
 56 | ## Context
 57 | 
 58 | ### Sink connector
 59 | 
 60 | If you review the detail about the [debezium], you should have context about the Kafka connect
 61 | and how to configure it. As you can see, [this connector] is very similar:
 62 | 
 63 | - Common connector parts (name, connector class, ...)
 64 | - Snowflake connection properties and destination definition
 65 |    - You should configure your Snowflake account (url, user, keys...)
 66 |    - Is recommended to apply a topic2table mapping
 67 | - Other configs:
 68 |    - `key.converter`: 
 69 |       - Tell to connector how to understand the key of the events received from the topics. 
 70 |       - You can use a generic JsonConverter, but Snowflake offers to you his own implementation, that support some additional options
 71 |    - `value.converter`: like the `key.converter`, but with a focus on the value of the event
 72 |    - `behavior.on.null.values`
 73 |       - Specific property of the Snowflake converters (but exist generic alternatives)
 74 |       - In [debezium] explain about how to Debezium transform the DELETE actions 
 75 |         into two events (one with the delete operation, and another with `null` value)
 76 |       - An `null` value makes sense in Kafka context, but not for a database (like Snowflake), for this reason, configure as `IGNORE`: 
 77 |         these events will not upload to Snowflake
 78 | 
 79 | ### Snowflake security
 80 | 
 81 | For simplicity, this demo should be run as SYSADMIN role in Snowflake, after grant privileges to run TASK to this role.
 82 | 
 83 | ### Snowflake resource naming used
 84 | 
 85 | In this demo:
 86 | - All resources include the topic name in upper case, replacing the `.` with `_`
 87 | - The Debezium events are ingested to tables with the prefix `CDC_`
 88 | - The tables with the replica of state using the prefix `REPLICA_`
 89 | - The stream (listeners over the change in Snowflake tables) used for batch new events to replication, follow `<source_table>_STREAM_REPLICATION`
 90 | - The task in charge of trigger the replica, follow `<source_table>_TASK_REPLICATION`
 91 | 
 92 | ### Snowflake CDC Debeizum table
 93 | 
 94 | As the configuration of the sink Kafka connector, you specify in which database, schema, and table populate the events.
 95 | The tables have the same format with two columns:
 96 | - `RECORD_METADATA`: variant column with a JSON, that includes info about the original topic and the key of the event
 97 | - `RECORD_CONTENT`: variant column with a JSON, with the value of the event.
 98 | 
 99 | About the key and the value, this demo works with JSON serialization without schema registry. The events generated by 
100 | the CDC includes the JSON Schema relative to the events. If you review, the `RECORD_CONTENT` has the same event value that
101 | you see as event value in Kafka topic. The record `RECORD_METADATA` includes:
102 | 
103 | - CreateTime: when Kafka receive the event
104 | - topic: the name of the source topic
105 | - partition: the number of the partition of topic that contains the event
106 | - offset: the position in the partition for the event
107 | - key: the event key
108 | 
109 | ```json
110 | {
111 |   "CreateTime": 1627490826351,
112 |   "topic": "mysqldb.inventory.users",
113 |   "partition": 0,
114 |   "offset": 12,
115 |   "key": {
116 |     "payload": {
117 |       "id": 1
118 |     },
119 |     "schema": {
120 |       "fields": [
121 |         {
122 |           "field": "id",
123 |           "optional": false,
124 |           "type": "int32"
125 |         }
126 |       ],
127 |       "name": "mysqldb.inventory.users.Key",
128 |       "optional": false,
129 |       "type": "struct"
130 |     }
131 |   }
132 | }
133 | 
134 | ```
135 | 
136 | You can use this table as historic evolution for the source table, which can be util for analytical purposes.
137 | 
138 | ### Snowflake replica table
139 | 
140 | One of the objectives of this demo is to replicate the state of the source databases in Snowflake. It can be done 
141 | not only for Snowflake (you can populate the topic data in another database via JDBC sink connector) but in the case
142 | of Snowflake exist several points to consider that enable one plus of complexity.
143 | 
144 | When you perform a replica using JDBC connector, the order of the operations is directly the order that you read
145 | from the topic. But in Snowflake, you need to process a batch of information (or the partial/entire event table while you 
146 | haven't a task to do it). In this case, you need to sort the events and take the last one for each key.
147 | 
148 | The script replication does these actions:
149 | - Create replication table
150 | - Create a view over the replication table (to see the same structure as original database table)
151 | - Create a stream over the event table (in our case, capture new ingested rows)
152 | - Merge the actual table to replication table
153 | - Create a task with the `MERGE INTO` sentence, reading from the stream (not from the event table)
154 | - Enable the task (that runs every minute)
155 | - And other check sentence util
156 | 
157 | Well, is important (to avoid lost data) to create the stream before running the `MERGE INTO` sentence from 
158 | the event table (I assume that you are ingesting data before creating the replication table).
159 | 
160 | The `MERGE INTO` sentence includes:
161 | - Projection of important fields for the process (not from a functional data perspective). This include:
162 |   - Fields used for sorting the events (binlog, lsn,...)
163 |   - The functional data (payload of the event)
164 |   - The CDC operation (read, insert, update, delete)
165 |   - Metadata about the CDC process (source field of Debezium change event), util for traceability
166 |   - Some fields util to calc latencies
167 | - Sort the input. This operation depends on your source database engine and his configuration:
168 |   - From MySQL, exist diferent topologies. In our demo, use a standalone and build a binlog sequence 
169 |     with filename and position to sort it
170 |   - From PostgreSQL, the path is used the lsn id
171 | - Take the last operation for each key
172 |   - You should guarantee that the query only result in one result for each key
173 |   - If the merge operation matches several keys to one, the operation is not deterministic and can apply any.
174 | - Check if the key of the source table match with the target (replica) table
175 |   - If no match and operation is `delete`, the event should be discarded
176 |   - If no match and operation is another, the event should be inserted
177 |   - If match and operation is `delete`, the row in the replica table should be deleted
178 |   - If match and operation is another, the event should be applied to the replica table
179 | 
180 | When your query runs fine over the source table, you should schedule a task that runs it for you. If you run
181 | again and again this query over the events table, you process again and again all the events. To avoid it, 
182 | run the task over the stream created, not for the event table. The stream is cleaned automatically every 
183 | successful iteration, and you only process the new events. You can add a condition over the task that only
184 | runs if exist data in the stream.
185 | 
186 | After create the task, you should enable it using a `ALTER TASK` sentence. You can see the task history execution with 
187 | ```sql
188 | select *
189 |   from table(demo_db.information_schema.task_history())
190 |   order by scheduled_time desc;
191 | ```
192 | 
193 | ### The final view
194 | 
195 | The replication table contains columns with info about the CDC and replication process, util for checking. But for your
196 | final consumer, this information is not expected. They want the same table that they have in the source database system.
197 | 
198 | One column has the valuable data: the `PAYLOAD` column. This content the functional data, in JSON format. 
199 | You can create a view over this field, projecting the data like the source databases. 
200 | 
201 | This has one additional benefit: **evolution**. If your source database evolves (adding columns, removing it, wherever) 
202 | all the process is not affected, all runs fine. The unique change is the view:
203 | - No changes in your data pipeline
204 | - No changes in your data
205 | - Coexistence of old and new data
206 | - The schema of each data is included with the data
207 | 
208 | [debezium]: ../debezium/README.md
209 | [this connector]: ./connect/snowflake-sink-connector.json
210 | [snowflake/keys]: keys/
211 | [KIP-297]: https://cwiki.apache.org/confluence/display/KAFKA/KIP-297%3A+Externalizing+Secrets+for+Connect+Configurations


--------------------------------------------------------------------------------
/snowflake/connect/snowflake-sink-connector.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "snowflake-sink-connector",
 3 |   "config": {
 4 |     "connector.class": "com.snowflake.kafka.connector.SnowflakeSinkConnector",
 5 |     "tasks.max": "1",
 6 |     "topics": "mysqldb.inventory.users,postgresdb.inventory.product",
 7 |     "snowflake.topic2table.map": "mysqldb.inventory.users:cdc_mysql_inventory_users,postgresdb.inventory.product:cdc_postgresdb_inventory_product",
 8 |     "snowflake.url.name": "mh16247.eu-west-2.aws.snowflakecomputing.com:443",
 9 |     "snowflake.user.name": "dariocazas",
10 |     "snowflake.private.key": "MIIFLTBXBgkqhkiG9w0BBQ0wSjApBgkqhkiG9w0BBQwwHAQIHl29yM4BvgICAggAMAwGCCqGSIb3DQIJBQAwHQYJYIZIAWUDBAEqBBCkFIfNB88Urq5VaPCCzze1BIIE0In6kYmdUnVvH5Q+nPXkPj3VCXd0/aPceHSbC4BsWRtli39bIrWCch1EQXZxoj3xt8QNtOL9XGKH3XqG9rYpu0VmR2MZjC+FteNZ98RXrDqdwkoN/ZWTeaQ+MaeZtiCn93N3dhh70Woee/JgVEcO38vV/i0eJ4ryM07a0eV4d5Y8JQHRBoVVxTPm0Ha/af+p7loS5AKvwfiHndFgQPNbILfweGuhRUe8AQf9Bo0vzeXHBj5nO4RqnaTcfmRDIul4ZtMD7zxxTMJnhezTGFDPqlxEvOOZAudakm01C3y5mfPUs+veMWjNcz0AfPPeyvJPO5Xmu7kiIGtM1UHRojnQOtZ9QWBmhLfMsRZ3GbVbycCLgZOKhW1OIy+PbbykiiRQD6AHszJiFKroZz3yqMRTh9QJFJ4mpa6XjkLGCE8CiPns5Tl7qX2BN78Qs5vxsWJC0Z7wqNuoldsNSFKVtfW6Qm81j12XQw1fsk5zqCnabpsiK/uWo1NowhEa5xAAeRW95wqTyWYi0tu8/u3EQo/xwBCCbDiYFxvbbOmWZjsxf95sO5yHrBxGTs8wCduj0I1UqTXWzTZ4JoAPdSFHwLS61slvujqlSvNvla12nqTYGBtWO/qgLh5egaTmGupLhu4b6FiO5CCXg4sfyOoKeZtykbM0wT0Ud8oK8fx9HwlUNxAaW8NrIo3EuRg7dsKdhtD2hJrqM1dyorVIT7bHSJ5YRLfXHdFGnmaOmJOGvMqXC2yfivEFbMI0nxnrJGDJ4KLS9a8DLmgsQZS8PySmWS+cGuvq4nUcxHnhX/j0ZWCZhSUxQ/z/lRx+RmZM+ey/PnzBuOQGaQrIHe44taN2skz97oopQu9lS6OANE9TPG1Vp1NqanU2Mxkz07++5swdeYp0WEJLWhkLpn3Ce7ImcceLlFI0B9TlAih4rEiE3REfbGCTvLKpaRPHmwYNmZIAhlhKm0Q+v/4Isk4hpce5MuOTiR7yz4neV3VCl66sw7o3tJSRnXtoVKFA2QlN0emdOj6ji0iPvRtKsU/9r8+8EkO3WTg/YO59aLM/pX8V9Rd87jnDidLuO2gVzIsghRiElg2g/4cC9zmvBSZLfF/TJZGs6pX9WxDh3VjLEjdqvU8weepk/LrxyJADp7Up7GuALSytFaMbDPRTLXICsu5q0C/ne//sHeVjiKcz0WgIzeUGqC4wt7ht/G1DDd4/gxAp6ZPmlnh5WjNPTtmfU2TVV14EYUs9UzrUYm+2G0uG/+da+WpB6hRKZkHNSoFKVq3g5IHlB2Lc7SFKYnQhpHxmpmCeoQ2/DlzSWS/EHrV54ej68TdPa2MnrrdeeDCGB3Oo4oSmHSh0bTO4vVOLS9ezLDiFfT0KnhI2HmN3JOGm/2njXwp/qnk3oscyYIxBocsmYeQ91EfS9M4iNjryFNLHNuyWq/9WsDF/LrWPJIoQ+7qZm9AmLZ9yx3ED8YbqIjiK1Q48gl0NwpyvCFEfWDCjmxUA+W1SnAhf4VK3pRLBbkr5UwNcW+FSQWNtoZ8eHASDab5l4HH1NoswYqzEc4jmssQG+3nDimNvenbXvuOjwMF9+wC5LVryysZ2nMeKql4lSr8hlHe4xkvquTyPbJCSsViAueAHmHxSNW/i6QVNukc24UtP",
11 |     "snowflake.private.key.passphrase": "mypassphrase",
12 |     "snowflake.database.name": "HOWTO_DB",
13 |     "snowflake.schema.name": "public",
14 |     "key.converter": "com.snowflake.kafka.connector.records.SnowflakeJsonConverter",
15 |     "value.converter": "com.snowflake.kafka.connector.records.SnowflakeJsonConverter",
16 |     "behavior.on.null.values": "IGNORE"
17 |   }
18 | }


--------------------------------------------------------------------------------
/snowflake/delete_sink.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | CONNECT_URL=http://localhost:8085
 4 | 
 5 | CONNECTORS=$(curl -s -k ${CONNECT_URL}/connectors)
 6 | echo Connector list:
 7 | echo $CONNECTORS
 8 | echo 
 9 | 
10 | for row in $(echo "${CONNECTORS}" | jq -c -r '.[]'); do
11 |     status=$(curl -s -k -X DELETE "${CONNECT_URL}/connectors/${row}")
12 |     echo Deleted ${row}
13 | done
14 | 


--------------------------------------------------------------------------------
/snowflake/init_sink.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | CONNECT_URL=http://localhost:8085
 4 | SINK_SNOWPIPE_CONNECT_CONFIG=connect/snowflake-sink-connector.json
 5 | 
 6 | echo "### Creating Snowpipe sink connector ###"
 7 | curl -i -X POST $CONNECT_URL/connectors \
 8 |     -H "Accept:application/json" \
 9 |     -H "Content-Type:application/json" \
10 |     -d @$SINK_SNOWPIPE_CONNECT_CONFIG
11 | echo .
12 | 


--------------------------------------------------------------------------------
/snowflake/keys/README.md:
--------------------------------------------------------------------------------
 1 | # Credentials management
 2 | 
 3 | ## Snowflake
 4 | 
 5 | ### Create your account
 6 | 
 7 | To use snowflake need to create a free trial: https://signup.snowflake.com
 8 | 
 9 | You can select a Standard Snowflake edition over several clouds. 
10 | After validate email and access to the web console, you can see that exists:
11 | 
12 | - The host accessed in the URL is your configuration for the snowflake connector
13 | - In left panel, you can see the DEMO_DB database with a PUBLIC schema
14 | - In top-right panel, you can see 
15 |   - Your role (SYSADMIN)
16 |   - Your warehouse (COMPUTE_WH)
17 | 
18 | ### Create your key pair
19 | 
20 | In [Kafka connector install - Using Key Pair Authentication & Key Rotation], you can 
21 | see more detail about it. 
22 | 
23 | To simplify the management, we generate a unencrypted private key (and a public key) 
24 | to use with snowflake:
25 | 
26 | ```sh
27 | cd snowflake/keys
28 | openssl genrsa -out snowflake_rsa_key.pem 2048
29 | openssl pkcs8 -topk8 -inform PEM -in snowflake_rsa_key.pem -out snowflake_rsa_key.p8
30 | openssl rsa -in snowflake_rsa_key.p8 -pubout -out snowflake_rsa_key.pub
31 | ```
32 | 
33 | If you don't have a [OpenSSL toolkit] installed in your environment, you can run 
34 | this commands with docker:
35 | 
36 | ```sh
37 | cd snowflake
38 | docker run -v $PWD:/work -it nginx openssl genrsa -out /work/keys/snowflake_rsa_key.pem 2048
39 | docker run -v $PWD:/work -it nginx openssl pkcs8 -topk8 -inform PEM -in /work/keys/snowflake_rsa_key.pem -out /work/keys/snowflake_rsa_key.p8
40 | docker run -v $PWD:/work -it nginx openssl rsa -in /work/keys/snowflake_rsa_key.pem -pubout -out /work/keys/snowflake_rsa_key.pub
41 | sudo chown -R $USER:$USER keys/*
42 | ```
43 | 
44 | The content of the keys is similar to the content in this repo 
45 | (we upload a valid cert, but it doesn't authenticate with our trial snowflake service)
46 | 
47 | ```sh
48 | cat docker/credentials/snowflake_rsa_key.pem
49 | -----BEGIN ENCRYPTED PRIVATE KEY-----
50 | MIIFLTBXBgkqhkiG9w0BBQ0wSjApBgkqhkiG9w0BBQwwHAQIHl29yM4BvgICAggA
51 | MAwGCCqGSIb3DQIJBQAwHQYJYIZIAWUDBAEqBBCkFIfNB88Urq5VaPCCzze1BIIE
52 | ...
53 | -----END ENCRYPTED PRIVATE KEY-----
54 | ```
55 | ```sh
56 | cat docker/credentials/snowflake_rsa_key.pub
57 | -----BEGIN PUBLIC KEY-----
58 | MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAwBwYbPtbEUXueQ6u3KDw
59 | zlKu4IhAkGdcUBVbdTdUVBLNVsZX+eiKOedN3EnMtDeVzRlaT8JAwHX0LVXkgXtn
60 | ...
61 | -----END PUBLIC KEY-----
62 | ```
63 | 
64 | ### Registry key pair in snowflake
65 | 
66 | Access it to the snowflake web console, and locate on top-right your username.
67 | In the snowflake documentation refers swicth your role to SECURITYADMIN, but 
68 | in our case need to change to ACCOUNTADMIN.
69 | 
70 | Take your public key (without header and footer) and use it to registry in snowflake 
71 | using the web console over your user:
72 | 
73 | ```sql
74 | alter user dariocazas set rsa_public_key='MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAwBwYbPtbEUXueQ6u3KDw
75 | zlKu4IhAkGdcUBVbdTdUVBLNVsZX+eiKOedN3EnMtDeVzRlaT8JAwHX0LVXkgXtn
76 | KzMBp6TpS4j+2kKvbZc5p0KfZHjn42G+C/DXI4ZNQZEBQ/Q4UY6OkTZepFaOX3ev
77 | 2icxB6LnnVYI3WHkSnq3vTthhYhTuUOQ4YRudadOtoT4By09hxbsaanVl42FXIZP
78 | AXX1jwawzKe52V1+FB5/UMv+JMUFfczlO+acn/EaZvKbR55Vk/+OVrUP4KIKvdWn
79 | s/n4ASYqxiw9xjrizGCoUyl+b+Ch6A02fTU02HrT9jOOj+dVAeFD2QGOqaze0eCD
80 | dwIDAQAB';
81 | ```
82 | 
83 | After do this, you can use the __snowflake_rsa_key.pem__ private key from kafka
84 | connect.
85 | 
86 | [Kafka connector install - Using Key Pair Authentication & Key Rotation]: https://docs.snowflake.com/en/user-guide/kafka-connector-install.html#using-key-pair-authentication-key-rotation
87 | 


--------------------------------------------------------------------------------
/snowflake/keys/snowflake_rsa_key.p8:
--------------------------------------------------------------------------------
 1 | -----BEGIN ENCRYPTED PRIVATE KEY-----
 2 | MIIFLTBXBgkqhkiG9w0BBQ0wSjApBgkqhkiG9w0BBQwwHAQIHl29yM4BvgICAggA
 3 | MAwGCCqGSIb3DQIJBQAwHQYJYIZIAWUDBAEqBBCkFIfNB88Urq5VaPCCzze1BIIE
 4 | 0In6kYmdUnVvH5Q+nPXkPj3VCXd0/aPceHSbC4BsWRtli39bIrWCch1EQXZxoj3x
 5 | t8QNtOL9XGKH3XqG9rYpu0VmR2MZjC+FteNZ98RXrDqdwkoN/ZWTeaQ+MaeZtiCn
 6 | 93N3dhh70Woee/JgVEcO38vV/i0eJ4ryM07a0eV4d5Y8JQHRBoVVxTPm0Ha/af+p
 7 | 7loS5AKvwfiHndFgQPNbILfweGuhRUe8AQf9Bo0vzeXHBj5nO4RqnaTcfmRDIul4
 8 | ZtMD7zxxTMJnhezTGFDPqlxEvOOZAudakm01C3y5mfPUs+veMWjNcz0AfPPeyvJP
 9 | O5Xmu7kiIGtM1UHRojnQOtZ9QWBmhLfMsRZ3GbVbycCLgZOKhW1OIy+PbbykiiRQ
10 | D6AHszJiFKroZz3yqMRTh9QJFJ4mpa6XjkLGCE8CiPns5Tl7qX2BN78Qs5vxsWJC
11 | 0Z7wqNuoldsNSFKVtfW6Qm81j12XQw1fsk5zqCnabpsiK/uWo1NowhEa5xAAeRW9
12 | 5wqTyWYi0tu8/u3EQo/xwBCCbDiYFxvbbOmWZjsxf95sO5yHrBxGTs8wCduj0I1U
13 | qTXWzTZ4JoAPdSFHwLS61slvujqlSvNvla12nqTYGBtWO/qgLh5egaTmGupLhu4b
14 | 6FiO5CCXg4sfyOoKeZtykbM0wT0Ud8oK8fx9HwlUNxAaW8NrIo3EuRg7dsKdhtD2
15 | hJrqM1dyorVIT7bHSJ5YRLfXHdFGnmaOmJOGvMqXC2yfivEFbMI0nxnrJGDJ4KLS
16 | 9a8DLmgsQZS8PySmWS+cGuvq4nUcxHnhX/j0ZWCZhSUxQ/z/lRx+RmZM+ey/PnzB
17 | uOQGaQrIHe44taN2skz97oopQu9lS6OANE9TPG1Vp1NqanU2Mxkz07++5swdeYp0
18 | WEJLWhkLpn3Ce7ImcceLlFI0B9TlAih4rEiE3REfbGCTvLKpaRPHmwYNmZIAhlhK
19 | m0Q+v/4Isk4hpce5MuOTiR7yz4neV3VCl66sw7o3tJSRnXtoVKFA2QlN0emdOj6j
20 | i0iPvRtKsU/9r8+8EkO3WTg/YO59aLM/pX8V9Rd87jnDidLuO2gVzIsghRiElg2g
21 | /4cC9zmvBSZLfF/TJZGs6pX9WxDh3VjLEjdqvU8weepk/LrxyJADp7Up7GuALSyt
22 | FaMbDPRTLXICsu5q0C/ne//sHeVjiKcz0WgIzeUGqC4wt7ht/G1DDd4/gxAp6ZPm
23 | lnh5WjNPTtmfU2TVV14EYUs9UzrUYm+2G0uG/+da+WpB6hRKZkHNSoFKVq3g5IHl
24 | B2Lc7SFKYnQhpHxmpmCeoQ2/DlzSWS/EHrV54ej68TdPa2MnrrdeeDCGB3Oo4oSm
25 | HSh0bTO4vVOLS9ezLDiFfT0KnhI2HmN3JOGm/2njXwp/qnk3oscyYIxBocsmYeQ9
26 | 1EfS9M4iNjryFNLHNuyWq/9WsDF/LrWPJIoQ+7qZm9AmLZ9yx3ED8YbqIjiK1Q48
27 | gl0NwpyvCFEfWDCjmxUA+W1SnAhf4VK3pRLBbkr5UwNcW+FSQWNtoZ8eHASDab5l
28 | 4HH1NoswYqzEc4jmssQG+3nDimNvenbXvuOjwMF9+wC5LVryysZ2nMeKql4lSr8h
29 | lHe4xkvquTyPbJCSsViAueAHmHxSNW/i6QVNukc24UtP
30 | -----END ENCRYPTED PRIVATE KEY-----
31 | 


--------------------------------------------------------------------------------
/snowflake/keys/snowflake_rsa_key.pem:
--------------------------------------------------------------------------------
 1 | -----BEGIN RSA PRIVATE KEY-----
 2 | MIIEpAIBAAKCAQEAwBwYbPtbEUXueQ6u3KDwzlKu4IhAkGdcUBVbdTdUVBLNVsZX
 3 | +eiKOedN3EnMtDeVzRlaT8JAwHX0LVXkgXtnKzMBp6TpS4j+2kKvbZc5p0KfZHjn
 4 | 42G+C/DXI4ZNQZEBQ/Q4UY6OkTZepFaOX3ev2icxB6LnnVYI3WHkSnq3vTthhYhT
 5 | uUOQ4YRudadOtoT4By09hxbsaanVl42FXIZPAXX1jwawzKe52V1+FB5/UMv+JMUF
 6 | fczlO+acn/EaZvKbR55Vk/+OVrUP4KIKvdWns/n4ASYqxiw9xjrizGCoUyl+b+Ch
 7 | 6A02fTU02HrT9jOOj+dVAeFD2QGOqaze0eCDdwIDAQABAoIBABdl1JvBaXALImZg
 8 | IXABshKPA3mZXrO3wwiF8WOvX3f16kh9U82+QseWomcIHgR9GKOoSNWaBSTruNek
 9 | tHYIv6IFTlhKv5dRkiinIpKobn8uoBcump+ZEfqGVM2g19v6ezr5jbpxMHADPTeq
10 | yyBZhXN+fnw9nRQOnHnKKHHhzGshkEWEwHSxteG9HYqmID68bEta3azpq+T+Rrig
11 | N9UKLqgUXBD9G8i4T0iuoBZcq0sN7YtJMg6sHOu0JbZgPHlTJliNuQ3OLt3TGYew
12 | /kiQJOsZofVbHVQXvxpAU/Dy+87yUVwl9tCBbsFKmtxgyzZR7w2WxMdCrIjc940t
13 | Zi4UUSECgYEA6CGO6NiYjogmVFhs+BEMi7oRdBtqouQ3qQW5byk8F1bPruWdRnmB
14 | Ekmu5sYhg9oS8PWxXKJHdZ7in/kmbXke/UQ8I+R+RqjKJVNKX0nBAyQY7XNsM4pv
15 | CuStp0XWnsnwP5MO3SYIkmaaushsL6AxR9RGgJZISTKcktA+v5S2cwkCgYEA090L
16 | w3qLfu9egox2/YWykaPsOQnwIEFRueowcJp/ZyAARA2A3gvyoiQt1CVcT9KJ0nPx
17 | ryXb6mQ2rf5qHG0JceQ1DI+mVXhbs+AzPI/n0pPnCW50J5+kNVGQ4fBpbXmh61Tr
18 | VM+b2lTHoSjDisVToaQHYn/BpzaK8aVQggm0Yn8CgYEAwP0VaTSaMPW0mC8j+WGD
19 | Qq+hTxx0HZULSXS+5FIt6WF9LPUtOqhNzLyBss9Kkeo+ESLTICayrnE4DLQBZMZs
20 | IzgVn+mZqnkuBrYmgO46j7f1GYT6kicnhrD0RrtjYYSWPuSuWOIEAmNXhK6Yc0gF
21 | cKhlLQbEdkajsdN8N58VyLECgYAf4mltztiFjvKzRP53YxKftoLLhsJbqFjrWOJX
22 | X/kChR9lHn8ha7zlR/qZrdG5tZ7GTGq4CEOTf+d2wg4oHwTH3idZr5jBzi5G4Nv1
23 | JlcmKtofYj8a43ysBY1/Y1YKgr6qkwojpmb3McElcOQU02OltPDjkwSK7Lt2aIG4
24 | QEukcwKBgQC55ORnqG6548zBg1+eryNKYrzTEJpaFSZ4gRFwcPJMogkQoGRLvW7K
25 | 7P++3fqYvOHTa0dCIHqMXjcalzcyM/N6VSiZExi6N5BhZtwkcGO0YVi+6FFRfGxW
26 | K6ITKTgeTj409QzpwH2qPszq1zsfiHz6HWcKbsJ18thU9ISnod3u9g==
27 | -----END RSA PRIVATE KEY-----
28 | 


--------------------------------------------------------------------------------
/snowflake/keys/snowflake_rsa_key.pub:
--------------------------------------------------------------------------------
 1 | -----BEGIN PUBLIC KEY-----
 2 | MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAwBwYbPtbEUXueQ6u3KDw
 3 | zlKu4IhAkGdcUBVbdTdUVBLNVsZX+eiKOedN3EnMtDeVzRlaT8JAwHX0LVXkgXtn
 4 | KzMBp6TpS4j+2kKvbZc5p0KfZHjn42G+C/DXI4ZNQZEBQ/Q4UY6OkTZepFaOX3ev
 5 | 2icxB6LnnVYI3WHkSnq3vTthhYhTuUOQ4YRudadOtoT4By09hxbsaanVl42FXIZP
 6 | AXX1jwawzKe52V1+FB5/UMv+JMUFfczlO+acn/EaZvKbR55Vk/+OVrUP4KIKvdWn
 7 | s/n4ASYqxiw9xjrizGCoUyl+b+Ch6A02fTU02HrT9jOOj+dVAeFD2QGOqaze0eCD
 8 | dwIDAQAB
 9 | -----END PUBLIC KEY-----
10 | 


--------------------------------------------------------------------------------
/snowflake/sql/00-security.sql:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | -- Set public key in snowflake to your user account to enable access via kafka connect
 4 | -- Snowflake doc refers to SECURITYADMIN, but for me didn't work (I need use ACCOUNTADMIN)
 5 | -- https://docs.snowflake.com/en/user-guide/kafka-connector-install.html#using-key-pair-authentication-key-rotation
 6 | use role accountadmin;
 7 | 
 8 | alter user dariocazas set rsa_public_key='MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEArJFv7/40nuy8D4FC76wQ
 9 | Qkz1FHnEhS8jvXVTrSGzlJoTRrKm3Nx039+PPgz0EkzW/WiUdyPF6G4ZJh5L9+WU
10 | 6xEQo9HGFJhA4U4rOOXv9q3SlZEMndpg9qbGd6mp/ym5GZ9lznBVc33oQO2lIWum
11 | j8EmuYn7SLpceY7iCUtCrGgu2gE+OxHcajvQPccdMtNlz+LfXXCe+4By7PGQuBkR
12 | 9wO0wkhoYfRdInvATRSpGJK8jtAmxe9UelobyeEFsbFVqsXruOw1LbNF2bq3IAaQ
13 | TvD5OVYcfyQ+nDrE55AngRAfewpur09laqYfqzYvVZjutZc2InD4VuSVouGc8bYg
14 | qwIDAQAB';
15 | 
16 | 


--------------------------------------------------------------------------------
/snowflake/sql/01-cdc-to-replica-mysql.sql:
--------------------------------------------------------------------------------
  1 | -- Based on: 
  2 | -- https://docs.snowflake.com/en/user-guide/data-pipelines-examples.html#transforming-loaded-json-data-on-a-schedule
  3 | -- https://docs.snowflake.com/en/sql-reference/sql/merge.html
  4 | 
  5 | -- Use this role is not recomendable in production environments
  6 | use role accountadmin;
  7 | 
  8 | -- Create the replica table, including extra columns to support replica logic and process trazability
  9 | create or replace 
 10 |     table "HOWTO_DB"."PUBLIC"."REPLICA_MYSQL_INVENTORY_USERS" 
 11 |     ( id number PRIMARY KEY comment 'primary key of the source table'
 12 |     , sourcedb_binlog_gtid string comment 'database log position, gtid used in HA MySQL (null in other cases), used for ordering events (RECORD_CONTENT:payload.source.gtid)'
 13 |     , sourcedb_binlog_file string comment 'database log position, file log name, used for ordering events (RECORD_CONTENT:payload.source.file)'
 14 |     , sourcedb_binlog_pos string comment 'database log position, position in log file, used for ordering events (RECORD_CONTENT:payload.source.pos)'
 15 |     , payload variant comment 'data after operation (RECORD_CONTENT:payload.after)'
 16 |     , cdc_operation char comment 'CDC registered operation in source DB (RECORD_CONTENT:payload.op)'
 17 |     , cdc_source_info variant comment 'Debezium source field, for trazability (RECORD_CONTENT:payload.source)'
 18 |     , ts_ms_sourcedb number comment 'the timestamp when database register the event, not available on database snapshot (RECORD_CONTENT:payload.source.ts_ms)'
 19 |     , ts_ms_cdc number comment 'the timestamp when the CDC connector capture the event (RECORD_CONTENT:payload.ts_ms)'
 20 |     , ts_ms_replica_sf number comment 'the timestamp when snowflake task fills the record')
 21 | comment = 'Replica from CDC over MySQL Inventory Users';
 22 | 
 23 | -- Create final view with same columns as MySQL database to use like the same table
 24 | create or replace view "HOWTO_DB"."PUBLIC"."MYSQL_INVENTORY_USERS"
 25 | as 
 26 |     select payload:id id, payload:name name, payload:email email, payload:password password, payload:created_on created_on
 27 |     from "HOWTO_DB"."PUBLIC"."REPLICA_MYSQL_INVENTORY_USERS";
 28 | 
 29 | -- Create a stream from CDC events table, to process new events into replica table
 30 | create or replace 
 31 |     stream "HOWTO_DB"."PUBLIC"."CDC_MYSQL_INVENTORY_USERS_STREAM_REPLICATION" 
 32 |     on table "HOWTO_DB"."PUBLIC"."CDC_MYSQL_INVENTORY_USERS";
 33 | 
 34 | 
 35 | -- After create stream (avoid loss events), process all events available in CDC events table
 36 | merge into "HOWTO_DB"."PUBLIC"."REPLICA_MYSQL_INVENTORY_USERS" replica_table
 37 |     using 
 38 |         (with 
 39 |             prequery as (select RECORD_METADATA:key.payload.id id
 40 |                     , COALESCE(RECORD_CONTENT:payload.source.gtid, '') sourcedb_binlog_gtid
 41 |                     , COALESCE(RECORD_CONTENT:payload.source.file, '') sourcedb_binlog_file
 42 |                     , to_number(RECORD_CONTENT:payload.source.pos) sourcedb_binlog_pos
 43 |                     , RECORD_CONTENT:payload.after payload
 44 |                     , RECORD_CONTENT:payload.op cdc_operation
 45 |                     , RECORD_CONTENT:payload.source cdc_source_info
 46 |                     , RECORD_CONTENT:payload.source.ts_ms ts_ms_sourcedb
 47 |                     , RECORD_CONTENT:payload.ts_ms ts_ms_cdc                        
 48 |                 from "HOWTO_DB"."PUBLIC"."CDC_MYSQL_INVENTORY_USERS"),
 49 |             rank_query as (select *
 50 |                     , ROW_NUMBER() over (PARTITION BY id 
 51 |                         order by ts_ms_cdc desc, sourcedb_binlog_file desc, sourcedb_binlog_pos desc) as row_num
 52 |                 from prequery)
 53 |             select * from rank_query where row_num = 1) event_data
 54 |         on replica_table.id = to_number(event_data.id)
 55 |     when not matched and event_data.cdc_operation <> 'd' 
 56 |         then insert 
 57 |                 (id, sourcedb_binlog_gtid, sourcedb_binlog_file, sourcedb_binlog_pos, payload
 58 |                 , cdc_operation, cdc_source_info, ts_ms_sourcedb, ts_ms_cdc, ts_ms_replica_sf)
 59 |             values 
 60 |                 (event_data.id, event_data.sourcedb_binlog_gtid, event_data.sourcedb_binlog_file
 61 |                 , event_data.sourcedb_binlog_pos, event_data.payload, event_data.cdc_operation
 62 |                 , event_data.cdc_source_info, event_data.ts_ms_sourcedb, event_data.ts_ms_cdc
 63 |                 , date_part(epoch_millisecond, CURRENT_TIMESTAMP))
 64 |     when matched and event_data.cdc_operation = 'd'
 65 |         then delete
 66 |     when matched and event_data.cdc_operation <> 'd'
 67 |         then update set id=event_data.id
 68 |             , sourcedb_binlog_gtid=event_data.sourcedb_binlog_gtid
 69 |             , sourcedb_binlog_file=event_data.sourcedb_binlog_file
 70 |             , sourcedb_binlog_pos=event_data.sourcedb_binlog_pos
 71 |             , payload=event_data.payload
 72 |             , cdc_operation=event_data.cdc_operation
 73 |             , cdc_source_info=event_data.cdc_source_info
 74 |             , ts_ms_sourcedb=event_data.ts_ms_sourcedb
 75 |             , ts_ms_cdc=event_data.ts_ms_cdc
 76 |             , ts_ms_replica_sf=date_part(epoch_millisecond, CURRENT_TIMESTAMP);
 77 | 
 78 | 
 79 | -- Create task with previous tested query, but read data from the created stream (not CDC events table).
 80 | create or replace task "HOWTO_DB"."PUBLIC"."CDC_MYSQL_INVENTORY_USERS_TASK_REPLICATION"
 81 |     warehouse = compute_wh
 82 |     schedule = '1 minute'
 83 |     allow_overlapping_execution = false
 84 |     when
 85 |         system$stream_has_data('HOWTO_DB.PUBLIC.CDC_MYSQL_INVENTORY_USERS_STREAM_REPLICATION')
 86 |     as
 87 | merge into "HOWTO_DB"."PUBLIC"."REPLICA_MYSQL_INVENTORY_USERS" replica_table
 88 |     using 
 89 |         (with 
 90 |             prequery as (select RECORD_METADATA:key.payload.id id
 91 |                     , COALESCE(RECORD_CONTENT:payload.source.gtid, '') sourcedb_binlog_gtid
 92 |                     , COALESCE(RECORD_CONTENT:payload.source.file, '') sourcedb_binlog_file
 93 |                     , to_number(RECORD_CONTENT:payload.source.pos) sourcedb_binlog_pos
 94 |                     , RECORD_CONTENT:payload.after payload
 95 |                     , RECORD_CONTENT:payload.op cdc_operation
 96 |                     , RECORD_CONTENT:payload.source cdc_source_info
 97 |                     , RECORD_CONTENT:payload.source.ts_ms ts_ms_sourcedb
 98 |                     , RECORD_CONTENT:payload.ts_ms ts_ms_cdc                        
 99 |                 from "HOWTO_DB"."PUBLIC"."CDC_MYSQL_INVENTORY_USERS_STREAM_REPLICATION"),
100 |             rank_query as (select *
101 |                     , ROW_NUMBER() over (PARTITION BY id 
102 |                         order by ts_ms_cdc desc, sourcedb_binlog_file desc, sourcedb_binlog_pos desc) as row_num
103 |                 from prequery)
104 |             select * from rank_query where row_num = 1) event_data
105 |         on replica_table.id = to_number(event_data.id)
106 |     when not matched and event_data.cdc_operation <> 'd' 
107 |         then insert 
108 |                 (id, sourcedb_binlog_gtid, sourcedb_binlog_file, sourcedb_binlog_pos, payload
109 |                 , cdc_operation, cdc_source_info, ts_ms_sourcedb, ts_ms_cdc, ts_ms_replica_sf)
110 |             values 
111 |                 (event_data.id, event_data.sourcedb_binlog_gtid, event_data.sourcedb_binlog_file
112 |                 , event_data.sourcedb_binlog_pos, event_data.payload, event_data.cdc_operation
113 |                 , event_data.cdc_source_info, event_data.ts_ms_sourcedb, event_data.ts_ms_cdc
114 |                 , date_part(epoch_millisecond, CURRENT_TIMESTAMP))
115 |     when matched and event_data.cdc_operation = 'd'
116 |         then delete
117 |     when matched and event_data.cdc_operation <> 'd'
118 |         then update set id=event_data.id
119 |             , sourcedb_binlog_gtid=event_data.sourcedb_binlog_gtid
120 |             , sourcedb_binlog_file=event_data.sourcedb_binlog_file
121 |             , sourcedb_binlog_pos=event_data.sourcedb_binlog_pos
122 |             , payload=event_data.payload
123 |             , cdc_operation=event_data.cdc_operation
124 |             , cdc_source_info=event_data.cdc_source_info
125 |             , ts_ms_sourcedb=event_data.ts_ms_sourcedb
126 |             , ts_ms_cdc=event_data.ts_ms_cdc
127 |             , ts_ms_replica_sf=date_part(epoch_millisecond, CURRENT_TIMESTAMP);
128 | 
129 | 
130 | -- Enable task
131 | ALTER TASK "HOWTO_DB"."PUBLIC"."CDC_MYSQL_INVENTORY_USERS_TASK_REPLICATION" RESUME;
132 | 
133 | -- Check info about the task executions (STATE and NEXT_SCHEDULED_TIME columns)
134 | -- If you see error "Cannot execute task , EXECUTE TASK privilege must be granted to owner role" 
135 | -- review 00-security.sql script
136 | select *
137 |   from table(HOWTO_DB.information_schema.task_history())
138 |   order by scheduled_time desc;
139 | 
140 | 
141 | -- Check counts (you don't see the same results in event table against the replica table)
142 | select to_char(RECORD_CONTENT:payload.op) cdc_operation, count(*), 'CDC_MYSQL_INVENTORY_USERS' table_name 
143 |     from "HOWTO_DB"."PUBLIC"."CDC_MYSQL_INVENTORY_USERS" group by RECORD_CONTENT:payload.op
144 | union all
145 | select cdc_operation, count(*), 'REPLICA_MYSQL_INVENTORY_USERS' table_name
146 |     from "HOWTO_DB"."PUBLIC"."REPLICA_MYSQL_INVENTORY_USERS" group by cdc_operation
147 | order by table_name, cdc_operation;
148 | 


--------------------------------------------------------------------------------
/snowflake/sql/01-cdc-to-replica-postgres.sql:
--------------------------------------------------------------------------------
  1 | -- Based on: 
  2 | -- https://docs.snowflake.com/en/user-guide/data-pipelines-examples.html#transforming-loaded-json-data-on-a-schedule
  3 | -- https://docs.snowflake.com/en/sql-reference/sql/merge.html
  4 | 
  5 | -- Use this role is not recomendable in production environments
  6 | use role accountadmin;
  7 | 
  8 | -- Create the replica table, including extra columns to support replica logic and process trazability
  9 | create or replace 
 10 |     table "HOWTO_DB"."PUBLIC"."REPLICA_POSTGRESDB_INVENTORY_PRODUCT" 
 11 |     ( id number PRIMARY KEY comment 'primary key of the source table'
 12 |     , sourcedb_lsn string comment 'postgres log sequence number, used for ordering events (RECORD_CONTENT:payload.source.lsn)'
 13 |     , payload variant comment 'data after operation (RECORD_CONTENT:payload.after)'
 14 |     , cdc_operation char comment 'CDC registered operation in source DB (RECORD_CONTENT:payload.op)'
 15 |     , cdc_source_info variant comment 'Debezium source field, for trazability (RECORD_CONTENT:payload.source)'
 16 |     , ts_ms_sourcedb number comment 'the timestamp when database register the event, not available on database snapshot (RECORD_CONTENT:payload.source.ts_ms)'
 17 |     , ts_ms_cdc number comment 'the timestamp when the CDC connector capture the event (RECORD_CONTENT:payload.ts_ms)'
 18 |     , ts_ms_replica_sf number comment 'the timestamp when snowflake task fills the record')
 19 | comment = 'Replica from CDC over PostgreSQL Inventory Products';
 20 | 
 21 | -- Create final view with same columns as PostgreSQL database to use like the same table
 22 | create or replace view "HOWTO_DB"."PUBLIC"."POSTGRESDB_INVENTORY_PRODUCT"
 23 | as 
 24 |     select payload:id id, payload:name name, payload:description description, payload:created_on created_on
 25 |     from "HOWTO_DB"."PUBLIC"."REPLICA_POSTGRESDB_INVENTORY_PRODUCT";
 26 | 
 27 | -- Create a stream from CDC events table, to process new events into replica table
 28 | create or replace 
 29 |     stream "HOWTO_DB"."PUBLIC"."CDC_POSTGRESDB_INVENTORY_PRODUCT_STREAM_REPLICATION" 
 30 |     on table "HOWTO_DB"."PUBLIC"."CDC_POSTGRESDB_INVENTORY_PRODUCT";
 31 | 
 32 | 
 33 | -- After create stream (avoid loss events), process all events available in CDC events table
 34 | merge into "HOWTO_DB"."PUBLIC"."REPLICA_POSTGRESDB_INVENTORY_PRODUCT" replica_table
 35 |     using 
 36 |         (with 
 37 |             prequery as (select RECORD_METADATA:key.payload.id id
 38 |                     , to_number(RECORD_CONTENT:payload.source.lsn) sourcedb_lsn
 39 |                     , RECORD_CONTENT:payload.after payload
 40 |                     , RECORD_CONTENT:payload.op cdc_operation
 41 |                     , RECORD_CONTENT:payload.source cdc_source_info
 42 |                     , RECORD_CONTENT:payload.source.ts_ms ts_ms_sourcedb
 43 |                     , RECORD_CONTENT:payload.ts_ms ts_ms_cdc                        
 44 |                 from "HOWTO_DB"."PUBLIC"."CDC_POSTGRESDB_INVENTORY_PRODUCT"),
 45 |             rank_query as (select *
 46 |                     , ROW_NUMBER() over (PARTITION BY id 
 47 |                         order by ts_ms_cdc desc, sourcedb_lsn desc) as row_num
 48 |                 from prequery)
 49 |             select * from rank_query where row_num = 1) event_data
 50 |         on replica_table.id = to_number(event_data.id)
 51 |     when not matched and event_data.cdc_operation <> 'd' 
 52 |         then insert 
 53 |                 (id, sourcedb_lsn, payload, cdc_operation, cdc_source_info, ts_ms_sourcedb
 54 |                 , ts_ms_cdc, ts_ms_replica_sf)
 55 |             values 
 56 |                 (event_data.id, event_data.sourcedb_lsn, event_data.payload, event_data.cdc_operation
 57 |                 , event_data.cdc_source_info, event_data.ts_ms_sourcedb, event_data.ts_ms_cdc
 58 |                 , date_part(epoch_millisecond, CURRENT_TIMESTAMP))
 59 |     when matched and event_data.cdc_operation = 'd'
 60 |         then delete
 61 |     when matched and event_data.cdc_operation <> 'd'
 62 |         then update set id=event_data.id
 63 |             , sourcedb_lsn=event_data.sourcedb_lsn
 64 |             , payload=event_data.payload
 65 |             , cdc_operation=event_data.cdc_operation
 66 |             , cdc_source_info=event_data.cdc_source_info
 67 |             , ts_ms_sourcedb=event_data.ts_ms_sourcedb
 68 |             , ts_ms_cdc=event_data.ts_ms_cdc
 69 |             , ts_ms_replica_sf=date_part(epoch_millisecond, CURRENT_TIMESTAMP);
 70 | 
 71 | 
 72 | -- Create task with previous tested query, but read data from the created stream (not CDC events table).
 73 | create or replace task "HOWTO_DB"."PUBLIC"."CDC_POSTGRESDB_INVENTORY_PRODUCT_TASK_REPLICATION"
 74 |     warehouse = compute_wh
 75 |     schedule = '1 minute'
 76 |     allow_overlapping_execution = false
 77 |     when
 78 |         system$stream_has_data('HOWTO_DB.PUBLIC.CDC_POSTGRESDB_INVENTORY_PRODUCT_STREAM_REPLICATION')
 79 |     as
 80 | merge into "HOWTO_DB"."PUBLIC"."REPLICA_POSTGRESDB_INVENTORY_PRODUCT" replica_table
 81 |     using 
 82 |         (with 
 83 |             prequery as (select RECORD_METADATA:key.payload.id id
 84 |                     , to_number(RECORD_CONTENT:payload.source.lsn) sourcedb_lsn
 85 |                     , RECORD_CONTENT:payload.after payload
 86 |                     , RECORD_CONTENT:payload.op cdc_operation
 87 |                     , RECORD_CONTENT:payload.source cdc_source_info
 88 |                     , RECORD_CONTENT:payload.source.ts_ms ts_ms_sourcedb
 89 |                     , RECORD_CONTENT:payload.ts_ms ts_ms_cdc                        
 90 |                 from "HOWTO_DB"."PUBLIC"."CDC_POSTGRESDB_INVENTORY_PRODUCT_STREAM_REPLICATION"),
 91 |             rank_query as (select *
 92 |                     , ROW_NUMBER() over (PARTITION BY id 
 93 |                         order by ts_ms_cdc desc, sourcedb_lsn desc) as row_num
 94 |                 from prequery)
 95 |             select * from rank_query where row_num = 1) event_data
 96 |         on replica_table.id = to_number(event_data.id)
 97 |     when not matched and event_data.cdc_operation <> 'd' 
 98 |         then insert 
 99 |                 (id, sourcedb_lsn, payload, cdc_operation, cdc_source_info, ts_ms_sourcedb
100 |                 , ts_ms_cdc, ts_ms_replica_sf)
101 |             values 
102 |                 (event_data.id, event_data.sourcedb_lsn, event_data.payload, event_data.cdc_operation
103 |                 , event_data.cdc_source_info, event_data.ts_ms_sourcedb, event_data.ts_ms_cdc
104 |                 , date_part(epoch_millisecond, CURRENT_TIMESTAMP))
105 |     when matched and event_data.cdc_operation = 'd'
106 |         then delete
107 |     when matched and event_data.cdc_operation <> 'd'
108 |         then update set id=event_data.id
109 |             , sourcedb_lsn=event_data.sourcedb_lsn
110 |             , payload=event_data.payload
111 |             , cdc_operation=event_data.cdc_operation
112 |             , cdc_source_info=event_data.cdc_source_info
113 |             , ts_ms_sourcedb=event_data.ts_ms_sourcedb
114 |             , ts_ms_cdc=event_data.ts_ms_cdc
115 |             , ts_ms_replica_sf=date_part(epoch_millisecond, CURRENT_TIMESTAMP);
116 | 
117 | 
118 | -- Enable task
119 | ALTER TASK "HOWTO_DB"."PUBLIC"."CDC_POSTGRESDB_INVENTORY_PRODUCT_TASK_REPLICATION" RESUME;
120 | 
121 | -- Check info about the task executions (STATE and NEXT_SCHEDULED_TIME columns)
122 | -- If you see error "Cannot execute task , EXECUTE TASK privilege must be granted to owner role" 
123 | -- review 00-security.sql script
124 | select *
125 |   from table(HOWTO_DB.information_schema.task_history())
126 |   order by scheduled_time desc;
127 | 
128 | 
129 | -- Check counts (you don't see the same results in event table against the replica table)
130 | select to_char(RECORD_CONTENT:payload.op) cdc_operation, count(*), 'CDC_POSTGRESDB_INVENTORY_PRODUCT' table_name 
131 |     from "HOWTO_DB"."PUBLIC"."CDC_POSTGRESDB_INVENTORY_PRODUCT" group by RECORD_CONTENT:payload.op
132 | union all
133 | select cdc_operation, count(*), 'REPLICA_POSTGRESDB_INVENTORY_PRODUCT' table_name
134 |     from "HOWTO_DB"."PUBLIC"."REPLICA_POSTGRESDB_INVENTORY_PRODUCT" group by cdc_operation
135 | order by table_name, cdc_operation;
136 | 


--------------------------------------------------------------------------------
/snowflake/status_sink.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | CONNECT_URL=http://localhost:8085
 4 | 
 5 | CONNECTORS=$(curl -s -k ${CONNECT_URL}/connectors)
 6 | echo Connector list:
 7 | echo $CONNECTORS
 8 | echo 
 9 | 
10 | echo Connector status:
11 | echo
12 | 
13 | for row in $(echo "${CONNECTORS}" | jq -c -r '.[]'); do
14 |     status=$(curl -s -k -X GET "${CONNECT_URL}/connectors/${row}/status")
15 |     echo $status
16 |     echo 
17 | done
18 | 
19 | 


--------------------------------------------------------------------------------