├── tsconfig.json ├── jest.config.js ├── src ├── jestGlobalSetup.ts ├── jestGlobalTeardown.ts ├── 1-naive.test.ts ├── 2-idempotent.test.ts ├── 3-microBatch.test.ts └── 4-eventTime.test.ts ├── docker-compose.yaml ├── package.json ├── README.md ├── LICENSE └── .gitignore /tsconfig.json: -------------------------------------------------------------------------------- 1 | 2 | { 3 | "compilerOptions": { 4 | "esModuleInterop": true 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /jest.config.js: -------------------------------------------------------------------------------- 1 | /** @type {import('ts-jest').JestConfigWithTsJest} */ 2 | module.exports = { 3 | preset: "ts-jest", 4 | testEnvironment: "node", 5 | globalTeardown: "/src/jestGlobalTeardown.ts", 6 | globalSetup: "/src/jestGlobalSetup.ts", 7 | }; 8 | -------------------------------------------------------------------------------- /src/jestGlobalSetup.ts: -------------------------------------------------------------------------------- 1 | import { createClient } from "@clickhouse/client"; 2 | 3 | export default async function setup() { 4 | const ch = createClient({ 5 | host: "http://localhost:8123", 6 | clickhouse_settings: { 7 | wait_end_of_query: 1, 8 | }, 9 | }); 10 | 11 | await ch.command({ 12 | query: "CREATE DATABASE segmentation", 13 | }); 14 | } 15 | -------------------------------------------------------------------------------- /src/jestGlobalTeardown.ts: -------------------------------------------------------------------------------- 1 | import { createClient } from "@clickhouse/client"; 2 | 3 | export default async function teardown() { 4 | const ch = createClient({ 5 | host: "http://localhost:8123", 6 | clickhouse_settings: { 7 | wait_end_of_query: 1, 8 | }, 9 | }); 10 | 11 | await ch.command({ 12 | query: "DROP DATABASE segmentation", 13 | }); 14 | } 15 | -------------------------------------------------------------------------------- /docker-compose.yaml: -------------------------------------------------------------------------------- 1 | services: 2 | clickhouse-server: 3 | image: clickhouse/clickhouse-server:23.8.8.20-alpine 4 | ports: 5 | - "8123:8123" 6 | - "9000:9000" 7 | - "9009:9009" 8 | volumes: 9 | - clickhouse_lib:/var/lib/clickhouse 10 | - clickhouse_log:/var/log/clickhouse-server 11 | networks: 12 | - clickhouse-segments-tutorial 13 | volumes: 14 | clickhouse_lib: 15 | clickhouse_log: 16 | networks: 17 | clickhouse-segments-tutorial: 18 | driver: bridge 19 | name: clickhouse-segments-tutorial 20 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "clickhouse-segments-tutorial", 3 | "version": "0.0.1", 4 | "description": "Repository demonstrating how to calculate user segments in Clickhouse.", 5 | "scripts": { 6 | "test": "jest" 7 | }, 8 | "repository": { 9 | "type": "git", 10 | "url": "git+https://github.com/dittofeed/clickhouse-segments-tutorial.git" 11 | }, 12 | "keywords": [ 13 | "Clickhouse", 14 | "Segments", 15 | "Segmentation", 16 | "Tutorial" 17 | ], 18 | "author": "Max Gurewitz", 19 | "license": "MIT", 20 | "bugs": { 21 | "url": "https://github.com/dittofeed/clickhouse-segments-tutorial/issues" 22 | }, 23 | "homepage": "https://github.com/dittofeed/clickhouse-segments-tutorial#readme" 24 | } 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Clickhouse Segments Tutorial 2 | 3 | This repository provides a tutorial on how to implement live user segmentation in ClickHouse. 4 | 5 | It accompanies a [technical blog post](https://dev.to/dittofeed-max/how-we-stopped-our-clickhouse-db-from-exploding-2969), which can be read in parallel. 6 | 7 | These test implementations are written in ascending order of complexity. 8 | 9 | 1. [Naive](./src/1-naive.test.ts) 10 | 2. [Idempotent](./src/2-idempotent.test.ts) 11 | 3. [Micro-Batch](./src/3-microBatch.test.ts) 12 | 4. [Event Time vs. Processing Time](./src/4-eventTime.test.ts) 13 | 14 | If you found this interesting, we'd love it if you shot over to our main repo and gave us a star! 🌟 15 | 16 | https://github.com/dittofeed/dittofeed 17 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Dittofeed 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/1-naive.test.ts: -------------------------------------------------------------------------------- 1 | import { createClient } from "@clickhouse/client"; 2 | 3 | const ch = createClient({ 4 | host: "http://localhost:8123", 5 | database: "segmentation", 6 | clickhouse_settings: { 7 | wait_end_of_query: 1, 8 | }, 9 | }); 10 | 11 | const setup = [ 12 | ` 13 | CREATE TABLE user_events_naive ( 14 | user_id String, 15 | event_name LowCardinality(String), 16 | timestamp DateTime 17 | ) 18 | Engine = MergeTree() 19 | ORDER BY (user_id, event_name, timestamp);`, 20 | ` 21 | CREATE TABLE segment_assignments_naive ( 22 | user_id String, 23 | value Boolean, 24 | assigned_at DateTime DEFAULT now(), 25 | INDEX value_idx value TYPE minmax GRANULARITY 4 26 | ) 27 | Engine = ReplacingMergeTree() 28 | ORDER BY (user_id);`, 29 | ] as const; 30 | 31 | interface NaiveEvent { 32 | user_id: string; 33 | event_name: string; 34 | timestamp: string; 35 | } 36 | 37 | describe("using a naive setup", () => { 38 | beforeAll(async () => { 39 | await Promise.all( 40 | setup.map((sql) => 41 | ch.command({ 42 | query: sql, 43 | }) 44 | ) 45 | ); 46 | }); 47 | 48 | it("calculates segments of users which clicked a button at least 2 times", async () => { 49 | await ch.insert({ 50 | table: "user_events_naive (user_id, event_name, timestamp)", 51 | values: [ 52 | { 53 | user_id: "1", 54 | event_name: "BUTTON_CLICK", 55 | timestamp: "2023-01-01 00:00:00", 56 | }, 57 | { 58 | user_id: "1", 59 | event_name: "BUTTON_CLICK", 60 | timestamp: "2023-01-01 00:05:00", 61 | }, 62 | { 63 | user_id: "2", 64 | event_name: "BUTTON_CLICK", 65 | timestamp: "2023-01-01 00:00:00", 66 | }, 67 | ] satisfies NaiveEvent[], 68 | format: "JSONEachRow", 69 | }); 70 | 71 | await ch.command({ 72 | query: ` 73 | INSERT INTO segment_assignments_naive (user_id, value) 74 | SELECT user_id, count() >= 2 75 | FROM user_events_naive 76 | WHERE event_name = 'BUTTON_CLICK' 77 | GROUP BY user_id 78 | `, 79 | }); 80 | 81 | const segmentsResponse = await ch.query({ 82 | query: ` 83 | SELECT 84 | user_id, 85 | argMax(value, assigned_at) AS latest_value 86 | FROM segment_assignments_naive 87 | WHERE value = True 88 | GROUP BY user_id; 89 | `, 90 | }); 91 | const { data: usersInSegment } = (await segmentsResponse.json()) as { 92 | data: { user_id: string }[]; 93 | }; 94 | 95 | expect(usersInSegment.map((u) => u.user_id)).toEqual(["1"]); 96 | }); 97 | }); 98 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | lerna-debug.log* 8 | 9 | # Diagnostic reports (https://nodejs.org/api/report.html) 10 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json 11 | 12 | # Runtime data 13 | pids 14 | *.pid 15 | *.seed 16 | *.pid.lock 17 | 18 | # Directory for instrumented libs generated by jscoverage/JSCover 19 | lib-cov 20 | 21 | # Coverage directory used by tools like istanbul 22 | coverage 23 | *.lcov 24 | 25 | # nyc test coverage 26 | .nyc_output 27 | 28 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) 29 | .grunt 30 | 31 | # Bower dependency directory (https://bower.io/) 32 | bower_components 33 | 34 | # node-waf configuration 35 | .lock-wscript 36 | 37 | # Compiled binary addons (https://nodejs.org/api/addons.html) 38 | build/Release 39 | 40 | # Dependency directories 41 | node_modules/ 42 | jspm_packages/ 43 | 44 | # TypeScript v1 declaration files 45 | typings/ 46 | 47 | # TypeScript cache 48 | *.tsbuildinfo 49 | 50 | # Optional npm cache directory 51 | .npm 52 | 53 | # Optional eslint cache 54 | .eslintcache 55 | 56 | # Microbundle cache 57 | .rpt2_cache/ 58 | .rts2_cache_cjs/ 59 | .rts2_cache_es/ 60 | .rts2_cache_umd/ 61 | 62 | # Optional REPL history 63 | .node_repl_history 64 | 65 | # Output of 'npm pack' 66 | *.tgz 67 | 68 | # Yarn Integrity file 69 | .yarn-integrity 70 | 71 | # dotenv environment variables file 72 | .env 73 | .env.test 74 | 75 | # parcel-bundler cache (https://parceljs.org/) 76 | .cache 77 | 78 | # Next.js build output 79 | .next 80 | 81 | # Nuxt.js build / generate output 82 | .nuxt 83 | dist 84 | 85 | # Gatsby files 86 | .cache/ 87 | # Comment in the public line in if your project uses Gatsby and *not* Next.js 88 | # https://nextjs.org/blog/next-9-1#public-directory-support 89 | # public 90 | 91 | # vuepress build output 92 | .vuepress/dist 93 | 94 | # Serverless directories 95 | .serverless/ 96 | 97 | # FuseBox cache 98 | .fusebox/ 99 | 100 | # DynamoDB Local files 101 | .dynamodb/ 102 | 103 | # TernJS port file 104 | .tern-port 105 | 106 | # MacOS 107 | 108 | # General 109 | .DS_Store 110 | .AppleDouble 111 | .LSOverride 112 | 113 | # Icon must end with two \r 114 | Icon 115 | 116 | # Thumbnails 117 | ._* 118 | 119 | # Files that might appear in the root of a volume 120 | .DocumentRevisions-V100 121 | .fseventsd 122 | .Spotlight-V100 123 | .TemporaryItems 124 | .Trashes 125 | .VolumeIcon.icns 126 | .com.apple.timemachine.donotpresent 127 | 128 | # Directories potentially created on remote AFP share 129 | .AppleDB 130 | .AppleDesktop 131 | Network Trash Folder 132 | Temporary Items 133 | .apdisk 134 | 135 | mnt/* 136 | !mnt/.gitkeep 137 | 138 | # Yarn 139 | .pnp.* 140 | .yarn/* 141 | !.yarn/patches 142 | !.yarn/plugins 143 | !.yarn/releases 144 | !.yarn/sdks 145 | !.yarn/versions 146 | 147 | 148 | .tmp/* 149 | !.tmp/.gitkeep 150 | 151 | .editorconfig 152 | -------------------------------------------------------------------------------- /src/2-idempotent.test.ts: -------------------------------------------------------------------------------- 1 | import { createClient } from "@clickhouse/client"; 2 | 3 | const ch = createClient({ 4 | host: "http://localhost:8123", 5 | database: "segmentation", 6 | clickhouse_settings: { 7 | wait_end_of_query: 1, 8 | }, 9 | }); 10 | 11 | const setup = [ 12 | ` 13 | CREATE TABLE user_events_idempotent ( 14 | user_id String, 15 | event_name LowCardinality(String), 16 | message_id String, 17 | timestamp DateTime 18 | ) 19 | Engine = MergeTree() 20 | ORDER BY (user_id, event_name, timestamp, message_id);`, 21 | ` 22 | CREATE TABLE segment_assignments_idempotent ( 23 | user_id String, 24 | value Boolean, 25 | assigned_at DateTime DEFAULT now() 26 | ) 27 | Engine = ReplacingMergeTree() 28 | ORDER BY (user_id);`, 29 | ] as const; 30 | 31 | interface IdempotentEvent { 32 | user_id: string; 33 | event_name: string; 34 | timestamp: string; 35 | message_id: string; 36 | } 37 | 38 | describe("using an idempotent setup", () => { 39 | beforeAll(async () => { 40 | await Promise.all( 41 | setup.map((sql) => 42 | ch.command({ 43 | query: sql, 44 | }) 45 | ) 46 | ); 47 | }); 48 | 49 | it("calculates segments of users which clicked a button at least 2 times", async () => { 50 | await ch.insert({ 51 | table: 52 | "user_events_idempotent (user_id, event_name, timestamp, message_id)", 53 | values: [ 54 | { 55 | user_id: "1", 56 | event_name: "BUTTON_CLICK", 57 | timestamp: "2023-01-01 00:00:00", 58 | message_id: "de4b1e29-7cf8-4e3e-b92b-05c8d5fd1606", 59 | }, 60 | { 61 | user_id: "1", 62 | event_name: "BUTTON_CLICK", 63 | timestamp: "2023-01-01 00:05:00", 64 | message_id: "ca4222e5-4497-42aa-9323-f9ec04a91c87", 65 | }, 66 | { 67 | user_id: "2", 68 | event_name: "BUTTON_CLICK", 69 | timestamp: "2023-01-01 00:00:00", 70 | message_id: "c38f4196-b60b-4f7c-b8e5-b243755c0f77", 71 | }, 72 | // duplicate event 73 | { 74 | user_id: "2", 75 | event_name: "BUTTON_CLICK", 76 | timestamp: "2023-01-01 00:00:00", 77 | message_id: "c38f4196-b60b-4f7c-b8e5-b243755c0f77", 78 | }, 79 | ] satisfies IdempotentEvent[], 80 | format: "JSONEachRow", 81 | }); 82 | 83 | await ch.command({ 84 | query: ` 85 | INSERT INTO segment_assignments_idempotent (user_id, value) 86 | SELECT user_id, uniq(message_id) >= 2 87 | FROM user_events_idempotent 88 | WHERE event_name = 'BUTTON_CLICK' 89 | GROUP BY user_id 90 | `, 91 | }); 92 | 93 | const segmentsResponse = await ch.query({ 94 | query: ` 95 | SELECT 96 | user_id, 97 | argMax(value, assigned_at) AS latest_value 98 | FROM segment_assignments_idempotent 99 | GROUP BY user_id 100 | HAVING latest_value = True; 101 | `, 102 | }); 103 | const { data: usersInSegment } = (await segmentsResponse.json()) as { 104 | data: { user_id: string }[]; 105 | }; 106 | 107 | expect(usersInSegment.map((u) => u.user_id)).toEqual(["1"]); 108 | }); 109 | }); 110 | -------------------------------------------------------------------------------- /src/3-microBatch.test.ts: -------------------------------------------------------------------------------- 1 | import { createClient } from "@clickhouse/client"; 2 | 3 | const ch = createClient({ 4 | host: "http://localhost:8123", 5 | database: "segmentation", 6 | clickhouse_settings: { 7 | wait_end_of_query: 1, 8 | date_time_input_format: "best_effort", 9 | }, 10 | }); 11 | 12 | const setupTables = [ 13 | ` 14 | CREATE TABLE user_events_micro_batch ( 15 | user_id String, 16 | event_name LowCardinality(String), 17 | message_id String, 18 | timestamp DateTime 19 | ) 20 | Engine = MergeTree() 21 | ORDER BY (user_id, event_name, timestamp, message_id);`, 22 | ` 23 | CREATE TABLE user_states_micro_batch ( 24 | user_id String, 25 | event_count AggregateFunction(uniq, String), 26 | computed_at DateTime DEFAULT now(), 27 | ) 28 | Engine = AggregatingMergeTree() 29 | ORDER BY (user_id);`, 30 | ` 31 | CREATE TABLE updated_user_states_micro_batch ( 32 | user_id String, 33 | computed_at DateTime DEFAULT now() 34 | ) 35 | Engine = MergeTree() 36 | PARTITION BY toYYYYMMDD(computed_at) 37 | ORDER BY computed_at 38 | TTL toStartOfDay(computed_at) + interval 100 day;`, 39 | ` 40 | CREATE TABLE segment_assignments_micro_batch ( 41 | user_id String, 42 | value Boolean, 43 | assigned_at DateTime DEFAULT now() 44 | ) 45 | Engine = ReplacingMergeTree() 46 | ORDER BY (user_id);`, 47 | ] as const; 48 | 49 | const setupViews = [ 50 | ` 51 | CREATE MATERIALIZED VIEW updated_user_states_micro_batch_mv 52 | TO updated_user_states_micro_batch 53 | AS SELECT 54 | user_id, 55 | computed_at 56 | FROM user_states_micro_batch;`, 57 | ] as const; 58 | 59 | interface microBatchEvent { 60 | user_id: string; 61 | event_name: string; 62 | timestamp: string; 63 | message_id: string; 64 | } 65 | 66 | describe("using an micro batch setup", () => { 67 | beforeAll(async () => { 68 | await Promise.all( 69 | setupTables.map((sql) => 70 | ch.command({ 71 | query: sql, 72 | }) 73 | ) 74 | ); 75 | 76 | await Promise.all( 77 | setupViews.map((sql) => 78 | ch.command({ 79 | query: sql, 80 | }) 81 | ) 82 | ); 83 | }); 84 | 85 | it("calculates segments of users which clicked a button at least 2 times", async () => { 86 | const now = new Date(); 87 | const oneMinuteAgo = new Date(now.getTime() - 60 * 1000); 88 | const twoMinutesAgo = new Date(now.getTime() - 2 * 60 * 1000); 89 | 90 | await ch.insert({ 91 | table: 92 | "user_events_micro_batch (user_id, event_name, timestamp, message_id)", 93 | values: [ 94 | { 95 | user_id: "1", 96 | event_name: "BUTTON_CLICK", 97 | timestamp: twoMinutesAgo.toISOString(), 98 | message_id: "de4b1e29-7cf8-4e3e-b92b-05c8d5fd1606", 99 | }, 100 | { 101 | user_id: "1", 102 | event_name: "BUTTON_CLICK", 103 | timestamp: oneMinuteAgo.toISOString(), 104 | message_id: "ca4222e5-4497-42aa-9323-f9ec04a91c87", 105 | }, 106 | { 107 | user_id: "2", 108 | event_name: "BUTTON_CLICK", 109 | timestamp: twoMinutesAgo.toISOString(), 110 | message_id: "c38f4196-b60b-4f7c-b8e5-b243755c0f77", 111 | }, 112 | ] satisfies microBatchEvent[], 113 | format: "JSONEachRow", 114 | }); 115 | 116 | await ch.command({ 117 | query: ` 118 | INSERT INTO user_states_micro_batch 119 | SELECT 120 | user_id, 121 | uniqState(message_id), 122 | parseDateTimeBestEffort({now:String}) 123 | FROM user_events_micro_batch 124 | WHERE 125 | event_name = 'BUTTON_CLICK' 126 | AND timestamp >= parseDateTimeBestEffort({lower_bound:String}) 127 | GROUP BY user_id; 128 | `, 129 | query_params: { 130 | lower_bound: twoMinutesAgo.toISOString(), 131 | now: now.toISOString(), 132 | }, 133 | }); 134 | 135 | await ch.command({ 136 | query: ` 137 | INSERT INTO segment_assignments_micro_batch 138 | SELECT 139 | user_id, 140 | uniqMerge(event_count) >= 2, 141 | parseDateTimeBestEffort({now:String}) 142 | FROM user_states_micro_batch 143 | WHERE 144 | user_id IN ( 145 | SELECT user_id 146 | FROM updated_user_states_micro_batch 147 | WHERE computed_at >= parseDateTimeBestEffort({now:String}) 148 | ) 149 | GROUP BY user_id; 150 | `, 151 | query_params: { 152 | now: now.toISOString(), 153 | }, 154 | }); 155 | 156 | const segmentsResponse = await ch.query({ 157 | query: ` 158 | SELECT 159 | user_id, 160 | argMax(value, assigned_at) AS latest_value 161 | FROM segment_assignments_micro_batch 162 | GROUP BY user_id 163 | HAVING latest_value = True; 164 | `, 165 | }); 166 | 167 | const { data: usersInSegment } = (await segmentsResponse.json()) as { 168 | data: { user_id: string }[]; 169 | }; 170 | 171 | expect(usersInSegment.map((u) => u.user_id)).toEqual(["1"]); 172 | }); 173 | }); 174 | -------------------------------------------------------------------------------- /src/4-eventTime.test.ts: -------------------------------------------------------------------------------- 1 | import { createClient } from "@clickhouse/client"; 2 | 3 | const ch = createClient({ 4 | host: "http://localhost:8123", 5 | database: "segmentation", 6 | clickhouse_settings: { 7 | wait_end_of_query: 1, 8 | date_time_input_format: "best_effort", 9 | }, 10 | }); 11 | 12 | const setupTables = [ 13 | ` 14 | CREATE TABLE user_events_event_time ( 15 | user_id String, 16 | event_name LowCardinality(String), 17 | message_id String, 18 | event_time DateTime, 19 | processing_time DateTime 20 | ) 21 | Engine = MergeTree() 22 | ORDER BY (user_id, event_name, processing_time, message_id);`, 23 | ` 24 | CREATE TABLE user_states_event_time ( 25 | user_id String, 26 | event_count AggregateFunction(uniq, String), 27 | last_event_time AggregateFunction(max, DateTime), 28 | computed_at DateTime DEFAULT now(), 29 | ) 30 | Engine = AggregatingMergeTree() 31 | ORDER BY (user_id);`, 32 | ` 33 | CREATE TABLE updated_user_states_event_time ( 34 | user_id String, 35 | computed_at DateTime DEFAULT now() 36 | ) 37 | Engine = MergeTree() 38 | PARTITION BY toYYYYMMDD(computed_at) 39 | ORDER BY computed_at 40 | TTL toStartOfDay(computed_at) + interval 100 day;`, 41 | ` 42 | CREATE TABLE segment_assignments_event_time ( 43 | user_id String, 44 | value Boolean, 45 | last_event_time DateTime, 46 | assigned_at DateTime DEFAULT now() 47 | ) 48 | Engine = ReplacingMergeTree() 49 | ORDER BY (user_id);`, 50 | ] as const; 51 | 52 | const setupViews = [ 53 | ` 54 | CREATE MATERIALIZED VIEW updated_user_states_event_time_mv 55 | TO updated_user_states_event_time 56 | AS SELECT 57 | user_id, 58 | computed_at 59 | FROM user_states_event_time;`, 60 | ] as const; 61 | 62 | interface MiniBatchEvent { 63 | user_id: string; 64 | event_name: string; 65 | processing_time: string; 66 | event_time: string; 67 | message_id: string; 68 | } 69 | 70 | describe("using an event time setup", () => { 71 | beforeAll(async () => { 72 | await Promise.all( 73 | setupTables.map((sql) => 74 | ch.command({ 75 | query: sql, 76 | }) 77 | ) 78 | ); 79 | 80 | await Promise.all( 81 | setupViews.map((sql) => 82 | ch.command({ 83 | query: sql, 84 | }) 85 | ) 86 | ); 87 | }); 88 | 89 | it("calculates segments of users which clicked a button at least 2 times", async () => { 90 | const now = new Date(); 91 | const oneMinuteAgo = new Date(now.getTime() - 60 * 1000); 92 | const oneMinuteAndThirtySecondsAgo = new Date( 93 | now.getTime() - 60 * 1000 + 30 * 1000 94 | ); 95 | const twoMinutesAgo = new Date(now.getTime() - 2 * 60 * 1000); 96 | const twoMinutesAndThirtySecondsAgo = new Date( 97 | now.getTime() - 2 * 60 * 1000 + 30 * 1000 98 | ); 99 | 100 | await ch.insert({ 101 | table: 102 | "user_events_event_time (user_id, event_name, processing_time, message_id, event_time)", 103 | values: [ 104 | { 105 | user_id: "1", 106 | event_name: "BUTTON_CLICK", 107 | processing_time: twoMinutesAgo.toISOString(), 108 | event_time: twoMinutesAndThirtySecondsAgo.toISOString(), 109 | message_id: "de4b1e29-7cf8-4e3e-b92b-05c8d5fd1606", 110 | }, 111 | { 112 | user_id: "1", 113 | event_name: "BUTTON_CLICK", 114 | processing_time: oneMinuteAgo.toISOString(), 115 | event_time: oneMinuteAndThirtySecondsAgo.toISOString(), 116 | message_id: "ca4222e5-4497-42aa-9323-f9ec04a91c87", 117 | }, 118 | { 119 | user_id: "2", 120 | event_name: "BUTTON_CLICK", 121 | processing_time: twoMinutesAgo.toISOString(), 122 | event_time: twoMinutesAndThirtySecondsAgo.toISOString(), 123 | message_id: "c38f4196-b60b-4f7c-b8e5-b243755c0f77", 124 | }, 125 | ] satisfies MiniBatchEvent[], 126 | format: "JSONEachRow", 127 | }); 128 | 129 | await ch.command({ 130 | query: ` 131 | INSERT INTO user_states_event_time 132 | SELECT 133 | user_id, 134 | uniqState(message_id), 135 | maxState(event_time), 136 | parseDateTimeBestEffort({now:String}) 137 | FROM user_events_event_time 138 | WHERE 139 | event_name = 'BUTTON_CLICK' 140 | AND processing_time >= parseDateTimeBestEffort({lower_bound:String}) 141 | GROUP BY user_id; 142 | `, 143 | query_params: { 144 | lower_bound: twoMinutesAgo.toISOString(), 145 | now: now.toISOString(), 146 | }, 147 | }); 148 | 149 | await ch.command({ 150 | query: ` 151 | INSERT INTO segment_assignments_event_time 152 | SELECT 153 | user_id, 154 | uniqMerge(event_count) >= 2, 155 | maxMerge(last_event_time), 156 | parseDateTimeBestEffort({now:String}) 157 | FROM user_states_event_time 158 | WHERE 159 | user_id IN ( 160 | SELECT user_id 161 | FROM updated_user_states_event_time 162 | WHERE computed_at >= parseDateTimeBestEffort({now:String}) 163 | ) 164 | GROUP BY user_id; 165 | `, 166 | query_params: { 167 | now: now.toISOString(), 168 | }, 169 | }); 170 | 171 | const segmentsResponse = await ch.query({ 172 | query: ` 173 | SELECT 174 | user_id, 175 | toUnixTimestamp(argMax(last_event_time, assigned_at)) AS last_event_time, 176 | argMax(value, assigned_at) AS latest_value 177 | FROM segment_assignments_event_time 178 | GROUP BY user_id 179 | HAVING latest_value = True; 180 | `, 181 | }); 182 | 183 | const { data: usersInSegment } = (await segmentsResponse.json()) as { 184 | data: { user_id: string; last_event_time }[]; 185 | }; 186 | 187 | expect(usersInSegment).toEqual([ 188 | { 189 | user_id: "1", 190 | latest_value: true, 191 | last_event_time: oneMinuteAndThirtySecondsAgo.setMilliseconds(0) / 1000, 192 | }, 193 | ]); 194 | }); 195 | }); 196 | --------------------------------------------------------------------------------