├── .gitignore
├── LICENSE
├── requirements.txt
├── cctxn.py
└── README.md
/.gitignore:
--------------------------------------------------------------------------------
1 | .venv/
2 | __pycache__
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Redis, Inc. proprietary, subject to the Redis Enterprise Software and/or Cloud Services license
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | async-timeout==4.0.2
2 | Faker==17.0.0
3 | python-dateutil==2.8.2
4 | redis==4.5.1
5 | six==1.16.0
6 |
--------------------------------------------------------------------------------
/cctxn.py:
--------------------------------------------------------------------------------
1 | # Maker: Joey Whelan
2 | # File overview: Generates random credit card transaction records, stores them in Redis as hash sets, and then performs
3 | # various searches and aggregations.
4 |
5 | from faker import Faker
6 | from faker.providers import DynamicProvider
7 | from redis import from_url
8 | from redis.commands.search.field import NumericField, TagField, TextField
9 | from redis.commands.search.indexDefinition import IndexDefinition, IndexType
10 | from redis.commands.search.query import Query
11 | from redis.commands.search.aggregation import AggregateRequest, Desc
12 | from redis.commands.search import reducers
13 | import random
14 | import time
15 | import datetime
16 | from pprint import pprint
17 | import re
18 |
19 | REDIS_URL = 'redis://localhost:6379'
20 | RECORDS = 5000
21 | IDX_NAME='txnIdx'
22 | PREFIX='txn:'
23 |
24 | merchants_provider = DynamicProvider(
25 | provider_name='merchants',
26 | elements=['Walmart', 'Nordstrom', 'Amazon', 'Exxon', 'Kroger', 'Safeway', 'United Airlines', 'Office Depot', 'Ford', 'Taco Bell']
27 | )
28 | categories_provider = DynamicProvider(
29 | provider_name='categories',
30 | elements= ['AUTO', 'FOOD', 'GASS', 'GIFT', 'TRAV', 'GROC', 'HOME', 'PERS', 'HEAL', 'MISC']
31 | )
32 |
33 | def build_index(client):
34 | try:
35 | client.ft(IDX_NAME).dropindex()
36 | except:
37 | pass
38 | idx_def = IndexDefinition(index_type=IndexType.HASH, prefix=[PREFIX])
39 | schema = [
40 | TagField('txn_id', sortable=True),
41 | TextField('txn_date'),
42 | NumericField('txn_timestamp', sortable=True),
43 | NumericField('txn_amt'),
44 | TagField('txn_currency'),
45 | TagField('expense_category'),
46 | TextField('merchant_name'),
47 | TextField('merchant_address')
48 | ]
49 | client.ft(IDX_NAME).create_index(schema, definition=idx_def)
50 | print(f'*** {IDX_NAME} index built ***')
51 |
52 | def generate_data(client, count):
53 | Faker.seed(0)
54 | random.seed(0)
55 | fake = Faker()
56 | fake.add_provider(merchants_provider)
57 | fake.add_provider(categories_provider)
58 |
59 | for i in range(count):
60 | tdate = fake.date_time_between(start_date='-3y', end_date='now')
61 | txn_record = {
62 | 'acct_id': int(fake.ean(length=13)),
63 | 'txn_id': int(fake.ean(length=13)),
64 | 'txn_date': re.escape(tdate.isoformat()),
65 | 'txn_timestamp': time.mktime(tdate.timetuple()),
66 | 'card_last_4': fake.credit_card_number()[-4:],
67 | 'txn_amt': round(random.uniform(1, 1000), 2),
68 | 'txn_currency': 'USD',
69 | 'expense_category': fake.categories(),
70 | 'merchant_name': fake.merchants(),
71 | 'merchant_address': re.escape(fake.address())
72 | }
73 | client.hset(f'{PREFIX}{txn_record["txn_id"]}', mapping=txn_record)
74 | if i == 0:
75 | print(f'\n*** Sample Transaction Record ***')
76 | pprint(txn_record)
77 | print(f'\n*** {RECORDS} transactions inserted into Redis as hash sets ***')
78 |
79 | def search(client):
80 |
81 | print('\n*** Search Scenario 1: Range query on dates (6/1/2022 - 7/31/2022, first 3 records sorted by txn_id) ***')
82 | begin = time.mktime(datetime.date(2022,6,1).timetuple())
83 | end = time.mktime(datetime.date(2022,7,31).timetuple())
84 | query = Query(f'@txn_timestamp:[{begin} {end}]')\
85 | .sort_by('txn_id', 'ASC')\
86 | .return_fields('acct_id', 'txn_date', 'txn_amt')\
87 | .paging(0, 3)
88 | result = client.ft(IDX_NAME).search(query)
89 | pprint(result.docs)
90 |
91 | print('\n*** Search Scenario 2: Find 5 most recent transactions by date where Merchant = Kroger, sorted by txn date ***')
92 | query = Query('@merchant_name:kroger')\
93 | .sort_by('txn_timestamp', 'ASC')\
94 | .return_fields('txn_date', 'card_last_4', 'txn_amt')\
95 | .paging(0,5)
96 | result = client.ft(IDX_NAME).search(query)
97 | pprint(result.docs)
98 |
99 | print('\n*** Search Scenario 3: Aggregate by expense category with count per category, sorted by count ***')
100 | request = AggregateRequest('*')\
101 | .group_by('@expense_category', reducers.count().alias('count'))\
102 | .sort_by(Desc('@count'))
103 | result = client.ft(IDX_NAME).aggregate(request)
104 | pprint(result.rows)
105 |
106 | print('\n*** Search Scenario 4: Aggregate on a query from a derived value(txn year). Return number of transactions per year ***')
107 | request = AggregateRequest('*')\
108 | .load('@txn_date')\
109 | .apply(year='substr(@txn_date,0,4)')\
110 | .group_by('@year', reducers.count().alias('num_transactions'))\
111 | .sort_by(Desc('@year'))
112 | result = client.ft(IDX_NAME).aggregate(request)
113 | pprint(result.rows)
114 |
115 | print('\n*** Search Scenario 5: For a merchant with name like "walmart", aggregate on address and find top 3 by txn count ***')
116 | request = AggregateRequest('@merchant_name:%walmrt%')\
117 | .group_by('@merchant_address', reducers.count().alias('txn_count'))\
118 | .sort_by(Desc('@txn_count'))\
119 | .limit(0,3)
120 | result = client.ft(IDX_NAME).aggregate(request)
121 | pprint(result.rows)
122 |
123 | print('\n*** Search Scenario 6: Aggregate total spend for categories that had individual tranactions with value >$500 in Dec 2021 ***')
124 | request = AggregateRequest('(@txn_date:2021\-12* @txn_currency:{USD} @txn_amt:[(500, inf])')\
125 | .group_by('@expense_category', reducers.sum('@txn_amt').alias('total_spend'))\
126 | .sort_by(Desc('@total_spend'))
127 | result = client.ft(IDX_NAME).aggregate(request)
128 | pprint(result.rows)
129 |
130 | if __name__ == '__main__':
131 | client = from_url(REDIS_URL)
132 | build_index(client)
133 | generate_data(client, RECORDS)
134 | search(client)
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Credit Card Transaction Search Examples
2 |
3 | ## Contents
4 | 1. [Summary](#summary)
5 | 2. [Features](#features)
6 | 3. [Prerequisites](#prerequisites)
7 | 4. [Installation](#installation)
8 | 5. [Usage](#usage)
9 | 6. [Index](#index)
10 | 7. [Data](#data)
11 | 8. [Search Scenario 1](#scenario1)
12 | 9. [Search Scenario 2](#scenario2)
13 | 10. [Search Scenario 3](#scenario3)
14 | 11. [Search Scenario 4](#scenario4)
15 | 12. [Search Scenario 5](#scenario5)
16 | 13. [Search Scenario 6](#scenario6)
17 |
18 | ## Summary
19 | This is collection of CLI and Python examples that first load Redis with credit card transaction records and then demonstrate various search and aggregation scenarios on that data.
20 |
21 | ## Features
22 | - Loads synthetic transaction data into Redis as hash sets
23 | - Implements multiple search and aggregation operations against Redis.
24 |
25 | ## Prerequisites
26 | - Python
27 |
28 | ## Installation
29 | 1. Clone this repo.
30 |
31 | 2. Install Python requirements
32 | ```bash
33 | pip install -r requirements.txt
34 | ```
35 |
36 | ## Usage
37 | ```bash
38 | python3 cctxn.py
39 | ```
40 |
41 | ## Index Build
42 | ### CLI
43 | ```bash
44 | FT.CREATE txnIdx ON HASH PREFIX 1 "txn:" SCHEMA txn_id TAG SORTABLE txn_date TEXT txn_timestamp NUMERIC SORTABLE txn_amt NUMERIC txn_currency TAG expense_category TAG merchant_name TEXT merchant_address TEXT
45 | ```
46 | ### Python
47 | ```python
48 | idx_def = IndexDefinition(index_type=IndexType.HASH, prefix=[PREFIX])
49 | schema = [
50 | TagField('txn_id', sortable=True),
51 | TextField('txn_date'),
52 | NumericField('txn_timestamp', sortable=True),
53 | NumericField('txn_amt'),
54 | TagField('txn_currency'),
55 | TagField('expense_category'),
56 | TextField('merchant_name'),
57 | TextField('merchant_address')
58 | ]
59 | client.ft(IDX_NAME).create_index(schema, definition=idx_def)
60 | ```
61 |
62 | ## Data
63 | ### Sample Transaction Record
64 | ```bash
65 | {'acct_id': 6048764759387,
66 | 'card_last_4': '8403',
67 | 'expense_category': 'HEAL',
68 | 'merchant_address': '097\\ Sanchez\\ Islands\\ Apt\\.\\ 393\\\n'
69 | 'Port\\ Tammy,\\ AS\\ 71671',
70 | 'merchant_name': 'Walmart',
71 | 'txn_amt': 844.58,
72 | 'txn_currency': 'USD',
73 | 'txn_date': '2021\\-10\\-12T01:10:51',
74 | 'txn_id': 2421948924117,
75 | 'txn_timestamp': 1634022651.0}
76 | ```
77 |
78 | ## Search Scenario 1
79 | ### Business Problem
80 | Range query on dates (6/1/2022 - 7/31/2022, first 3 records sorted by txn_id)
81 | ### CLI
82 | ```bash
83 | FT.SEARCH txnIdx '@txn_timestamp:[1654063200 1659247200]' SORTBY txn_id ASC RETURN 3 acct_id txn_date txn_amt LIMIT 0 3
84 | ```
85 | ### Python
86 | ```python
87 | begin = time.mktime(datetime.date(2022,6,1).timetuple())
88 | end = time.mktime(datetime.date(2022,7,31).timetuple())
89 | query = Query(f'@txn_timestamp:[{begin} {end}]')\
90 | .sort_by('txn_id', 'ASC')\
91 | .return_fields('acct_id', 'txn_date', 'txn_amt')\
92 | .paging(0, 3)
93 | result = client.ft(IDX_NAME).search(query)
94 | pprint(result.docs)
95 | ```
96 | ### Results
97 | ```bash
98 | [Document {'id': 'txn:104801452768', 'payload': None, 'acct_id': '3855580637385', 'txn_date': '2022\\-06\\-20T00:16:38', 'txn_amt': '527.3'},
99 | Document {'id': 'txn:1057562256603', 'payload': None, 'acct_id': '8440141859082', 'txn_date': '2022\\-07\\-06T19:39:08', 'txn_amt': '820.81'},
100 | Document {'id': 'txn:1108039921439', 'payload': None, 'acct_id': '1214588109355', 'txn_date': '2022\\-07\\-13T13:30:17', 'txn_amt': '485.79'}]
101 | ```
102 |
103 | ## Search Scenario 2
104 | ### Business Problem
105 | Find 5 most recent transactions by date where Merchant = Kroger, sorted by txn date
106 | ### CLI
107 | ```bash
108 | FT.SEARCH txnIdx @merchant_name:kroger SORTBY txn_timestamp ASC RETURN 3 txn_date card_last_4 txn_amt LIMIT 0 5
109 | ```
110 | ### Python
111 | ```python
112 | query = Query('@merchant_name:kroger')\
113 | .sort_by('txn_timestamp', 'ASC')\
114 | .return_fields('txn_date', 'card_last_4', 'txn_amt')\
115 | .paging(0,5)
116 | result = client.ft(IDX_NAME).search(query)
117 | pprint(result.docs)
118 | ```
119 | #### Results
120 | ```bash
121 | [Document {'id': 'txn:3254125735126', 'payload': None, 'txn_date': '2020\\-02\\-23T23:36:22', 'card_last_4': '5185', 'txn_amt': '108.4'},
122 | Document {'id': 'txn:315330658921', 'payload': None, 'txn_date': '2020\\-02\\-25T01:41:21', 'card_last_4': '9303', 'txn_amt': '301.16'},
123 | Document {'id': 'txn:3309978830143', 'payload': None, 'txn_date': '2020\\-02\\-25T16:35:50', 'card_last_4': '1302', 'txn_amt': '612.1'},
124 | Document {'id': 'txn:5034622706076', 'payload': None, 'txn_date': '2020\\-03\\-02T20:03:45', 'card_last_4': '3967', 'txn_amt': '565.42'},
125 | Document {'id': 'txn:8477539870510', 'payload': None, 'txn_date': '2020\\-03\\-03T08:51:41', 'card_last_4': '5115', 'txn_amt': '384.68'}]
126 | ```
127 |
128 | ## Search Scenario 3
129 | ### Business Problem
130 | Aggregate by expense category with count per category, sorted by count
131 | ### CLI
132 | ```bash
133 | FT.AGGREGATE txnIdx * GROUPBY 1 @expense_category REDUCE COUNT 0 AS count SORTBY 2 @count DESC
134 | ```
135 | ### Python
136 | ```python
137 | request = AggregateRequest('*')\
138 | .group_by('@expense_category', reducers.count().alias('count'))\
139 | .sort_by(Desc('@count'))
140 | result = client.ft(IDX_NAME).aggregate(request)
141 | pprint(result.rows)
142 | ```
143 | ### Results
144 | ```bash
145 | [[b'expense_category', b'FOOD', b'count', b'515'],
146 | [b'expense_category', b'HOME', b'count', b'513'],
147 | [b'expense_category', b'GASS', b'count', b'511'],
148 | [b'expense_category', b'MISC', b'count', b'510'],
149 | [b'expense_category', b'AUTO', b'count', b'504'],
150 | [b'expense_category', b'HEAL', b'count', b'501'],
151 | [b'expense_category', b'PERS', b'count', b'499'],
152 | [b'expense_category', b'GIFT', b'count', b'495'],
153 | [b'expense_category', b'GROC', b'count', b'479'],
154 | [b'expense_category', b'TRAV', b'count', b'473']]
155 | ```
156 |
157 | ## Search Scenario 4
158 | ### Business Problem
159 | Aggregate on a query from a derived value(txn year). Return number of transactions per year
160 | ### CLI
161 | ```bash
162 | FT.AGGREGATE txnIdx * LOAD 1 @txn_date apply 'substr(@txn_date,0,4)' AS year GROUPBY 1 @year REDUCE COUNT 0 AS num_transactions SORTBY 2 @year DESC
163 | ```
164 | ### Python
165 | ```python
166 | request = AggregateRequest('*')\
167 | .load('@txn_date')\
168 | .apply(year='substr(@txn_date,0,4)')\
169 | .group_by('@year', reducers.count().alias('num_transactions'))\
170 | .sort_by(Desc('@year'))
171 | result = client.ft(IDX_NAME).aggregate(request)
172 | pprint(result.rows)
173 | ```
174 | ### Results
175 | ```bash
176 | [[b'year', b'2023', b'num_transactions', b'238'],
177 | [b'year', b'2022', b'num_transactions', b'1712'],
178 | [b'year', b'2021', b'num_transactions', b'1655'],
179 | [b'year', b'2020', b'num_transactions', b'1395']]
180 | ```
181 |
182 | ## Search Scenario 5
183 | ### Business Problem
184 | For a merchant with name like "walmart", aggregate on address and find top 3 by txn count
185 | ### CLI
186 | ```bash
187 | FT.AGGREGATE txnIdx '@merchant_name:%walmrt%' GROUPBY 1 @merchant_address REDUCE COUNT 0 as txn_count sortby 2 @txn_count DESC limit 0 3
188 | ```
189 | ### Python
190 | ```python
191 | request = AggregateRequest('@merchant_name:%walmrt%')\
192 | .group_by('@merchant_address', reducers.count().alias('txn_count'))\
193 | .sort_by(Desc('@txn_count'))\
194 | .limit(0,3)
195 | result = client.ft(IDX_NAME).aggregate(request)
196 | pprint(result.rows)
197 | ```
198 | ### Results
199 | ```bash
200 | [[b'merchant_address',
201 | b'50840\\ Cook\\ View\\ Apt\\.\\ 055\\\nMillerbury,\\ PW\\ 64864',
202 | b'txn_count',
203 | b'1'],
204 | [b'merchant_address',
205 | b'13797\\ Franklin\\ Shores\\\nBrandonville,\\ IN\\ 46042',
206 | b'txn_count',
207 | b'1'],
208 | [b'merchant_address',
209 | b'Unit\\ 7722\\ Box\\ 2524\\\nDPO\\ AE\\ 36572',
210 | b'txn_count',
211 | b'1']]
212 | ```
213 | ## Search Scenario 6
214 | ### Business Problem
215 | Aggregate total spend for categories that had individual tranactions with value >$500 in Dec 2021
216 | ### CLI
217 | ```bash
218 | FT.AGGREGATE txnIdx '(@txn_date:2021\-12* @txn_currency:{USD} @txn_amt:[(500, inf])' GROUPBY 1 @expense_category REDUCE SUM 1 @txn_amt as total_spend SORTBY 2 @total_spend DESC
219 | ```
220 | ### Python
221 | ```python
222 | request = AggregateRequest('(@txn_date:2021\-12* @txn_currency:{USD} @txn_amt:[(500, inf])')\
223 | .group_by('@expense_category', reducers.sum('@txn_amt').alias('total_spend'))\
224 | .sort_by(Desc('@total_spend'))
225 | result = client.ft(IDX_NAME).aggregate(request)
226 | pprint(result.rows)
227 | ```
228 | ### Results
229 | ```bash
230 | [[b'expense_category', b'FOOD', b'total_spend', b'11137.79'],
231 | [b'expense_category', b'MISC', b'total_spend', b'8551.65'],
232 | [b'expense_category', b'HEAL', b'total_spend', b'7449.49'],
233 | [b'expense_category', b'GIFT', b'total_spend', b'6354.79'],
234 | [b'expense_category', b'AUTO', b'total_spend', b'5981.9'],
235 | [b'expense_category', b'HOME', b'total_spend', b'4927.18'],
236 | [b'expense_category', b'GASS', b'total_spend', b'4528.07'],
237 | [b'expense_category', b'GROC', b'total_spend', b'4288.77'],
238 | [b'expense_category', b'PERS', b'total_spend', b'3896.34'],
239 | [b'expense_category', b'TRAV', b'total_spend', b'3600.05']]
240 | ```
--------------------------------------------------------------------------------