├── .gitignore ├── LICENSE ├── requirements.txt ├── cctxn.py └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | .venv/ 2 | __pycache__ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Redis, Inc. proprietary, subject to the Redis Enterprise Software and/or Cloud Services license -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | async-timeout==4.0.2 2 | Faker==17.0.0 3 | python-dateutil==2.8.2 4 | redis==4.5.1 5 | six==1.16.0 6 | -------------------------------------------------------------------------------- /cctxn.py: -------------------------------------------------------------------------------- 1 | # Maker: Joey Whelan 2 | # File overview: Generates random credit card transaction records, stores them in Redis as hash sets, and then performs 3 | # various searches and aggregations. 4 | 5 | from faker import Faker 6 | from faker.providers import DynamicProvider 7 | from redis import from_url 8 | from redis.commands.search.field import NumericField, TagField, TextField 9 | from redis.commands.search.indexDefinition import IndexDefinition, IndexType 10 | from redis.commands.search.query import Query 11 | from redis.commands.search.aggregation import AggregateRequest, Desc 12 | from redis.commands.search import reducers 13 | import random 14 | import time 15 | import datetime 16 | from pprint import pprint 17 | import re 18 | 19 | REDIS_URL = 'redis://localhost:6379' 20 | RECORDS = 5000 21 | IDX_NAME='txnIdx' 22 | PREFIX='txn:' 23 | 24 | merchants_provider = DynamicProvider( 25 | provider_name='merchants', 26 | elements=['Walmart', 'Nordstrom', 'Amazon', 'Exxon', 'Kroger', 'Safeway', 'United Airlines', 'Office Depot', 'Ford', 'Taco Bell'] 27 | ) 28 | categories_provider = DynamicProvider( 29 | provider_name='categories', 30 | elements= ['AUTO', 'FOOD', 'GASS', 'GIFT', 'TRAV', 'GROC', 'HOME', 'PERS', 'HEAL', 'MISC'] 31 | ) 32 | 33 | def build_index(client): 34 | try: 35 | client.ft(IDX_NAME).dropindex() 36 | except: 37 | pass 38 | idx_def = IndexDefinition(index_type=IndexType.HASH, prefix=[PREFIX]) 39 | schema = [ 40 | TagField('txn_id', sortable=True), 41 | TextField('txn_date'), 42 | NumericField('txn_timestamp', sortable=True), 43 | NumericField('txn_amt'), 44 | TagField('txn_currency'), 45 | TagField('expense_category'), 46 | TextField('merchant_name'), 47 | TextField('merchant_address') 48 | ] 49 | client.ft(IDX_NAME).create_index(schema, definition=idx_def) 50 | print(f'*** {IDX_NAME} index built ***') 51 | 52 | def generate_data(client, count): 53 | Faker.seed(0) 54 | random.seed(0) 55 | fake = Faker() 56 | fake.add_provider(merchants_provider) 57 | fake.add_provider(categories_provider) 58 | 59 | for i in range(count): 60 | tdate = fake.date_time_between(start_date='-3y', end_date='now') 61 | txn_record = { 62 | 'acct_id': int(fake.ean(length=13)), 63 | 'txn_id': int(fake.ean(length=13)), 64 | 'txn_date': re.escape(tdate.isoformat()), 65 | 'txn_timestamp': time.mktime(tdate.timetuple()), 66 | 'card_last_4': fake.credit_card_number()[-4:], 67 | 'txn_amt': round(random.uniform(1, 1000), 2), 68 | 'txn_currency': 'USD', 69 | 'expense_category': fake.categories(), 70 | 'merchant_name': fake.merchants(), 71 | 'merchant_address': re.escape(fake.address()) 72 | } 73 | client.hset(f'{PREFIX}{txn_record["txn_id"]}', mapping=txn_record) 74 | if i == 0: 75 | print(f'\n*** Sample Transaction Record ***') 76 | pprint(txn_record) 77 | print(f'\n*** {RECORDS} transactions inserted into Redis as hash sets ***') 78 | 79 | def search(client): 80 | 81 | print('\n*** Search Scenario 1: Range query on dates (6/1/2022 - 7/31/2022, first 3 records sorted by txn_id) ***') 82 | begin = time.mktime(datetime.date(2022,6,1).timetuple()) 83 | end = time.mktime(datetime.date(2022,7,31).timetuple()) 84 | query = Query(f'@txn_timestamp:[{begin} {end}]')\ 85 | .sort_by('txn_id', 'ASC')\ 86 | .return_fields('acct_id', 'txn_date', 'txn_amt')\ 87 | .paging(0, 3) 88 | result = client.ft(IDX_NAME).search(query) 89 | pprint(result.docs) 90 | 91 | print('\n*** Search Scenario 2: Find 5 most recent transactions by date where Merchant = Kroger, sorted by txn date ***') 92 | query = Query('@merchant_name:kroger')\ 93 | .sort_by('txn_timestamp', 'ASC')\ 94 | .return_fields('txn_date', 'card_last_4', 'txn_amt')\ 95 | .paging(0,5) 96 | result = client.ft(IDX_NAME).search(query) 97 | pprint(result.docs) 98 | 99 | print('\n*** Search Scenario 3: Aggregate by expense category with count per category, sorted by count ***') 100 | request = AggregateRequest('*')\ 101 | .group_by('@expense_category', reducers.count().alias('count'))\ 102 | .sort_by(Desc('@count')) 103 | result = client.ft(IDX_NAME).aggregate(request) 104 | pprint(result.rows) 105 | 106 | print('\n*** Search Scenario 4: Aggregate on a query from a derived value(txn year). Return number of transactions per year ***') 107 | request = AggregateRequest('*')\ 108 | .load('@txn_date')\ 109 | .apply(year='substr(@txn_date,0,4)')\ 110 | .group_by('@year', reducers.count().alias('num_transactions'))\ 111 | .sort_by(Desc('@year')) 112 | result = client.ft(IDX_NAME).aggregate(request) 113 | pprint(result.rows) 114 | 115 | print('\n*** Search Scenario 5: For a merchant with name like "walmart", aggregate on address and find top 3 by txn count ***') 116 | request = AggregateRequest('@merchant_name:%walmrt%')\ 117 | .group_by('@merchant_address', reducers.count().alias('txn_count'))\ 118 | .sort_by(Desc('@txn_count'))\ 119 | .limit(0,3) 120 | result = client.ft(IDX_NAME).aggregate(request) 121 | pprint(result.rows) 122 | 123 | print('\n*** Search Scenario 6: Aggregate total spend for categories that had individual tranactions with value >$500 in Dec 2021 ***') 124 | request = AggregateRequest('(@txn_date:2021\-12* @txn_currency:{USD} @txn_amt:[(500, inf])')\ 125 | .group_by('@expense_category', reducers.sum('@txn_amt').alias('total_spend'))\ 126 | .sort_by(Desc('@total_spend')) 127 | result = client.ft(IDX_NAME).aggregate(request) 128 | pprint(result.rows) 129 | 130 | if __name__ == '__main__': 131 | client = from_url(REDIS_URL) 132 | build_index(client) 133 | generate_data(client, RECORDS) 134 | search(client) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Credit Card Transaction Search Examples 2 | 3 | ## Contents 4 | 1. [Summary](#summary) 5 | 2. [Features](#features) 6 | 3. [Prerequisites](#prerequisites) 7 | 4. [Installation](#installation) 8 | 5. [Usage](#usage) 9 | 6. [Index](#index) 10 | 7. [Data](#data) 11 | 8. [Search Scenario 1](#scenario1) 12 | 9. [Search Scenario 2](#scenario2) 13 | 10. [Search Scenario 3](#scenario3) 14 | 11. [Search Scenario 4](#scenario4) 15 | 12. [Search Scenario 5](#scenario5) 16 | 13. [Search Scenario 6](#scenario6) 17 | 18 | ## Summary 19 | This is collection of CLI and Python examples that first load Redis with credit card transaction records and then demonstrate various search and aggregation scenarios on that data. 20 | 21 | ## Features 22 | - Loads synthetic transaction data into Redis as hash sets 23 | - Implements multiple search and aggregation operations against Redis. 24 | 25 | ## Prerequisites 26 | - Python 27 | 28 | ## Installation 29 | 1. Clone this repo. 30 | 31 | 2. Install Python requirements 32 | ```bash 33 | pip install -r requirements.txt 34 | ``` 35 | 36 | ## Usage 37 | ```bash 38 | python3 cctxn.py 39 | ``` 40 | 41 | ## Index Build 42 | ### CLI 43 | ```bash 44 | FT.CREATE txnIdx ON HASH PREFIX 1 "txn:" SCHEMA txn_id TAG SORTABLE txn_date TEXT txn_timestamp NUMERIC SORTABLE txn_amt NUMERIC txn_currency TAG expense_category TAG merchant_name TEXT merchant_address TEXT 45 | ``` 46 | ### Python 47 | ```python 48 | idx_def = IndexDefinition(index_type=IndexType.HASH, prefix=[PREFIX]) 49 | schema = [ 50 | TagField('txn_id', sortable=True), 51 | TextField('txn_date'), 52 | NumericField('txn_timestamp', sortable=True), 53 | NumericField('txn_amt'), 54 | TagField('txn_currency'), 55 | TagField('expense_category'), 56 | TextField('merchant_name'), 57 | TextField('merchant_address') 58 | ] 59 | client.ft(IDX_NAME).create_index(schema, definition=idx_def) 60 | ``` 61 | 62 | ## Data 63 | ### Sample Transaction Record 64 | ```bash 65 | {'acct_id': 6048764759387, 66 | 'card_last_4': '8403', 67 | 'expense_category': 'HEAL', 68 | 'merchant_address': '097\\ Sanchez\\ Islands\\ Apt\\.\\ 393\\\n' 69 | 'Port\\ Tammy,\\ AS\\ 71671', 70 | 'merchant_name': 'Walmart', 71 | 'txn_amt': 844.58, 72 | 'txn_currency': 'USD', 73 | 'txn_date': '2021\\-10\\-12T01:10:51', 74 | 'txn_id': 2421948924117, 75 | 'txn_timestamp': 1634022651.0} 76 | ``` 77 | 78 | ## Search Scenario 1 79 | ### Business Problem 80 | Range query on dates (6/1/2022 - 7/31/2022, first 3 records sorted by txn_id) 81 | ### CLI 82 | ```bash 83 | FT.SEARCH txnIdx '@txn_timestamp:[1654063200 1659247200]' SORTBY txn_id ASC RETURN 3 acct_id txn_date txn_amt LIMIT 0 3 84 | ``` 85 | ### Python 86 | ```python 87 | begin = time.mktime(datetime.date(2022,6,1).timetuple()) 88 | end = time.mktime(datetime.date(2022,7,31).timetuple()) 89 | query = Query(f'@txn_timestamp:[{begin} {end}]')\ 90 | .sort_by('txn_id', 'ASC')\ 91 | .return_fields('acct_id', 'txn_date', 'txn_amt')\ 92 | .paging(0, 3) 93 | result = client.ft(IDX_NAME).search(query) 94 | pprint(result.docs) 95 | ``` 96 | ### Results 97 | ```bash 98 | [Document {'id': 'txn:104801452768', 'payload': None, 'acct_id': '3855580637385', 'txn_date': '2022\\-06\\-20T00:16:38', 'txn_amt': '527.3'}, 99 | Document {'id': 'txn:1057562256603', 'payload': None, 'acct_id': '8440141859082', 'txn_date': '2022\\-07\\-06T19:39:08', 'txn_amt': '820.81'}, 100 | Document {'id': 'txn:1108039921439', 'payload': None, 'acct_id': '1214588109355', 'txn_date': '2022\\-07\\-13T13:30:17', 'txn_amt': '485.79'}] 101 | ``` 102 | 103 | ## Search Scenario 2 104 | ### Business Problem 105 | Find 5 most recent transactions by date where Merchant = Kroger, sorted by txn date 106 | ### CLI 107 | ```bash 108 | FT.SEARCH txnIdx @merchant_name:kroger SORTBY txn_timestamp ASC RETURN 3 txn_date card_last_4 txn_amt LIMIT 0 5 109 | ``` 110 | ### Python 111 | ```python 112 | query = Query('@merchant_name:kroger')\ 113 | .sort_by('txn_timestamp', 'ASC')\ 114 | .return_fields('txn_date', 'card_last_4', 'txn_amt')\ 115 | .paging(0,5) 116 | result = client.ft(IDX_NAME).search(query) 117 | pprint(result.docs) 118 | ``` 119 | #### Results 120 | ```bash 121 | [Document {'id': 'txn:3254125735126', 'payload': None, 'txn_date': '2020\\-02\\-23T23:36:22', 'card_last_4': '5185', 'txn_amt': '108.4'}, 122 | Document {'id': 'txn:315330658921', 'payload': None, 'txn_date': '2020\\-02\\-25T01:41:21', 'card_last_4': '9303', 'txn_amt': '301.16'}, 123 | Document {'id': 'txn:3309978830143', 'payload': None, 'txn_date': '2020\\-02\\-25T16:35:50', 'card_last_4': '1302', 'txn_amt': '612.1'}, 124 | Document {'id': 'txn:5034622706076', 'payload': None, 'txn_date': '2020\\-03\\-02T20:03:45', 'card_last_4': '3967', 'txn_amt': '565.42'}, 125 | Document {'id': 'txn:8477539870510', 'payload': None, 'txn_date': '2020\\-03\\-03T08:51:41', 'card_last_4': '5115', 'txn_amt': '384.68'}] 126 | ``` 127 | 128 | ## Search Scenario 3 129 | ### Business Problem 130 | Aggregate by expense category with count per category, sorted by count 131 | ### CLI 132 | ```bash 133 | FT.AGGREGATE txnIdx * GROUPBY 1 @expense_category REDUCE COUNT 0 AS count SORTBY 2 @count DESC 134 | ``` 135 | ### Python 136 | ```python 137 | request = AggregateRequest('*')\ 138 | .group_by('@expense_category', reducers.count().alias('count'))\ 139 | .sort_by(Desc('@count')) 140 | result = client.ft(IDX_NAME).aggregate(request) 141 | pprint(result.rows) 142 | ``` 143 | ### Results 144 | ```bash 145 | [[b'expense_category', b'FOOD', b'count', b'515'], 146 | [b'expense_category', b'HOME', b'count', b'513'], 147 | [b'expense_category', b'GASS', b'count', b'511'], 148 | [b'expense_category', b'MISC', b'count', b'510'], 149 | [b'expense_category', b'AUTO', b'count', b'504'], 150 | [b'expense_category', b'HEAL', b'count', b'501'], 151 | [b'expense_category', b'PERS', b'count', b'499'], 152 | [b'expense_category', b'GIFT', b'count', b'495'], 153 | [b'expense_category', b'GROC', b'count', b'479'], 154 | [b'expense_category', b'TRAV', b'count', b'473']] 155 | ``` 156 | 157 | ## Search Scenario 4 158 | ### Business Problem 159 | Aggregate on a query from a derived value(txn year). Return number of transactions per year 160 | ### CLI 161 | ```bash 162 | FT.AGGREGATE txnIdx * LOAD 1 @txn_date apply 'substr(@txn_date,0,4)' AS year GROUPBY 1 @year REDUCE COUNT 0 AS num_transactions SORTBY 2 @year DESC 163 | ``` 164 | ### Python 165 | ```python 166 | request = AggregateRequest('*')\ 167 | .load('@txn_date')\ 168 | .apply(year='substr(@txn_date,0,4)')\ 169 | .group_by('@year', reducers.count().alias('num_transactions'))\ 170 | .sort_by(Desc('@year')) 171 | result = client.ft(IDX_NAME).aggregate(request) 172 | pprint(result.rows) 173 | ``` 174 | ### Results 175 | ```bash 176 | [[b'year', b'2023', b'num_transactions', b'238'], 177 | [b'year', b'2022', b'num_transactions', b'1712'], 178 | [b'year', b'2021', b'num_transactions', b'1655'], 179 | [b'year', b'2020', b'num_transactions', b'1395']] 180 | ``` 181 | 182 | ## Search Scenario 5 183 | ### Business Problem 184 | For a merchant with name like "walmart", aggregate on address and find top 3 by txn count 185 | ### CLI 186 | ```bash 187 | FT.AGGREGATE txnIdx '@merchant_name:%walmrt%' GROUPBY 1 @merchant_address REDUCE COUNT 0 as txn_count sortby 2 @txn_count DESC limit 0 3 188 | ``` 189 | ### Python 190 | ```python 191 | request = AggregateRequest('@merchant_name:%walmrt%')\ 192 | .group_by('@merchant_address', reducers.count().alias('txn_count'))\ 193 | .sort_by(Desc('@txn_count'))\ 194 | .limit(0,3) 195 | result = client.ft(IDX_NAME).aggregate(request) 196 | pprint(result.rows) 197 | ``` 198 | ### Results 199 | ```bash 200 | [[b'merchant_address', 201 | b'50840\\ Cook\\ View\\ Apt\\.\\ 055\\\nMillerbury,\\ PW\\ 64864', 202 | b'txn_count', 203 | b'1'], 204 | [b'merchant_address', 205 | b'13797\\ Franklin\\ Shores\\\nBrandonville,\\ IN\\ 46042', 206 | b'txn_count', 207 | b'1'], 208 | [b'merchant_address', 209 | b'Unit\\ 7722\\ Box\\ 2524\\\nDPO\\ AE\\ 36572', 210 | b'txn_count', 211 | b'1']] 212 | ``` 213 | ## Search Scenario 6 214 | ### Business Problem 215 | Aggregate total spend for categories that had individual tranactions with value >$500 in Dec 2021 216 | ### CLI 217 | ```bash 218 | FT.AGGREGATE txnIdx '(@txn_date:2021\-12* @txn_currency:{USD} @txn_amt:[(500, inf])' GROUPBY 1 @expense_category REDUCE SUM 1 @txn_amt as total_spend SORTBY 2 @total_spend DESC 219 | ``` 220 | ### Python 221 | ```python 222 | request = AggregateRequest('(@txn_date:2021\-12* @txn_currency:{USD} @txn_amt:[(500, inf])')\ 223 | .group_by('@expense_category', reducers.sum('@txn_amt').alias('total_spend'))\ 224 | .sort_by(Desc('@total_spend')) 225 | result = client.ft(IDX_NAME).aggregate(request) 226 | pprint(result.rows) 227 | ``` 228 | ### Results 229 | ```bash 230 | [[b'expense_category', b'FOOD', b'total_spend', b'11137.79'], 231 | [b'expense_category', b'MISC', b'total_spend', b'8551.65'], 232 | [b'expense_category', b'HEAL', b'total_spend', b'7449.49'], 233 | [b'expense_category', b'GIFT', b'total_spend', b'6354.79'], 234 | [b'expense_category', b'AUTO', b'total_spend', b'5981.9'], 235 | [b'expense_category', b'HOME', b'total_spend', b'4927.18'], 236 | [b'expense_category', b'GASS', b'total_spend', b'4528.07'], 237 | [b'expense_category', b'GROC', b'total_spend', b'4288.77'], 238 | [b'expense_category', b'PERS', b'total_spend', b'3896.34'], 239 | [b'expense_category', b'TRAV', b'total_spend', b'3600.05']] 240 | ``` --------------------------------------------------------------------------------