├── .gitignore ├── BankClassify.py ├── Classify.py ├── README.md ├── Statement_Example.txt ├── categories.txt ├── example.py ├── requirements.txt └── test ├── __init__.py ├── test_bak_classify.py └── transactions.csv /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | -------------------------------------------------------------------------------- /BankClassify.py: -------------------------------------------------------------------------------- 1 | import re 2 | import dateutil 3 | import os 4 | from datetime import datetime 5 | 6 | import pandas as pd 7 | from textblob.classifiers import NaiveBayesClassifier 8 | from colorama import init, Fore, Style 9 | from tabulate import tabulate 10 | 11 | class BankClassify(): 12 | 13 | def __init__(self, data="AllData.csv"): 14 | """Load in the previous data (by default from `data`) and initialise the classifier""" 15 | 16 | # allows dynamic training data to be used (i.e many accounts in a loop) 17 | self.trainingDataFile = data 18 | 19 | if os.path.exists(data): 20 | self.prev_data = pd.read_csv(self.trainingDataFile) 21 | else: 22 | self.prev_data = pd.DataFrame(columns=['date', 'desc', 'amount', 'cat']) 23 | 24 | self.classifier = NaiveBayesClassifier(self._get_training(self.prev_data), self._extractor) 25 | 26 | def add_data(self, filename, bank="santander"): 27 | """Add new data and interactively classify it. 28 | 29 | Arguments: 30 | - filename: filename of Santander-format file 31 | """ 32 | if bank == "santander": 33 | print("adding Santander data!") 34 | self.new_data = self._read_santander_file(filename) 35 | elif bank == "nationwide": 36 | print("adding Nationwide data!") 37 | self.new_data = self._read_nationwide_file(filename) 38 | elif bank == "lloyds": 39 | print("adding Lloyds Bank data!") 40 | self.new_data = self._read_lloyds_csv(filename) 41 | elif bank == "barclays": 42 | print("adding Barclays Bank data!") 43 | self.new_data = self._read_barclays_csv(filename) 44 | elif bank == "mint": 45 | print("adding Mint data!") 46 | self.new_data = self._read_mint_csv(filename) 47 | elif bank == "natwest": 48 | print("adding Natwest Bank data!") 49 | self.new_data = self._read_natwest_csv(filename) 50 | elif bank == "amex": 51 | print("adding Amex Bank data!") 52 | self.new_data = self._read_amex_csv(filename) 53 | else: 54 | raise ValueError('new_data appears empty! probably tried an unknown bank: ' + bank) 55 | 56 | 57 | 58 | self._ask_with_guess(self.new_data) 59 | 60 | self.prev_data = pd.concat([self.prev_data, self.new_data]) 61 | # save data to the same file we loaded earlier 62 | self.prev_data.to_csv(self.trainingDataFile, index=False) 63 | 64 | def _prep_for_analysis(self): 65 | """Prepare data for analysis in pandas, setting index types and subsetting""" 66 | self.prev_data = self._make_date_index(self.prev_data) 67 | 68 | self.prev_data['cat'] = self.prev_data['cat'].str.strip() 69 | 70 | self.inc = self.prev_data[self.prev_data.amount > 0] 71 | self.out = self.prev_data[self.prev_data.amount < 0] 72 | self.out.amount = self.out.amount.abs() 73 | 74 | self.inc_noignore = self.inc[self.inc.cat != 'Ignore'] 75 | self.inc_noexpignore = self.inc[(self.inc.cat != 'Ignore') & (self.inc.cat != 'Expenses')] 76 | 77 | self.out_noignore = self.out[self.out.cat != 'Ignore'] 78 | self.out_noexpignore = self.out[(self.out.cat != 'Ignore') & (self.out.cat != 'Expenses')] 79 | 80 | def _read_categories(self): 81 | """Read list of categories from categories.txt""" 82 | categories = {} 83 | 84 | with open('categories.txt') as f: 85 | for i, line in enumerate(f.readlines()): 86 | categories[i] = line.strip() 87 | 88 | return categories 89 | 90 | def _add_new_category(self, category): 91 | """Add a new category to categories.txt""" 92 | with open('categories.txt', 'a') as f: 93 | f.write('\n' + category) 94 | 95 | def _ask_with_guess(self, df): 96 | """Interactively guess categories for each transaction in df, asking each time if the guess 97 | is correct""" 98 | # Initialise colorama 99 | init() 100 | 101 | df['cat'] = "" 102 | 103 | categories = self._read_categories() 104 | 105 | for index, row in df.iterrows(): 106 | 107 | # Generate the category numbers table from the list of categories 108 | cats_list = [[idnum, cat] for idnum, cat in categories.items()] 109 | cats_table = tabulate(cats_list) 110 | 111 | stripped_text = self._strip_numbers(row['desc']) 112 | 113 | # Guess a category using the classifier (only if there is data in the classifier) 114 | if len(self.classifier.train_set) > 1: 115 | guess = self.classifier.classify(stripped_text) 116 | else: 117 | guess = "" 118 | 119 | 120 | # Print list of categories 121 | print(chr(27) + "[2J") 122 | print(cats_table) 123 | print("\n\n") 124 | # Print transaction 125 | print("On: %s\t %.2f\n%s" % (row['date'], row['amount'], row['desc'])) 126 | print(Fore.RED + Style.BRIGHT + "My guess is: " + str(guess) + Fore.RESET) 127 | 128 | input_value = input("> ") 129 | 130 | if input_value.lower() == 'q': 131 | # If the input was 'q' then quit 132 | return df 133 | if input_value == "": 134 | # If the input was blank then our guess was right! 135 | df.at[index, 'cat'] = guess 136 | self.classifier.update([(stripped_text, guess)]) 137 | else: 138 | # Otherwise, our guess was wrong 139 | try: 140 | # Try converting the input to an integer category number 141 | # If it works then we've entered a category 142 | category_number = int(input_value) 143 | category = categories[category_number] 144 | except ValueError: 145 | # Otherwise, we've entered a new category, so add it to the list of 146 | # categories 147 | category = input_value 148 | self._add_new_category(category) 149 | categories = self._read_categories() 150 | 151 | # Write correct answer 152 | df.at[index, 'cat'] = category 153 | # Update classifier 154 | self.classifier.update([(stripped_text, category) ]) 155 | 156 | return df 157 | 158 | def _make_date_index(self, df): 159 | """Make the index of df a Datetime index""" 160 | df.index = pd.DatetimeIndex(df.date.apply(dateutil.parser.parse,dayfirst=True)) 161 | 162 | return df 163 | 164 | def _read_nationwide_file(self, filename): 165 | """Read a file in the csv file that Nationwide provides downloads in. 166 | 167 | Returns a pd.DataFrame with columns of 'date', 'desc' and 'amount'.""" 168 | 169 | with open(filename) as f: 170 | lines = f.readlines() 171 | 172 | 173 | dates = [] 174 | descs = [] 175 | amounts = [] 176 | 177 | for line in lines[5:]: 178 | 179 | line = "".join(i for i in line if ord(i)<128) 180 | if line.strip() == '': 181 | continue 182 | 183 | splits = line.split("\",\"") 184 | """ 185 | 0 = Date 186 | 1 = Transaction type 187 | 2 = Description 188 | 3 = Paid Out 189 | 4 = Paid In 190 | 5 = Balance 191 | """ 192 | date = splits[0].replace("\"", "").strip() 193 | date = datetime.strptime(date, '%d %b %Y').strftime('%d/%m/%Y') 194 | dates.append(date) 195 | 196 | # get spend/pay in amount 197 | if splits[3] != "": # paid out 198 | spend = float(re.sub("[^0-9\.-]", "", splits[3])) * -1 199 | else: # paid in 200 | spend = float(re.sub("[^0-9\.-]", "", splits[4])) 201 | 202 | amounts.append(spend) 203 | 204 | #Description 205 | descs.append(splits[2]) 206 | 207 | df = pd.DataFrame({'date':dates, 'desc':descs, 'amount':amounts}) 208 | 209 | df['amount'] = df.amount.astype(float) 210 | df['desc'] = df.desc.astype(str) 211 | df['date'] = df.date.astype(str) 212 | 213 | return df 214 | 215 | def _read_santander_file(self, filename): 216 | """Read a file in the plain text format that Santander provides downloads in. 217 | 218 | Returns a pd.DataFrame with columns of 'date', 'desc' and 'amount'.""" 219 | with open(filename, errors='replace') as f: 220 | lines = f.readlines() 221 | 222 | dates = [] 223 | descs = [] 224 | amounts = [] 225 | 226 | for line in lines[4:]: 227 | 228 | line = "".join(i for i in line if ord(i)<128) 229 | if line.strip() == '': 230 | continue 231 | 232 | splitted = line.split(":") 233 | 234 | category = splitted[0] 235 | data = ":".join(splitted[1:]) 236 | 237 | if category == 'Date': 238 | dates.append(data.strip()) 239 | elif category == 'Description': 240 | descs.append(data.strip()) 241 | elif category == 'Amount': 242 | just_numbers = re.sub("[^0-9\.-]", "", data) 243 | amounts.append(just_numbers.strip()) 244 | 245 | 246 | df = pd.DataFrame({'date':dates, 'desc':descs, 'amount':amounts}) 247 | 248 | 249 | df['amount'] = df.amount.astype(float) 250 | df['desc'] = df.desc.astype(str) 251 | df['date'] = df.date.astype(str) 252 | 253 | return df 254 | 255 | def _read_lloyds_csv(self, filename): 256 | """Read a file in the CSV format that Lloyds Bank provides downloads in. 257 | 258 | Returns a pd.DataFrame with columns of 'date' 0 , 'desc' 4 and 'amount' 5 .""" 259 | 260 | df = pd.read_csv(filename, skiprows=0) 261 | 262 | """Rename columns """ 263 | #df.columns = ['date', 'desc', 'amount'] 264 | df.rename( 265 | columns={ 266 | "Transaction Date" : 'date', 267 | "Transaction Description" : 'desc', 268 | "Debit Amount": 'amount', 269 | "Credit Amount": 'creditAmount' 270 | }, 271 | inplace=True 272 | ) 273 | 274 | # if its income we still want it in the amount col! 275 | # manually correct each using 2 cols to create 1 col with either + or - figure 276 | # lloyds outputs 2 cols, credit and debit, we want 1 col representing a +- figure 277 | for index, row in df.iterrows(): 278 | if (row['amount'] > 0): 279 | # it's a negative amount because this is a spend 280 | df.at[index, 'amount'] = -row['amount'] 281 | elif (row['creditAmount'] > 0): 282 | df.at[index, 'amount'] = row['creditAmount'] 283 | 284 | # cast types to columns for math 285 | df = df.astype({"desc": str, "date": str, "amount": float}) 286 | 287 | return df 288 | 289 | def _read_barclays_csv(self, filename): 290 | """Read a file in the CSV format that Barclays Bank provides downloads in. 291 | Edge case: foreign txn's sometimes causes more cols than it should 292 | Returns a pd.DataFrame with columns of 'date' 1 , 'desc' (memo) 5 and 'amount' 3 .""" 293 | 294 | # Edge case: Barclays foreign transaction memo sometimes contains a comma, which is bad. 295 | # Use a work-around to read only fixed col count 296 | # https://stackoverflow.com/questions/20154303/pandas-read-csv-expects-wrong-number-of-columns-with-ragged-csv-file 297 | # Prevents an error where some rows have more cols than they should 298 | temp=pd.read_csv(filename,sep='^',header=None,prefix='X',skiprows=1) 299 | temp2=temp.X0.str.split(',',expand=True) 300 | del temp['X0'] 301 | df = pd.concat([temp,temp2],axis=1) 302 | 303 | """Rename columns """ 304 | df.rename( 305 | columns={ 306 | 1: 'date', 307 | 5 : 'desc', 308 | 3: 'amount' 309 | }, 310 | inplace=True 311 | ) 312 | 313 | # cast types to columns for math 314 | df = df.astype({"desc": str, "date": str, "amount": float}) 315 | 316 | return df 317 | 318 | def _read_mint_csv(self, filename) -> pd.DataFrame: 319 | """Read a file in the CSV format that mint.intuit.com provides downloads in. 320 | 321 | Returns a pd.DataFrame with columns of 'date', 'desc', and 'amount'.""" 322 | 323 | df = pd.read_csv(filename, skiprows=0) 324 | 325 | """Rename columns """ 326 | # df.columns = ['date', 'desc', 'amount'] 327 | df.rename( 328 | columns={ 329 | "Date": 'date', 330 | "Original Description": 'desc', 331 | "Amount": 'amount', 332 | "Transaction Type": 'type' 333 | }, 334 | inplace=True 335 | ) 336 | 337 | # mint outputs 2 cols, amount and type, we want 1 col representing a +- figure 338 | # manually correct amount based on transaction type colum with either + or - figure 339 | df.loc[df['type'] == 'debit', 'amount'] = -df['amount'] 340 | 341 | # cast types to columns for math 342 | df = df.astype({"desc": str, "date": str, "amount": float}) 343 | df = df[['date', 'desc', 'amount']] 344 | 345 | return df 346 | 347 | def _read_natwest_csv(self, filename): 348 | """Read a file in the CSV format that Natwest Bank provides downloads in. 349 | Returns a pd.DataFrame with columns of 'date' 0 , 'desc' 2 and 'amount' 3 . 350 | Date, Type, Desc, Value (- or unsigned positive integer), Balance, Account Name, Account Number.. 351 | """ 352 | 353 | temp=pd.read_csv(filename,sep='^',header=None,prefix='X',skiprows=1) 354 | temp2=temp.X0.str.split(',',expand=True) 355 | del temp['X0'] 356 | df = pd.concat([temp,temp2],axis=1) 357 | 358 | """Rename columns """ 359 | df.rename( 360 | columns={ 361 | 0: 'date', 362 | 2 : 'desc', 363 | 3: 'amount' 364 | }, 365 | inplace=True 366 | ) 367 | 368 | # cast types to columns for math 369 | df = df.astype({"desc": str, "date": str, "amount": float}) 370 | 371 | return df 372 | 373 | def _read_amex_csv(self, filename): 374 | """Read a file in the CSV format that AMEX (American Express) provides downloads in. 375 | Returns a pd.DataFrame with columns of 'date' 0 , 'desc' 1 and 'amount' 4 . 376 | Date, Desc, Account Name, Account Number, Amount (- or unsigned positive integer) 377 | """ 378 | 379 | temp=pd.read_csv(filename,sep='^',header=None,prefix='X',skiprows=1) 380 | temp2=temp.X0.str.split(',',expand=True) 381 | del temp['X0'] 382 | df = pd.concat([temp,temp2],axis=1) 383 | 384 | """Rename columns """ 385 | df.rename( 386 | columns={ 387 | 0: 'date', 388 | 1 : 'desc', 389 | 4: 'amount' 390 | }, 391 | inplace=True 392 | ) 393 | 394 | # cast types to columns for math 395 | df = df.astype({"desc": str, "date": str, "amount": float}) 396 | 397 | return df 398 | 399 | def _get_training(self, df): 400 | """Get training data for the classifier, consisting of tuples of 401 | (text, category)""" 402 | train = [] 403 | subset = df[df['cat'] != ''] 404 | for i in subset.index: 405 | row = subset.iloc[i] 406 | new_desc = self._strip_numbers(row['desc']) 407 | train.append( (new_desc, row['cat']) ) 408 | 409 | return train 410 | 411 | def _extractor(self, doc): 412 | """Extract tokens from a given string""" 413 | # TODO: Extend to extract words within words 414 | # For example, MUSICROOM should give MUSIC and ROOM 415 | tokens = self._split_by_multiple_delims(doc, [' ', '/']) 416 | 417 | features = {} 418 | 419 | for token in tokens: 420 | if token == "": 421 | continue 422 | features[token] = True 423 | 424 | return features 425 | 426 | def _strip_numbers(self, s): 427 | """Strip numbers from the given string""" 428 | return re.sub("[^A-Z ]", "", s) 429 | 430 | def _split_by_multiple_delims(self, string, delims): 431 | """Split the given string by the list of delimiters given""" 432 | regexp = "|".join(delims) 433 | 434 | return re.split(regexp, string) 435 | -------------------------------------------------------------------------------- /Classify.py: -------------------------------------------------------------------------------- 1 | from BankClassify import BankClassify 2 | 3 | bc = BankClassify() 4 | 5 | bc.add_data("85561768_20205411_0903.csv", "lloyds") -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # BankClassify - automatically classify your bank statement entries 2 | 3 | **Note:** This is not 'finished' software. I use it for dealing with my bank statements, but it is not 'production-ready' and may crash or do strange things. It is also set up for my particular usage, so may not work for you. However, I hope it will be a useful resource. 4 | 5 | This code will classify each entry in your bank statement into categories such as 'Supermarket', 'Petrol', 'Eating Out' etc. It learns from previously classified data, and corrections you make when it guesses a category incorrectly, and improves its performance over time. 6 | 7 | ## How to use 8 | 1. Install the required libraries: 9 | `pip install -r requirements.txt` 10 | 11 | 2. Run the code in `example.py` as a demonstration. This will interactively classify the example bank statement data in `Statement_Example.txt` and save the results in `AllData.csv`. In the interactive classification you will be presented with a list of categories (with ID numbers), the details of a transaction, and a guessed category. You have three choices: 12 | - To accept the guessed category, just press `Enter` 13 | - To correct the classifier to a category that is in the list shown, enter the ID number of the category and press `Enter` 14 | - To add a new category, type the name of the category and press `Enter` 15 | 16 | 3. Examine the output in `AllData.csv` manually, or run `bc._prep_for_analysis()` and look at `bc.in` and `bc.out` for incomings and outgoings respectively. You will see there is a `cat` column with the category in it. 17 | 18 | To use it with your own data: 19 | 20 | - *If you use Santander UK as your bank:* just run `bc.add_data(filename)` with the filename of your downloaded statement file. Delete `AllData.py` first though, or the example data will be used as part of the training data. 21 | - *If you use another bank:* Write your own function to read in your statement data from your bank. It must return a pandas dataframe with columns of `date`, `desc` and `amount`. Add this to the `BankClassify` class and call it instead of `_read_santander_file`. 22 | 23 | ### Known issues 24 | For Barclays bank sometimes the CSV file contains multiple commas within the 'memo' (transaction description) column. You can either manually patch your data before you run the tool or be aware that due to the work-around implemented we could potentially be losing valuable information beyond the comma. -------------------------------------------------------------------------------- /Statement_Example.txt: -------------------------------------------------------------------------------- 1 | From:01/08/2017to31/08/2017 2 | 3 | Account:XXXX XXXX XXXX XXXX 4 | 5 | Date:31/08/2017 6 | Description:CARD PAYMENT TO SHELL TOTHILL,2.04 GBP, RATE 1.00/GBP ON 29-08-2013 7 | Amount:-2.04 8 | Balance:2301.62 9 | 10 | Date:31/08/2017 11 | Description:CARD PAYMENT TO ASDA SUPERSTORE ON 2013-08-29 12 | Amount:-23.46 13 | Balance:2303.66 14 | 15 | Date:30/08/2017 16 | Description:CHEQUE PAID IN AT SOUTHAMPTON UNIVERSITY 17 | Amount:55.10 18 | Balance:2327.12 19 | 20 | Date:30/08/2017 21 | Description:CARD PAYMENT TO TICKETOFFICESALE,6.20 GBP, RATE 1.00/GBP ON 28-08-2013 22 | Amount:-6.20 23 | Balance:2272.02 24 | 25 | Date:30/08/2017 26 | Description:CARD PAYMENT TO MARKS & SPENCER,8.99 GBP, RATE 1.00/GBP ON 28-08-2013 27 | Amount:-8.99 28 | Balance:2288.22 29 | 30 | Date:29/08/2017 31 | Description:CARD PAYMENT TO THE COWHERDS,2.55 GBP, RATE 1.00/GBP ON 27-08-2013 32 | Amount:-2.55 33 | Balance:1308.27 34 | 35 | Date:29/08/2017 36 | Description:CARD PAYMENT TO HOBBY CRAFT LTD,8.59 GBP, RATE 1.00/GBP ON 27-08-2013 37 | Amount:-8.59 38 | Balance:1310.82 39 | 40 | Date:29/08/2017 41 | Description:CARD PAYMENT TO WAITROSE 720,2.18 GBP, RATE 1.00/GBP ON 27-08-2013 42 | Amount:-2.18 43 | Balance:1319.41 44 | 45 | Date:29/08/2017 46 | Description:CARD PAYMENT TO ASDA STORE/PETROL/,56.60 GBP, RATE 1.00/GBP ON 27-08-2013 47 | Amount:-56.60 48 | Balance:1321.59 49 | 50 | Date:28/08/2017 51 | Description:CARD PAYMENT TO HOBBY CRAFT LTD,4.69 GBP, RATE 1.00/GBP ON 26-08-2013 52 | Amount:-4.69 53 | Balance:1398.19 54 | 55 | Date:28/08/2017 56 | Description:CARD PAYMENT TO ASDA SUPERSTORE ON 2013-08-27 57 | Amount:-43.21 58 | Balance:2303.66 59 | 60 | Date:28/08/2017 61 | Description:CARD PAYMENT TO SAINSBURYS,14.80 GBP, RATE 1.00/GBP ON 26-08-2013 62 | Amount:-14.80 63 | Balance:1402.88 64 | 65 | Date:26/08/2017 66 | Description:CARD PAYMENT TO ASDA STORE/PETROL/,62.60 GBP, RATE 1.00/GBP ON 25-08-2013 67 | Amount:-62.60 68 | Balance:1321.59 -------------------------------------------------------------------------------- /categories.txt: -------------------------------------------------------------------------------- 1 | Income 2 | Bill - General 3 | Bill - Utilities 4 | Bill - Communications 5 | Travel - General 6 | Travel - Car 7 | Travel - Petrol 8 | Supermarket 9 | Cash Out 10 | Eating Out 11 | House 12 | Books 13 | Craft 14 | Charity Shop 15 | Presents 16 | Toiletries 17 | Mortgage 18 | Paypal 19 | Unclassified -------------------------------------------------------------------------------- /example.py: -------------------------------------------------------------------------------- 1 | from BankClassify import BankClassify 2 | 3 | bc = BankClassify() 4 | 5 | bc.add_data("Statement_Example.txt") -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pandas 2 | textblob 3 | colorama 4 | tabulate -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robintw/BankClassify/477c32a64a9d5cf0564d47ca656572d7132ec180/test/__init__.py -------------------------------------------------------------------------------- /test/test_bak_classify.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from BankClassify import BankClassify 3 | 4 | 5 | def test_mintReader_returns_date_description_ammount(): 6 | bc = BankClassify() 7 | df = bc._read_mint_csv('transactions.csv') 8 | 9 | columns = df.columns.values.tolist() 10 | assert 'date' in columns 11 | assert 'desc' in columns 12 | assert 'amount' in columns 13 | 14 | 15 | def test_onlyTreeColumns(): 16 | bc = BankClassify() 17 | df = bc._read_mint_csv('transactions.csv') 18 | 19 | assert len(df.columns.values.tolist()) == 3 20 | 21 | 22 | def test_debitIsNegative_creditIsPositive(): 23 | df = pd.read_csv('transactions.csv', skiprows=0) 24 | 25 | """Rename columns """ 26 | # df.columns = ['date', 'desc', 'amount'] 27 | df.rename( 28 | columns={ 29 | "Date": 'date', 30 | "Original Description": 'desc', 31 | "Amount": 'amount', 32 | "Transaction Type": 'type' 33 | }, 34 | inplace=True 35 | ) 36 | 37 | bc = BankClassify() 38 | df_dut = bc._read_mint_csv('transactions.csv') 39 | 40 | baseline = df['type'] == 'debit' 41 | 42 | assert (df_dut.loc[baseline, 'amount'] < 0).all() 43 | assert (df_dut.loc[~baseline, 'amount'] >= 0).all() 44 | -------------------------------------------------------------------------------- /test/transactions.csv: -------------------------------------------------------------------------------- 1 | "Date","Description","Original Description","Amount","Transaction Type","Category","Account Name","Labels","Notes" 2 | "4/28/2020","Paycheck","CORPORATIO CO PAYCHECK","500.00","credit","Paycheck","Bank checking","","" 3 | "4/25/2020","Amazon","AMZN Mktp US","29.99","debit","Shopping","CC","","" 4 | --------------------------------------------------------------------------------