├── .gitignore
├── BankClassify.py
├── Classify.py
├── README.md
├── Statement_Example.txt
├── categories.txt
├── example.py
├── requirements.txt
└── test
    ├── __init__.py
    ├── test_bak_classify.py
    └── transactions.csv


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 


--------------------------------------------------------------------------------
/BankClassify.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import dateutil
  3 | import os
  4 | from datetime import datetime
  5 | 
  6 | import pandas as pd
  7 | from textblob.classifiers import NaiveBayesClassifier
  8 | from colorama import init, Fore, Style
  9 | from tabulate import tabulate
 10 | 
 11 | class BankClassify():
 12 | 
 13 |     def __init__(self, data="AllData.csv"):
 14 |         """Load in the previous data (by default from `data`) and initialise the classifier"""
 15 | 
 16 |         # allows dynamic training data to be used (i.e many accounts in a loop)
 17 |         self.trainingDataFile = data
 18 | 
 19 |         if os.path.exists(data):
 20 |             self.prev_data = pd.read_csv(self.trainingDataFile)
 21 |         else:
 22 |             self.prev_data = pd.DataFrame(columns=['date', 'desc', 'amount', 'cat'])
 23 | 
 24 |         self.classifier = NaiveBayesClassifier(self._get_training(self.prev_data), self._extractor)
 25 | 
 26 |     def add_data(self, filename, bank="santander"):
 27 |         """Add new data and interactively classify it.
 28 | 
 29 |         Arguments:
 30 |          - filename: filename of Santander-format file
 31 |         """
 32 |         if bank == "santander":
 33 |             print("adding Santander data!")
 34 |             self.new_data = self._read_santander_file(filename)
 35 |         elif bank == "nationwide":
 36 |             print("adding Nationwide data!")
 37 |             self.new_data = self._read_nationwide_file(filename)
 38 |         elif bank == "lloyds":
 39 |             print("adding Lloyds Bank data!")
 40 |             self.new_data = self._read_lloyds_csv(filename)
 41 |         elif bank == "barclays":
 42 |             print("adding Barclays Bank data!")
 43 |             self.new_data = self._read_barclays_csv(filename)
 44 |         elif bank == "mint":
 45 |             print("adding Mint data!")
 46 |             self.new_data = self._read_mint_csv(filename)
 47 |         elif bank == "natwest":
 48 |             print("adding Natwest Bank data!")
 49 |             self.new_data = self._read_natwest_csv(filename)
 50 |         elif bank == "amex":
 51 |             print("adding Amex Bank data!")
 52 |             self.new_data = self._read_amex_csv(filename)
 53 |         else:
 54 |             raise ValueError('new_data appears empty! probably tried an unknown bank: ' + bank)
 55 | 
 56 | 
 57 | 
 58 |         self._ask_with_guess(self.new_data)
 59 | 
 60 |         self.prev_data = pd.concat([self.prev_data, self.new_data])
 61 |         # save data to the same file we loaded earlier
 62 |         self.prev_data.to_csv(self.trainingDataFile, index=False)
 63 | 
 64 |     def _prep_for_analysis(self):
 65 |         """Prepare data for analysis in pandas, setting index types and subsetting"""
 66 |         self.prev_data = self._make_date_index(self.prev_data)
 67 | 
 68 |         self.prev_data['cat'] = self.prev_data['cat'].str.strip()
 69 | 
 70 |         self.inc = self.prev_data[self.prev_data.amount > 0]
 71 |         self.out = self.prev_data[self.prev_data.amount < 0]
 72 |         self.out.amount = self.out.amount.abs()
 73 | 
 74 |         self.inc_noignore = self.inc[self.inc.cat != 'Ignore']
 75 |         self.inc_noexpignore = self.inc[(self.inc.cat != 'Ignore') & (self.inc.cat != 'Expenses')]
 76 | 
 77 |         self.out_noignore = self.out[self.out.cat != 'Ignore']
 78 |         self.out_noexpignore = self.out[(self.out.cat != 'Ignore') & (self.out.cat != 'Expenses')]
 79 | 
 80 |     def _read_categories(self):
 81 |         """Read list of categories from categories.txt"""
 82 |         categories = {}
 83 | 
 84 |         with open('categories.txt') as f:
 85 |             for i, line in enumerate(f.readlines()):
 86 |                 categories[i] = line.strip()
 87 | 
 88 |         return categories
 89 | 
 90 |     def _add_new_category(self, category):
 91 |         """Add a new category to categories.txt"""
 92 |         with open('categories.txt', 'a') as f:
 93 |             f.write('\n' + category)
 94 | 
 95 |     def _ask_with_guess(self, df):
 96 |         """Interactively guess categories for each transaction in df, asking each time if the guess
 97 |         is correct"""
 98 |         # Initialise colorama
 99 |         init()
100 | 
101 |         df['cat'] = ""
102 | 
103 |         categories = self._read_categories()
104 | 
105 |         for index, row in df.iterrows():
106 | 
107 |             # Generate the category numbers table from the list of categories
108 |             cats_list = [[idnum, cat] for idnum, cat in categories.items()]
109 |             cats_table = tabulate(cats_list)
110 | 
111 |             stripped_text = self._strip_numbers(row['desc'])
112 | 
113 |             # Guess a category using the classifier (only if there is data in the classifier)
114 |             if len(self.classifier.train_set) > 1:
115 |                 guess = self.classifier.classify(stripped_text)
116 |             else:
117 |                 guess = ""
118 | 
119 | 
120 |             # Print list of categories
121 |             print(chr(27) + "[2J")
122 |             print(cats_table)
123 |             print("\n\n")
124 |             # Print transaction
125 |             print("On: %s\t %.2f\n%s" % (row['date'], row['amount'], row['desc']))
126 |             print(Fore.RED  + Style.BRIGHT + "My guess is: " + str(guess) + Fore.RESET)
127 | 
128 |             input_value = input("> ")
129 | 
130 |             if input_value.lower() == 'q':
131 |                 # If the input was 'q' then quit
132 |                 return df
133 |             if input_value == "":
134 |                 # If the input was blank then our guess was right!
135 |                 df.at[index, 'cat'] = guess
136 |                 self.classifier.update([(stripped_text, guess)])
137 |             else:
138 |                 # Otherwise, our guess was wrong
139 |                 try:
140 |                     # Try converting the input to an integer category number
141 |                     # If it works then we've entered a category
142 |                     category_number = int(input_value)
143 |                     category = categories[category_number]
144 |                 except ValueError:
145 |                     # Otherwise, we've entered a new category, so add it to the list of
146 |                     # categories
147 |                     category = input_value
148 |                     self._add_new_category(category)
149 |                     categories = self._read_categories()
150 | 
151 |                 # Write correct answer
152 |                 df.at[index, 'cat'] = category
153 |                 # Update classifier
154 |                 self.classifier.update([(stripped_text, category)   ])
155 | 
156 |         return df
157 | 
158 |     def _make_date_index(self, df):
159 |         """Make the index of df a Datetime index"""
160 |         df.index = pd.DatetimeIndex(df.date.apply(dateutil.parser.parse,dayfirst=True))
161 | 
162 |         return df
163 | 
164 |     def _read_nationwide_file(self, filename):
165 |         """Read a file in the csv file that Nationwide provides downloads in.
166 | 
167 |         Returns a pd.DataFrame with columns of 'date', 'desc' and 'amount'."""
168 | 
169 |         with open(filename) as f:
170 |            lines = f.readlines()
171 | 
172 | 
173 |         dates = []
174 |         descs = []
175 |         amounts = []
176 | 
177 |         for line in lines[5:]:
178 | 
179 |             line = "".join(i for i in line if ord(i)<128)
180 |             if line.strip() == '':
181 |                 continue
182 | 
183 |             splits = line.split("\",\"")
184 |             """
185 |             0 = Date
186 |             1 = Transaction type
187 |             2 = Description
188 |             3 = Paid Out
189 |             4 = Paid In
190 |             5 = Balance
191 |             """
192 |             date = splits[0].replace("\"", "").strip()
193 |             date = datetime.strptime(date, '%d %b %Y').strftime('%d/%m/%Y')
194 |             dates.append(date)
195 | 
196 |             # get spend/pay in amount
197 |             if splits[3] != "": # paid out
198 |                 spend = float(re.sub("[^0-9\.-]", "", splits[3])) * -1
199 |             else: # paid in
200 |                 spend = float(re.sub("[^0-9\.-]", "", splits[4]))
201 |             
202 |             amounts.append(spend)
203 | 
204 |             #Description
205 |             descs.append(splits[2])
206 | 
207 |         df = pd.DataFrame({'date':dates, 'desc':descs, 'amount':amounts})
208 | 
209 |         df['amount'] = df.amount.astype(float)
210 |         df['desc'] = df.desc.astype(str)
211 |         df['date'] = df.date.astype(str)
212 | 
213 |         return df
214 | 
215 |     def _read_santander_file(self, filename):
216 |         """Read a file in the plain text format that Santander provides downloads in.
217 | 
218 |         Returns a pd.DataFrame with columns of 'date', 'desc' and 'amount'."""
219 |         with open(filename, errors='replace') as f:
220 |             lines = f.readlines()
221 | 
222 |         dates = []
223 |         descs = []
224 |         amounts = []
225 | 
226 |         for line in lines[4:]:
227 | 
228 |             line = "".join(i for i in line if ord(i)<128)
229 |             if line.strip() == '':
230 |                 continue
231 | 
232 |             splitted = line.split(":")
233 | 
234 |             category = splitted[0]
235 |             data = ":".join(splitted[1:])
236 | 
237 |             if category == 'Date':
238 |                 dates.append(data.strip())
239 |             elif category == 'Description':
240 |                 descs.append(data.strip())
241 |             elif category == 'Amount':
242 |                 just_numbers = re.sub("[^0-9\.-]", "", data)
243 |                 amounts.append(just_numbers.strip())
244 | 
245 | 
246 |         df = pd.DataFrame({'date':dates, 'desc':descs, 'amount':amounts})
247 | 
248 | 
249 |         df['amount'] = df.amount.astype(float)
250 |         df['desc'] = df.desc.astype(str)
251 |         df['date'] = df.date.astype(str)
252 | 
253 |         return df
254 | 
255 |     def _read_lloyds_csv(self, filename):
256 |         """Read a file in the CSV format that Lloyds Bank provides downloads in.
257 | 
258 |         Returns a pd.DataFrame with columns of 'date' 0 , 'desc'  4 and 'amount' 5 ."""
259 | 
260 |         df = pd.read_csv(filename, skiprows=0)
261 | 
262 |         """Rename columns """
263 |         #df.columns = ['date', 'desc', 'amount']
264 |         df.rename(
265 |             columns={
266 |                 "Transaction Date" : 'date',
267 |                 "Transaction Description" : 'desc',
268 |                 "Debit Amount": 'amount',
269 |                 "Credit Amount": 'creditAmount'
270 |             },
271 |             inplace=True
272 |         )
273 | 
274 |         # if its income we still want it in the amount col!
275 |         # manually correct each using 2 cols to create 1 col with either + or - figure
276 |         # lloyds outputs 2 cols, credit and debit, we want 1 col representing a +- figure
277 |         for index, row in df.iterrows():
278 |             if (row['amount'] > 0):
279 |                 # it's a negative amount because this is a spend
280 |                 df.at[index, 'amount'] = -row['amount']
281 |             elif (row['creditAmount'] > 0):
282 |                 df.at[index, 'amount'] = row['creditAmount']
283 | 
284 |         # cast types to columns for math 
285 |         df = df.astype({"desc": str, "date": str, "amount": float})
286 | 
287 |         return df
288 | 
289 |     def _read_barclays_csv(self, filename):
290 |             """Read a file in the CSV format that Barclays Bank provides downloads in.
291 |             Edge case: foreign txn's sometimes causes more cols than it should 
292 |             Returns a pd.DataFrame with columns of 'date' 1 , 'desc' (memo)  5 and 'amount' 3 ."""
293 | 
294 |             # Edge case: Barclays foreign transaction memo sometimes contains a comma, which is bad.
295 |             # Use a work-around to read only fixed col count
296 |             # https://stackoverflow.com/questions/20154303/pandas-read-csv-expects-wrong-number-of-columns-with-ragged-csv-file
297 |             # Prevents an error where some rows have more cols than they should
298 |             temp=pd.read_csv(filename,sep='^',header=None,prefix='X',skiprows=1)
299 |             temp2=temp.X0.str.split(',',expand=True)
300 |             del temp['X0']
301 |             df = pd.concat([temp,temp2],axis=1)
302 | 
303 |             """Rename columns """
304 |             df.rename(
305 |                 columns={
306 |                     1: 'date',
307 |                     5 : 'desc',
308 |                     3: 'amount'
309 |                     },
310 |                 inplace=True
311 |             )
312 | 
313 |             # cast types to columns for math 
314 |             df = df.astype({"desc": str, "date": str, "amount": float})
315 | 
316 |             return df
317 | 
318 |     def _read_mint_csv(self, filename) -> pd.DataFrame:
319 |         """Read a file in the CSV format that mint.intuit.com provides downloads in.
320 | 
321 |         Returns a pd.DataFrame with columns of 'date', 'desc', and 'amount'."""
322 | 
323 |         df = pd.read_csv(filename, skiprows=0)
324 | 
325 |         """Rename columns """
326 |         # df.columns = ['date', 'desc', 'amount']
327 |         df.rename(
328 |             columns={
329 |                 "Date": 'date',
330 |                 "Original Description": 'desc',
331 |                 "Amount": 'amount',
332 |                 "Transaction Type": 'type'
333 |             },
334 |             inplace=True
335 |         )
336 | 
337 |         # mint outputs 2 cols, amount and type, we want 1 col representing a +- figure
338 |         # manually correct amount based on transaction type colum with either + or - figure
339 |         df.loc[df['type'] == 'debit', 'amount'] = -df['amount']
340 | 
341 |         # cast types to columns for math
342 |         df = df.astype({"desc": str, "date": str, "amount": float})
343 |         df = df[['date', 'desc', 'amount']]
344 | 
345 |         return df
346 | 
347 |     def _read_natwest_csv(self, filename):
348 |             """Read a file in the CSV format that Natwest Bank provides downloads in.
349 |             Returns a pd.DataFrame with columns of 'date' 0 , 'desc'  2 and 'amount' 3 .
350 |             Date, Type, Desc, Value (- or unsigned positive integer), Balance, Account Name, Account Number..
351 |             """
352 | 
353 |             temp=pd.read_csv(filename,sep='^',header=None,prefix='X',skiprows=1)
354 |             temp2=temp.X0.str.split(',',expand=True)
355 |             del temp['X0']
356 |             df = pd.concat([temp,temp2],axis=1)
357 | 
358 |             """Rename columns """
359 |             df.rename(
360 |                 columns={
361 |                     0: 'date',
362 |                     2 : 'desc',
363 |                     3: 'amount'
364 |                     },
365 |                 inplace=True
366 |             )
367 | 
368 |             # cast types to columns for math 
369 |             df = df.astype({"desc": str, "date": str, "amount": float})
370 | 
371 |             return df
372 |     
373 |     def _read_amex_csv(self, filename):
374 |                 """Read a file in the CSV format that AMEX (American Express) provides downloads in.
375 |                 Returns a pd.DataFrame with columns of 'date' 0 , 'desc'  1 and 'amount' 4 .
376 |                 Date, Desc, Account Name, Account Number,  Amount (- or unsigned positive integer)
377 |                 """
378 | 
379 |                 temp=pd.read_csv(filename,sep='^',header=None,prefix='X',skiprows=1)
380 |                 temp2=temp.X0.str.split(',',expand=True)
381 |                 del temp['X0']
382 |                 df = pd.concat([temp,temp2],axis=1)
383 | 
384 |                 """Rename columns """
385 |                 df.rename(
386 |                     columns={
387 |                         0: 'date',
388 |                         1 : 'desc',
389 |                         4: 'amount'
390 |                         },
391 |                     inplace=True
392 |                 )
393 | 
394 |                 # cast types to columns for math 
395 |                 df = df.astype({"desc": str, "date": str, "amount": float})
396 | 
397 |                 return df
398 | 
399 |     def _get_training(self, df):
400 |         """Get training data for the classifier, consisting of tuples of
401 |         (text, category)"""
402 |         train = []
403 |         subset = df[df['cat'] != '']
404 |         for i in subset.index:
405 |             row = subset.iloc[i]
406 |             new_desc = self._strip_numbers(row['desc'])
407 |             train.append( (new_desc, row['cat']) )
408 | 
409 |         return train
410 | 
411 |     def _extractor(self, doc):
412 |         """Extract tokens from a given string"""
413 |         # TODO: Extend to extract words within words
414 |         # For example, MUSICROOM should give MUSIC and ROOM
415 |         tokens = self._split_by_multiple_delims(doc, [' ', '/'])
416 | 
417 |         features = {}
418 | 
419 |         for token in tokens:
420 |             if token == "":
421 |                 continue
422 |             features[token] = True
423 | 
424 |         return features
425 | 
426 |     def _strip_numbers(self, s):
427 |         """Strip numbers from the given string"""
428 |         return re.sub("[^A-Z ]", "", s)
429 | 
430 |     def _split_by_multiple_delims(self, string, delims):
431 |         """Split the given string by the list of delimiters given"""
432 |         regexp = "|".join(delims)
433 | 
434 |         return re.split(regexp, string)
435 | 


--------------------------------------------------------------------------------
/Classify.py:
--------------------------------------------------------------------------------
1 | from BankClassify import BankClassify
2 | 
3 | bc = BankClassify()
4 | 
5 | bc.add_data("85561768_20205411_0903.csv", "lloyds")


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # BankClassify - automatically classify your bank statement entries
 2 | 
 3 | **Note:** This is not 'finished' software. I use it for dealing with my bank statements, but it is not 'production-ready' and may crash or do strange things. It is also set up for my particular usage, so may not work for you. However, I hope it will be a useful resource.
 4 | 
 5 | This code will classify each entry in your bank statement into categories such as 'Supermarket', 'Petrol', 'Eating Out' etc. It learns from previously classified data, and corrections you make when it guesses a category incorrectly, and improves its performance over time.
 6 | 
 7 | ## How to use
 8 | 1. Install the required libraries:
 9 |   `pip install -r requirements.txt`
10 | 
11 | 2. Run the code in `example.py` as a demonstration. This will interactively classify the example bank statement data in `Statement_Example.txt` and save the results in `AllData.csv`. In the interactive classification you will be presented with a list of categories (with ID numbers), the details of a transaction, and a guessed category. You have three choices:
12 |    - To accept the guessed category, just press `Enter`
13 |    - To correct the classifier to a category that is in the list shown, enter the ID number of the category and press `Enter`
14 |    - To add a new category, type the name of the category and press `Enter`
15 | 
16 | 3. Examine the output in `AllData.csv` manually, or run `bc._prep_for_analysis()` and look at `bc.in` and `bc.out` for incomings and outgoings respectively. You will see there is a `cat` column with the category in it.
17 | 
18 | To use it with your own data:
19 | 
20 | - *If you use Santander UK as your bank:* just run `bc.add_data(filename)` with the filename of your downloaded statement file. Delete `AllData.py` first though, or the example data will be used as part of the training data.
21 | - *If you use another bank:* Write your own function to read in your statement data from your bank. It must return a pandas dataframe with columns of `date`, `desc` and `amount`. Add this to the `BankClassify` class and call it instead of `_read_santander_file`.
22 | 
23 | ### Known issues
24 | For Barclays bank sometimes the CSV file contains multiple commas within the 'memo' (transaction description) column. You can either manually patch your data before you run the tool or be aware that due to the work-around implemented we could potentially be losing valuable information beyond the comma.


--------------------------------------------------------------------------------
/Statement_Example.txt:
--------------------------------------------------------------------------------
 1 | From:01/08/2017to31/08/2017
 2 | 							
 3 | Account:XXXX XXXX XXXX XXXX
 4 | 							
 5 | Date:31/08/2017
 6 | Description:CARD PAYMENT TO SHELL TOTHILL,2.04 GBP, RATE 1.00/GBP ON 29-08-2013
 7 | Amount:-2.04	
 8 | Balance:2301.62
 9 | 						
10 | Date:31/08/2017
11 | Description:CARD PAYMENT TO ASDA SUPERSTORE ON 2013-08-29
12 | Amount:-23.46	
13 | Balance:2303.66
14 | 						
15 | Date:30/08/2017
16 | Description:CHEQUE PAID IN AT SOUTHAMPTON UNIVERSITY
17 | Amount:55.10	
18 | Balance:2327.12
19 | 						
20 | Date:30/08/2017
21 | Description:CARD PAYMENT TO TICKETOFFICESALE,6.20 GBP, RATE 1.00/GBP ON 28-08-2013
22 | Amount:-6.20	
23 | Balance:2272.02
24 | 						
25 | Date:30/08/2017
26 | Description:CARD PAYMENT TO MARKS &amp; SPENCER,8.99 GBP, RATE 1.00/GBP ON 28-08-2013
27 | Amount:-8.99	
28 | Balance:2288.22
29 | 						
30 | Date:29/08/2017
31 | Description:CARD PAYMENT TO THE COWHERDS,2.55 GBP, RATE 1.00/GBP ON 27-08-2013
32 | Amount:-2.55	
33 | Balance:1308.27
34 | 						
35 | Date:29/08/2017
36 | Description:CARD PAYMENT TO HOBBY CRAFT LTD,8.59 GBP, RATE 1.00/GBP ON 27-08-2013
37 | Amount:-8.59	
38 | Balance:1310.82
39 | 						
40 | Date:29/08/2017
41 | Description:CARD PAYMENT TO WAITROSE 720,2.18 GBP, RATE 1.00/GBP ON 27-08-2013
42 | Amount:-2.18	
43 | Balance:1319.41
44 | 						
45 | Date:29/08/2017
46 | Description:CARD PAYMENT TO ASDA STORE/PETROL/,56.60 GBP, RATE 1.00/GBP ON 27-08-2013
47 | Amount:-56.60	
48 | Balance:1321.59
49 | 						
50 | Date:28/08/2017
51 | Description:CARD PAYMENT TO HOBBY CRAFT LTD,4.69 GBP, RATE 1.00/GBP ON 26-08-2013
52 | Amount:-4.69	
53 | Balance:1398.19
54 | 
55 | Date:28/08/2017
56 | Description:CARD PAYMENT TO ASDA SUPERSTORE ON 2013-08-27
57 | Amount:-43.21	
58 | Balance:2303.66
59 | 						
60 | Date:28/08/2017
61 | Description:CARD PAYMENT TO SAINSBURYS,14.80 GBP, RATE 1.00/GBP ON 26-08-2013
62 | Amount:-14.80	
63 | Balance:1402.88
64 | 
65 | Date:26/08/2017
66 | Description:CARD PAYMENT TO ASDA STORE/PETROL/,62.60 GBP, RATE 1.00/GBP ON 25-08-2013
67 | Amount:-62.60	
68 | Balance:1321.59


--------------------------------------------------------------------------------
/categories.txt:
--------------------------------------------------------------------------------
 1 | Income
 2 | Bill - General
 3 | Bill - Utilities
 4 | Bill - Communications
 5 | Travel - General
 6 | Travel - Car
 7 | Travel - Petrol
 8 | Supermarket
 9 | Cash Out
10 | Eating Out
11 | House
12 | Books
13 | Craft
14 | Charity Shop       
15 | Presents
16 | Toiletries
17 | Mortgage
18 | Paypal
19 | Unclassified


--------------------------------------------------------------------------------
/example.py:
--------------------------------------------------------------------------------
1 | from BankClassify import BankClassify
2 | 
3 | bc = BankClassify()
4 | 
5 | bc.add_data("Statement_Example.txt")


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | pandas
2 | textblob
3 | colorama
4 | tabulate


--------------------------------------------------------------------------------
/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robintw/BankClassify/477c32a64a9d5cf0564d47ca656572d7132ec180/test/__init__.py


--------------------------------------------------------------------------------
/test/test_bak_classify.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from BankClassify import BankClassify
 3 | 
 4 | 
 5 | def test_mintReader_returns_date_description_ammount():
 6 |     bc = BankClassify()
 7 |     df = bc._read_mint_csv('transactions.csv')
 8 | 
 9 |     columns = df.columns.values.tolist()
10 |     assert 'date' in columns
11 |     assert 'desc' in columns
12 |     assert 'amount' in columns
13 | 
14 | 
15 | def test_onlyTreeColumns():
16 |     bc = BankClassify()
17 |     df = bc._read_mint_csv('transactions.csv')
18 | 
19 |     assert len(df.columns.values.tolist()) == 3
20 | 
21 | 
22 | def test_debitIsNegative_creditIsPositive():
23 |     df = pd.read_csv('transactions.csv', skiprows=0)
24 | 
25 |     """Rename columns """
26 |     # df.columns = ['date', 'desc', 'amount']
27 |     df.rename(
28 |         columns={
29 |             "Date": 'date',
30 |             "Original Description": 'desc',
31 |             "Amount": 'amount',
32 |             "Transaction Type": 'type'
33 |         },
34 |         inplace=True
35 |     )
36 | 
37 |     bc = BankClassify()
38 |     df_dut = bc._read_mint_csv('transactions.csv')
39 | 
40 |     baseline = df['type'] == 'debit'
41 | 
42 |     assert (df_dut.loc[baseline, 'amount'] < 0).all()
43 |     assert (df_dut.loc[~baseline, 'amount'] >= 0).all()
44 | 


--------------------------------------------------------------------------------
/test/transactions.csv:
--------------------------------------------------------------------------------
1 | "Date","Description","Original Description","Amount","Transaction Type","Category","Account Name","Labels","Notes"
2 | "4/28/2020","Paycheck","CORPORATIO CO PAYCHECK","500.00","credit","Paycheck","Bank checking","",""
3 | "4/25/2020","Amazon","AMZN Mktp US","29.99","debit","Shopping","CC","",""
4 | 


--------------------------------------------------------------------------------