├── .gitignore ├── LICENSE ├── README.md ├── build.gradle ├── demo_animation.gif ├── gradlew ├── gradlew.bat ├── inputs.txt └── src └── main └── kotlin ├── Dashboard.kt └── Model.kt /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | ### JetBrains template 3 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm 4 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 5 | 6 | # User-specific stuff: 7 | .idea/**/workspace.xml 8 | .idea/**/tasks.xml 9 | .idea/dictionaries 10 | 11 | # Sensitive or high-churn files: 12 | .idea/**/dataSources/ 13 | .idea/**/dataSources.ids 14 | .idea/**/dataSources.xml 15 | .idea/**/dataSources.local.xml 16 | .idea/**/sqlDataSources.xml 17 | .idea/**/dynamic.xml 18 | .idea/**/uiDesigner.xml 19 | 20 | # Gradle: 21 | .idea/**/gradle.xml 22 | .idea/**/libraries 23 | 24 | # CMake 25 | cmake-build-debug/ 26 | cmake-build-release/ 27 | 28 | # Mongo Explorer plugin: 29 | .idea/**/mongoSettings.xml 30 | 31 | ## File-based project format: 32 | *.iws 33 | 34 | ## Plugin-specific files: 35 | 36 | # IntelliJ 37 | out/ 38 | 39 | # mpeltonen/sbt-idea plugin 40 | .idea_modules/ 41 | 42 | # JIRA plugin 43 | atlassian-ide-plugin.xml 44 | 45 | # Cursive Clojure plugin 46 | .idea/replstate.xml 47 | 48 | # Crashlytics plugin (for Android Studio and IntelliJ) 49 | com_crashlytics_export_strings.xml 50 | crashlytics.properties 51 | crashlytics-build.properties 52 | fabric.properties 53 | 54 | /bayes_user_input_prediction.iml 55 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Naive Bayes/Logistic Regression User Input Prediction 2 | 3 | This is a simple [Kotlin](http://kotlinlang.org/) application that leverages Naive Bayes or logistic regression to categorize bank transactions. 4 | 5 | ![](demo_animation.gif) 6 | 7 | 8 | ## YouTube Walkthrough 9 | 10 | [![](https://img.youtube.com/vi/JLSdW60t898/hqdefault.jpg)](https://www.youtube.com/watch?v=JLSdW60t898) 11 | 12 | ## Featured at KotlinConf 2018 in Amsterdam 13 | 14 | [![](https://img.youtube.com/vi/-zTqtEcnM7A/hqdefault.jpg)](https://youtu.be/-zTqtEcnM7A) 15 | 16 | 17 | 18 | 19 | ## Details 20 | 21 | 22 | NOTE: [Kotlin-Statistics](https://github.com/thomasnield/kotlin-statistics#naive-bayes-classifier) now has this feature in an easy-to-use API. 23 | 24 | Input and categorize a few transactions, and the simple AI will start to predict the categories once it picks up probabilities for given keywords. 25 | 26 | Chapter 13 of [O'Reilly Data Science From Scratch](http://a.co/i6i5wEX ) by Joel Grus was a useful reference. 27 | 28 | Here is a recorded demo. The first three transactions did not have enough data to predict. But after the fourth transaction, the algorithm starts to make accurate predictions. 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /build.gradle: -------------------------------------------------------------------------------- 1 | 2 | 3 | buildscript { 4 | ext.kotlin_version = '1.3.50' 5 | 6 | repositories { 7 | mavenCentral() 8 | } 9 | dependencies { 10 | classpath "org.jetbrains.kotlin:kotlin-gradle-plugin:$kotlin_version" 11 | } 12 | } 13 | 14 | apply plugin: 'kotlin' 15 | 16 | repositories { 17 | mavenCentral() 18 | } 19 | 20 | dependencies { 21 | compile "org.jetbrains.kotlin:kotlin-stdlib-jdk8" 22 | compile 'no.tornado:tornadofx:1.7.15' 23 | compile 'org.nield:kotlin-statistics:1.2.1' 24 | } 25 | 26 | compileKotlin { 27 | kotlinOptions.jvmTarget = "1.8" 28 | } 29 | compileTestKotlin { 30 | kotlinOptions.jvmTarget = "1.8" 31 | } 32 | 33 | 34 | task fatJar(type: Jar) { 35 | manifest { 36 | attributes 'Implementation-Title': 'Kotlin Linear Regression', 37 | 'Implementation-Version': 1.0, 38 | 'Main-Class': 'DashboardKt' 39 | } 40 | baseName = project.name 41 | from { configurations.compile.collect { it.isDirectory() ? it : zipTree(it) } } 42 | with jar 43 | } 44 | 45 | -------------------------------------------------------------------------------- /demo_animation.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thomasnield/bayes_user_input_prediction/a348ffe21917cc12ce6e48d5d1aec264f28f3fc3/demo_animation.gif -------------------------------------------------------------------------------- /gradlew: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | 3 | ############################################################################## 4 | ## 5 | ## Gradle start up script for UN*X 6 | ## 7 | ############################################################################## 8 | 9 | # Attempt to set APP_HOME 10 | # Resolve links: $0 may be a link 11 | PRG="$0" 12 | # Need this for relative symlinks. 13 | while [ -h "$PRG" ] ; do 14 | ls=`ls -ld "$PRG"` 15 | link=`expr "$ls" : '.*-> \(.*\)$'` 16 | if expr "$link" : '/.*' > /dev/null; then 17 | PRG="$link" 18 | else 19 | PRG=`dirname "$PRG"`"/$link" 20 | fi 21 | done 22 | SAVED="`pwd`" 23 | cd "`dirname \"$PRG\"`/" >/dev/null 24 | APP_HOME="`pwd -P`" 25 | cd "$SAVED" >/dev/null 26 | 27 | APP_NAME="Gradle" 28 | APP_BASE_NAME=`basename "$0"` 29 | 30 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 31 | DEFAULT_JVM_OPTS="" 32 | 33 | # Use the maximum available, or set MAX_FD != -1 to use that value. 34 | MAX_FD="maximum" 35 | 36 | warn () { 37 | echo "$*" 38 | } 39 | 40 | die () { 41 | echo 42 | echo "$*" 43 | echo 44 | exit 1 45 | } 46 | 47 | # OS specific support (must be 'true' or 'false'). 48 | cygwin=false 49 | msys=false 50 | darwin=false 51 | nonstop=false 52 | case "`uname`" in 53 | CYGWIN* ) 54 | cygwin=true 55 | ;; 56 | Darwin* ) 57 | darwin=true 58 | ;; 59 | MINGW* ) 60 | msys=true 61 | ;; 62 | NONSTOP* ) 63 | nonstop=true 64 | ;; 65 | esac 66 | 67 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar 68 | 69 | # Determine the Java command to use to start the JVM. 70 | if [ -n "$JAVA_HOME" ] ; then 71 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then 72 | # IBM's JDK on AIX uses strange locations for the executables 73 | JAVACMD="$JAVA_HOME/jre/sh/java" 74 | else 75 | JAVACMD="$JAVA_HOME/bin/java" 76 | fi 77 | if [ ! -x "$JAVACMD" ] ; then 78 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME 79 | 80 | Please set the JAVA_HOME variable in your environment to match the 81 | location of your Java installation." 82 | fi 83 | else 84 | JAVACMD="java" 85 | which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 86 | 87 | Please set the JAVA_HOME variable in your environment to match the 88 | location of your Java installation." 89 | fi 90 | 91 | # Increase the maximum file descriptors if we can. 92 | if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then 93 | MAX_FD_LIMIT=`ulimit -H -n` 94 | if [ $? -eq 0 ] ; then 95 | if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then 96 | MAX_FD="$MAX_FD_LIMIT" 97 | fi 98 | ulimit -n $MAX_FD 99 | if [ $? -ne 0 ] ; then 100 | warn "Could not set maximum file descriptor limit: $MAX_FD" 101 | fi 102 | else 103 | warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" 104 | fi 105 | fi 106 | 107 | # For Darwin, add options to specify how the application appears in the dock 108 | if $darwin; then 109 | GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" 110 | fi 111 | 112 | # For Cygwin, switch paths to Windows format before running java 113 | if $cygwin ; then 114 | APP_HOME=`cygpath --path --mixed "$APP_HOME"` 115 | CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` 116 | JAVACMD=`cygpath --unix "$JAVACMD"` 117 | 118 | # We build the pattern for arguments to be converted via cygpath 119 | ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` 120 | SEP="" 121 | for dir in $ROOTDIRSRAW ; do 122 | ROOTDIRS="$ROOTDIRS$SEP$dir" 123 | SEP="|" 124 | done 125 | OURCYGPATTERN="(^($ROOTDIRS))" 126 | # Add a user-defined pattern to the cygpath arguments 127 | if [ "$GRADLE_CYGPATTERN" != "" ] ; then 128 | OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" 129 | fi 130 | # Now convert the arguments - kludge to limit ourselves to /bin/sh 131 | i=0 132 | for arg in "$@" ; do 133 | CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` 134 | CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option 135 | 136 | if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition 137 | eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` 138 | else 139 | eval `echo args$i`="\"$arg\"" 140 | fi 141 | i=$((i+1)) 142 | done 143 | case $i in 144 | (0) set -- ;; 145 | (1) set -- "$args0" ;; 146 | (2) set -- "$args0" "$args1" ;; 147 | (3) set -- "$args0" "$args1" "$args2" ;; 148 | (4) set -- "$args0" "$args1" "$args2" "$args3" ;; 149 | (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; 150 | (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; 151 | (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; 152 | (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; 153 | (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; 154 | esac 155 | fi 156 | 157 | # Escape application args 158 | save () { 159 | for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done 160 | echo " " 161 | } 162 | APP_ARGS=$(save "$@") 163 | 164 | # Collect all arguments for the java command, following the shell quoting and substitution rules 165 | eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS" 166 | 167 | # by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong 168 | if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then 169 | cd "$(dirname "$0")" 170 | fi 171 | 172 | exec "$JAVACMD" "$@" 173 | -------------------------------------------------------------------------------- /gradlew.bat: -------------------------------------------------------------------------------- 1 | @if "%DEBUG%" == "" @echo off 2 | @rem ########################################################################## 3 | @rem 4 | @rem Gradle startup script for Windows 5 | @rem 6 | @rem ########################################################################## 7 | 8 | @rem Set local scope for the variables with windows NT shell 9 | if "%OS%"=="Windows_NT" setlocal 10 | 11 | set DIRNAME=%~dp0 12 | if "%DIRNAME%" == "" set DIRNAME=. 13 | set APP_BASE_NAME=%~n0 14 | set APP_HOME=%DIRNAME% 15 | 16 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 17 | set DEFAULT_JVM_OPTS= 18 | 19 | @rem Find java.exe 20 | if defined JAVA_HOME goto findJavaFromJavaHome 21 | 22 | set JAVA_EXE=java.exe 23 | %JAVA_EXE% -version >NUL 2>&1 24 | if "%ERRORLEVEL%" == "0" goto init 25 | 26 | echo. 27 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 28 | echo. 29 | echo Please set the JAVA_HOME variable in your environment to match the 30 | echo location of your Java installation. 31 | 32 | goto fail 33 | 34 | :findJavaFromJavaHome 35 | set JAVA_HOME=%JAVA_HOME:"=% 36 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 37 | 38 | if exist "%JAVA_EXE%" goto init 39 | 40 | echo. 41 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 42 | echo. 43 | echo Please set the JAVA_HOME variable in your environment to match the 44 | echo location of your Java installation. 45 | 46 | goto fail 47 | 48 | :init 49 | @rem Get command-line arguments, handling Windows variants 50 | 51 | if not "%OS%" == "Windows_NT" goto win9xME_args 52 | 53 | :win9xME_args 54 | @rem Slurp the command line arguments. 55 | set CMD_LINE_ARGS= 56 | set _SKIP=2 57 | 58 | :win9xME_args_slurp 59 | if "x%~1" == "x" goto execute 60 | 61 | set CMD_LINE_ARGS=%* 62 | 63 | :execute 64 | @rem Setup the command line 65 | 66 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar 67 | 68 | @rem Execute Gradle 69 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% 70 | 71 | :end 72 | @rem End local scope for the variables with windows NT shell 73 | if "%ERRORLEVEL%"=="0" goto mainEnd 74 | 75 | :fail 76 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 77 | rem the _cmd.exe /c_ return code! 78 | if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 79 | exit /b 1 80 | 81 | :mainEnd 82 | if "%OS%"=="Windows_NT" endlocal 83 | 84 | :omega 85 | -------------------------------------------------------------------------------- /inputs.txt: -------------------------------------------------------------------------------- 1 | 2018-03-13,12.69,WHOLEFDS HPK 10140 2 | 2018-03-13,4.64,BIGGBY COFFEE #370 3 | 2018-03-13,14.23,AMAZON SALE 4 | 2018-03-10,5.40,AMAZON VIDEO ON DEMAND 5 | 2018-03-12,5.19,STARBUCKS COFFEE #370 6 | 2018-03-10,61.27,WHOLEFDS PLN 10030 7 | 2018-03-07,2.29,REDBOX VIDEO RENTAL #271 8 | 9 | 2018-03-06,13.99,NETFLIX ON DEMAND #21 10 | 2018-03-07,2.29,FAMILY VIDEO 11 | 12 | 2018-03-06,17.21,FROGG COFFEE BAR AND CREPERIE 13 | 2018-03-06,17.92,OAK CLIFF CREPERIE 14 | 15 | 16 | 2018-03-06,120.91,HERTZ CAR RENTAL DALLAS LUV FIELD 17 | 2018-03-03,113.12,ALAMO CAR RENTAL SAN ANTONIO 18 | 2018-03-02,2.29,REDBOX VIDEO RENTAL #270 19 | 2018-03-07,120.14,ENTERPRISE CAR RENTAL NEW YORK 20 | -------------------------------------------------------------------------------- /src/main/kotlin/Dashboard.kt: -------------------------------------------------------------------------------- 1 | import javafx.application.Application 2 | import javafx.beans.property.SimpleDoubleProperty 3 | import javafx.beans.property.SimpleObjectProperty 4 | import javafx.beans.property.SimpleStringProperty 5 | import javafx.geometry.Orientation 6 | import javafx.scene.control.ButtonType 7 | import javafx.scene.control.Dialog 8 | import javafx.scene.input.Clipboard 9 | import tornadofx.* 10 | import java.time.LocalDate 11 | 12 | 13 | fun main(args: Array) = Application.launch(TransactionApp::class.java, *args) 14 | 15 | class TransactionApp: App(TransactionView::class) 16 | 17 | class TransactionView: View() { 18 | 19 | override val root = borderpane { 20 | 21 | title = "Bank Transaction Categorizer" 22 | 23 | style = "-fx-font-size: 16pt; " 24 | 25 | center = tableview(transactions) { 26 | readonlyColumn("DATE", BankTransaction::date) 27 | readonlyColumn("AMOUNT", BankTransaction::amount) 28 | readonlyColumn("MEMO", BankTransaction::memo) 29 | column("CATEGORY", BankTransaction::category).useComboBox(categories) 30 | 31 | isEditable = true 32 | 33 | items.onChange { 34 | resizeColumnsToFitContent() 35 | } 36 | } 37 | 38 | left = toolbar { 39 | orientation = Orientation.VERTICAL 40 | 41 | hbox { 42 | button("+") { 43 | setOnAction { 44 | 45 | val date = SimpleObjectProperty() 46 | val amount = SimpleDoubleProperty() 47 | val memo = SimpleStringProperty() 48 | 49 | val result = Dialog().apply { 50 | title = "Enter Transaction" 51 | headerText = "Input a new transaction" 52 | 53 | dialogPane.buttonTypes.addAll(ButtonType.OK, ButtonType.CANCEL) 54 | 55 | 56 | 57 | dialogPane.content = form { 58 | style = "-fx-font-size: 16pt; " 59 | 60 | fieldset { 61 | field("DATE") { 62 | datepicker(date) 63 | } 64 | field("AMOUNT") { 65 | textfield(amount) 66 | } 67 | field("MEMO") { 68 | textfield(memo) 69 | } 70 | field { 71 | button("CLIPBOARD") { 72 | setOnAction { 73 | try { 74 | Clipboard.getSystemClipboard().string.split(",").also { 75 | date.value = LocalDate.parse(it[0]) 76 | amount.value = it[1].toDouble() 77 | memo.value = it[2] 78 | } 79 | } catch (e: Exception) { 80 | println("Invalid clipboard input") 81 | } 82 | } 83 | } 84 | } 85 | } 86 | } 87 | setResultConverter { 88 | if (it == ButtonType.OK) { 89 | BankTransaction(date.value, amount.value, memo.value) 90 | } else null 91 | } 92 | 93 | showAndWait() 94 | }.result 95 | 96 | if (result != null) 97 | transactions += result 98 | } 99 | } 100 | 101 | combobox(property = selectedClassifier, 102 | values = ClassifierImplementation.values().toList().observable() 103 | ) 104 | } 105 | } 106 | } 107 | } -------------------------------------------------------------------------------- /src/main/kotlin/Model.kt: -------------------------------------------------------------------------------- 1 | import javafx.beans.property.SimpleObjectProperty 2 | import javafx.collections.FXCollections 3 | import org.apache.commons.math3.distribution.NormalDistribution 4 | import org.nield.kotlinstatistics.randomFirst 5 | import org.nield.kotlinstatistics.toNaiveBayesClassifier 6 | import java.time.LocalDate 7 | import kotlin.math.exp 8 | import kotlin.math.ln 9 | 10 | 11 | val categories = FXCollections.observableArrayList("Grocery", "Utility", "Electronics", "Entertainment", "Coffee", "Restaurants","Travel") 12 | val transactions = FXCollections.observableArrayList() 13 | 14 | val selectedClassifier = SimpleObjectProperty(ClassifierImplementation.NAIVE_BAYES) 15 | 16 | class BankTransaction( 17 | val date: LocalDate, 18 | val amount: Double, 19 | val memo: String, 20 | category: String? = null 21 | ) { 22 | // default category to a predicated category if it is not provided 23 | var category = category?:selectedClassifier.get().predict(this) 24 | } 25 | 26 | 27 | enum class ClassifierImplementation { 28 | NAIVE_BAYES { 29 | override fun predict(bankTransaction: BankTransaction): String? { 30 | val classifier = transactions.toNaiveBayesClassifier( 31 | featuresSelector = { it.memo.discretizeWords() }, 32 | categorySelector = { it.category } 33 | ) 34 | 35 | return classifier.predict(bankTransaction.memo.discretizeWords()) 36 | } 37 | }, 38 | LOGISTIC_REGRESSION { 39 | 40 | override fun predict(bankTransaction: BankTransaction): String? { 41 | 42 | val normalDistribution = NormalDistribution(0.0, 1.0) 43 | 44 | return categories.asSequence() 45 | .map { category -> 46 | 47 | val allWordsForCategory = transactions.asSequence() 48 | .filter { it.category == category } 49 | .flatMap { it.memo.discretizeWords().asSequence() } 50 | .distinct() 51 | .toList() 52 | .toTypedArray() 53 | 54 | var bestLikelihood = -10_000_000.0 55 | var b0 = .01 56 | val bX = allWordsForCategory.asSequence().map { it to .01 }.toMap().toMutableMap() 57 | 58 | fun sumBx(inputVariables: Map) = 59 | allWordsForCategory.asSequence() 60 | .map { bX[it]!! * (if (inputVariables[it] == true) 1.0 else 0.0) } 61 | .sum() 62 | 63 | fun predictProbability(inputVariables: Map) = 64 | 1.0 / (1 + exp(-(b0 + sumBx(inputVariables)))) 65 | 66 | fun predictProbability(inputWords: Set) = 67 | allWordsForCategory.asSequence() 68 | .map { w -> w to (w in inputWords) } 69 | .toMap() 70 | .let { predictProbability(it) } 71 | 72 | repeat(10000) { 73 | 74 | val selectedBeta = (0..bX.count()).asSequence().randomFirst() 75 | 76 | val adjust = normalDistribution.sample() 77 | 78 | // make random adjustment to two of the colors 79 | when { 80 | selectedBeta == 0 -> b0 += adjust 81 | else -> bX.compute(allWordsForCategory[selectedBeta-1]) { _, oldValue -> 82 | (oldValue?: throw Exception("oldValue doesn't exist")) + adjust 83 | } 84 | } 85 | 86 | // calculate maximum likelihood 87 | val trueEstimates = transactions.asSequence() 88 | .filter { t -> t.category == category } 89 | .map { ln(predictProbability(it.memo.discretizeWords())) } 90 | .sum() 91 | 92 | val falseEstimates = transactions.asSequence() 93 | .filter { t -> t.category != category } 94 | .map { ln(1 - predictProbability(it.memo.discretizeWords())) } 95 | .sum() 96 | 97 | val likelihood = exp(trueEstimates + falseEstimates) 98 | 99 | if (bestLikelihood < likelihood) { 100 | bestLikelihood = likelihood 101 | } else { 102 | // revert if no improvement happens 103 | when { 104 | selectedBeta == 0 -> b0 -= adjust 105 | else -> bX.compute(allWordsForCategory[selectedBeta-1]!!) { key, oldValue -> oldValue!! - adjust } 106 | } 107 | } 108 | } 109 | category to predictProbability(bankTransaction.memo.discretizeWords()) 110 | }.sortedByDescending { it.second } 111 | .toList().onEach { println(it) }.asSequence() 112 | .firstOrNull()?.first 113 | } 114 | }; 115 | 116 | abstract fun predict(bankTransaction: BankTransaction): String? 117 | 118 | override fun toString() = name.replace("_", " ") 119 | } 120 | 121 | fun String.discretizeWords() = split(Regex("\\s")).asSequence() 122 | .map { it.replace(Regex("[^A-Za-z]"),"").toLowerCase() } 123 | .filter { it.isNotEmpty() } 124 | .toSet() --------------------------------------------------------------------------------