├── .gitignore
├── Code Examples
├── KNN
│ ├── .idea
│ │ ├── .name
│ │ ├── compiler.xml
│ │ ├── copyright
│ │ │ └── profiles_settings.xml
│ │ ├── encodings.xml
│ │ ├── libraries
│ │ │ ├── Maven__com_github_haifengl_smile_core_1_0_3.xml
│ │ │ ├── Maven__com_github_haifengl_smile_data_1_0_3.xml
│ │ │ ├── Maven__com_github_haifengl_smile_graph_1_0_3.xml
│ │ │ ├── Maven__com_github_haifengl_smile_math_1_0_3.xml
│ │ │ ├── Maven__com_github_haifengl_smile_plot_1_0_2.xml
│ │ │ └── Maven__org_swinglabs_swingx_swingx_all_1_6_4.xml
│ │ ├── misc.xml
│ │ ├── modules.xml
│ │ ├── scala_compiler.xml
│ │ ├── scopes
│ │ │ └── scope_settings.xml
│ │ ├── vcs.xml
│ │ └── workspace.xml
│ ├── KNN.iml
│ ├── KNN_Example_1.csv
│ ├── pom.xml
│ ├── projectFilesBackup
│ │ └── KNN.iml
│ └── src
│ │ └── main
│ │ └── java
│ │ └── KNNExample.scala
├── LinearRegression
│ ├── .idea
│ │ ├── .name
│ │ ├── compiler.xml
│ │ ├── copyright
│ │ │ └── profiles_settings.xml
│ │ ├── encodings.xml
│ │ ├── libraries
│ │ │ ├── Maven__com_github_haifengl_smile_core_1_0_3.xml
│ │ │ ├── Maven__com_github_haifengl_smile_data_1_0_3.xml
│ │ │ ├── Maven__com_github_haifengl_smile_graph_1_0_3.xml
│ │ │ ├── Maven__com_github_haifengl_smile_math_1_0_3.xml
│ │ │ ├── Maven__com_github_haifengl_smile_plot_1_0_2.xml
│ │ │ ├── Maven__org_swinglabs_swingx_swingx_all_1_6_4.xml
│ │ │ └── smile_1_0.xml
│ │ ├── misc.xml
│ │ ├── modules.xml
│ │ ├── scala_compiler.xml
│ │ ├── scopes
│ │ │ └── scope_settings.xml
│ │ ├── uiDesigner.xml
│ │ ├── vcs.xml
│ │ └── workspace.xml
│ ├── LinearRegression.iml
│ ├── data
│ │ └── OLS_Regression_Example_3.csv
│ ├── pom.xml
│ ├── projectFilesBackup
│ │ └── LinearRegression.iml
│ └── src
│ │ └── main
│ │ └── java
│ │ └── LinearRegression.scala
├── NaiveBayes
│ ├── .idea
│ │ ├── .name
│ │ ├── compiler.xml
│ │ ├── copyright
│ │ │ └── profiles_settings.xml
│ │ ├── encodings.xml
│ │ ├── libraries
│ │ │ ├── Maven__com_github_haifengl_smile_core_1_0_3.xml
│ │ │ ├── Maven__com_github_haifengl_smile_data_1_0_3.xml
│ │ │ ├── Maven__com_github_haifengl_smile_graph_1_0_3.xml
│ │ │ ├── Maven__com_github_haifengl_smile_math_1_0_3.xml
│ │ │ ├── Maven__com_github_haifengl_smile_plot_1_0_2.xml
│ │ │ ├── Maven__org_swinglabs_swingx_swingx_all_1_6_4.xml
│ │ │ ├── scala_compiler.xml
│ │ │ └── smile_1_0.xml
│ │ ├── misc.xml
│ │ ├── modules.xml
│ │ ├── scala_compiler.xml
│ │ ├── scopes
│ │ │ └── scope_settings.xml
│ │ ├── vcs.xml
│ │ └── workspace.xml
│ ├── NaiveBayes.iml
│ ├── data
│ │ └── stopwords.txt
│ ├── pom.xml
│ ├── projectFilesBackup
│ │ └── NaiveBayes.iml
│ └── src
│ │ └── main
│ │ └── java
│ │ ├── NaiveBayesExample.scala
│ │ └── TDM.scala
├── PCA
│ ├── .idea
│ │ ├── .name
│ │ ├── compiler.xml
│ │ ├── copyright
│ │ │ └── profiles_settings.xml
│ │ ├── encodings.xml
│ │ ├── highlighting.xml
│ │ ├── libraries
│ │ │ ├── Maven__com_github_haifengl_smile_core_1_0_3.xml
│ │ │ ├── Maven__com_github_haifengl_smile_data_1_0_3.xml
│ │ │ ├── Maven__com_github_haifengl_smile_graph_1_0_3.xml
│ │ │ ├── Maven__com_github_haifengl_smile_math_1_0_3.xml
│ │ │ ├── Maven__com_github_haifengl_smile_plot_1_0_2.xml
│ │ │ ├── Maven__org_swinglabs_swingx_swingx_all_1_6_4.xml
│ │ │ └── smile_1_0.xml
│ │ ├── misc.xml
│ │ ├── modules.xml
│ │ ├── scala_compiler.xml
│ │ ├── scopes
│ │ │ └── scope_settings.xml
│ │ ├── vcs.xml
│ │ └── workspace.xml
│ ├── PCA.iml
│ ├── data
│ │ ├── PCA_Example_1.csv
│ │ └── PCA_Example_2.csv
│ ├── pom.xml
│ ├── projectFilesBackup
│ │ └── PCA.iml
│ └── src
│ │ └── main
│ │ └── java
│ │ └── PCA.scala
├── RecommendationSystem
│ ├── .idea
│ │ ├── .name
│ │ ├── compiler.xml
│ │ ├── copyright
│ │ │ └── profiles_settings.xml
│ │ ├── encodings.xml
│ │ ├── libraries
│ │ │ ├── Maven__com_github_haifengl_smile_core_1_0_3.xml
│ │ │ ├── Maven__com_github_haifengl_smile_data_1_0_3.xml
│ │ │ ├── Maven__com_github_haifengl_smile_graph_1_0_3.xml
│ │ │ ├── Maven__com_github_haifengl_smile_math_1_0_3.xml
│ │ │ ├── Maven__com_github_haifengl_smile_plot_1_0_2.xml
│ │ │ ├── Maven__org_swinglabs_swingx_swingx_all_1_6_4.xml
│ │ │ ├── joda_time_joda_time_2_2.xml
│ │ │ └── smile_1_0.xml
│ │ ├── misc.xml
│ │ ├── modules.xml
│ │ ├── scala_compiler.xml
│ │ ├── scopes
│ │ │ └── scope_settings.xml
│ │ ├── uiDesigner.xml
│ │ └── vcs.xml
│ ├── RecommendationSystem.iml
│ ├── data
│ │ └── stopwords.txt
│ ├── pom.xml
│ ├── projectFilesBackup
│ │ └── RecommendationSystem.iml
│ └── src
│ │ └── main
│ │ └── java
│ │ └── RecommendationSystem.scala
├── SVM
│ ├── .idea
│ │ ├── .name
│ │ ├── compiler.xml
│ │ ├── copyright
│ │ │ └── profiles_settings.xml
│ │ ├── encodings.xml
│ │ ├── libraries
│ │ │ ├── Maven__com_github_haifengl_smile_core_1_0_3.xml
│ │ │ ├── Maven__com_github_haifengl_smile_data_1_0_3.xml
│ │ │ ├── Maven__com_github_haifengl_smile_graph_1_0_3.xml
│ │ │ ├── Maven__com_github_haifengl_smile_math_1_0_3.xml
│ │ │ ├── Maven__com_github_haifengl_smile_plot_1_0_2.xml
│ │ │ └── Maven__org_swinglabs_swingx_swingx_all_1_6_4.xml
│ │ ├── misc.xml
│ │ ├── modules.xml
│ │ ├── scala_compiler.xml
│ │ ├── scopes
│ │ │ └── scope_settings.xml
│ │ ├── uiDesigner.xml
│ │ ├── vcs.xml
│ │ └── workspace.xml
│ ├── SVM.iml
│ ├── data
│ │ ├── SVM_Example_1.csv
│ │ ├── SVM_Example_2.csv
│ │ └── SVM_Example_2_Test_data.csv
│ ├── pom.xml
│ ├── projectFilesBackup
│ │ └── SVM.iml
│ └── src
│ │ └── main
│ │ └── java
│ │ ├── SVM_Example_2.scala
│ │ └── SupportVectorMachine.scala
└── TextRegression
│ ├── .idea
│ ├── .name
│ ├── compiler.xml
│ ├── copyright
│ │ └── profiles_settings.xml
│ ├── encodings.xml
│ ├── libraries
│ │ ├── Maven__com_github_haifengl_smile_core_1_0_2.xml
│ │ ├── Maven__com_github_haifengl_smile_data_1_0_1.xml
│ │ ├── Maven__com_github_haifengl_smile_graph_1_0_1.xml
│ │ ├── Maven__com_github_haifengl_smile_math_1_0_2.xml
│ │ ├── Maven__com_github_haifengl_smile_plot_1_0_2.xml
│ │ ├── Maven__com_github_tototoshi_scala_csv_2_11_1_2_1.xml
│ │ ├── Maven__org_scala_lang_scala_library_2_11_6.xml
│ │ ├── Maven__org_swinglabs_swingx_swingx_all_1_6_4.xml
│ │ ├── com_github_tototoshi_scala_csv_2_11_1_2_0.xml
│ │ └── smile_1_0.xml
│ ├── misc.xml
│ ├── modules.xml
│ ├── scala_compiler.xml
│ ├── scopes
│ │ └── scope_settings.xml
│ ├── uiDesigner.xml
│ ├── vcs.xml
│ └── workspace.xml
│ ├── TextRegression.iml
│ ├── data
│ ├── TextRegression_Example_1.csv
│ └── stopwords.txt
│ ├── pom.xml
│ ├── projectFilesBackup
│ └── TextRegression.iml
│ └── src
│ └── main
│ └── java
│ ├── DTM.scala
│ └── TextRegression.scala
├── Example Data
├── KNN_Example_1.csv
├── OLS_Regression_Example_3.csv
├── PCA_Example_1.csv
├── PCA_Example_2.csv
├── Recommendation_Example_1.zip
├── SVM_Example_1.csv
├── SVM_Example_2.csv
├── SVM_Example_2_Test_data.csv
├── TextRegression_Example_1.csv
└── stopwords.txt
├── Explanatory Graphs
└── ML Blog.graffle
├── Images
├── DynamicMachineLearning.png
├── Formula1.png
├── Formula2.png
├── Formula3.png
├── Formula4.png
├── Good_Fit.png
├── Ham_No_Stopwords.png
├── Ham_Stopwords.png
├── HumanDataPoints.png
├── KNNPlot.png
├── Mail_per_Sender_Distribution.png
├── Mail_per_Sender_log_Distribution.png
├── Mail_per_Subject_Distribution.png
├── Mail_per_Subject_log_Distribution.png
├── MaleFemalePlot.png
├── OverFitting.png
├── PCA_Explanatory_Data.png
├── PCA_Normalised.png
├── PCA_Reduced_Dimension.png
├── Precision.png
├── PrecisionFull.png
├── PrecisionHalf.png
├── Recall.png
├── RecallFull.png
├── RecallHalf.png
├── SVM_Datapoints.png
├── SVM_TestData.png
├── SVM_TrainData.png
├── Spam_No_Stopwords.png
├── Spam_Stopwords.png
├── Under-fitting.png
├── Unscaled_DJI_PCA_Index.png
├── Unscaled_PCA_Index.png
└── Weighted_Subject_Distribution.png
└── Readme.md
/.gitignore:
--------------------------------------------------------------------------------
1 |
2 | *.class
3 |
4 | Code/Scala/BinaryClassification/.idea/workspace.xml
5 |
6 | Code Examples/KNN/.idea/workspace.xml
7 |
8 | Code Examples/RecommendationSystem/.idea/workspace.xml
9 |
--------------------------------------------------------------------------------
/Code Examples/KNN/.idea/.name:
--------------------------------------------------------------------------------
1 | KNN
--------------------------------------------------------------------------------
/Code Examples/KNN/.idea/compiler.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
--------------------------------------------------------------------------------
/Code Examples/KNN/.idea/copyright/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/Code Examples/KNN/.idea/encodings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/Code Examples/KNN/.idea/libraries/Maven__com_github_haifengl_smile_core_1_0_3.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/Code Examples/KNN/.idea/libraries/Maven__com_github_haifengl_smile_data_1_0_3.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/Code Examples/KNN/.idea/libraries/Maven__com_github_haifengl_smile_graph_1_0_3.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/Code Examples/KNN/.idea/libraries/Maven__com_github_haifengl_smile_math_1_0_3.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/Code Examples/KNN/.idea/libraries/Maven__com_github_haifengl_smile_plot_1_0_2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/Code Examples/KNN/.idea/libraries/Maven__org_swinglabs_swingx_swingx_all_1_6_4.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/Code Examples/KNN/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/Code Examples/KNN/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
--------------------------------------------------------------------------------
/Code Examples/KNN/.idea/scala_compiler.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/Code Examples/KNN/.idea/scopes/scope_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
--------------------------------------------------------------------------------
/Code Examples/KNN/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/Code Examples/KNN/KNN.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
--------------------------------------------------------------------------------
/Code Examples/KNN/KNN_Example_1.csv:
--------------------------------------------------------------------------------
1 | "X","Y","Label"
2 | 2.37354618925767,5.39810588036707,0
3 | 3.18364332422208,4.38797360674923,0
4 | 2.16437138758995,5.34111969142442,0
5 | 4.59528080213779,3.87063690391921,0
6 | 3.32950777181536,6.43302370170104,0
7 | 2.17953161588198,6.98039989850586,0
8 | 3.48742905242849,4.63277852353349,0
9 | 3.73832470512922,3.95586537368347,0
10 | 3.57578135165349,5.56971962744241,0
11 | 2.69461161284364,4.86494539611918,0
12 | 4.51178116845085,7.40161776050478,0
13 | 3.38984323641143,4.96075999726683,0
14 | 2.3787594194582,5.68973936245078,0
15 | 0.7853001128225,5.02800215878067,0
16 | 4.12493091814311,4.25672679111759,0
17 | 2.95506639098477,5.18879229951434,0
18 | 2.98380973690105,3.19504137110896,0
19 | 3.9438362106853,6.46555486156289,0
20 | 3.82122119509809,5.1532533382119,0
21 | 3.59390132121751,7.17261167036215,0
22 | 3.91897737160822,5.47550952889966,0
23 | 3.78213630073107,4.29005356907819,0
24 | 3.07456498336519,5.61072635348905,0
25 | 1.01064830413663,4.06590236835575,0
26 | 3.61982574789471,3.7463665997609,0
27 | 2.943871260471,5.29144623551746,0
28 | 2.84420449329467,4.55670812678157,0
29 | 1.52924761610073,5.00110535163162,0
30 | 2.52184994489138,5.07434132415166,0
31 | 3.4179415601997,4.41047905381193,0
32 | 4.35867955152904,4.4313312671815,0
33 | 2.897212272657,4.86482138487617,0
34 | 3.38767161155937,6.1780869965732,0
35 | 2.94619495941709,3.47643319957024,0
36 | 1.62294044317139,5.59394618762842,0
37 | 2.58500543670032,5.33295037121352,0
38 | 2.60571004628965,6.06309983727636,0
39 | 2.94068660328881,4.6958160763657,0
40 | 4.10002537198388,5.37001880991629,0
41 | 3.76317574845754,5.26709879077223,0
42 | 2.83547640374641,4.45747996900835,0
43 | 2.74663831986349,6.20786780598317,0
44 | 3.69696337540474,6.16040261569495,0
45 | 3.55666319867366,5.700213649515,0
46 | 2.31124430545048,6.58683345454085,0
47 | 2.29250484303788,5.5584864255653,0
48 | 3.36458196213683,3.72340779154196,0
49 | 3.76853292451542,4.42673458576311,0
50 | 2.88765378784977,3.77538738510164,0
51 | 3.88110772645421,4.52659936356069,0
52 | 4.37963332277588,7.45018710127266,1
53 | 5.04211587314424,6.98144016728536,1
54 | 4.08907835144755,6.68193162545616,1
55 | 5.15802877240407,6.0706378525463,1
56 | 4.34541535608118,5.51253968985852,1
57 | 6.76728726937265,5.92480770338432,1
58 | 5.71670747601721,8.00002880371391,1
59 | 5.91017422949523,6.37873330520318,1
60 | 5.38418535782634,5.61557315261551,1
61 | 6.68217608051942,8.86929062242358,1
62 | 4.36426354605102,7.42510037737245,1
63 | 4.53835526963943,6.76135289908697,1
64 | 6.43228223854166,8.05848304870902,1
65 | 4.34930364668963,7.88642265137494,1
66 | 4.79261925639803,6.38075695176885,1
67 | 4.60719207055802,9.20610246454047,1
68 | 4.68000713145149,6.74497296985898,1
69 | 4.72088669702344,5.57550534978719,1
70 | 5.49418833126783,6.85560039804578,1
71 | 4.82266951773039,7.20753833923234,1
72 | 4.49404253788574,9.30797839905936,1
73 | 6.34303882517041,7.10580236789371,1
74 | 4.78542059145313,7.45699880542341,1
75 | 4.82044346995661,6.92284706464347,1
76 | 4.89980925878644,6.66599915763346,1
77 | 5.71266630705141,6.96527397168872,1
78 | 4.92643559587367,7.78763960563016,1
79 | 4.96236582853295,9.07524500865228,1
80 | 4.31833952124434,8.02739243876377,1
81 | 4.67572972775368,8.2079083983867,1
82 | 5.06016044043452,5.76867657844196,1
83 | 4.41110551374034,7.98389557005338,1
84 | 5.53149619263257,7.21992480366065,1
85 | 3.48160591821321,5.53274997090776,1
86 | 5.30655786078977,7.52102274264814,1
87 | 3.46355017646241,6.84124539528398,1
88 | 4.69902387316339,8.4645873119698,1
89 | 4.47172009555499,6.23391800039534,1
90 | 4.347905219319,6.56978824607145,1
91 | 4.94310322215261,6.07389050262256,1
92 | 3.08564057431999,6.82289603856346,1
93 | 6.17658331201856,7.40201177948634,1
94 | 3.335027563788,6.26825182688039,1
95 | 4.53646959852761,7.83037316798167,1
96 | 3.88407989495715,5.79191721369553,1
97 | 4.24918099880655,5.95201558719226,1
98 | 7.08716654562835,8.44115770684428,1
99 | 5.01739561969325,5.98415253469535,1
100 | 3.71369946956567,7.41197471231752,1
101 | 3.35939446558142,6.61892394889108,1
102 |
--------------------------------------------------------------------------------
/Code Examples/KNN/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 | Xyclade.ML
8 | KNN
9 | 1.0-SNAPSHOT
10 |
11 |
12 |
13 |
14 | com.github.haifengl
15 | smile-core
16 | 1.0.3
17 |
18 |
19 | com.github.haifengl
20 | smile-plot
21 | 1.0.2
22 |
23 |
24 |
--------------------------------------------------------------------------------
/Code Examples/KNN/projectFilesBackup/KNN.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
--------------------------------------------------------------------------------
/Code Examples/KNN/src/main/java/KNNExample.scala:
--------------------------------------------------------------------------------
1 | import java.awt.Color
2 | import java.io.File
3 | import smile.classification.KNN
4 | import smile.plot._
5 | import smile.validation._
6 | import scala.swing._
7 |
8 | object KNNExample extends SimpleSwingApplication {
9 | def top = new MainFrame {
10 | title = "KNN Example from http://xyclade.ml"
11 | val basePath = "KNN_Example_1.csv"
12 |
13 | try {
14 |
15 | val testData = GetDataFromCSV(new File(basePath))
16 |
17 | val plot = ScatterPlot.plot(testData._1, testData._2, '@', Array(Color.red, Color.blue))
18 | peer.setContentPane(plot)
19 | size = new Dimension(400, 400)
20 |
21 |
22 | //Define the amount of rounds, in our case 2 and initialise the cross validation
23 | val validationRounds = 2
24 |
25 | val cv = new CrossValidation(testData._2.length, validationRounds)
26 |
27 | val testDataWithIndices = (testData._1.zipWithIndex, testData._2.zipWithIndex)
28 |
29 | val trainingDPSets = cv.train
30 | .map(indexList => indexList
31 | .map(index => testDataWithIndices
32 | ._1.collectFirst { case (dp, `index`) => dp }.get))
33 |
34 | val trainingClassifierSets = cv.train
35 | .map(indexList => indexList
36 | .map(index => testDataWithIndices
37 | ._2.collectFirst { case (dp, `index`) => dp }.get))
38 |
39 | val testingDPSets = cv.test
40 | .map(indexList => indexList
41 | .map(index => testDataWithIndices
42 | ._1.collectFirst { case (dp, `index`) => dp }.get))
43 |
44 | val testingClassifierSets = cv.test
45 | .map(indexList => indexList
46 | .map(index => testDataWithIndices
47 | ._2.collectFirst { case (dp, `index`) => dp }.get))
48 |
49 |
50 | val validationRoundRecords = trainingDPSets
51 | .zipWithIndex.map(x => (x._1, trainingClassifierSets(x._2), testingDPSets(x._2), testingClassifierSets(x._2)))
52 |
53 | validationRoundRecords.foreach { record =>
54 |
55 | val knn = KNN.learn(record._1, record._2, 3)
56 |
57 | //And for each test data point make a prediction with the model
58 | val predictions = record._3.map(x => knn.predict(x)).zipWithIndex
59 |
60 | //Finally evaluate the predictions as correct or incorrect and count the amount of wrongly classified data points.
61 | val error : Double = predictions.map(x => if (x._1 != record._4(x._2)) 1 else 0).sum
62 |
63 | println("False prediction rate: " + error / predictions.length * 100 + "%")
64 |
65 |
66 | val unknownDataPoint = Array(5.3, 4.3)
67 |
68 | val result = knn.predict(unknownDataPoint)
69 | if (result == 0) {
70 | println("Internet Service Provider Alpha")
71 | }
72 | else if (result == 1) {
73 | println("Internet Service Provider Beta")
74 | }
75 | else {
76 | println("Unexpected prediction")
77 | }
78 | }
79 | }
80 | catch {
81 | case e: Exception => println("You probably are missing the KNN sample file, or did not set the path correctly. Check the exception for more details: " + e);
82 | }
83 | }
84 |
85 |
86 | def GetDataFromCSV(file: File): (Array[Array[Double]], Array[Int]) = {
87 |
88 | val source = scala.io.Source.fromFile(file)
89 | val data = source.getLines().drop(1).map(x => GetDataFromString(x)).toArray
90 | source.close()
91 | val dataPoints = data.map(x => x._1)
92 | val classifierArray = data.map(x => x._2)
93 | (dataPoints, classifierArray)
94 | }
95 |
96 | def GetDataFromString(dataString: String): (Array[Double], Int) = {
97 |
98 | //Split the comma separated value string into an array of strings
99 | val dataArray: Array[String] = dataString.split(',')
100 |
101 | //Extract the values from the strings
102 | val xCoordinate: Double = dataArray(0).toDouble
103 | val yCoordinate: Double = dataArray(1).toDouble
104 | val classifier: Int = dataArray(2).toInt
105 |
106 | //And return the result in a format that can later easily be used to feed to Smile
107 | (Array(xCoordinate, yCoordinate), classifier)
108 | }
109 | }
110 |
--------------------------------------------------------------------------------
/Code Examples/LinearRegression/.idea/.name:
--------------------------------------------------------------------------------
1 | LinearRegressionExample
--------------------------------------------------------------------------------
/Code Examples/LinearRegression/.idea/compiler.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
--------------------------------------------------------------------------------
/Code Examples/LinearRegression/.idea/copyright/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/Code Examples/LinearRegression/.idea/encodings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/Code Examples/LinearRegression/.idea/libraries/Maven__com_github_haifengl_smile_core_1_0_3.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/Code Examples/LinearRegression/.idea/libraries/Maven__com_github_haifengl_smile_data_1_0_3.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/Code Examples/LinearRegression/.idea/libraries/Maven__com_github_haifengl_smile_graph_1_0_3.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/Code Examples/LinearRegression/.idea/libraries/Maven__com_github_haifengl_smile_math_1_0_3.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/Code Examples/LinearRegression/.idea/libraries/Maven__com_github_haifengl_smile_plot_1_0_2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/Code Examples/LinearRegression/.idea/libraries/Maven__org_swinglabs_swingx_swingx_all_1_6_4.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/Code Examples/LinearRegression/.idea/libraries/smile_1_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/Code Examples/LinearRegression/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/Code Examples/LinearRegression/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
--------------------------------------------------------------------------------
/Code Examples/LinearRegression/.idea/scala_compiler.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/Code Examples/LinearRegression/.idea/scopes/scope_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
--------------------------------------------------------------------------------
/Code Examples/LinearRegression/.idea/uiDesigner.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | -
6 |
7 |
8 | -
9 |
10 |
11 | -
12 |
13 |
14 | -
15 |
16 |
17 | -
18 |
19 |
20 |
21 |
22 |
23 | -
24 |
25 |
26 |
27 |
28 |
29 | -
30 |
31 |
32 |
33 |
34 |
35 | -
36 |
37 |
38 |
39 |
40 |
41 | -
42 |
43 |
44 |
45 |
46 | -
47 |
48 |
49 |
50 |
51 | -
52 |
53 |
54 |
55 |
56 | -
57 |
58 |
59 |
60 |
61 | -
62 |
63 |
64 |
65 |
66 | -
67 |
68 |
69 |
70 |
71 | -
72 |
73 |
74 | -
75 |
76 |
77 |
78 |
79 | -
80 |
81 |
82 |
83 |
84 | -
85 |
86 |
87 |
88 |
89 | -
90 |
91 |
92 |
93 |
94 | -
95 |
96 |
97 |
98 |
99 | -
100 |
101 |
102 | -
103 |
104 |
105 | -
106 |
107 |
108 | -
109 |
110 |
111 | -
112 |
113 |
114 |
115 |
116 | -
117 |
118 |
119 | -
120 |
121 |
122 |
123 |
124 |
125 |
126 |
--------------------------------------------------------------------------------
/Code Examples/LinearRegression/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/Code Examples/LinearRegression/LinearRegression.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
--------------------------------------------------------------------------------
/Code Examples/LinearRegression/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 | Xyclade.ml
8 | LinearRegression
9 | 1.0-SNAPSHOT
10 |
11 |
12 | com.github.haifengl
13 | smile-core
14 | 1.0.3
15 |
16 |
17 | com.github.haifengl
18 | smile-plot
19 | 1.0.2
20 |
21 |
22 |
23 |
24 |
--------------------------------------------------------------------------------
/Code Examples/LinearRegression/projectFilesBackup/LinearRegression.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
--------------------------------------------------------------------------------
/Code Examples/LinearRegression/src/main/java/LinearRegression.scala:
--------------------------------------------------------------------------------
1 | import java.awt.Color
2 | import java.io.File
3 |
4 | import smile.plot._
5 | import smile.regression._
6 | import scala.swing._
7 |
8 |
9 | object LinearRegression extends SimpleSwingApplication {
10 | def top = new MainFrame {
11 | title = "Linear regression Example from http://Xyclade.ml"
12 | val basePath = "data/OLS_Regression_Example_3.csv"
13 |
14 | val test_data = GetDataFromCSV(new File(basePath))
15 |
16 | val plotData = (test_data._1 zip test_data._2).map(x => Array(x._1(1) ,x._2))
17 | val maleFemaleLabels = test_data._1.map( x=> x(0).toInt)
18 | val plot = ScatterPlot.plot(plotData,maleFemaleLabels,'@',Array(Color.blue, Color.green))
19 | plot.setTitle("Weight and heights for males and females")
20 | plot.setAxisLabel(0,"Heights")
21 | plot.setAxisLabel(1,"Weights")
22 |
23 |
24 |
25 | peer.setContentPane(plot)
26 | size = new Dimension(400, 400)
27 |
28 | val olsModel = new OLS(test_data._1,test_data._2)
29 |
30 | println("Prediction for Male of 1.7M: " +olsModel.predict(Array(0.0,170.0)))
31 | println("Prediction for Female of 1.7M:" + olsModel.predict(Array(1.0,170.0)))
32 |
33 | println("Model Error:" + olsModel.error())
34 | println("Accuracy of the model: " + olsModel.RSquared() * 100 + "%")
35 | }
36 |
37 | def GetDataFromCSV(file: File): (Array[Array[Double]], Array[Double]) = {
38 | val source = scala.io.Source.fromFile(file)
39 | val data = source.getLines().drop(1).map(x => GetDataFromString(x)).toArray
40 | source.close()
41 | var inputData = data.map(x => x._1)
42 | var resultData = data.map(x => x._2)
43 |
44 | (inputData,resultData)
45 | }
46 |
47 | def GetDataFromString(dataString: String): (Array[Double], Double) = {
48 |
49 | //Split the comma separated value string into an array of strings
50 | val dataArray: Array[String] = dataString.split(',')
51 | var person = 1.0
52 |
53 | if (dataArray(0) == "\"Male\"") {
54 | person = 0.0
55 | }
56 |
57 | //Extract the values from the strings
58 | //Since the data is in US metrics (inch and pounds we will recalculate this to cm and kilo's)
59 | val data : Array[Double] = Array(person,dataArray(1).toDouble * 2.54)
60 | val weight: Double = dataArray(2).toDouble * 0.45359237
61 |
62 | //And return the result in a format that can later easily be used to feed to Smile
63 | (data, weight)
64 | }
65 | }
--------------------------------------------------------------------------------
/Code Examples/NaiveBayes/.idea/.name:
--------------------------------------------------------------------------------
1 | NaiveBayes
--------------------------------------------------------------------------------
/Code Examples/NaiveBayes/.idea/compiler.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
--------------------------------------------------------------------------------
/Code Examples/NaiveBayes/.idea/copyright/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/Code Examples/NaiveBayes/.idea/encodings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/Code Examples/NaiveBayes/.idea/libraries/Maven__com_github_haifengl_smile_core_1_0_3.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/Code Examples/NaiveBayes/.idea/libraries/Maven__com_github_haifengl_smile_data_1_0_3.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/Code Examples/NaiveBayes/.idea/libraries/Maven__com_github_haifengl_smile_graph_1_0_3.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/Code Examples/NaiveBayes/.idea/libraries/Maven__com_github_haifengl_smile_math_1_0_3.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/Code Examples/NaiveBayes/.idea/libraries/Maven__com_github_haifengl_smile_plot_1_0_2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/Code Examples/NaiveBayes/.idea/libraries/Maven__org_swinglabs_swingx_swingx_all_1_6_4.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/Code Examples/NaiveBayes/.idea/libraries/scala_compiler.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/Code Examples/NaiveBayes/.idea/libraries/smile_1_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/Code Examples/NaiveBayes/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/Code Examples/NaiveBayes/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
--------------------------------------------------------------------------------
/Code Examples/NaiveBayes/.idea/scala_compiler.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/Code Examples/NaiveBayes/.idea/scopes/scope_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
--------------------------------------------------------------------------------
/Code Examples/NaiveBayes/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/Code Examples/NaiveBayes/NaiveBayes.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
--------------------------------------------------------------------------------
/Code Examples/NaiveBayes/data/stopwords.txt:
--------------------------------------------------------------------------------
1 | i
2 | me
3 | my
4 | myself
5 | we
6 | our
7 | ours
8 | ourselves
9 | you
10 | your
11 | yours
12 | yourself
13 | yourselves
14 | he
15 | him
16 | his
17 | himself
18 | she
19 | her
20 | hers
21 | herself
22 | it
23 | its
24 | itself
25 | they
26 | them
27 | their
28 | theirs
29 | themselves
30 | what
31 | which
32 | who
33 | whom
34 | this
35 | that
36 | these
37 | those
38 | am
39 | is
40 | are
41 | was
42 | were
43 | be
44 | been
45 | being
46 | have
47 | has
48 | had
49 | having
50 | do
51 | does
52 | did
53 | doing
54 | would
55 | should
56 | could
57 | ought
58 | im
59 | youre
60 | hes
61 | shes
62 | were
63 | theyre
64 | ive
65 | youve
66 | weve
67 | theyve
68 | id
69 | youd
70 | hed
71 | shed
72 | wed
73 | theyd
74 | ill
75 | youll
76 | hell
77 | shell
78 | well
79 | theyll
80 | isnt
81 | arent
82 | wasnt
83 | werent
84 | hasnt
85 | havent
86 | hadnt
87 | doesnt
88 | dont
89 | didnt
90 | wont
91 | wouldnt
92 | shant
93 | shouldnt
94 | cant
95 | cannot
96 | couldnt
97 | mustnt
98 | lets
99 | thats
100 | whos
101 | whats
102 | heres
103 | theres
104 | whens
105 | wheres
106 | whys
107 | hows
108 | a
109 | an
110 | the
111 | and
112 | but
113 | if
114 | or
115 | because
116 | as
117 | until
118 | while
119 | of
120 | at
121 | by
122 | for
123 | with
124 | about
125 | against
126 | between
127 | into
128 | through
129 | during
130 | before
131 | after
132 | above
133 | below
134 | to
135 | from
136 | up
137 | down
138 | in
139 | out
140 | on
141 | off
142 | over
143 | under
144 | again
145 | further
146 | then
147 | once
148 | here
149 | there
150 | when
151 | where
152 | why
153 | how
154 | all
155 | any
156 | both
157 | each
158 | few
159 | more
160 | most
161 | other
162 | some
163 | such
164 | no
165 | nor
166 | not
167 | only
168 | own
169 | same
170 | so
171 | than
172 | too
173 | very
174 | tr
175 | td
176 |
177 |
178 |
--------------------------------------------------------------------------------
/Code Examples/NaiveBayes/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 | groupId
8 | NaiveBayes
9 | 1.0-SNAPSHOT
10 |
11 |
12 |
13 | com.github.haifengl
14 | smile-core
15 | 1.0.3
16 |
17 |
18 | com.github.haifengl
19 | smile-plot
20 | 1.0.2
21 |
22 |
23 |
--------------------------------------------------------------------------------
/Code Examples/NaiveBayes/projectFilesBackup/NaiveBayes.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
--------------------------------------------------------------------------------
/Code Examples/NaiveBayes/src/main/java/NaiveBayesExample.scala:
--------------------------------------------------------------------------------
1 | import java.io.File
2 | import smile.classification.NaiveBayes
3 | import smile.feature.Bag
4 |
5 | object NaiveBayesExample {
6 |
7 |
8 | def main(args: Array[String]): Unit = {
9 | val basePath = "data/"
10 | val spamPath = basePath + "/spam"
11 | val easyHamPath = basePath + "/easy_ham"
12 | val easyHam2Path = basePath + "/easy_ham_2"
13 |
14 | val amountOfSamplesPerSet = 500
15 | val amountOfFeaturesToTake = 400
16 |
17 | try {
18 | //First get a subset of the file names for the spam sample set (500 is the complete set in this case)
19 | val listOfSpamFiles = getFilesFromDir(spamPath).take(amountOfSamplesPerSet)
20 | //Then get the messages that are contained in these files
21 | val spamMails = listOfSpamFiles.map(x => (x, getMessage(x)))
22 |
23 | val stopWords = getStopWords
24 | val spamTDM = spamMails
25 | .flatMap(email => email
26 | ._2.split(" ")
27 | .filter(word => word.nonEmpty && !stopWords.contains(word))
28 | .map(word => (email._1.getName, word)))
29 | .groupBy(x => x._2)
30 | .map(x => (x._1, x._2.groupBy(x => x._1)))
31 | .map(x => (x._1, x._2.map(y => (y._1, y._2.length)))).toList
32 |
33 | //Sort the words by occurrence rate descending (amount of times the word occurs among all documents)
34 | val sortedSpamTDM = spamTDM.sortBy(x => -(x._2.size.toDouble / spamMails.length))
35 | val spamFeatures = sortedSpamTDM.take(amountOfFeaturesToTake).map(x => x._1)
36 |
37 | //Get a subset of the file names from the ham sample set (note that in this case it is not necessary to randomly sample as the emails are already randomly ordered)
38 | val listOfHamFiles = getFilesFromDir(easyHamPath).take(amountOfSamplesPerSet)
39 |
40 | //Get the messages that are contained in the ham files
41 | val hamMails = listOfHamFiles.map(x => (x, getMessage(x)))
42 | //Then its time for feature selection specifically for the Ham messages
43 | val hamTDM = hamMails
44 | .flatMap(email => email
45 | ._2.split(" ")
46 | .filter(word => word.nonEmpty && !stopWords.contains(word))
47 | .map(word => (email._1.getName, word)))
48 | .groupBy(x => x._2)
49 | .map(x => (x._1, x._2.groupBy(x => x._1)))
50 | .map(x => (x._1, x._2.map(y => (y._1, y._2.length)))).toList
51 |
52 | //Sort the words by occurrence rate descending (amount of times the word occurs among all documents)
53 | val sortedHamTDM = hamTDM.sortBy(x => -(x._2.size.toDouble / spamMails.length))
54 | val hamFeatures = sortedHamTDM.take(amountOfFeaturesToTake).map(x => x._1)
55 |
56 | //Now we have a set of ham and spam features, we group them and then remove the intersecting features, as these are noise.
57 | var data = (hamFeatures ++ spamFeatures).toSet
58 | hamFeatures.intersect(spamFeatures).foreach(x => data = data - x)
59 |
60 |
61 | //Initialize a bag of words that takes the top x features from both spam and ham and combines them
62 | val bag = new Bag[String](data.toArray)
63 |
64 | //Initialize the classifier array with first a set of 0(spam) and then a set of 1(ham) values that represent the emails
65 | val classifiers = Array.fill[Int](amountOfSamplesPerSet)(0) ++ Array.fill[Int](amountOfSamplesPerSet)(1)
66 |
67 | //Get the trainingData in the right format for the spam mails
68 | val spamData = spamMails.map(x => bag.feature(x._2.split(" "))).toArray
69 |
70 | //Get the trainingData in the right format for the ham mails
71 | val hamData = hamMails.map(x => bag.feature(x._2.split(" "))).toArray
72 |
73 | //Combine the training data from both categories
74 | val trainingData = spamData ++ hamData
75 |
76 | //Create the bayes model as a multinomial with 2 classification groups and the amount of features passed in the constructor.
77 | val bayes = new NaiveBayes(NaiveBayes.Model.MULTINOMIAL, 2, data.size)
78 | //Now train the bayes instance with the training data, which is represented in a specific format due to the bag.feature method, and the known classifiers.
79 | bayes.learn(trainingData, classifiers)
80 |
81 |
82 |
83 | //Now we are ready for evaluation, for this we will use the testing sets:
84 | val listOfSpam2Files = getFilesFromDir(easyHam2Path)
85 | //Then get the messages that are contained in these files
86 | val spam2Mails = listOfSpam2Files.map { x => (x, getMessage(x)) }
87 |
88 | val spam2FeatureVectors = spam2Mails.map(x => bag.feature(x._2.split(" ")))
89 |
90 | val spam2ClassificationResults = spam2FeatureVectors.map(x => bayes.predict(x))
91 |
92 | val spamClassifications = spam2ClassificationResults.count(x => x == 0)
93 | println(spamClassifications + " of " + listOfSpam2Files.length + " were classified as spam")
94 | println(((spamClassifications.toDouble / listOfSpam2Files.length) * 100) + "% was classified as spam")
95 |
96 | val hamClassifications = spam2ClassificationResults.count(x => x == 1)
97 | println(hamClassifications + " of " + listOfSpam2Files.length + " were classified as ham")
98 | println(((hamClassifications.toDouble / listOfSpam2Files.length) * 100) + "% was classified as ham")
99 |
100 | val unknownClassifications = spam2ClassificationResults.count(x => x == -1)
101 | println(unknownClassifications + " of " + listOfSpam2Files.length + " were unknowingly classified")
102 | println(((unknownClassifications.toDouble / listOfSpam2Files.length) * 100) + "% was unknowingly classified")
103 | }
104 | catch {
105 | case e: Exception => println("You probably are missing the sample data. You can download these from the spamassasin corpus (mentioned in the example on http://xyclade.ml) and place them in the directory 'data' in this project. Check the exception for more details: " + e);
106 | }
107 |
108 | }
109 |
110 | def getFilesFromDir(path: String): List[File] = {
111 | val d = new File(path)
112 | if (d.exists && d.isDirectory) {
113 | //Remove the mac os basic storage file, and alternatively for unix systems "cmds"
114 | d.listFiles.filter(x => x.isFile && !x.toString.contains(".DS_Store") && !x.toString.contains("cmds")).toList
115 | } else {
116 | List[File]()
117 | }
118 | }
119 |
120 | def getStopWords: List[String] = {
121 | val source = scala.io.Source.fromFile(new File("data/stopwords.txt"))("latin1")
122 | val lines = source.mkString.split("\n")
123 | source.close()
124 | lines.toList
125 | }
126 |
127 | def getMessage(file: File): String = {
128 | //Note that the encoding of the example files is latin1, thus this should be passed to the from file method.
129 | val source = scala.io.Source.fromFile(file)("latin1")
130 | val lines = source.getLines mkString "\n"
131 | source.close()
132 | //Find the first line break in the email, as this indicates the message body
133 | val firstLineBreak = lines.indexOf("\n\n")
134 | //Return the message body filtered by only text from a-z and to lower case
135 | lines.substring(firstLineBreak).replace("\n", " ").replaceAll("[^a-zA-Z ]", "").toLowerCase
136 | }
137 | }
138 |
--------------------------------------------------------------------------------
/Code Examples/NaiveBayes/src/main/java/TDM.scala:
--------------------------------------------------------------------------------
1 | import scala.collection.mutable
2 |
3 | class TDM {
4 |
5 | var records : List[TDMRecord] = List[TDMRecord]()
6 |
7 | def addTermToRecord(term : String, documentName : String) =
8 | {
9 | //Find a record for the term
10 | val record = records.find( x => x.term == term)
11 | if (record.nonEmpty)
12 | {
13 | val termRecord = record.get
14 | val documentRecord = termRecord.occurrences.find(x => x._1 == documentName)
15 | if (documentRecord.nonEmpty)
16 | {
17 | termRecord.occurrences += documentName -> (documentRecord.get._2 + 1)
18 | }
19 | else
20 | {
21 | termRecord.occurrences += documentName -> 1
22 | }
23 | }
24 | else
25 | {
26 | //No record yet exists for this term
27 | val newRecord = new TDMRecord(term, mutable.HashMap[String,Int](documentName -> 1))
28 | records = newRecord :: records
29 | }
30 | }
31 | def SortByTotalFrequency() = records = records.sortBy( x => -x.totalFrequency)
32 | def SortByOccurrenceRate(rate : Int) = records = records.sortBy( x => -x.occurrenceRate(rate))
33 | }
34 |
35 | class TDMRecord(val term : String, var occurrences : mutable.HashMap[String,Int] )
36 | {
37 | def totalFrequency = occurrences.map(y => y._2).fold(0){ (z, i) => z + i}
38 | def occurrenceRate(totalDocuments : Int) : Double = occurrences.size.toDouble / totalDocuments
39 | def densityRate(totalTerms : Int) : Double = totalFrequency.toDouble / totalTerms
40 | }
--------------------------------------------------------------------------------
/Code Examples/PCA/.idea/.name:
--------------------------------------------------------------------------------
1 | PCA
--------------------------------------------------------------------------------
/Code Examples/PCA/.idea/compiler.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
--------------------------------------------------------------------------------
/Code Examples/PCA/.idea/copyright/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/Code Examples/PCA/.idea/encodings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/Code Examples/PCA/.idea/highlighting.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/Code Examples/PCA/.idea/libraries/Maven__com_github_haifengl_smile_core_1_0_3.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/Code Examples/PCA/.idea/libraries/Maven__com_github_haifengl_smile_data_1_0_3.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/Code Examples/PCA/.idea/libraries/Maven__com_github_haifengl_smile_graph_1_0_3.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/Code Examples/PCA/.idea/libraries/Maven__com_github_haifengl_smile_math_1_0_3.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/Code Examples/PCA/.idea/libraries/Maven__com_github_haifengl_smile_plot_1_0_2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/Code Examples/PCA/.idea/libraries/Maven__org_swinglabs_swingx_swingx_all_1_6_4.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/Code Examples/PCA/.idea/libraries/smile_1_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/Code Examples/PCA/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/Code Examples/PCA/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
--------------------------------------------------------------------------------
/Code Examples/PCA/.idea/scala_compiler.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/Code Examples/PCA/.idea/scopes/scope_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
--------------------------------------------------------------------------------
/Code Examples/PCA/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/Code Examples/PCA/PCA.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
--------------------------------------------------------------------------------
/Code Examples/PCA/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 | Xyclade.ml
8 | PCA
9 | 1.0-SNAPSHOT
10 |
11 |
12 |
13 | com.github.haifengl
14 | smile-core
15 | 1.0.3
16 |
17 |
18 | com.github.haifengl
19 | smile-plot
20 | 1.0.2
21 |
22 |
23 |
--------------------------------------------------------------------------------
/Code Examples/PCA/projectFilesBackup/PCA.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
--------------------------------------------------------------------------------
/Code Examples/PCA/src/main/java/PCA.scala:
--------------------------------------------------------------------------------
1 | import java.awt.{Dimension, Color}
2 | import java.io.{PrintWriter, File}
3 | import java.text.{DecimalFormat, DateFormat, SimpleDateFormat}
4 | import java.util.{Locale, Date}
5 |
6 | import smile.math.distance.CorrelationDistance
7 | import smile.plot.{PlotCanvas, LinePlot, ScatterPlot, Line}
8 | import smile.projection.PCA
9 |
10 | import scala.swing.{MainFrame, SimpleSwingApplication}
11 | import scala.util.Random
12 |
13 |
14 | object PCA extends SimpleSwingApplication{
15 |
16 |
17 | def top = new MainFrame {
18 |
19 | title = "PCA Example from http://xyclade.ml"
20 | //Get the example data
21 | val basePath = "data/"
22 | val exampleDataPath = basePath + "PCA_Example_1.csv"
23 | val trainData = GetStockDataFromCSV(new File(exampleDataPath))
24 |
25 | val pca = new PCA(trainData._2)
26 |
27 | //We want to merge into 1 feature
28 | pca.setProjection(1)
29 | val points = pca.project(trainData._2)
30 |
31 | val maxDataValue = points.maxBy(x => x(0))
32 | val minDataValue = points.minBy(x => x(0))
33 | val rangeValue = maxDataValue(0) - minDataValue(0)
34 | val plotData = points.zipWithIndex.map(x => Array(x._2.toDouble, -x._1(0) / rangeValue))
35 | // val plotData = points.zipWithIndex.map(x => Array(x._2.toDouble, x._1(0) ))
36 | val canvas: PlotCanvas = LinePlot.plot("Merged Features Index", plotData, Line.Style.DASH, Color.RED);
37 |
38 |
39 | //Verification against DJI
40 | val verificationDataPath = basePath + "PCA_Example_2.csv"
41 | val verificationData = GetDJIFromFile(new File(verificationDataPath))
42 | val DJIIndex = GetDJIFromFile(new File(verificationDataPath))
43 | canvas.line("Dow Jones Index", DJIIndex._2, Line.Style.DOT_DASH, Color.BLUE)
44 |
45 |
46 | peer.setContentPane(canvas)
47 | size = new Dimension(700, 400)
48 |
49 | }
50 |
51 |
52 | def GetStockDataFromCSV(file: File): (Array[Date],Array[Array[Double]]) = {
53 | val source = scala.io.Source.fromFile(file)
54 | //Get all the records (minus the header)
55 | val data = source.getLines().drop(1).map(x => GetStockDataFromString(x)).toArray
56 | source.close()
57 | //group all records by date, and sort the groups on date ascending
58 | val groupedByDate = data.groupBy(x => x._1).toArray.sortBy(x => x._1)
59 | //extract the values from the 3-tuple and turn them into an array of tuples: Array[(Date, Array[Double)]
60 | val dateArrayTuples = groupedByDate.map(x => (x._1, x._2.sortBy(x => x._2).map(y => y._3)))
61 |
62 | //turn the tuples into two separate arrays for easier use later on
63 | val dateArray = dateArrayTuples.map(x => x._1).toArray
64 | val doubleArray = dateArrayTuples.map(x => x._2).toArray
65 |
66 |
67 | (dateArray,doubleArray)
68 | }
69 |
70 | def GetStockDataFromString(dataString: String): (Date,String,Double) = {
71 |
72 | //Split the comma separated value string into an array of strings
73 | val dataArray: Array[String] = dataString.split(',')
74 |
75 | val format = new SimpleDateFormat("yyyy-MM-dd")
76 | //Extract the values from the strings
77 |
78 | val date = format.parse(dataArray(0))
79 | val stock: String = dataArray(1)
80 | val close: Double = dataArray(2).toDouble
81 |
82 | //And return the result in a format that can later easily be used to feed to Smile
83 | (date,stock,close)
84 | }
85 |
86 |
87 |
88 | def GetDJIRecordFromString(dataString: String): (Date,Double) = {
89 |
90 | //Split the comma separated value string into an array of strings
91 | val dataArray: Array[String] = dataString.split(',')
92 |
93 | val format = new SimpleDateFormat("yyyy-MM-dd")
94 | //Extract the values from the strings
95 |
96 | val date = format.parse(dataArray(0))
97 | val close: Double = dataArray(4).toDouble
98 |
99 | //And return the result in a format that can later easily be used to feed to Smile
100 | (date,close)
101 | }
102 |
103 |
104 | def GetDJIFromFile(file: File): (Array[Date],Array[Double]) = {
105 | val source = scala.io.Source.fromFile(file)
106 | //Get all the records (minus the header)
107 | val data = source.getLines().drop(1).map(x => GetDJIRecordFromString(x)).toArray
108 | source.close()
109 |
110 | //turn the tuples into two separate arrays for easier use later on
111 | val sortedData = data.sortBy(x => x._1)
112 | val dates = sortedData.map(x => x._1)
113 | val maxDouble = sortedData.maxBy(x => x._2)._2
114 | val minDouble = sortedData.minBy(x => x._2)._2
115 | val rangeValue = maxDouble - minDouble
116 | val doubles = sortedData.map(x => x._2 / rangeValue )
117 |
118 |
119 |
120 | (dates, doubles)
121 | }
122 | }
--------------------------------------------------------------------------------
/Code Examples/RecommendationSystem/.idea/.name:
--------------------------------------------------------------------------------
1 | RecommendationSystem
--------------------------------------------------------------------------------
/Code Examples/RecommendationSystem/.idea/compiler.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
--------------------------------------------------------------------------------
/Code Examples/RecommendationSystem/.idea/copyright/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/Code Examples/RecommendationSystem/.idea/encodings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/Code Examples/RecommendationSystem/.idea/libraries/Maven__com_github_haifengl_smile_core_1_0_3.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/Code Examples/RecommendationSystem/.idea/libraries/Maven__com_github_haifengl_smile_data_1_0_3.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/Code Examples/RecommendationSystem/.idea/libraries/Maven__com_github_haifengl_smile_graph_1_0_3.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/Code Examples/RecommendationSystem/.idea/libraries/Maven__com_github_haifengl_smile_math_1_0_3.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/Code Examples/RecommendationSystem/.idea/libraries/Maven__com_github_haifengl_smile_plot_1_0_2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/Code Examples/RecommendationSystem/.idea/libraries/Maven__org_swinglabs_swingx_swingx_all_1_6_4.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/Code Examples/RecommendationSystem/.idea/libraries/joda_time_joda_time_2_2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
--------------------------------------------------------------------------------
/Code Examples/RecommendationSystem/.idea/libraries/smile_1_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
--------------------------------------------------------------------------------
/Code Examples/RecommendationSystem/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/Code Examples/RecommendationSystem/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
--------------------------------------------------------------------------------
/Code Examples/RecommendationSystem/.idea/scala_compiler.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/Code Examples/RecommendationSystem/.idea/scopes/scope_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
--------------------------------------------------------------------------------
/Code Examples/RecommendationSystem/.idea/uiDesigner.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | -
6 |
7 |
8 | -
9 |
10 |
11 | -
12 |
13 |
14 | -
15 |
16 |
17 | -
18 |
19 |
20 |
21 |
22 |
23 | -
24 |
25 |
26 |
27 |
28 |
29 | -
30 |
31 |
32 |
33 |
34 |
35 | -
36 |
37 |
38 |
39 |
40 |
41 | -
42 |
43 |
44 |
45 |
46 | -
47 |
48 |
49 |
50 |
51 | -
52 |
53 |
54 |
55 |
56 | -
57 |
58 |
59 |
60 |
61 | -
62 |
63 |
64 |
65 |
66 | -
67 |
68 |
69 |
70 |
71 | -
72 |
73 |
74 | -
75 |
76 |
77 |
78 |
79 | -
80 |
81 |
82 |
83 |
84 | -
85 |
86 |
87 |
88 |
89 | -
90 |
91 |
92 |
93 |
94 | -
95 |
96 |
97 |
98 |
99 | -
100 |
101 |
102 | -
103 |
104 |
105 | -
106 |
107 |
108 | -
109 |
110 |
111 | -
112 |
113 |
114 |
115 |
116 | -
117 |
118 |
119 | -
120 |
121 |
122 |
123 |
124 |
125 |
126 |
--------------------------------------------------------------------------------
/Code Examples/RecommendationSystem/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/Code Examples/RecommendationSystem/RecommendationSystem.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
--------------------------------------------------------------------------------
/Code Examples/RecommendationSystem/data/stopwords.txt:
--------------------------------------------------------------------------------
1 | i
2 | me
3 | my
4 | myself
5 | we
6 | our
7 | ours
8 | ourselves
9 | you
10 | your
11 | yours
12 | yourself
13 | yourselves
14 | he
15 | him
16 | his
17 | himself
18 | she
19 | her
20 | hers
21 | herself
22 | it
23 | its
24 | itself
25 | they
26 | them
27 | their
28 | theirs
29 | themselves
30 | what
31 | which
32 | who
33 | whom
34 | this
35 | that
36 | these
37 | those
38 | am
39 | is
40 | are
41 | was
42 | were
43 | be
44 | been
45 | being
46 | have
47 | has
48 | had
49 | having
50 | do
51 | does
52 | did
53 | doing
54 | would
55 | should
56 | could
57 | ought
58 | im
59 | youre
60 | hes
61 | shes
62 | were
63 | theyre
64 | ive
65 | youve
66 | weve
67 | theyve
68 | id
69 | youd
70 | hed
71 | shed
72 | wed
73 | theyd
74 | ill
75 | youll
76 | hell
77 | shell
78 | well
79 | theyll
80 | isnt
81 | arent
82 | wasnt
83 | werent
84 | hasnt
85 | havent
86 | hadnt
87 | doesnt
88 | dont
89 | didnt
90 | wont
91 | wouldnt
92 | shant
93 | shouldnt
94 | cant
95 | cannot
96 | couldnt
97 | mustnt
98 | lets
99 | thats
100 | whos
101 | whats
102 | heres
103 | theres
104 | whens
105 | wheres
106 | whys
107 | hows
108 | a
109 | an
110 | the
111 | and
112 | but
113 | if
114 | or
115 | because
116 | as
117 | until
118 | while
119 | of
120 | at
121 | by
122 | for
123 | with
124 | about
125 | against
126 | between
127 | into
128 | through
129 | during
130 | before
131 | after
132 | above
133 | below
134 | to
135 | from
136 | up
137 | down
138 | in
139 | out
140 | on
141 | off
142 | over
143 | under
144 | again
145 | further
146 | then
147 | once
148 | here
149 | there
150 | when
151 | where
152 | why
153 | how
154 | all
155 | any
156 | both
157 | each
158 | few
159 | more
160 | most
161 | other
162 | some
163 | such
164 | no
165 | nor
166 | not
167 | only
168 | own
169 | same
170 | so
171 | than
172 | too
173 | very
174 | tr
175 | td
176 |
177 |
178 |
--------------------------------------------------------------------------------
/Code Examples/RecommendationSystem/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 | Xyclade.ml
8 | RecommendationSystem
9 | 1.0-SNAPSHOT
10 |
11 |
12 |
13 | com.github.haifengl
14 | smile-core
15 | 1.0.3
16 |
17 |
18 | com.github.haifengl
19 | smile-plot
20 | 1.0.2
21 |
22 |
23 |
24 |
--------------------------------------------------------------------------------
/Code Examples/RecommendationSystem/projectFilesBackup/RecommendationSystem.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
--------------------------------------------------------------------------------
/Code Examples/RecommendationSystem/src/main/java/RecommendationSystem.scala:
--------------------------------------------------------------------------------
1 | import java.awt.{Rectangle}
2 | import java.io.File
3 | import java.text.SimpleDateFormat
4 | import java.util.Date
5 | import smile.plot.BarPlot
6 |
7 | import scala.swing.{MainFrame, SimpleSwingApplication}
8 | import scala.util.Try
9 |
10 | object RecommendationSystem extends SimpleSwingApplication {
11 |
12 |
13 | case class EmailData(emailDate : Date, sender : String, subject : String, body : String)
14 |
15 | def top = new MainFrame {
16 | title = "Recommendation System Example from http://xyclade.ml"
17 |
18 | val basePath = "data"
19 | val easyHamPath = basePath + "/easy_ham"
20 |
21 | try
22 | {
23 | val mails = getFilesFromDir(easyHamPath).map(x => getFullEmail(x))
24 | val timeSortedMails = mails
25 | .map(x => EmailData(getDateFromEmail(x), getSenderFromEmail(x), getSubjectFromEmail(x), getMessageBodyFromEmail(x)))
26 | .sortBy(x => x.emailDate)
27 |
28 | val (trainingData, testingData) = timeSortedMails
29 | .splitAt(timeSortedMails.length / 2)
30 |
31 |
32 | //First we group the emails by Sender, then we extract only the sender address and amount of emails, and finally we sort them on amounts ascending
33 | val mailsGroupedBySender = trainingData
34 | .groupBy(x => x.sender)
35 | .map(x => (x._1, Math.log1p(x._2.length)))
36 | .toArray
37 | .sortBy(x => x._2)
38 |
39 | //In order to plot the data we split the values from the addresses as this is how the plotting library accepts the data.
40 | val senderDescriptions = mailsGroupedBySender.map(x => x._1)
41 | val senderValues = mailsGroupedBySender.map(x => x._2.toDouble)
42 |
43 | val barPlot = BarPlot.plot("", senderValues, senderDescriptions)
44 |
45 | //Rotate the email addresses by -80 degrees such that we can read them
46 | barPlot.getAxis(0).setRotation(-1.3962634)
47 | barPlot.setAxisLabel(0, "")
48 | barPlot.setAxisLabel(1, "Amount of emails received on log Scale ")
49 | peer.setContentPane(barPlot)
50 |
51 | bounds = new Rectangle(800, 600)
52 |
53 | val mailsGroupedByThread = trainingData
54 | .groupBy(x => x.subject)
55 |
56 | //Create a list of tuples with (subject, list of emails)
57 | val threadBarPlotData = mailsGroupedByThread
58 | .map(x => (x._1, Math.log1p(x._2.length)))
59 | .toArray
60 | .sortBy(x => x._2)
61 |
62 | val threadDescriptions = threadBarPlotData
63 | .map(x => x._1)
64 | val threadValues = threadBarPlotData
65 | .map(x => x._2.toDouble)
66 |
67 | val mailGroupsWithMinMaxDates = mailsGroupedByThread
68 | .map(x => (x._1, x._2, (x._2
69 | .maxBy(x => x.emailDate)
70 | .emailDate.getTime - x._2.minBy(x => x.emailDate).emailDate.getTime
71 | ) / 1000))
72 |
73 | //turn into a list of tuples with (topic, list of emails, time difference, and weight) filtered that only threads occur
74 | val threadGroupsWithWeights = mailGroupsWithMinMaxDates
75 | .filter(x => x._3 != 0)
76 | .map(x => (x._1, x._2, x._3, 10 + Math.log10(x._2.length.toDouble / x._3)))
77 |
78 |
79 | val stopWords = getStopWords
80 |
81 | val threadTermWeights = threadGroupsWithWeights
82 | .toArray
83 | .sortBy(x => x._4)
84 | .flatMap(x => x._1
85 | .replaceAll("[^a-zA-Z ]", "")
86 | .toLowerCase.split(" ")
87 | .filter(_.nonEmpty)
88 | .map(y => (y, x._4)))
89 |
90 | val filteredThreadTermWeights = threadTermWeights
91 | .groupBy(x => x._1)
92 | .map(x => (x._1, x._2.maxBy(y => y._2)._2))
93 | .toArray.sortBy(x => x._1)
94 | .filter(x => !stopWords.contains(x._1))
95 |
96 |
97 | val tdm = trainingData
98 | .flatMap(x => x.body.split(" "))
99 | .filter(x => x.nonEmpty && !stopWords.contains(x))
100 | .groupBy(x => x)
101 | .map(x => (x._1, Math.log10(x._2.length + 1)))
102 | .filter(x => x._2 != 0)
103 |
104 |
105 | val trainingRanks = trainingData.map(mail => {
106 | //mail contains (full content, date, sender, subject, body)
107 |
108 | //Determine the weight of the sender
109 | val senderWeight = mailsGroupedBySender
110 | .collectFirst { case (mail.sender, x) => x + 1}
111 | .getOrElse(1.0)
112 |
113 | //Determine the weight of the subject
114 | val termsInSubject = mail.subject
115 | .replaceAll("[^a-zA-Z ]", "")
116 | .toLowerCase.split(" ")
117 | .filter(x => x.nonEmpty && !stopWords.contains(x))
118 |
119 | val termWeight = if (termsInSubject.size > 0) termsInSubject
120 | .map(x => {
121 | tdm.collectFirst { case (y, z) if y == x => z + 1}
122 | .getOrElse(1.0)
123 | })
124 | .sum / termsInSubject.length
125 | else 1.0
126 |
127 | //Determine if the email is from a thread, and if it is the weight from this thread:
128 | val threadGroupWeight: Double = threadGroupsWithWeights
129 | .collectFirst { case (mail.subject, _, _, weight) => weight}
130 | .getOrElse(1.0)
131 |
132 | //Determine the commonly used terms in the email and the weight belonging to it:
133 | val termsInMailBody = mail.body
134 | .replaceAll("[^a-zA-Z ]", "")
135 | .toLowerCase.split(" ")
136 | .filter(x => x.nonEmpty && !stopWords.contains(x))
137 |
138 | val commonTermsWeight = if (termsInMailBody.size > 0) termsInMailBody
139 | .map(x => {
140 | tdm.collectFirst { case (y, z) if y == x => z + 1}
141 | .getOrElse(1.0)
142 | })
143 | .sum / termsInMailBody.length
144 | else 1.0
145 |
146 | val rank = termWeight * threadGroupWeight * commonTermsWeight * senderWeight
147 |
148 | (mail, rank)
149 | })
150 |
151 | val sortedTrainingRanks = trainingRanks.sortBy(x => x._2)
152 |
153 | val median = sortedTrainingRanks(sortedTrainingRanks.length / 2)._2
154 | val mean = sortedTrainingRanks.map(x => x._2).sum / sortedTrainingRanks.length
155 | println("Median:" + median + " Mean:" + mean)
156 |
157 |
158 | val testingRanks = testingData.map(mail => {
159 | //mail contains (full content, date, sender, subject, body)
160 |
161 | //Determine the weight of the sender
162 | val senderWeight = mailsGroupedBySender
163 | .collectFirst { case (mail.sender, x) => x +1}
164 | .getOrElse(1.0)
165 |
166 | //Determine the weight of the subject
167 | val termsInSubject = mail.subject
168 | .replaceAll("[^a-zA-Z ]", "")
169 | .toLowerCase.split(" ")
170 | .filter(x => x.nonEmpty && !stopWords.contains(x))
171 |
172 | val termWeight = if (termsInSubject.size > 0) termsInSubject
173 | .map(x => {
174 | tdm.collectFirst { case (y, z) if y == x => z + 1}
175 | .getOrElse(1.0)
176 | })
177 | .sum / termsInSubject.length
178 | else 1.0
179 |
180 | //Determine if the email is from a thread, and if it is the weight from this thread:
181 | val threadGroupWeight: Double = threadGroupsWithWeights
182 | .collectFirst { case (mail.subject, _, _, weight) => weight}
183 | .getOrElse(1.0)
184 |
185 | //Determine the commonly used terms in the email and the weight belonging to it:
186 | val termsInMailBody = mail.body
187 | .replaceAll("[^a-zA-Z ]", "")
188 | .toLowerCase.split(" ")
189 | .filter(x => x.nonEmpty && !stopWords.contains(x))
190 |
191 | val commonTermsWeight = if (termsInMailBody.size > 0) termsInMailBody
192 | .map(x => {
193 | tdm.collectFirst { case (y, z) if y == x => z + 1}
194 | .getOrElse(1.0)
195 | })
196 | .sum / termsInMailBody.length
197 | else 1.0
198 |
199 | val rank = termWeight * threadGroupWeight * commonTermsWeight * senderWeight
200 |
201 | (mail, rank, termWeight,threadGroupWeight,commonTermsWeight,senderWeight)
202 | })
203 |
204 | val priorityEmails = testingRanks.filter(x => x._2 >= mean/2).sortBy(x => -x._2)
205 | val df = new java.text.DecimalFormat("#.##")
206 |
207 | println("|Date | Sender | Subject | Rank | thread term | thread time | common terms | sender |")
208 | println("| :--- | : -- | :-- | :-- | :-- | :-- | :-- | :-- | ")
209 | priorityEmails.take(10).foreach(x => println("| " + x._1.emailDate + " | " + x._1.sender + " | " + x._1.subject + " | " + df.format(x._2) + " |"+ df.format(x._3) + " |"+ df.format(x._4) + " |"+ df.format(x._5) + " |"+ df.format(x._6) + " |"))
210 |
211 |
212 | println(priorityEmails.length + " ranked as priority")
213 |
214 | }
215 | catch
216 | {
217 | case e: Exception => println("You probably are missing the sample data. You can download these from the spamassasin corpus (mentioned in the example on http://xyclade.ml) and place them in the directory 'data' in this project. Check the exception for more details: " + e);
218 | }
219 | }
220 |
221 | def getFilesFromDir(path: String): List[File] = {
222 | val d = new File(path)
223 | if (d.exists && d.isDirectory) {
224 | //Remove the mac os basic storage file, and alternatively for unix systems "cmds"
225 | d.listFiles.filter(x => x.isFile && !x.toString.contains(".DS_Store") && !x.toString.contains("cmds")).toList
226 | } else {
227 | List[File]()
228 | }
229 | }
230 |
231 |
232 | def getFullEmail(file: File): String = {
233 | //Note that the encoding of the example files is latin1, thus this should be passed to the from file method.
234 | val source = scala.io.Source.fromFile(file)("latin1")
235 | val fullEmail = source.getLines mkString "\n"
236 | source.close()
237 |
238 | fullEmail
239 | }
240 |
241 |
242 | def getSubjectFromEmail(email: String): String = {
243 |
244 | //Find the index of the end of the subject line
245 | val subjectIndex = email.indexOf("Subject:")
246 | val endOfSubjectIndex = email.substring(subjectIndex).indexOf('\n') + subjectIndex
247 |
248 | //Extract the subject: start of subject + 7 (length of Subject:) until the end of the line.
249 | val subject = email
250 | .substring(subjectIndex + 8, endOfSubjectIndex)
251 | .trim
252 | .toLowerCase
253 |
254 | //Additionally, we check whether the email was a response and remove the 're: ' tag, to make grouping on topic easier:
255 | subject.replace("re: ", "")
256 | }
257 |
258 | def getMessageBodyFromEmail(email: String): String = {
259 |
260 | val firstLineBreak = email.indexOf("\n\n")
261 | //Return the message body filtered by only text from a-z and to lower case
262 | email.substring(firstLineBreak)
263 | .replace("\n", " ")
264 | .replaceAll("[^a-zA-Z ]", "")
265 | .toLowerCase
266 | }
267 |
268 |
269 | def getSenderFromEmail(email: String): String = {
270 | //Find the index of the From: line
271 | val fromLineIndex = email.indexOf("From:")
272 | val endOfLine = email.substring(fromLineIndex).indexOf('\n') + fromLineIndex
273 |
274 | //Search for the <> tags in this line, as if they are there, the email address is contained inside these tags
275 | val mailAddressStartIndex = email
276 | .substring(fromLineIndex, endOfLine)
277 | .indexOf('<') + fromLineIndex + 1
278 | val mailAddressEndIndex = email
279 | .substring(fromLineIndex, endOfLine)
280 | .indexOf('>') + fromLineIndex
281 |
282 | if (mailAddressStartIndex > mailAddressEndIndex) {
283 |
284 | //The email address was not embedded in <> tags, extract the substring without extra spacing and to lower case
285 | var emailString = email
286 | .substring(fromLineIndex + 5, endOfLine)
287 | .trim
288 | .toLowerCase
289 |
290 | //Remove a possible name embedded in () at the end of the line, for example in test@test.com (tester) the name would be removed here
291 | val additionalNameStartIndex = emailString.indexOf('(')
292 | if (additionalNameStartIndex == -1) {
293 | emailString
294 | .toLowerCase
295 | }
296 | else {
297 | emailString
298 | .substring(0, additionalNameStartIndex)
299 | .trim
300 | .toLowerCase
301 | }
302 | }
303 | else {
304 | //Extract the email address from the tags. If these <> tags are there, there is no () with a name in the From: string in our data
305 | email
306 | .substring(mailAddressStartIndex, mailAddressEndIndex)
307 | .trim
308 | .toLowerCase
309 | }
310 | }
311 |
312 | def getDateFromEmail(email: String): Date = {
313 | //Find the index of the Date: line in the complete email
314 | val dateLineIndex = email.indexOf("Date:")
315 | val endOfDateLine = email.substring(dateLineIndex).indexOf('\n') + dateLineIndex
316 |
317 | //All possible date patterns in the emails.
318 | val datePatterns = Array("EEE MMM dd HH:mm:ss yyyy", "EEE, dd MMM yyyy HH:mm", "dd MMM yyyy HH:mm:ss", "EEE MMM dd yyyy HH:mm")
319 |
320 | datePatterns.foreach { x =>
321 | //Try to directly return a date from the formatting.when it fails on a pattern it continues with the next one until one works
322 | Try(return new SimpleDateFormat(x).parse(email.substring(dateLineIndex + 5, endOfDateLine).trim.substring(0, x.length)))
323 | }
324 | //Finally, if all failed return null (this will not happen with our example data but without this return the code will not compile)
325 | null
326 | }
327 |
328 | def getStopWords: List[String] = {
329 | val source = scala.io.Source.fromFile(new File("data/stopwords.txt"))("latin1")
330 | val lines = source.mkString.split("\n")
331 | source.close()
332 | lines.toList
333 | }
334 | }
335 |
336 |
--------------------------------------------------------------------------------
/Code Examples/SVM/.idea/.name:
--------------------------------------------------------------------------------
1 | SVM
--------------------------------------------------------------------------------
/Code Examples/SVM/.idea/compiler.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
--------------------------------------------------------------------------------
/Code Examples/SVM/.idea/copyright/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/Code Examples/SVM/.idea/encodings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/Code Examples/SVM/.idea/libraries/Maven__com_github_haifengl_smile_core_1_0_3.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/Code Examples/SVM/.idea/libraries/Maven__com_github_haifengl_smile_data_1_0_3.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/Code Examples/SVM/.idea/libraries/Maven__com_github_haifengl_smile_graph_1_0_3.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/Code Examples/SVM/.idea/libraries/Maven__com_github_haifengl_smile_math_1_0_3.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/Code Examples/SVM/.idea/libraries/Maven__com_github_haifengl_smile_plot_1_0_2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/Code Examples/SVM/.idea/libraries/Maven__org_swinglabs_swingx_swingx_all_1_6_4.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/Code Examples/SVM/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/Code Examples/SVM/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
--------------------------------------------------------------------------------
/Code Examples/SVM/.idea/scala_compiler.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/Code Examples/SVM/.idea/scopes/scope_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
--------------------------------------------------------------------------------
/Code Examples/SVM/.idea/uiDesigner.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | -
6 |
7 |
8 | -
9 |
10 |
11 | -
12 |
13 |
14 | -
15 |
16 |
17 | -
18 |
19 |
20 |
21 |
22 |
23 | -
24 |
25 |
26 |
27 |
28 |
29 | -
30 |
31 |
32 |
33 |
34 |
35 | -
36 |
37 |
38 |
39 |
40 |
41 | -
42 |
43 |
44 |
45 |
46 | -
47 |
48 |
49 |
50 |
51 | -
52 |
53 |
54 |
55 |
56 | -
57 |
58 |
59 |
60 |
61 | -
62 |
63 |
64 |
65 |
66 | -
67 |
68 |
69 |
70 |
71 | -
72 |
73 |
74 | -
75 |
76 |
77 |
78 |
79 | -
80 |
81 |
82 |
83 |
84 | -
85 |
86 |
87 |
88 |
89 | -
90 |
91 |
92 |
93 |
94 | -
95 |
96 |
97 |
98 |
99 | -
100 |
101 |
102 | -
103 |
104 |
105 | -
106 |
107 |
108 | -
109 |
110 |
111 | -
112 |
113 |
114 |
115 |
116 | -
117 |
118 |
119 | -
120 |
121 |
122 |
123 |
124 |
125 |
126 |
--------------------------------------------------------------------------------
/Code Examples/SVM/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/Code Examples/SVM/SVM.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
--------------------------------------------------------------------------------
/Code Examples/SVM/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 | Xyclade.ml
8 | SVM
9 | 1.0-SNAPSHOT
10 |
11 |
12 | com.github.haifengl
13 | smile-core
14 | 1.0.3
15 |
16 |
17 | com.github.haifengl
18 | smile-plot
19 | 1.0.2
20 |
21 |
22 |
23 |
--------------------------------------------------------------------------------
/Code Examples/SVM/projectFilesBackup/SVM.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
--------------------------------------------------------------------------------
/Code Examples/SVM/src/main/java/SVM_Example_2.scala:
--------------------------------------------------------------------------------
1 | import java.awt.{Color, Dimension}
2 | import java.io.{ File}
3 | import java.text.DecimalFormat
4 |
5 | import smile.math.kernel.{PolynomialKernel, GaussianKernel}
6 | import smile.plot.ScatterPlot
7 | import smile.classification.SVM
8 | import scala.collection.mutable
9 | import scala.swing.{SimpleSwingApplication, MainFrame}
10 |
11 | object SVM_Example_2 extends SimpleSwingApplication {
12 |
13 |
14 | def top = new MainFrame {
15 | title = "SVM Example 2"
16 | //File path (this changes per example)
17 | val trainingPath = "data/SVM_Example_2.csv"
18 | val testingPath = "data/SVM_Example_2_Test_data.csv"
19 | val df = new DecimalFormat("#.#")
20 | //Loading of the test data and plot generation stays the same
21 | val trainingData = GetDataFromCSV(new File(trainingPath))
22 | val testingData = GetDataFromCSV(new File(testingPath))
23 |
24 |
25 | val plot = ScatterPlot.plot(trainingData._1, trainingData._2, '@', Array(Color.blue, Color.green))
26 | peer.setContentPane(plot)
27 |
28 | val printlist = mutable.MutableList[(Int,Double,Double)]()
29 |
30 |
31 | val sigmas = Array(2,3,4,5)
32 | val marginPenalties = Array(0.001,0.01,0.1,0.2,0.5,1.0,2.0,3.0,10.0,100)
33 |
34 |
35 |
36 | sigmas.foreach( sigma =>
37 | marginPenalties.foreach(marginPenalty => {
38 | val svm = new SVM[Array[Double]](new PolynomialKernel(sigma), marginPenalty, 2)
39 | svm.learn(trainingData._1, trainingData._2)
40 | svm.finish()
41 |
42 |
43 | //Calculate how well the SVM predicts on the training set
44 | val predictions = testingData._1.map(x => svm.predict(x)).zip(testingData._2)
45 | val falsePredictions = predictions.map(x => if (x._1 == x._2) 0 else 1)
46 |
47 | val result = (sigma ,marginPenalty , (falsePredictions.sum.toDouble / predictions.length * 100))
48 | printlist += result
49 | println("degree: " + sigma + " margin: " + marginPenalty + " error: " + result._3)
50 | }
51 | )
52 | )
53 |
54 | print("| |")
55 | sigmas.foreach(x => print(" s: " + x + " |"))
56 | println("")
57 | println("| :-- | :--: | :--: | :--: | :--: | :--: | :--: | :--: | ")
58 | marginPenalties.foreach(x => {
59 | val sigmaValues = sigmas.map(y => printlist.filter(z => z._1 == y && z._2 == x)(0)._3)
60 | println("")
61 | print("| **c: " + x + "** |")
62 | sigmaValues.foreach(s => print(" " + df.format(s) + "% |")
63 | )
64 | })
65 |
66 |
67 |
68 | size = new Dimension(400, 400)
69 | }
70 |
71 |
72 | def GetDataFromCSV(file: File): (Array[Array[Double]], Array[Int]) = {
73 | val source = scala.io.Source.fromFile(file)
74 | val data = source.getLines().drop(1).map(x => GetDataFromString(x)).toArray
75 | source.close()
76 | val dataPoints = data.map(x => x._1)
77 | val classifierArray = data.map(x => x._2)
78 | return (dataPoints, classifierArray)
79 | }
80 |
81 | def GetDataFromString(dataString: String): (Array[Double], Int) = {
82 | //Split the comma separated value string into an array of strings
83 | val dataArray: Array[String] = dataString.split(',')
84 |
85 | //Extract the values from the strings
86 | val coordinates = Array( dataArray(0).toDouble, dataArray(1).toDouble)
87 | val classifier: Int = dataArray(2).toInt
88 |
89 | //And return the result in a format that can later easily be used to feed to Smile
90 | return (coordinates, classifier)
91 | }
92 | }
93 |
--------------------------------------------------------------------------------
/Code Examples/SVM/src/main/java/SupportVectorMachine.scala:
--------------------------------------------------------------------------------
1 | import java.awt.{Color, Dimension}
2 | import java.io.{ File}
3 | import java.text.DecimalFormat
4 |
5 | import smile.math.kernel.{ GaussianKernel}
6 | import smile.plot.ScatterPlot
7 | import smile.classification.SVM
8 | import scala.collection.mutable
9 | import scala.swing.{SimpleSwingApplication, MainFrame}
10 |
11 | object SupportVectorMachine extends SimpleSwingApplication {
12 |
13 |
14 | def top = new MainFrame {
15 | title = "SVM Example 1"
16 | //File path (this changes per example)
17 | val path = "data/SVM_Example_1.csv"
18 | val df = new DecimalFormat("#.#")
19 | //Loading of the test data and plot generation stays the same
20 | val testData = GetDataFromCSV(new File(path))
21 | val plot = ScatterPlot.plot(testData._1, testData._2, '@', Array(Color.blue, Color.green))
22 | peer.setContentPane(plot)
23 |
24 | //Here we do our SVM fine tuning with possibly different kernels
25 | //val svm = new SVM[Array[Double]](new GaussianKernel(0.01), 10.0,2)
26 | val printlist = mutable.MutableList[(Double,Double,Double)]()
27 |
28 |
29 | val sigmas = Array(0.001,0.01,0.1,0.2,0.5,1.0,2.0,3.0,10.0,100)
30 | val marginPenalties = Array(0.001,0.01,0.1,0.2,0.5,1.0,2.0,3.0,10.0,100)
31 |
32 |
33 |
34 | sigmas.foreach( sigma =>
35 | marginPenalties.foreach(marginPenalty => {
36 | val svm = new SVM[Array[Double]](new GaussianKernel(sigma), marginPenalty, 2)
37 | svm.learn(testData._1, testData._2)
38 | svm.finish()
39 |
40 |
41 | //Calculate how well the SVM predicts on the training set
42 | val predictions = testData._1.map(x => svm.predict(x)).zip(testData._2)
43 | val falsePredictions = predictions.map(x => if (x._1 == x._2) 0 else 1)
44 |
45 | val result = (sigma ,marginPenalty , (falsePredictions.sum.toDouble / predictions.length * 100))
46 | printlist += result
47 | println("sigma: " + sigma + " margin: " + marginPenalty + " error: " + result._3)
48 | }
49 | )
50 | )
51 |
52 | print("| |")
53 | sigmas.foreach(x => print(" s: " + x + " |"))
54 | println("")
55 | println("| :-- | :--: | :--: | :--: | :--: | :--: | :--: | :--: | ")
56 | marginPenalties.foreach(x => {
57 | val sigmaValues = sigmas.map(y => printlist.filter(z => z._1 == y && z._2 == x)(0)._3)
58 | println("")
59 | print("| **c: " + x + "** |")
60 | sigmaValues.foreach(s => print(" " + df.format(s) + "% |")
61 | )
62 | })
63 |
64 |
65 |
66 | size = new Dimension(400, 400)
67 | }
68 |
69 |
70 | def GetDataFromCSV(file: File): (Array[Array[Double]], Array[Int]) = {
71 | val source = scala.io.Source.fromFile(file)
72 | val data = source.getLines().drop(1).map(x => GetDataFromString(x)).toArray
73 | source.close()
74 | val dataPoints = data.map(x => x._1)
75 | val classifierArray = data.map(x => x._2)
76 | return (dataPoints, classifierArray)
77 | }
78 |
79 | def GetDataFromString(dataString: String): (Array[Double], Int) = {
80 | //Split the comma separated value string into an array of strings
81 | val dataArray: Array[String] = dataString.split(',')
82 |
83 | //Extract the values from the strings
84 | val coordinates = Array( dataArray(0).toDouble, dataArray(1).toDouble)
85 | val classifier: Int = dataArray(2).toInt
86 |
87 | //And return the result in a format that can later easily be used to feed to Smile
88 | return (coordinates, classifier)
89 | }
90 | }
91 |
--------------------------------------------------------------------------------
/Code Examples/TextRegression/.idea/.name:
--------------------------------------------------------------------------------
1 | TextRegression
--------------------------------------------------------------------------------
/Code Examples/TextRegression/.idea/compiler.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
--------------------------------------------------------------------------------
/Code Examples/TextRegression/.idea/copyright/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/Code Examples/TextRegression/.idea/encodings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/Code Examples/TextRegression/.idea/libraries/Maven__com_github_haifengl_smile_core_1_0_2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/Code Examples/TextRegression/.idea/libraries/Maven__com_github_haifengl_smile_data_1_0_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/Code Examples/TextRegression/.idea/libraries/Maven__com_github_haifengl_smile_graph_1_0_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/Code Examples/TextRegression/.idea/libraries/Maven__com_github_haifengl_smile_math_1_0_2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/Code Examples/TextRegression/.idea/libraries/Maven__com_github_haifengl_smile_plot_1_0_2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/Code Examples/TextRegression/.idea/libraries/Maven__com_github_tototoshi_scala_csv_2_11_1_2_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/Code Examples/TextRegression/.idea/libraries/Maven__org_scala_lang_scala_library_2_11_6.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/Code Examples/TextRegression/.idea/libraries/Maven__org_swinglabs_swingx_swingx_all_1_6_4.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/Code Examples/TextRegression/.idea/libraries/com_github_tototoshi_scala_csv_2_11_1_2_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/Code Examples/TextRegression/.idea/libraries/smile_1_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/Code Examples/TextRegression/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/Code Examples/TextRegression/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
--------------------------------------------------------------------------------
/Code Examples/TextRegression/.idea/scala_compiler.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/Code Examples/TextRegression/.idea/scopes/scope_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
--------------------------------------------------------------------------------
/Code Examples/TextRegression/.idea/uiDesigner.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | -
6 |
7 |
8 | -
9 |
10 |
11 | -
12 |
13 |
14 | -
15 |
16 |
17 | -
18 |
19 |
20 |
21 |
22 |
23 | -
24 |
25 |
26 |
27 |
28 |
29 | -
30 |
31 |
32 |
33 |
34 |
35 | -
36 |
37 |
38 |
39 |
40 |
41 | -
42 |
43 |
44 |
45 |
46 | -
47 |
48 |
49 |
50 |
51 | -
52 |
53 |
54 |
55 |
56 | -
57 |
58 |
59 |
60 |
61 | -
62 |
63 |
64 |
65 |
66 | -
67 |
68 |
69 |
70 |
71 | -
72 |
73 |
74 | -
75 |
76 |
77 |
78 |
79 | -
80 |
81 |
82 |
83 |
84 | -
85 |
86 |
87 |
88 |
89 | -
90 |
91 |
92 |
93 |
94 | -
95 |
96 |
97 |
98 |
99 | -
100 |
101 |
102 | -
103 |
104 |
105 | -
106 |
107 |
108 | -
109 |
110 |
111 | -
112 |
113 |
114 |
115 |
116 | -
117 |
118 |
119 | -
120 |
121 |
122 |
123 |
124 |
125 |
126 |
--------------------------------------------------------------------------------
/Code Examples/TextRegression/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/Code Examples/TextRegression/TextRegression.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
--------------------------------------------------------------------------------
/Code Examples/TextRegression/data/TextRegression_Example_1.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Xyclade/MachineLearning/a01a1c3acc8fa6f534ab4765f201e6dab78aae04/Code Examples/TextRegression/data/TextRegression_Example_1.csv
--------------------------------------------------------------------------------
/Code Examples/TextRegression/data/stopwords.txt:
--------------------------------------------------------------------------------
1 | i
2 | me
3 | my
4 | myself
5 | we
6 | our
7 | ours
8 | ourselves
9 | you
10 | your
11 | yours
12 | yourself
13 | yourselves
14 | he
15 | him
16 | his
17 | himself
18 | she
19 | her
20 | hers
21 | herself
22 | it
23 | its
24 | itself
25 | they
26 | them
27 | their
28 | theirs
29 | themselves
30 | what
31 | which
32 | who
33 | whom
34 | this
35 | that
36 | these
37 | those
38 | am
39 | is
40 | are
41 | was
42 | were
43 | be
44 | been
45 | being
46 | have
47 | has
48 | had
49 | having
50 | do
51 | does
52 | did
53 | doing
54 | would
55 | should
56 | could
57 | ought
58 | im
59 | youre
60 | hes
61 | shes
62 | were
63 | theyre
64 | ive
65 | youve
66 | weve
67 | theyve
68 | id
69 | youd
70 | hed
71 | shed
72 | wed
73 | theyd
74 | ill
75 | youll
76 | hell
77 | shell
78 | well
79 | theyll
80 | isnt
81 | arent
82 | wasnt
83 | werent
84 | hasnt
85 | havent
86 | hadnt
87 | doesnt
88 | dont
89 | didnt
90 | wont
91 | wouldnt
92 | shant
93 | shouldnt
94 | cant
95 | cannot
96 | couldnt
97 | mustnt
98 | lets
99 | thats
100 | whos
101 | whats
102 | heres
103 | theres
104 | whens
105 | wheres
106 | whys
107 | hows
108 | a
109 | an
110 | the
111 | and
112 | but
113 | if
114 | or
115 | because
116 | as
117 | until
118 | while
119 | of
120 | at
121 | by
122 | for
123 | with
124 | about
125 | against
126 | between
127 | into
128 | through
129 | during
130 | before
131 | after
132 | above
133 | below
134 | to
135 | from
136 | up
137 | down
138 | in
139 | out
140 | on
141 | off
142 | over
143 | under
144 | again
145 | further
146 | then
147 | once
148 | here
149 | there
150 | when
151 | where
152 | why
153 | how
154 | all
155 | any
156 | both
157 | each
158 | few
159 | more
160 | most
161 | other
162 | some
163 | such
164 | no
165 | nor
166 | not
167 | only
168 | own
169 | same
170 | so
171 | than
172 | too
173 | very
174 | tr
175 | td
176 |
177 |
178 |
--------------------------------------------------------------------------------
/Code Examples/TextRegression/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 | xyclade.ml
8 | TextRegression
9 | 1.0-SNAPSHOT
10 |
11 |
12 |
13 | com.github.haifengl
14 | smile-core
15 | 1.0.2
16 |
17 |
18 | com.github.haifengl
19 | smile-plot
20 | 1.0.2
21 |
22 |
23 | com.github.tototoshi
24 | scala-csv_2.11
25 | 1.2.1
26 |
27 |
28 |
--------------------------------------------------------------------------------
/Code Examples/TextRegression/projectFilesBackup/TextRegression.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
--------------------------------------------------------------------------------
/Code Examples/TextRegression/src/main/java/DTM.scala:
--------------------------------------------------------------------------------
1 | import java.io.File
2 | import scala.collection.mutable
3 |
4 | class DTM {
5 |
6 | var records: List[DTMRecord] = List[DTMRecord]()
7 | var wordList: List[String] = List[String]()
8 |
9 | def addDocumentToRecords(documentName: String, rank: Int, documentContent: String) = {
10 | //Find a record for the document
11 | val record = records.find(x => x.document == documentName)
12 | if (record.nonEmpty) {
13 | throw new Exception("Document already exists in the records")
14 | }
15 |
16 | var wordRecords = mutable.HashMap[String, Int]()
17 | val individualWords = documentContent.toLowerCase.split(" ")
18 | individualWords.foreach { x =>
19 | val wordRecord = wordRecords.find(y => y._1 == x)
20 | if (wordRecord.nonEmpty) {
21 | wordRecords += x -> (wordRecord.get._2 + 1)
22 | }
23 | else {
24 | wordRecords += x -> 1
25 | wordList = x :: wordList
26 | }
27 | }
28 | records = new DTMRecord(documentName, rank, wordRecords) :: records
29 | }
30 |
31 | def getStopWords(): List[String] = {
32 | val source = scala.io.Source.fromFile(new File("data/stopwords.txt"))("latin1")
33 | val lines = source.mkString.split("\n")
34 | source.close()
35 | return lines.toList
36 | }
37 |
38 | def getNumericRepresentationForRecords(): (Array[Array[Double]], Array[Double]) = {
39 | //First filter out all stop words:
40 | val StopWords = getStopWords()
41 | wordList = wordList.filter(x => !StopWords.contains(x))
42 |
43 | var dtmNumeric = Array[Array[Double]]()
44 | var ranks = Array[Double]()
45 |
46 | records.foreach { x =>
47 | //Add the rank to the array of ranks
48 | ranks = ranks :+ x.rank.toDouble
49 |
50 | //And create an array representing all words and their occurrences
51 | //for this document:
52 | var dtmNumericRecord: Array[Double] = Array()
53 | wordList.foreach { y =>
54 |
55 | val termRecord = x.occurrences.find(z => z._1 == y)
56 | if (termRecord.nonEmpty) {
57 | dtmNumericRecord = dtmNumericRecord :+ termRecord.get._2.toDouble
58 | }
59 | else {
60 | dtmNumericRecord = dtmNumericRecord :+ 0.0
61 | }
62 | }
63 | dtmNumeric = dtmNumeric :+ dtmNumericRecord
64 |
65 | }
66 |
67 | return (dtmNumeric, ranks)
68 | }
69 | }
70 |
71 | class DTMRecord(val document : String,
72 | val rank : Int,
73 | var occurrences : mutable.HashMap[String,Int]
74 | )
--------------------------------------------------------------------------------
/Code Examples/TextRegression/src/main/java/TextRegression.scala:
--------------------------------------------------------------------------------
1 | import java.awt.Color
2 | import java.io.File
3 | import java.util.Calendar
4 | import com.github.tototoshi.csv._
5 | import smile.plot._
6 | import smile.regression.{LASSO, RidgeRegression}
7 | import smile.validation.CrossValidation
8 | object TextRegression {
9 |
10 | def main(args: Array[String]): Unit = {
11 |
12 |
13 | //Get the example data
14 | val basePath = "data/TextRegression_Example_1.csv"
15 | val testData = GetDataFromCSV(new File(basePath))
16 |
17 | //Create a document term matrix for the data
18 | val documentTermMatrix = new DTM()
19 | testData.foreach(x => documentTermMatrix.addDocumentToRecords(x._1, x._2, x._3))
20 |
21 | //Get the cross validation data
22 | val cv = new CrossValidation(testData.length, 2)
23 | val numericDTM = documentTermMatrix.getNumericRepresentationForRecords
24 |
25 | for (i <- 0 until cv.k) {
26 | //Split off the training datapoints and classifiers from the dataset
27 | val dpForTraining = numericDTM._1.zipWithIndex.filter(x => cv.test(i).toList.contains(x._2)).map(y => y._1)
28 | val classifiersForTraining = numericDTM._2.zipWithIndex.filter(x => cv.test(i).toList.contains(x._2)).map(y => y._1)
29 |
30 | //And the corresponding subset of data points and their classifiers for testing
31 | val dpForTesting = numericDTM._1.zipWithIndex.filter(x => !cv.test(i).contains(x._2)).map(y => y._1)
32 | val classifiersForTesting = numericDTM._2.zipWithIndex.filter(x => !cv.test(i).contains(x._2)).map(y => y._1)
33 |
34 | //These are the lambda values we will verify against
35 | val lambdas: Array[Double] = Array(0.1, 0.25, 0.5, 1.0, 2.0, 5.0)
36 |
37 | lambdas.foreach { x =>
38 | //Define a new model based on the training data and one of the lambda's
39 | val model = new LASSO(dpForTraining, classifiersForTraining, x)
40 |
41 | //Compute the RMSE for this model with this lambda
42 | val results = dpForTesting.map(y => model.predict(y)) zip classifiersForTesting
43 | val RMSE = Math.sqrt(results.map(x => Math.pow(x._1 - x._2, 2)).sum / results.length)
44 | println(Calendar.getInstance().getTime + "Lambda: " + x + " RMSE: " + RMSE)
45 |
46 | }
47 | }
48 | }
49 |
50 | def GetDataFromCSV(file: File) : List[(String,Int,String)]= {
51 | val reader = CSVReader.open(file)
52 | val data = reader.all()
53 |
54 | val documents = data.drop(1).map(x => (x(1),x(3).toInt,x(4)))
55 | documents
56 | }
57 |
58 |
59 |
60 |
61 |
62 | }
63 |
--------------------------------------------------------------------------------
/Example Data/KNN_Example_1.csv:
--------------------------------------------------------------------------------
1 | "X","Y","Label"
2 | 2.37354618925767,5.39810588036707,0
3 | 3.18364332422208,4.38797360674923,0
4 | 2.16437138758995,5.34111969142442,0
5 | 4.59528080213779,3.87063690391921,0
6 | 3.32950777181536,6.43302370170104,0
7 | 2.17953161588198,6.98039989850586,0
8 | 3.48742905242849,4.63277852353349,0
9 | 3.73832470512922,3.95586537368347,0
10 | 3.57578135165349,5.56971962744241,0
11 | 2.69461161284364,4.86494539611918,0
12 | 4.51178116845085,7.40161776050478,0
13 | 3.38984323641143,4.96075999726683,0
14 | 2.3787594194582,5.68973936245078,0
15 | 0.7853001128225,5.02800215878067,0
16 | 4.12493091814311,4.25672679111759,0
17 | 2.95506639098477,5.18879229951434,0
18 | 2.98380973690105,3.19504137110896,0
19 | 3.9438362106853,6.46555486156289,0
20 | 3.82122119509809,5.1532533382119,0
21 | 3.59390132121751,7.17261167036215,0
22 | 3.91897737160822,5.47550952889966,0
23 | 3.78213630073107,4.29005356907819,0
24 | 3.07456498336519,5.61072635348905,0
25 | 1.01064830413663,4.06590236835575,0
26 | 3.61982574789471,3.7463665997609,0
27 | 2.943871260471,5.29144623551746,0
28 | 2.84420449329467,4.55670812678157,0
29 | 1.52924761610073,5.00110535163162,0
30 | 2.52184994489138,5.07434132415166,0
31 | 3.4179415601997,4.41047905381193,0
32 | 4.35867955152904,4.4313312671815,0
33 | 2.897212272657,4.86482138487617,0
34 | 3.38767161155937,6.1780869965732,0
35 | 2.94619495941709,3.47643319957024,0
36 | 1.62294044317139,5.59394618762842,0
37 | 2.58500543670032,5.33295037121352,0
38 | 2.60571004628965,6.06309983727636,0
39 | 2.94068660328881,4.6958160763657,0
40 | 4.10002537198388,5.37001880991629,0
41 | 3.76317574845754,5.26709879077223,0
42 | 2.83547640374641,4.45747996900835,0
43 | 2.74663831986349,6.20786780598317,0
44 | 3.69696337540474,6.16040261569495,0
45 | 3.55666319867366,5.700213649515,0
46 | 2.31124430545048,6.58683345454085,0
47 | 2.29250484303788,5.5584864255653,0
48 | 3.36458196213683,3.72340779154196,0
49 | 3.76853292451542,4.42673458576311,0
50 | 2.88765378784977,3.77538738510164,0
51 | 3.88110772645421,4.52659936356069,0
52 | 4.37963332277588,7.45018710127266,1
53 | 5.04211587314424,6.98144016728536,1
54 | 4.08907835144755,6.68193162545616,1
55 | 5.15802877240407,6.0706378525463,1
56 | 4.34541535608118,5.51253968985852,1
57 | 6.76728726937265,5.92480770338432,1
58 | 5.71670747601721,8.00002880371391,1
59 | 5.91017422949523,6.37873330520318,1
60 | 5.38418535782634,5.61557315261551,1
61 | 6.68217608051942,8.86929062242358,1
62 | 4.36426354605102,7.42510037737245,1
63 | 4.53835526963943,6.76135289908697,1
64 | 6.43228223854166,8.05848304870902,1
65 | 4.34930364668963,7.88642265137494,1
66 | 4.79261925639803,6.38075695176885,1
67 | 4.60719207055802,9.20610246454047,1
68 | 4.68000713145149,6.74497296985898,1
69 | 4.72088669702344,5.57550534978719,1
70 | 5.49418833126783,6.85560039804578,1
71 | 4.82266951773039,7.20753833923234,1
72 | 4.49404253788574,9.30797839905936,1
73 | 6.34303882517041,7.10580236789371,1
74 | 4.78542059145313,7.45699880542341,1
75 | 4.82044346995661,6.92284706464347,1
76 | 4.89980925878644,6.66599915763346,1
77 | 5.71266630705141,6.96527397168872,1
78 | 4.92643559587367,7.78763960563016,1
79 | 4.96236582853295,9.07524500865228,1
80 | 4.31833952124434,8.02739243876377,1
81 | 4.67572972775368,8.2079083983867,1
82 | 5.06016044043452,5.76867657844196,1
83 | 4.41110551374034,7.98389557005338,1
84 | 5.53149619263257,7.21992480366065,1
85 | 3.48160591821321,5.53274997090776,1
86 | 5.30655786078977,7.52102274264814,1
87 | 3.46355017646241,6.84124539528398,1
88 | 4.69902387316339,8.4645873119698,1
89 | 4.47172009555499,6.23391800039534,1
90 | 4.347905219319,6.56978824607145,1
91 | 4.94310322215261,6.07389050262256,1
92 | 3.08564057431999,6.82289603856346,1
93 | 6.17658331201856,7.40201177948634,1
94 | 3.335027563788,6.26825182688039,1
95 | 4.53646959852761,7.83037316798167,1
96 | 3.88407989495715,5.79191721369553,1
97 | 4.24918099880655,5.95201558719226,1
98 | 7.08716654562835,8.44115770684428,1
99 | 5.01739561969325,5.98415253469535,1
100 | 3.71369946956567,7.41197471231752,1
101 | 3.35939446558142,6.61892394889108,1
102 |
--------------------------------------------------------------------------------
/Example Data/Recommendation_Example_1.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Xyclade/MachineLearning/a01a1c3acc8fa6f534ab4765f201e6dab78aae04/Example Data/Recommendation_Example_1.zip
--------------------------------------------------------------------------------
/Example Data/TextRegression_Example_1.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Xyclade/MachineLearning/a01a1c3acc8fa6f534ab4765f201e6dab78aae04/Example Data/TextRegression_Example_1.csv
--------------------------------------------------------------------------------
/Example Data/stopwords.txt:
--------------------------------------------------------------------------------
1 | i
2 | me
3 | my
4 | myself
5 | we
6 | our
7 | ours
8 | ourselves
9 | you
10 | your
11 | yours
12 | yourself
13 | yourselves
14 | he
15 | him
16 | his
17 | himself
18 | she
19 | her
20 | hers
21 | herself
22 | it
23 | its
24 | itself
25 | they
26 | them
27 | their
28 | theirs
29 | themselves
30 | what
31 | which
32 | who
33 | whom
34 | this
35 | that
36 | these
37 | those
38 | am
39 | is
40 | are
41 | was
42 | were
43 | be
44 | been
45 | being
46 | have
47 | has
48 | had
49 | having
50 | do
51 | does
52 | did
53 | doing
54 | would
55 | should
56 | could
57 | ought
58 | im
59 | youre
60 | hes
61 | shes
62 | were
63 | theyre
64 | ive
65 | youve
66 | weve
67 | theyve
68 | id
69 | youd
70 | hed
71 | shed
72 | wed
73 | theyd
74 | ill
75 | youll
76 | hell
77 | shell
78 | well
79 | theyll
80 | isnt
81 | arent
82 | wasnt
83 | werent
84 | hasnt
85 | havent
86 | hadnt
87 | doesnt
88 | dont
89 | didnt
90 | wont
91 | wouldnt
92 | shant
93 | shouldnt
94 | cant
95 | cannot
96 | couldnt
97 | mustnt
98 | lets
99 | thats
100 | whos
101 | whats
102 | heres
103 | theres
104 | whens
105 | wheres
106 | whys
107 | hows
108 | a
109 | an
110 | the
111 | and
112 | but
113 | if
114 | or
115 | because
116 | as
117 | until
118 | while
119 | of
120 | at
121 | by
122 | for
123 | with
124 | about
125 | against
126 | between
127 | into
128 | through
129 | during
130 | before
131 | after
132 | above
133 | below
134 | to
135 | from
136 | up
137 | down
138 | in
139 | out
140 | on
141 | off
142 | over
143 | under
144 | again
145 | further
146 | then
147 | once
148 | here
149 | there
150 | when
151 | where
152 | why
153 | how
154 | all
155 | any
156 | both
157 | each
158 | few
159 | more
160 | most
161 | other
162 | some
163 | such
164 | no
165 | nor
166 | not
167 | only
168 | own
169 | same
170 | so
171 | than
172 | too
173 | very
174 | tr
175 | td
176 |
177 |
178 |
--------------------------------------------------------------------------------
/Images/DynamicMachineLearning.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Xyclade/MachineLearning/a01a1c3acc8fa6f534ab4765f201e6dab78aae04/Images/DynamicMachineLearning.png
--------------------------------------------------------------------------------
/Images/Formula1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Xyclade/MachineLearning/a01a1c3acc8fa6f534ab4765f201e6dab78aae04/Images/Formula1.png
--------------------------------------------------------------------------------
/Images/Formula2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Xyclade/MachineLearning/a01a1c3acc8fa6f534ab4765f201e6dab78aae04/Images/Formula2.png
--------------------------------------------------------------------------------
/Images/Formula3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Xyclade/MachineLearning/a01a1c3acc8fa6f534ab4765f201e6dab78aae04/Images/Formula3.png
--------------------------------------------------------------------------------
/Images/Formula4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Xyclade/MachineLearning/a01a1c3acc8fa6f534ab4765f201e6dab78aae04/Images/Formula4.png
--------------------------------------------------------------------------------
/Images/Good_Fit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Xyclade/MachineLearning/a01a1c3acc8fa6f534ab4765f201e6dab78aae04/Images/Good_Fit.png
--------------------------------------------------------------------------------
/Images/Ham_No_Stopwords.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Xyclade/MachineLearning/a01a1c3acc8fa6f534ab4765f201e6dab78aae04/Images/Ham_No_Stopwords.png
--------------------------------------------------------------------------------
/Images/Ham_Stopwords.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Xyclade/MachineLearning/a01a1c3acc8fa6f534ab4765f201e6dab78aae04/Images/Ham_Stopwords.png
--------------------------------------------------------------------------------
/Images/HumanDataPoints.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Xyclade/MachineLearning/a01a1c3acc8fa6f534ab4765f201e6dab78aae04/Images/HumanDataPoints.png
--------------------------------------------------------------------------------
/Images/KNNPlot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Xyclade/MachineLearning/a01a1c3acc8fa6f534ab4765f201e6dab78aae04/Images/KNNPlot.png
--------------------------------------------------------------------------------
/Images/Mail_per_Sender_Distribution.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Xyclade/MachineLearning/a01a1c3acc8fa6f534ab4765f201e6dab78aae04/Images/Mail_per_Sender_Distribution.png
--------------------------------------------------------------------------------
/Images/Mail_per_Sender_log_Distribution.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Xyclade/MachineLearning/a01a1c3acc8fa6f534ab4765f201e6dab78aae04/Images/Mail_per_Sender_log_Distribution.png
--------------------------------------------------------------------------------
/Images/Mail_per_Subject_Distribution.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Xyclade/MachineLearning/a01a1c3acc8fa6f534ab4765f201e6dab78aae04/Images/Mail_per_Subject_Distribution.png
--------------------------------------------------------------------------------
/Images/Mail_per_Subject_log_Distribution.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Xyclade/MachineLearning/a01a1c3acc8fa6f534ab4765f201e6dab78aae04/Images/Mail_per_Subject_log_Distribution.png
--------------------------------------------------------------------------------
/Images/MaleFemalePlot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Xyclade/MachineLearning/a01a1c3acc8fa6f534ab4765f201e6dab78aae04/Images/MaleFemalePlot.png
--------------------------------------------------------------------------------
/Images/OverFitting.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Xyclade/MachineLearning/a01a1c3acc8fa6f534ab4765f201e6dab78aae04/Images/OverFitting.png
--------------------------------------------------------------------------------
/Images/PCA_Explanatory_Data.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Xyclade/MachineLearning/a01a1c3acc8fa6f534ab4765f201e6dab78aae04/Images/PCA_Explanatory_Data.png
--------------------------------------------------------------------------------
/Images/PCA_Normalised.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Xyclade/MachineLearning/a01a1c3acc8fa6f534ab4765f201e6dab78aae04/Images/PCA_Normalised.png
--------------------------------------------------------------------------------
/Images/PCA_Reduced_Dimension.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Xyclade/MachineLearning/a01a1c3acc8fa6f534ab4765f201e6dab78aae04/Images/PCA_Reduced_Dimension.png
--------------------------------------------------------------------------------
/Images/Precision.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Xyclade/MachineLearning/a01a1c3acc8fa6f534ab4765f201e6dab78aae04/Images/Precision.png
--------------------------------------------------------------------------------
/Images/PrecisionFull.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Xyclade/MachineLearning/a01a1c3acc8fa6f534ab4765f201e6dab78aae04/Images/PrecisionFull.png
--------------------------------------------------------------------------------
/Images/PrecisionHalf.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Xyclade/MachineLearning/a01a1c3acc8fa6f534ab4765f201e6dab78aae04/Images/PrecisionHalf.png
--------------------------------------------------------------------------------
/Images/Recall.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Xyclade/MachineLearning/a01a1c3acc8fa6f534ab4765f201e6dab78aae04/Images/Recall.png
--------------------------------------------------------------------------------
/Images/RecallFull.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Xyclade/MachineLearning/a01a1c3acc8fa6f534ab4765f201e6dab78aae04/Images/RecallFull.png
--------------------------------------------------------------------------------
/Images/RecallHalf.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Xyclade/MachineLearning/a01a1c3acc8fa6f534ab4765f201e6dab78aae04/Images/RecallHalf.png
--------------------------------------------------------------------------------
/Images/SVM_Datapoints.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Xyclade/MachineLearning/a01a1c3acc8fa6f534ab4765f201e6dab78aae04/Images/SVM_Datapoints.png
--------------------------------------------------------------------------------
/Images/SVM_TestData.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Xyclade/MachineLearning/a01a1c3acc8fa6f534ab4765f201e6dab78aae04/Images/SVM_TestData.png
--------------------------------------------------------------------------------
/Images/SVM_TrainData.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Xyclade/MachineLearning/a01a1c3acc8fa6f534ab4765f201e6dab78aae04/Images/SVM_TrainData.png
--------------------------------------------------------------------------------
/Images/Spam_No_Stopwords.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Xyclade/MachineLearning/a01a1c3acc8fa6f534ab4765f201e6dab78aae04/Images/Spam_No_Stopwords.png
--------------------------------------------------------------------------------
/Images/Spam_Stopwords.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Xyclade/MachineLearning/a01a1c3acc8fa6f534ab4765f201e6dab78aae04/Images/Spam_Stopwords.png
--------------------------------------------------------------------------------
/Images/Under-fitting.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Xyclade/MachineLearning/a01a1c3acc8fa6f534ab4765f201e6dab78aae04/Images/Under-fitting.png
--------------------------------------------------------------------------------
/Images/Unscaled_DJI_PCA_Index.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Xyclade/MachineLearning/a01a1c3acc8fa6f534ab4765f201e6dab78aae04/Images/Unscaled_DJI_PCA_Index.png
--------------------------------------------------------------------------------
/Images/Unscaled_PCA_Index.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Xyclade/MachineLearning/a01a1c3acc8fa6f534ab4765f201e6dab78aae04/Images/Unscaled_PCA_Index.png
--------------------------------------------------------------------------------
/Images/Weighted_Subject_Distribution.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Xyclade/MachineLearning/a01a1c3acc8fa6f534ab4765f201e6dab78aae04/Images/Weighted_Subject_Distribution.png
--------------------------------------------------------------------------------