result = new ArrayList<>();
24 |
25 | for (int i = 0; i < k; i++) {
26 | int[] testIdx = folds[i];
27 | int[] trainIdx = combineTrainFolds(folds, indexes.length, i);
28 | result.add(Split.fromIndexes(dataset, trainIdx, testIdx));
29 | }
30 |
31 | return result;
32 | }
33 |
34 | public static Split trainTestSplit(Dataset dataset, double testRatio, boolean shuffle, long seed) {
35 | Validate.isTrue(testRatio > 0.0 && testRatio < 1.0, "testRatio must be in (0, 1) interval");
36 |
37 | int[] indexes = IntStream.range(0, dataset.length()).toArray();
38 | if (shuffle) {
39 | shuffle(indexes, seed);
40 | }
41 |
42 | int trainSize = (int) (indexes.length * (1 - testRatio));
43 |
44 | int[] trainIndex = Arrays.copyOfRange(indexes, 0, trainSize);
45 | int[] testIndex = Arrays.copyOfRange(indexes, trainSize, indexes.length);
46 |
47 | return Split.fromIndexes(dataset, trainIndex, testIndex);
48 | }
49 |
50 | public static void shuffle(int[] indexes, long seed) {
51 | Random rnd = new Random(seed);
52 |
53 | for (int i = indexes.length - 1; i > 0; i--) {
54 | int index = rnd.nextInt(i + 1);
55 |
56 | int tmp = indexes[index];
57 | indexes[index] = indexes[i];
58 | indexes[i] = tmp;
59 | }
60 | }
61 |
62 | private static int[][] prepareFolds(int[] indexes, int k) {
63 | int[][] foldIndexes = new int[k][];
64 |
65 | int step = indexes.length / k;
66 | int beginIndex = 0;
67 |
68 | for (int i = 0; i < k - 1; i++) {
69 | foldIndexes[i] = Arrays.copyOfRange(indexes, beginIndex, beginIndex + step);
70 | beginIndex = beginIndex + step;
71 | }
72 |
73 | foldIndexes[k - 1] = Arrays.copyOfRange(indexes, beginIndex, indexes.length);
74 | return foldIndexes;
75 | }
76 |
77 | private static int[] combineTrainFolds(int[][] folds, int totalSize, int excludeIndex) {
78 | int size = totalSize - folds[excludeIndex].length;
79 | int result[] = new int[size];
80 |
81 | int start = 0;
82 | for (int i = 0; i < folds.length; i++) {
83 | if (i == excludeIndex) {
84 | continue;
85 | }
86 | int[] fold = folds[i];
87 | System.arraycopy(fold, 0, result, start, fold.length);
88 | start = start + fold.length;
89 | }
90 |
91 | return result;
92 | }
93 | }
94 |
--------------------------------------------------------------------------------
/Module 2/MasteringJavaforDataScience_Code/Chapter07/src/main/java/chapter07/cv/Split.java:
--------------------------------------------------------------------------------
1 | package chapter07.cv;
2 |
3 | import java.util.Objects;
4 |
5 | public class Split {
6 |
7 | private final Dataset train;
8 | private final Dataset test;
9 |
10 | public Split(Dataset train, Dataset test) {
11 | this.train = train;
12 | this.test = test;
13 | }
14 |
15 | public static Split fromIndexes(Dataset dataset, int[] trainIndex, int[] testIndex) {
16 | double[][] X = dataset.getX();
17 | double[] y = dataset.getY();
18 |
19 | int trainSize = trainIndex.length;
20 |
21 | double[][] trainXres = new double[trainSize][];
22 | double[] trainYres = new double[trainSize];
23 | for (int i = 0; i < trainSize; i++) {
24 | int idx = trainIndex[i];
25 | trainXres[i] = X[idx];
26 | trainYres[i] = y[idx];
27 | }
28 |
29 | int testSize = testIndex.length;
30 |
31 | double[][] testXres = new double[testSize][];
32 | double[] testYres = new double[testSize];
33 | for (int i = 0; i < testSize; i++) {
34 | int idx = testIndex[i];
35 | testXres[i] = X[idx];
36 | testYres[i] = y[idx];
37 | }
38 |
39 | Dataset train = new Dataset(trainXres, trainYres, dataset.getFeatureNames());
40 | Dataset test = new Dataset(testXres, testYres, dataset.getFeatureNames());
41 | return new Split(train, test);
42 | }
43 |
44 | public Dataset getTrain() {
45 | return train;
46 | }
47 |
48 | public Dataset getTest() {
49 | return test;
50 | }
51 |
52 | @Override
53 | public boolean equals(Object obj) {
54 | if (obj instanceof Split) {
55 | Split other = (Split) obj;
56 | return train.equals(other.train) && test.equals(test);
57 | }
58 |
59 | return false;
60 | }
61 |
62 | @Override
63 | public int hashCode() {
64 | return Objects.hash(train, test);
65 | }
66 | }
67 |
--------------------------------------------------------------------------------
/Module 2/MasteringJavaforDataScience_Code/Chapter07/src/main/resources/logback.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
6 |
7 | %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
--------------------------------------------------------------------------------
/Module 2/MasteringJavaforDataScience_Code/Chapter08/pom.xml:
--------------------------------------------------------------------------------
1 |
3 | 4.0.0
4 |
5 | com.alexeygrigorev.javads
6 | chapter-08-dl4j
7 | 0.0.1-SNAPSHOT
8 |
9 |
10 | 1.7.21
11 | 1.1.7
12 |
13 |
14 |
15 |
16 | central
17 | http://repo1.maven.org/maven2
18 |
19 |
20 | bintray
21 | http://jcenter.bintray.com
22 |
23 |
24 |
25 |
26 |
27 |
28 | org.slf4j
29 | slf4j-api
30 | ${slf4j.version}
31 |
32 |
33 | ch.qos.logback
34 | logback-classic
35 | ${logback.version}
36 |
37 |
38 | ch.qos.logback
39 | logback-core
40 | ${logback.version}
41 |
42 |
43 |
44 |
45 | org.apache.commons
46 | commons-lang3
47 | 3.4
48 |
49 |
50 | commons-io
51 | commons-io
52 | 2.5
53 |
54 |
55 | org.apache.commons
56 | commons-math3
57 | 3.6.1
58 |
59 |
60 | com.google.guava
61 | guava
62 | 19.0
63 |
64 |
65 |
66 | org.deeplearning4j
67 | deeplearning4j-core
68 | 0.7.1
69 |
70 |
71 | org.deeplearning4j
72 | deeplearning4j-ui_2.10
73 | 0.7.1
74 |
75 |
76 | org.nd4j
77 | nd4j-native-platform
78 | 0.7.1
79 |
80 |
87 |
88 | joinery
89 | joinery-dataframe
90 | 1.7
91 |
92 |
93 | org.apache.poi
94 | poi
95 | 3.14
96 |
97 |
98 |
99 | org.imgscalr
100 | imgscalr-lib
101 | 4.2
102 |
103 |
104 |
105 |
106 | junit
107 | junit-dep
108 | 4.8.1
109 | test
110 |
111 |
112 |
113 |
114 |
115 |
116 | org.apache.maven.plugins
117 | maven-compiler-plugin
118 | 3.5.1
119 |
120 | 1.8
121 | 1.8
122 |
123 |
124 |
125 | org.apache.maven.plugins
126 | maven-surefire-plugin
127 | 2.19.1
128 |
129 |
130 | org.apache.maven.plugins
131 | maven-dependency-plugin
132 |
133 |
134 | copy-dependencies
135 | prepare-package
136 |
137 | copy-dependencies
138 |
139 |
140 | libs
141 | false
142 | false
143 | true
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 | org.eclipse.m2e
155 | lifecycle-mapping
156 | 1.0.0
157 |
158 |
159 |
160 |
161 |
162 |
163 | org.apache.maven.plugins
164 | maven-dependency-plugin
165 | [1.0.0,)
166 |
167 | copy-dependencies
168 | unpack
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
--------------------------------------------------------------------------------
/Module 2/MasteringJavaforDataScience_Code/Chapter08/src/main/java/chapter08/Metrics.java:
--------------------------------------------------------------------------------
1 | package chapter08;
2 |
3 | import org.apache.commons.lang3.Validate;
4 |
5 | public class Metrics {
6 |
7 | public static double logLoss(double[] actual, double[] predicted) {
8 | return logLoss(actual, predicted, 1e-15);
9 | }
10 |
11 | public static double logLoss(double[] actual, double[] predicted, double eps) {
12 | Validate.isTrue(actual.length == predicted.length, "the lengths don't match");
13 | int n = actual.length;
14 | double total = 0.0;
15 |
16 | for (int i = 0; i < n; i++) {
17 | double yi = actual[i];
18 | double pi = predicted[i];
19 |
20 | if (yi == 0.0) {
21 | double log = Math.log(Math.min(1 - pi, 1 - eps));
22 | total = total + log;
23 | } else if (yi == 1.0) {
24 | double log = Math.log(Math.max(pi, eps));
25 | total = total + log;
26 | } else {
27 | throw new IllegalArgumentException("unrecognized class " + yi);
28 | }
29 | }
30 |
31 | return -total / n;
32 | }
33 |
34 | }
35 |
--------------------------------------------------------------------------------
/Module 2/MasteringJavaforDataScience_Code/Chapter08/src/main/resources/logback.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
6 |
7 | %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
--------------------------------------------------------------------------------
/Module 2/MasteringJavaforDataScience_Code/Chapter09/src/main/resources/logback.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
6 |
7 | %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
--------------------------------------------------------------------------------
/Module 2/MasteringJavaforDataScience_Code/Chapter10/src/main/resources/logback.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
6 |
7 | %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
--------------------------------------------------------------------------------
/Module 2/MasteringJavaforDataScience_Code/ReadMe.txt:
--------------------------------------------------------------------------------
1 | Chapter 1 does not contain code files
--------------------------------------------------------------------------------
/Module 2/MasteringJavaforDataScience_Code/SoftwareHardwareList.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Java-Data-Science-Made-Easy/29e2a5d0638c0ee8eb913c67bf549221bc4d0549/Module 2/MasteringJavaforDataScience_Code/SoftwareHardwareList.pdf
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | ## $5 Tech Unlocked 2021!
5 | [Buy and download this Course for only $5 on PacktPub.com](https://www.packtpub.com/product/java-data-science-made-easy/9781788475655)
6 | -----
7 | *The $5 campaign runs from __December 15th 2020__ to __January 13th 2021.__*
8 |
9 | # Java-Data-Science-Made-Easy
10 | Code Repository for Java: Data Science Made Easy
11 | ### Download a free PDF
12 |
13 | If you have already purchased a print or Kindle version of this book, you can get a DRM-free PDF version at no cost.
Simply click on the link to claim your free PDF.
14 | https://packt.link/free-ebook/9781788475655
--------------------------------------------------------------------------------