├── LICENSE
├── Module 1
    └── Java for Data Science
    │   ├── B04295_SoftwareHardware list.pdf
    │   ├── Chapter 10-Visual and Audio analysis
    │       ├── CMUSphinxExamples.java
    │       ├── DetectFaceDemo.java
    │       ├── TSSExamples.java
    │       └── TessrJExample.java
    │   ├── Chapter 11-Mathematical
    │       ├── AparapiExamples.java
    │       ├── AveragePageCount.java
    │       ├── Book.java
    │       ├── Java8Examples.java
    │       ├── Java8MapReduceExample2.java
    │       ├── MathExamples.java
    │       ├── MatrixMultiplicationKernel.java
    │       └── ScalarMultiplicationKernel.java
    │   ├── Chapter 12-Bringing it together
    │       ├── ApplicationDriver.java
    │       ├── SentimentAnalysisTrainingData.java
    │       ├── TweetHandler.java
    │       └── TwitterStream.java
    │   ├── Chapter 4_Data Visualization
    │       ├── BarCharts - MainApp.java
    │       ├── BubblePlotExample.java
    │       ├── DonutPlotExample.java
    │       ├── HistogramExample.java
    │       ├── IndexChart - MainApp.java
    │       ├── PieChart - MainApp.java
    │       └── ScatterChart - MainApp.java
    │   ├── Chapter 5-SDAT
    │       ├── Main.java
    │       ├── MainApp - Simple Regression.java
    │       └── MainApp. - Multiple Regressionjava.txt
    │   ├── Chapter 7-Neural networks
    │       ├── KNNExample.java
    │       ├── MultilayerPerceptronExample.java
    │       ├── NeuralNetworkTraining.java
    │       ├── SOMExample.java
    │       └── SampleNeuralNetwork.java
    │   ├── Chapter 9-Text Analysis
    │       ├── ClassifyBySimilarity.java
    │       ├── DL4JSentimentAnalysisExample.java
    │       ├── NLPExamples.java
    │       └── ParagraphVectorsClassifierExample.java
    │   ├── Readme.txt
    │   ├── chapter 2-data acquisition
    │       ├── BlikiExample.java
    │       ├── CrawlerController.java
    │       ├── DatabaseExample.java
    │       ├── FindPicture.java
    │       ├── HttpURLConnectionExample.java
    │       ├── JSoupExamples.java
    │       ├── SampleCrawler.java
    │       ├── SampleStreamExample.java
    │       ├── Search.java
    │       └── SimpleWebCrawler.java
    │   ├── chapter 3
    │       ├── App (2).java
    │       ├── App.java
    │       ├── CSVwithScanner.java
    │       ├── Cat.jpg
    │       ├── DataImputation.java
    │       ├── Dogs.java
    │       ├── GrayScaleParrot.png
    │       ├── JSONExamples.java
    │       ├── OCRExample.png
    │       ├── OpenCVNonMavenExamples.java
    │       ├── PDF File.docx
    │       ├── PDF File.pdf
    │       ├── PDFExtractionExample.java
    │       ├── Person.json
    │       ├── Persons.json
    │       ├── ReadExcelExample.java
    │       ├── Sample.xlsx
    │       ├── SimpleSearching.java
    │       ├── SimpleSort.java
    │       ├── SimpleStringCleaning.java
    │       ├── SimpleSubsetting.java
    │       ├── TokenizerExamples.java
    │       └── ValidatingData.java
    │   ├── chapter 6-Machine Learning
    │       ├── BookDecisionTree.java
    │       ├── FXMLController.java
    │       ├── JBayesTest.java
    │       ├── Main-ARL.java
    │       ├── Main-SVG.java
    │       └── MainApp-Camping.java
    │   └── chapter 8-Deep learning
    │       ├── ConvolutionalNetworkExample.java
    │       ├── DeepAutoEncoderExample.java
    │       └── RegressionExample.java
├── Module 2
    └── MasteringJavaforDataScience_Code
    │   ├── Chapter02
    │       ├── data
    │       │   ├── keywords.txt
    │       │   ├── search-results.txt
    │       │   ├── text.txt
    │       │   └── words.txt
    │       ├── pom.xml
    │       └── ranked-pages.json
    │   ├── Chapter03
    │       ├── data
    │       │   └── ranked-pages.json
    │       ├── pom.xml
    │       └── src
    │       │   └── main
    │       │       └── java
    │       │           └── chapter03
    │       │               └── Data.java
    │   ├── Chapter04
    │       ├── data
    │       │   ├── default.csv
    │       │   ├── performance
    │       │   │   └── y_train.csv
    │       │   └── ranked-pages.json
    │       ├── pom.xml
    │       └── src
    │       │   └── main
    │       │       └── resources
    │       │           └── logback.xml
    │   ├── Chapter05
    │       ├── data
    │       │   └── performance
    │       │   │   └── y_train.csv
    │       └── pom.xml
    │   ├── Chapter06
    │       ├── pom.xml
    │       └── src
    │       │   └── main
    │       │       ├── java
    │       │           └── chapter06
    │       │           │   └── cv
    │       │           │       └── CV.java
    │       │       └── resources
    │       │           └── logback.xml
    │   ├── Chapter07
    │       ├── pom.xml
    │       └── src
    │       │   └── main
    │       │       ├── java
    │       │           └── chapter07
    │       │           │   ├── Metrics.java
    │       │           │   ├── TextUtils.java
    │       │           │   └── cv
    │       │           │       ├── CV.java
    │       │           │       └── Split.java
    │       │       └── resources
    │       │           └── logback.xml
    │   ├── Chapter08
    │       ├── pom.xml
    │       └── src
    │       │   └── main
    │       │       ├── java
    │       │           └── chapter08
    │       │           │   └── Metrics.java
    │       │       └── resources
    │       │           └── logback.xml
    │   ├── Chapter09
    │       ├── pom.xml
    │       └── src
    │       │   └── main
    │       │       └── resources
    │       │           └── logback.xml
    │   ├── Chapter10
    │       ├── pom.xml
    │       └── src
    │       │   └── main
    │       │       └── resources
    │       │           └── logback.xml
    │   ├── ReadMe.txt
    │   └── SoftwareHardwareList.pdf
└── README.md


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Packt
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/B04295_SoftwareHardware list.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Java-Data-Science-Made-Easy/29e2a5d0638c0ee8eb913c67bf549221bc4d0549/Module 1/Java for Data Science/B04295_SoftwareHardware list.pdf


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/Chapter 10-Visual and Audio analysis/CMUSphinxExamples.java:
--------------------------------------------------------------------------------
 1 | package packt;
 2 | 
 3 | import edu.cmu.sphinx.api.Configuration;
 4 | import edu.cmu.sphinx.api.SpeechResult;
 5 | import edu.cmu.sphinx.api.StreamSpeechRecognizer;
 6 | import edu.cmu.sphinx.result.WordResult;
 7 | import java.io.File;
 8 | import java.io.FileInputStream;
 9 | import java.io.IOException;
10 | import java.io.InputStream;
11 | import static java.lang.System.out;
12 | import java.util.Collection;
13 | import java.util.List;
14 | 
15 | /*
16 |     <dependencies>
17 |         <dependency>
18 |             <groupId>edu.cmu.sphinx</groupId>
19 |             <artifactId>sphinx4-core</artifactId>
20 |             <version>5prealpha-SNAPSHOT</version>
21 |         </dependency>
22 |         <dependency>
23 |             <groupId>edu.cmu.sphinx</groupId>
24 |             <artifactId>sphinx4-data</artifactId>
25 |             <version>5prealpha-SNAPSHOT</version>
26 |         </dependency>
27 |     </dependencies>
28 | */
29 | 
30 | public class CMUSphinxExamples {
31 | 
32 |     public CMUSphinxExamples() {
33 |         simpleSpeechExample();
34 |     }
35 | 
36 |     public static void main(String[] args) {
37 |         new CMUSphinxExamples();
38 |     }
39 | 
40 |     public void simpleSpeechExample() {
41 |         try {
42 |             Configuration configuration = new Configuration();
43 | 
44 |             String prefix = "resource:/edu/cmu/sphinx/models/en-us/";
45 |             configuration
46 |                     .setAcousticModelPath(prefix + "en-us");
47 |             configuration
48 |                     .setDictionaryPath(prefix + "cmudict-en-us.dict");
49 |             configuration
50 |                     .setLanguageModelPath(prefix + "en-us.lm.bin");
51 | 
52 |             StreamSpeechRecognizer recognizer = new StreamSpeechRecognizer(
53 |                     configuration);
54 |             InputStream stream = new FileInputStream(new File("Original5.wav"));
55 | 
56 |             recognizer.startRecognition(stream);
57 |             SpeechResult result;
58 |             while ((result = recognizer.getResult()) != null) {
59 |                 out.println("Hypothesis: " + result.getHypothesis());
60 |                 
61 |                 out.println();
62 |                 Collection<String> results = result.getNbest(3);
63 |                 for (String sentence : results) {
64 |                     out.println(sentence);
65 |                 }
66 |                 out.println("-----");
67 |                 List<WordResult> words = result.getWords();
68 |                 for (WordResult wordResult : words) {
69 |                     out.print(wordResult.getWord() + " ");
70 |                 }
71 |                 out.println();
72 |                 out.println("-----");
73 |                 for (WordResult wordResult : words) {
74 |                     out.printf("%s\n\tConfidence: %.3f\n\tTime Frame: %s\n",
75 |                             wordResult.getWord(), result
76 |                                     .getResult()
77 |                                     .getLogMath()
78 |                                     .logToLinear((float)wordResult
79 |                                             .getConfidence()),
80 |                             wordResult.getTimeFrame());
81 |                 }
82 |                 out.println();
83 |             }
84 | 
85 |             recognizer.stopRecognition();
86 |         } catch (IOException ex) {
87 |             ex.printStackTrace();
88 |         }
89 |     }
90 | }
91 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/Chapter 10-Visual and Audio analysis/DetectFaceDemo.java:
--------------------------------------------------------------------------------
 1 | package packt;
 2 | 
 3 | import static java.lang.System.out;
 4 | import org.opencv.core.Core;
 5 | import org.opencv.core.Mat;
 6 | import org.opencv.core.MatOfRect;
 7 | import org.opencv.core.Point;
 8 | import org.opencv.core.Rect;
 9 | import org.opencv.core.Scalar;
10 | import org.opencv.imgcodecs.Imgcodecs;
11 | import org.opencv.imgproc.Imgproc;
12 | import org.opencv.objdetect.CascadeClassifier;
13 | 
14 | // Adapted from http://docs.opencv.org/2.4/doc/tutorials/introduction/desktop_java/java_dev_intro.html
15 | 
16 | public class DetectFaceDemo {
17 |     
18 |   public void run() {
19 |     System.loadLibrary(Core.NATIVE_LIBRARY_NAME);
20 |     String base = "C:/Books in Progress/Java for Data Science/Chapter 10/OpenCVExamples/src/resources";
21 |     CascadeClassifier faceDetector = 
22 |             new CascadeClassifier(base + "/lbpcascade_frontalface.xml");
23 |     
24 |     Mat image = Imgcodecs.imread(base + "/images.jpg");
25 | 
26 |     MatOfRect faceVectors = new MatOfRect();
27 |     faceDetector.detectMultiScale(image, faceVectors);
28 | 
29 |     out.println(faceVectors.toArray().length + " faces found");
30 | 
31 |     for (Rect rect : faceVectors.toArray()) {
32 |         Imgproc.rectangle(image, new Point(rect.x, rect.y), 
33 |                 new Point(rect.x + rect.width, rect.y + rect.height), 
34 |                 new Scalar(0, 255, 0));
35 |     }
36 |     Imgcodecs.imwrite("faceDetection.png", image);
37 |   }
38 | }
39 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/Chapter 10-Visual and Audio analysis/TSSExamples.java:
--------------------------------------------------------------------------------
 1 | package packt;
 2 | 
 3 | import com.sun.speech.freetts.Voice;
 4 | import com.sun.speech.freetts.VoiceManager;
 5 | import static java.lang.System.out;
 6 | 
 7 | public class TSSExamples {
 8 | 
 9 |     public TSSExamples() {
10 |         System.setProperty("mbrola.base", "C:\\Books in Progress\\Java for Data Science\\Chapter 10\\Downloads\\MBROLA");
11 |         demonstrateFreeTTS();
12 |         demonstrateVoice();
13 |     }
14 | 
15 |     public static void main(String[] args) {
16 |         new TSSExamples();
17 |     }
18 | 
19 |     public void demonstrateFreeTTS() {
20 |         VoiceManager vm = VoiceManager.getInstance();
21 |         Voice voice = vm.getVoice("kevin16");
22 |         voice.allocate();
23 |         voice.speak("Hello World");
24 | 
25 |         // Voices
26 |         out.println("------Voices-------");
27 |         Voice[] voices = vm.getVoices();
28 |         for (Voice v : voices) {
29 |             out.println(v);
30 |         }
31 | 
32 |         // Voice information
33 |         out.println();
34 |         out.println("------Voice Information-------");
35 |         out.println(vm.getVoiceInfo());
36 | 
37 |         out.println();
38 |         out.println("------Alan Voice Information-------");
39 |         Voice v = vm.getVoice("alan");
40 |         out.println(v);
41 |     }
42 | 
43 |     public void demonstrateVoice() {
44 |         out.println();
45 |         out.println("------Voice Demonstration-------");
46 | 
47 |         VoiceManager vm = VoiceManager.getInstance();
48 |         Voice voice = vm.getVoice("kevin16");
49 |         voice.allocate();
50 | 
51 |         out.println("Name: " + voice.getName());
52 |         out.println("Description: " + voice.getDescription());
53 |         out.println("Organization: " + voice.getOrganization());
54 |         out.println("Age: " + voice.getAge());
55 |         out.println("Gender: " + voice.getGender());
56 |         out.println("Rate: " + voice.getRate());
57 |         out.println("Pitch: " + voice.getPitch());
58 |         out.println("Style: " + voice.getStyle());
59 |         out.println();
60 |     }
61 | }
62 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/Chapter 10-Visual and Audio analysis/TessrJExample.java:
--------------------------------------------------------------------------------
 1 | package tessrj.example;
 2 | 
 3 | import java.io.File;
 4 | import net.sourceforge.tess4j.ITesseract;
 5 | import net.sourceforge.tess4j.Tesseract;
 6 | import net.sourceforge.tess4j.TesseractException;
 7 | 
 8 | public class TessrJExample {
 9 | 
10 |     public static void main(String[] args) {
11 |         ITesseract instance = new Tesseract();
12 |         instance.setLanguage("eng");
13 |         try {
14 |             String result;
15 |             result = instance.doOCR(new File("OCRExample.png"));
16 |             System.out.println(result);
17 |         } catch (TesseractException e) {
18 |             System.err.println(e.getMessage());
19 |         }
20 |     }
21 |     
22 | }
23 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/Chapter 11-Mathematical/AparapiExamples.java:
--------------------------------------------------------------------------------
 1 | package packt;
 2 | 
 3 | import com.amd.aparapi.Kernel;
 4 | import com.amd.aparapi.internal.exception.ClassParseException;
 5 | import static java.lang.System.out;
 6 | import java.util.Arrays;
 7 | import java.util.Random;
 8 | import java.util.stream.IntStream;
 9 | 
10 | public class AparapiExamples {    
11 |     int n = 4;
12 |     int m = 2;
13 |     int p = 3;
14 |     
15 |     double A[][] = {
16 |         {0.1950, 0.0311},
17 |         {0.3588, 0.2203},
18 |         {0.1716, 0.5931},
19 |         {0.2105, 0.3242}};
20 |     double B[][] = {
21 |         {0.0502, 0.9823, 0.9472},
22 |         {0.5732, 0.2694, 0.916}};
23 |     double C[][] = new double[n][p];
24 |     
25 |     public AparapiExamples() {
26 |         simpleMatrixMultiplication();
27 |         matrixMulitplication();
28 |         scalarMatrixMultiplication();
29 |     }
30 |     
31 |     public void scalarMatrixMultiplication() {
32 |         float inputMatrix[] = {3, 4, 5, 6, 7, 8, 9};
33 |         int size = inputMatrix.length;
34 |         
35 |         ScalarMultiplicationKernel kernel
36 |                 = new ScalarMultiplicationKernel(inputMatrix);
37 |         kernel.setExecutionMode(Kernel.EXECUTION_MODE.NONE);
38 |         kernel.execute(size);
39 |         kernel.displayResult();
40 |         kernel.dispose();
41 |     }
42 |     
43 |     public void matrixMulitplication() {
44 |         MatrixMultiplicationKernel kernel
45 |                 = new MatrixMultiplicationKernel(n, m, p);
46 |         kernel.execute(6, 3);
47 |         kernel.displayResults();
48 |         kernel.dispose();
49 |     }
50 |     
51 |     public void simpleMatrixMultiplication() {
52 |         System.out.println();
53 |         System.out.println("Simple Matrix Multiplication");
54 |         for (int i = 0; i < n; i++) {
55 |             for (int k = 0; k < m; k++) {
56 |                 for (int j = 0; j < p; j++) {
57 |                     C[i][j] += A[i][k] * B[k][j];
58 |                 }
59 |             }
60 |         }
61 |         displayResult();
62 |     }
63 |     
64 |     public void displayResult() {
65 |         out.println("Result");
66 |         for (int i = 0; i < n; i++) {
67 |             for (int j = 0; j < p; j++) {
68 |                 System.out.printf("%.4f  ", C[i][j]);
69 |             }
70 |             System.out.println();
71 |         }
72 |     }
73 |     
74 |     public static void main(String[] args) throws Exception {
75 |         new AparapiExamples();
76 |     }
77 | }
78 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/Chapter 11-Mathematical/AveragePageCount.java:
--------------------------------------------------------------------------------
 1 | 
 2 | import java.io.IOException;
 3 | import org.apache.hadoop.conf.Configuration;
 4 | import org.apache.hadoop.fs.Path;
 5 | import org.apache.hadoop.io.FloatWritable;
 6 | import org.apache.hadoop.io.IntWritable;
 7 | import org.apache.hadoop.io.Text;
 8 | import org.apache.hadoop.mapreduce.Job;
 9 | import org.apache.hadoop.mapreduce.Mapper;
10 | import org.apache.hadoop.mapreduce.Reducer;
11 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
12 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
13 | 
14 | public class AveragePageCount {
15 | 
16 |     public static class TextMapper
17 |             extends Mapper<Object, Text, Text, IntWritable> {
18 | 
19 |         private final IntWritable pgs = new IntWritable();
20 |         private final Text bookTitle = new Text();
21 | 
22 |         @Override
23 |         public void map(Object key, Text bookInfo, Context context
24 |         ) throws IOException, InterruptedException {
25 |             String[] book = bookInfo.toString().split("\t");
26 |             bookTitle.set(book[0]);
27 |             pgs.set(Integer.parseInt(book[2]));
28 |             context.write(bookTitle, pgs);
29 |         }
30 |     }
31 | 
32 |     public static class AverageReduce
33 |             extends Reducer<Text, IntWritable, Text, FloatWritable> {
34 | 
35 |         private final FloatWritable finalAvg = new FloatWritable();
36 |         Float average = 0f;
37 |         Float count = 0f;
38 |         int sum = 0;
39 | 
40 |         @Override
41 |         public void reduce(Text key, Iterable<IntWritable> pageCnts,
42 |                 Context context
43 |         ) throws IOException, InterruptedException {
44 | 
45 |             for (IntWritable cnt : pageCnts) {
46 |                 sum += cnt.get();
47 |             }
48 |             count += 1;
49 |             average = sum / count;
50 |             finalAvg.set(average);
51 |             context.write(new Text("Average Page Count = "), finalAvg);
52 |         }
53 |     }
54 | 
55 |     public static void main(String[] args) throws Exception {
56 |         Configuration con = new Configuration();
57 |         Job bookJob = Job.getInstance(con, "Average Page Count");
58 |         bookJob.setJarByClass(AveragePageCount.class);
59 |         bookJob.setMapperClass(TextMapper.class);
60 |         bookJob.setReducerClass(AverageReduce.class);
61 |         bookJob.setOutputKeyClass(Text.class);
62 |         bookJob.setOutputValueClass(IntWritable.class);
63 | 
64 |         FileInputFormat.addInputPath(bookJob, new Path("C:/Hadoop/books.txt"));
65 |         FileOutputFormat.setOutputPath(bookJob, new Path("C:/Hadoop/BookOutput"));
66 |         if (bookJob.waitForCompletion(true)) {
67 |             System.exit(0);
68 |         }
69 |     }
70 | }
71 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/Chapter 11-Mathematical/Book.java:
--------------------------------------------------------------------------------
 1 | 
 2 | public class Book {
 3 | 
 4 |     public String title;
 5 |     public String author;
 6 |     public int pgCnt;
 7 | 
 8 |     public Book() {
 9 |         title = "";
10 |         author = "";
11 |         pgCnt = 0;
12 |     }
13 | 
14 |     public Book(String t, String a, int p) {
15 |         title = t;
16 |         author = a;
17 |         pgCnt = p;
18 |     }
19 |     
20 |     public int getPgCnt(){
21 |         return pgCnt;
22 |     }
23 | 
24 | }
25 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/Chapter 11-Mathematical/Java8Examples.java:
--------------------------------------------------------------------------------
 1 | package java8examples;
 2 | 
 3 | import static java.lang.System.out;
 4 | import java.util.Arrays;
 5 | import java.util.stream.IntStream;
 6 | 
 7 | public class Java8Examples {
 8 |     int n = 4;
 9 |     int m = 2;
10 |     int p = 3;
11 | 
12 |     double A[][] = {
13 |         {0.1950, 0.0311},
14 |         {0.3588, 0.2203},
15 |         {0.1716, 0.5931},
16 |         {0.2105, 0.3242}};
17 |     double B[][] = {
18 |         {0.0502, 0.9823, 0.9472},
19 |         {0.5732, 0.2694, 0.916}};
20 |     double C[][] = new double[n][p];
21 | 
22 |     public void simpleStreams() {
23 |         int numbers[] = {3, 54, 23, 87, 50, 21, 31};
24 | 
25 |         IntStream stream = Arrays.stream(numbers);
26 |         stream.forEach(e -> out.printf("%d  ", e));
27 |         out.println();
28 | 
29 |         stream = Arrays.stream(numbers);
30 |         stream
31 |                 .mapToDouble((int e) -> {
32 |                     return 2 * e;
33 |                 })
34 |                 .forEach(e -> out.printf("%.4f  ", e));
35 |         out.println();
36 | 
37 |         stream = Arrays.stream(numbers);
38 |         IntStream.range(0, 3).forEach(e -> out.printf("%d  ", e));
39 |         out.println();
40 | 
41 |         out.println(Arrays.stream(numbers).sum());
42 | 
43 |         int nums[] = stream.toArray();
44 |         for (int e : nums) {
45 |             out.print(e + " ");
46 |         }
47 |         out.println();
48 |     }
49 | 
50 |     public void matrixMultipliationSolution() {
51 |         // Java 8 Stream solution
52 |         out.println("\nJava 8 Stream Solution");
53 |         C = Arrays.stream(A)
54 |                 //                .parallel()
55 |                 .map(AMatrixRow -> IntStream.range(0, B[0].length)
56 |                         .mapToDouble(i -> IntStream.range(0, B.length)
57 |                                 .mapToDouble(j -> AMatrixRow[j] * B[j][i])
58 |                                 .sum()
59 |                         ).toArray()).toArray(double[][]::new);
60 |         displayResult();
61 |         out.println();   
62 |     }
63 | 
64 |     public Java8Examples() {
65 | //        simpleStreams();
66 |         matrixMultipliationSolution();
67 |     }
68 | 
69 |     public void displayResult() {
70 |         out.println("Result");
71 |         for (int i = 0; i < n; i++) {
72 |             for (int j = 0; j < p; j++) {
73 |                 System.out.printf("%.4f  ", C[i][j]);
74 |             }
75 |             System.out.println();
76 |         }
77 |     }
78 | 
79 |     public static void main(String[] args) {
80 |         new Java8Examples();
81 |     }
82 | 
83 | }
84 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/Chapter 11-Mathematical/Java8MapReduceExample2.java:
--------------------------------------------------------------------------------
 1 | 
 2 | import static java.lang.System.out;
 3 | import java.util.ArrayList;
 4 | 
 5 | public class Java8MapReduceExample2 {
 6 | 
 7 |     public static void main(String[] args) {
 8 | 
 9 |         ArrayList<Book> books = new ArrayList<>();
10 |         double average;
11 |         int totalPg = 0;
12 | 
13 |         books.add(new Book("Moby Dick", "Herman Melville", 822));
14 |         books.add(new Book("Charlotte's Web", "E.B. White", 189));
15 |         books.add(new Book("The Grapes of Wrath", "John Steinbeck", 212));
16 |         books.add(new Book("Jane Eyre", "Charlotte Bronte", 299));
17 |         books.add(new Book("A Tale of Two Cities", "Charles Dickens", 673));
18 |         books.add(new Book("War and Peace", "Leo Tolstoy", 1032));
19 |         books.add(new Book("The Great Gatsby", "F. Scott Fitzgerald", 275));
20 | 
21 |         totalPg = books
22 |                 .stream()
23 |                 .parallel()
24 |                 .map((b) -> b.pgCnt)
25 |                 .reduce(totalPg, (accumulator, _item) -> {
26 |                     out.println(accumulator + " " +_item);
27 |                     return accumulator + _item;
28 |                         });
29 | 
30 |         average = 1.0 * totalPg / books.size();
31 |         out.println("Average Page Count: " + average);
32 | 
33 |         average = books
34 |                 .parallelStream()
35 |                 .map(b -> b.pgCnt)
36 |                 .mapToDouble(s -> s)
37 |                 .average()
38 |                 .getAsDouble();
39 |         out.println("Average Page Count: " + average);
40 |     }
41 | 
42 | }
43 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/Chapter 11-Mathematical/MathExamples.java:
--------------------------------------------------------------------------------
  1 | package packt.jblasexamples;
  2 | 
  3 | import static java.lang.System.out;
  4 | import org.apache.commons.math3.linear.Array2DRowRealMatrix;
  5 | import org.apache.commons.math3.linear.RealMatrix;
  6 | import org.jblas.DoubleMatrix;
  7 | import org.nd4j.linalg.api.ndarray.INDArray;
  8 | import org.nd4j.linalg.factory.Nd4j;
  9 | 
 10 | /*
 11 |  //    <dependencies>
 12 |  //        <dependency>
 13 |  //            <groupId>org.jblas</groupId>
 14 |  //            <artifactId>jblas</artifactId>
 15 |  //            <version>1.2.4</version>
 16 |  //        </dependency>
 17 |  //        <!-- https://mvnrepository.com/artifact/org.apache.commons/commons-exec -->
 18 |  //        <dependency>
 19 |  //            <groupId>org.apache.commons</groupId>
 20 |  //            <artifactId>commons-exec</artifactId>
 21 |  //            <version>1.3</version>
 22 |  //        </dependency>
 23 |  //        <dependency>
 24 |  //            <groupId>org.apache.commons</groupId>
 25 |  //            <artifactId>commons-math3</artifactId>
 26 |  //            <version>3.6.1</version>
 27 |  //            <type>jar</type>
 28 |  //        </dependency>
 29 |  //        <!-- https://mvnrepository.com/artifact/org.nd4j/nd4j-native -->
 30 |  //        <dependency>
 31 |  //            <groupId>org.nd4j</groupId>
 32 |  //            <artifactId>nd4j-native</artifactId>
 33 |  //            <version>0.6.0</version>
 34 |  //        </dependency>       
 35 |  //    </dependencies>
 36 |  */
 37 | public class MathExamples {
 38 | 
 39 |     public static void main(String[] args) {
 40 |         new MathExamples();
 41 |     }
 42 | 
 43 |     public MathExamples() {
 44 |         JBLASExample();
 45 |         apacheCommonsExample();
 46 |         nd4JExample();
 47 |     }
 48 | 
 49 |     public void JBLASExample() {
 50 |         DoubleMatrix A = new DoubleMatrix(new double[][]{
 51 |             {0.1950, 0.0311},
 52 |             {0.3588, 0.2203},
 53 |             {0.1716, 0.5931},
 54 |             {0.2105, 0.3242}});
 55 | 
 56 |         DoubleMatrix B = new DoubleMatrix(new double[][]{
 57 |             {0.0502, 0.9823, 0.9472},
 58 |             {0.5732, 0.2694, 0.916}});
 59 |         DoubleMatrix C;
 60 | 
 61 |         C = A.mmul(B);
 62 | 
 63 |         for (int i = 0; i < C.getRows(); i++) {
 64 |             out.println(C.getRow(i));
 65 |         }
 66 |     }
 67 | 
 68 |     public void apacheCommonsExample() {
 69 |         double[][] A = {
 70 |             {0.1950, 0.0311},
 71 |             {0.3588, 0.2203},
 72 |             {0.1716, 0.5931},
 73 |             {0.2105, 0.3242}};
 74 | 
 75 |         double[][] B = {
 76 |             {0.0502, 0.9823, 0.9472},
 77 |             {0.5732, 0.2694, 0.916}};
 78 | 
 79 |         RealMatrix aRealMatrix = new Array2DRowRealMatrix(A);
 80 |         RealMatrix bRealMatrix = new Array2DRowRealMatrix(B);
 81 | 
 82 |         RealMatrix cRealMatrix = aRealMatrix.multiply(bRealMatrix);
 83 |         System.out.println();
 84 |         for (int i = 0; i < cRealMatrix.getRowDimension(); i++) {
 85 |             System.out.println(cRealMatrix.getRowVector(i));
 86 |         }
 87 |     }
 88 | 
 89 |     public void nd4JExample() {
 90 |         double[] A = {
 91 |             0.1950, 0.0311,
 92 |             0.3588, 0.2203,
 93 |             0.1716, 0.5931,
 94 |             0.2105, 0.3242};
 95 | 
 96 |         double[] B = {
 97 |             0.0502, 0.9823, 0.9472,
 98 |             0.5732, 0.2694, 0.916};
 99 | 
100 |         
101 |         INDArray aINDArray = Nd4j.create(A,new int[]{4,2},'c');
102 |         INDArray bINDArray = Nd4j.create(B,new int[]{2,3},'c');
103 |         
104 |         INDArray cINDArray;
105 |         cINDArray = aINDArray.mmul(bINDArray);
106 |         for(int i=0; i<cINDArray.rows(); i++) {
107 |             System.out.println(cINDArray.getRow(i));
108 |         }
109 |     }
110 | }
111 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/Chapter 11-Mathematical/MatrixMultiplicationKernel.java:
--------------------------------------------------------------------------------
 1 | package packt;
 2 | 
 3 | import com.amd.aparapi.Kernel;
 4 | import static java.lang.System.out;
 5 | 
 6 | // Use the following as the JVM option
 7 | // -Djava.library.path="C:\Downloads\Aparapi"
 8 | class MatrixMultiplicationKernel extends Kernel {
 9 |     float[] vectorA = {0.1950f, 0.0311f, 0.3588f, 0.2203f, 
10 |         0.1716f, 0.5931f, 0.2105f, 0.3242f};
11 |     float[] vectorB = {0.0502f, 0.9823f, 0.9472f, 0.5732f, 0.2694f, 0.916f};
12 |     float[] vectorC;
13 |     int n;
14 |     int m;
15 |     int p;
16 | 
17 |     @Override
18 |     public void run() {
19 |         int i = getGlobalId();
20 |         int j = this.getPassId();
21 |         float value = 0;
22 |         for (int k = 0; k < p; k++) {
23 |             value += vectorA[k + i * m] * vectorB[k * p + j];
24 |         }
25 |         vectorC[i * p + j] = value;
26 |     }
27 | 
28 |     public MatrixMultiplicationKernel(int n, int m, int p) {
29 |         this.n = n;
30 |         this.p = p;
31 |         this.m = m;
32 | 
33 |         vectorC = new float[n * p];
34 |     }
35 | 
36 |     public void displayResults() {
37 |         out.println("Result");
38 |         for (int i = 0; i < n; i++) {
39 |             for (int j = 0; j < p; j++) {
40 |                 System.out.printf("%.4f  ", vectorC[i * p + j]);
41 |             }
42 |             out.println();
43 |         }
44 |     }
45 | }
46 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/Chapter 11-Mathematical/ScalarMultiplicationKernel.java:
--------------------------------------------------------------------------------
 1 | package packt;
 2 | 
 3 | import com.amd.aparapi.Kernel;
 4 | import static java.lang.System.out;
 5 | 
 6 | public class ScalarMultiplicationKernel extends Kernel {
 7 |     float[] inputMatrix;
 8 |     float[] outputMatrix;
 9 | 
10 |     public ScalarMultiplicationKernel(float inputMatrix[]) {
11 |         this.inputMatrix = inputMatrix;
12 |         outputMatrix = new float[this.inputMatrix.length];
13 |     }
14 | 
15 |     @Override
16 |     public void run() {
17 |         int globalID = this.getGlobalId();
18 |         outputMatrix[globalID] = 2.0f * inputMatrix[globalID];
19 |     }
20 | 
21 |     public void displayResult() {
22 |         out.println("Result");
23 |         for (float element : outputMatrix) {
24 |             out.printf("%.4f ", element);
25 |         }
26 |         out.println();
27 |     }
28 | 
29 | }
30 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/Chapter 12-Bringing it together/ApplicationDriver.java:
--------------------------------------------------------------------------------
 1 | package packt.twitterdatascienceproject;
 2 | 
 3 | import static java.lang.System.out;
 4 | import java.util.Scanner;
 5 | import java.util.stream.Stream;
 6 | 
 7 | public class ApplicationDriver {
 8 | 
 9 |     private String topic;
10 |     private String subTopic;
11 |     private int numberOfTweets;
12 | 
13 |     public ApplicationDriver() {
14 |         Scanner scanner = new Scanner(System.in);
15 |         TweetHandler swt = new TweetHandler();
16 |         swt.buildSentimentAnalysisModel();
17 | 
18 |         boolean running = true;
19 |         while (running) {
20 |             out.println("Welcome to the Tweet Analysis Application");
21 |             out.print("Enter a topic: ");
22 |             this.topic = scanner.nextLine();
23 |             out.print("Enter a sub-topic: ");
24 |             this.subTopic = scanner.nextLine().toLowerCase();
25 |             out.print("Enter number of tweets: ");
26 |             this.numberOfTweets = scanner.nextInt();
27 |             performAnalysis();
28 |         }
29 |     }
30 | 
31 |     public void performAnalysis() {
32 |         Stream<TweetHandler> stream
33 |                 = new TwitterStream(this.numberOfTweets, this.topic).stream();
34 |         stream
35 |                 .map(s -> s.processJSON())
36 |                 .map(s -> s.toLowerCase())
37 |                 .filter(s -> s.isEnglish())
38 |                 .map(s -> s.removeStopWords())
39 |                 .filter(s -> s.containsCharacter(this.subTopic))
40 |                 .map(s -> s.performSentimentAnalysis())
41 |                 .forEach((TweetHandler s) -> {
42 |                     s.computeStats();
43 |                     out.println(s);
44 |                 });
45 |         out.println();
46 |         out.println("Positive Reviews: "
47 |                 + TweetHandler.getNumberOfPositiveReviews());
48 |         out.println("Negative Reviews: "
49 |                 + TweetHandler.getNumberOfNegativeReviews());
50 |     }
51 | 
52 |     public static void main(String[] args) {
53 |         new ApplicationDriver();
54 |     }
55 | 
56 | }
57 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/Chapter 12-Bringing it together/SentimentAnalysisTrainingData.java:
--------------------------------------------------------------------------------
 1 | package packt.twitterdatascienceproject;
 2 | 
 3 | import java.io.IOException;
 4 | import java.nio.charset.StandardCharsets;
 5 | import java.nio.file.Files;
 6 | import java.nio.file.Paths;
 7 | import java.util.List;
 8 | 
 9 | public class SentimentAnalysisTrainingData {
10 |     
11 |     public static void main(String[] args) {
12 |         try {
13 |             String filename;
14 |             String file;
15 |             String text;
16 |             List<String> lines = Files.readAllLines(Paths.get("C:\\Jenn Personal\\Packt Data Science\\Chapter 12\\Sentiment-Analysis-Dataset\\SentimentAnalysisDataset.csv"),StandardCharsets.ISO_8859_1);
17 |             for(String s : lines){
18 |                 String[] oneLine = s.split(",");
19 |                 if(Integer.parseInt(oneLine[1])==1){
20 |                     filename = "pos";
21 |                 }else{
22 |                     filename = "neg";
23 |                 }
24 |                 file = oneLine[0]+".txt";
25 |                 text = oneLine[3];
26 |                 Files.write(Paths.get("C:\\Jenn Personal\\Packt Data Science\\Chapter 12\\review_polarity\\txt_sentoken\\"+filename+"\\"+file), text.getBytes());
27 |             }
28 |           
29 |         } catch (IOException ex) {
30 |             ex.printStackTrace();
31 |         }
32 |     }
33 | 
34 | }
35 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/Chapter 12-Bringing it together/TwitterStream.java:
--------------------------------------------------------------------------------
 1 | package packt.twitterdatascienceproject;
 2 | 
 3 | import com.google.common.collect.Lists;
 4 | import com.twitter.hbc.ClientBuilder;
 5 | import com.twitter.hbc.core.Constants;
 6 | import com.twitter.hbc.core.endpoint.StatusesFilterEndpoint;
 7 | import com.twitter.hbc.core.processor.StringDelimitedProcessor;
 8 | import com.twitter.hbc.httpclient.BasicClient;
 9 | import com.twitter.hbc.httpclient.auth.Authentication;
10 | import com.twitter.hbc.httpclient.auth.OAuth1;
11 | import static java.lang.System.out;
12 | import java.util.ArrayList;
13 | import java.util.List;
14 | import java.util.concurrent.BlockingQueue;
15 | import java.util.concurrent.LinkedBlockingQueue;
16 | import java.util.stream.Stream;
17 | 
18 | public class TwitterStream {
19 |     private int numberOfTweets;
20 |     private String topic;
21 | 
22 |     public TwitterStream() {
23 |         this(100, "Stars Wars");
24 |     }
25 | 
26 |     public TwitterStream(int numberOfTweets, String topic) {
27 |         this.numberOfTweets = numberOfTweets;
28 |         this.topic = topic;
29 |     }
30 | 
31 |     public Stream<TweetHandler> stream() {
32 |         String myKey = "sl2WbCf4UnIr08xvHVitHJ99r";
33 |         String mySecret = "PE6yauvXjKLuvoQNXZAJo5C8N5U5piSFb3udwkoI76paK6KyqI";
34 |         String myToken = "1098376471-p6iWfxCLtyMvMutTb010w1D1xZ3UyJhcC2kkBjN";
35 |         String myAccess = "2o1uGcp4b2bFynOfu2cA1uz63n5aruV0RwNsUjRpjDBZS";
36 | 
37 |         out.println("Creating Twitter Stream");
38 |         BlockingQueue<String> statusQueue = new LinkedBlockingQueue<>(1000);
39 |         StatusesFilterEndpoint endpoint = new StatusesFilterEndpoint();
40 |         endpoint.trackTerms(Lists.newArrayList("twitterapi", this.topic));
41 |         endpoint.stallWarnings(false);
42 |         Authentication twitterAuth = new OAuth1(myKey, mySecret, myToken, myAccess);
43 | 
44 |         BasicClient twitterClient = new ClientBuilder()
45 |                 .name("Twitter client")
46 |                 .hosts(Constants.STREAM_HOST)
47 |                 .endpoint(endpoint)
48 |                 .authentication(twitterAuth)
49 |                 .processor(new StringDelimitedProcessor(statusQueue))
50 |                 .build();
51 | 
52 |         twitterClient.connect();
53 | 
54 |         List<TweetHandler> list = new ArrayList();
55 |         List<String> twitterList = new ArrayList();
56 | 
57 |         statusQueue.drainTo(twitterList);
58 |         for(int i=0; i<numberOfTweets; i++) {
59 |             String message;
60 |             try {
61 |                 message = statusQueue.take();
62 |                 list.add(new TweetHandler(message));
63 |             } catch (InterruptedException ex) {
64 |                 ex.printStackTrace();
65 |             }
66 |         }
67 | 
68 | //        for (int msgRead = 0; msgRead < this.numberOfTweets; msgRead++) {
69 | //            try {
70 | //                if (twitterClient.isDone()) {
71 | //                  //  out.println(twitterClient.getExitEvent().getMessage());
72 | //                    break;
73 | //                }
74 | //
75 | //                String msg = statusQueue.poll(10, TimeUnit.SECONDS);
76 | //                if (msg == null) {
77 | //                    out.println("Waited 10 seconds - no message received");
78 | //                } else {
79 | //                    list.add(new TweetHandler(msg));
80 | //                    out.println("Added message: " + msg.length());
81 | //                }
82 | //            } catch (InterruptedException ex) {
83 | //                ex.printStackTrace();
84 | //            }
85 | //        }
86 |         twitterClient.stop();
87 |         out.printf("%d messages processed!\n", twitterClient.getStatsTracker().getNumMessages());
88 | 
89 |         return list.stream();
90 |     }
91 | 
92 | }
93 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/Chapter 4_Data Visualization/BubblePlotExample.java:
--------------------------------------------------------------------------------
 1 | package packt.com.packt.java.gral;
 2 | 
 3 | 
 4 | import java.awt.BorderLayout;
 5 | import java.awt.Color;
 6 | import java.awt.Dimension;
 7 | import java.awt.geom.Ellipse2D;
 8 | import java.io.FileInputStream;
 9 | import java.io.FileNotFoundException;
10 | import java.io.IOException;
11 | 
12 | import javax.swing.JFrame;
13 | 
14 | import de.erichseifert.gral.data.DataSeries;
15 | import de.erichseifert.gral.data.DataSource;
16 | import de.erichseifert.gral.data.DataTable;
17 | import de.erichseifert.gral.data.EnumeratedData;
18 | import de.erichseifert.gral.data.statistics.Histogram;
19 | import de.erichseifert.gral.data.statistics.Histogram1D;
20 | import de.erichseifert.gral.plots.XYPlot;
21 | import de.erichseifert.gral.plots.XYPlot.XYPlotArea2D;
22 | import de.erichseifert.gral.plots.points.SizeablePointRenderer;
23 | import de.erichseifert.gral.ui.InteractivePanel;
24 | import de.erichseifert.gral.util.GraphicsUtils;
25 | import de.erichseifert.gral.graphics.Insets2D;
26 | import de.erichseifert.gral.graphics.Orientation;
27 | import de.erichseifert.gral.io.data.DataReader;
28 | import de.erichseifert.gral.io.data.DataReaderFactory;
29 | 
30 | 
31 | public class BubblePlotExample extends JFrame {
32 | 
33 | 	public BubblePlotExample() {
34 | 
35 | 		DataReader readType = DataReaderFactory.getInstance().get("text/csv");
36 | 		String fileName = "C://Jenn Personal//Packt Data Science//Chapter 4 Statistical Analysis//MarriageByYears.csv";
37 | 		try {
38 | 			DataTable bubbleData = (DataTable) readType.read(new FileInputStream(fileName), Integer.class, Integer.class, Integer.class);
39 | 			// Create a new data series (optional)
40 | 			DataSeries bubbleSeries = new DataSeries("Bubble", bubbleData);
41 | 			// Create a new xy-plot
42 | 			XYPlot testPlot = new XYPlot(bubbleSeries);
43 | 			// Format plot
44 | 			testPlot.setInsets(new Insets2D.Double(30.0));  // Add a margin to the plot
45 | 			testPlot.setBackground(new Color(0.75f, 0.75f, 0.75f));
46 | 			// Format plot area
47 | 			XYPlotArea2D areaProp = (XYPlotArea2D) testPlot.getPlotArea();
48 | 			areaProp.setBorderColor(null);   // Remove border of plot area
49 | 			areaProp.setMajorGridX(false);   // Disable vertical grid
50 | 			areaProp.setMajorGridY(false);   // Disable horizontal grid
51 | 			areaProp.setClippingArea(null);  // Disable clipping
52 | 
53 | 			// Format axes
54 | 			testPlot.getAxisRenderer(XYPlot.AXIS_X).setShapeVisible(false);  // Disable x axis
55 | 			testPlot.getAxisRenderer(XYPlot.AXIS_X).setTicksVisible(false);  // Disable tick marks on x axis
56 | 			testPlot.getAxisRenderer(XYPlot.AXIS_Y).setShapeVisible(false);  // Disable y axis
57 | 			testPlot.getAxisRenderer(XYPlot.AXIS_Y).setTicksVisible(false);  // Disable tick marks on y axis
58 | 			testPlot.getAxis(XYPlot.AXIS_X).setRange(1940, 2020);  // Scale x axis from -10 to 10
59 | 			testPlot.getAxis(XYPlot.AXIS_Y).setRange(17, 30);  // Scale y axis from -10 to 10
60 | 
61 | 			// Format data series
62 | 			Color color = GraphicsUtils.deriveWithAlpha(Color.black, 96);
63 | 			SizeablePointRenderer renderBubble = new SizeablePointRenderer();
64 | 			renderBubble.setShape(new Ellipse2D.Double(-3.5, -3.5, 4.0, 4.0));  // shape of data points
65 | 			renderBubble.setColor(color);  // color of data points
66 | 			renderBubble.setColumn(2);  // data column which determines the scaling of data point shapes
67 | 			testPlot.setPointRenderers(bubbleSeries, renderBubble);  // Assign the point renderer to the data series
68 | 
69 | 			add(new InteractivePanel(testPlot), BorderLayout.CENTER);  // Add the plot to the Swing component
70 | 			setSize(new Dimension(1500, 700));
71 | 			setVisible(true);
72 | 		} catch (FileNotFoundException e) {
73 | 			e.printStackTrace();
74 | 		} catch (IOException e) {
75 | 			e.printStackTrace();
76 | 		}	
77 | 	}
78 | 
79 | 	@Override
80 | 	public String getTitle() {
81 | 		return "Spiral bubble plot";
82 | 	}
83 | 
84 | 
85 | 	public static void main(String[] args) {
86 | 		new BubblePlotExample();
87 | 	}
88 | 
89 | }
90 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/Chapter 4_Data Visualization/DonutPlotExample.java:
--------------------------------------------------------------------------------
  1 | package packt.com.packt.java.gral;
  2 | 
  3 | 
  4 | import java.awt.BorderLayout;
  5 | import java.awt.Color;
  6 | import java.awt.Font;
  7 | import java.io.File;
  8 | import java.io.FileInputStream;
  9 | import java.io.FileNotFoundException;
 10 | import java.io.IOException;
 11 | import java.util.Random;
 12 | import java.util.Scanner;
 13 | 
 14 | import javax.swing.JFrame;
 15 | 
 16 | import de.erichseifert.gral.data.DataTable;
 17 | import de.erichseifert.gral.plots.PiePlot;
 18 | import de.erichseifert.gral.plots.PiePlot.PieSliceRenderer;
 19 | import de.erichseifert.gral.plots.colors.LinearGradient;
 20 | import de.erichseifert.gral.plots.legends.ValueLegend;
 21 | import de.erichseifert.gral.ui.InteractivePanel;
 22 | import de.erichseifert.gral.graphics.Insets2D;
 23 | import de.erichseifert.gral.io.data.DataReader;
 24 | import de.erichseifert.gral.io.data.DataReaderFactory;
 25 | 
 26 | import static java.lang.System.out;
 27 | 
 28 | public class DonutPlotExample extends JFrame {
 29 | 
 30 | 	public DonutPlotExample() {
 31 | 
 32 | 		String fileName = "C://Jenn Personal//Packt Data Science//Chapter 4 Statistical Analysis//AgeOfMarriageDonut.csv";
 33 | 		try {
 34 | 			//Don't include this part in book - just populating array data
 35 | 			int[][] ageCount = new int[2][12];
 36 | 			int col = 0;
 37 | 			for(int x = 19; x < 31; x++){
 38 | 				ageCount[0][col] = x;
 39 | 				col++;
 40 | 			}
 41 | 
 42 | 			Scanner file = new Scanner(new File(fileName));
 43 | 			int age = 0;
 44 | 			while(file.hasNext()){
 45 | 				age = file.nextInt();
 46 | 				for(int loc = 0; loc < 12; loc++){
 47 | 					if(ageCount[0][loc] == age){
 48 | 						ageCount[1][loc]++;
 49 | 					}
 50 | 				}
 51 | 			}
 52 | 
 53 | 			DataTable donutData = new DataTable(Integer.class, Integer.class);
 54 | 			for(int y = 0; y < ageCount[0].length; y++){
 55 | 				//This options adds blank space if age is zero
 56 | 				if(ageCount[1][y] == 0){
 57 | 					donutData.add(-3, ageCount[0][y]);
 58 | 				}else{
 59 | 					donutData.add(ageCount[1][y], ageCount[0][y]);
 60 | 				}
 61 | 
 62 | 			}
 63 | 			// Create new pie plot
 64 | 			PiePlot testPlot = new PiePlot(donutData);
 65 | 			// Format plot
 66 | 			//Sets legend to be second column
 67 | 			((ValueLegend) testPlot.getLegend()).setLabelColumn(1);
 68 | 			testPlot.getTitle().setText("Donut Plot Example");
 69 | 			// Change relative size of pie
 70 | 			testPlot.setRadius(0.9);
 71 | 			// Display a legend
 72 | 			testPlot.setLegendVisible(true);
 73 | 			// Add some margin to the plot area
 74 | 			testPlot.setInsets(new Insets2D.Double(20.0, 20.0, 20.0, 20.0));
 75 | 
 76 | 			PieSliceRenderer renderPie =
 77 | 					(PieSliceRenderer) testPlot.getPointRenderer(donutData);
 78 | 			// Change relative size of inner region
 79 | 			//In GRAL a donut or doughnut plot is merely a pie plot with a hole in its center. 
 80 | 			//Use the innerRradius property to change the radius relative to the outer radius:
 81 | 			renderPie.setInnerRadius(0.4);
 82 | 			// Change the width of gaps between segments
 83 | 			renderPie.setGap(0.2);
 84 | 			// Change the colors
 85 | 			LinearGradient colors = new LinearGradient(Color.blue, Color.green);
 86 | 			renderPie.setColor(colors);
 87 | 			// Show labels
 88 | 			renderPie.setValueVisible(true);
 89 | 			renderPie.setValueColor(Color.WHITE);
 90 | 			renderPie.setValueFont(Font.decode(null).deriveFont(Font.BOLD));
 91 | 
 92 | 			// Add plot to Swing component
 93 | 			add(new InteractivePanel(testPlot), BorderLayout.CENTER);
 94 | 			setSize(1500, 700);
 95 | 			setVisible(true);
 96 | 
 97 | 
 98 | 		} catch (FileNotFoundException e) {
 99 | 			e.printStackTrace();
100 | 		} catch (IOException e) {
101 | 			e.printStackTrace();
102 | 
103 | 		}
104 | 	}
105 | 
106 | 	@Override
107 | 	public String getTitle() {
108 | 		return "Donut plot";
109 | 	}
110 | 
111 | 	public static void main(String[] args) {
112 | 		new DonutPlotExample();
113 | 	}
114 | 
115 | 
116 | 
117 | 
118 | }
119 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/Chapter 4_Data Visualization/HistogramExample.java:
--------------------------------------------------------------------------------
 1 | package packt.com.packt.java.gral;
 2 | 
 3 | import java.awt.Color;
 4 | import java.io.FileInputStream;
 5 | import java.io.FileNotFoundException;
 6 | import java.io.IOException;
 7 | import javax.swing.JFrame;
 8 | import de.erichseifert.gral.data.DataSource;
 9 | import de.erichseifert.gral.data.DataTable;
10 | import de.erichseifert.gral.data.EnumeratedData;
11 | import de.erichseifert.gral.data.statistics.Histogram;
12 | import de.erichseifert.gral.data.statistics.Histogram1D;
13 | import de.erichseifert.gral.plots.BarPlot;
14 | import de.erichseifert.gral.plots.points.PointRenderer;
15 | import de.erichseifert.gral.ui.InteractivePanel;
16 | import de.erichseifert.gral.util.GraphicsUtils;
17 | import de.erichseifert.gral.graphics.Insets2D;
18 | import de.erichseifert.gral.graphics.Orientation;
19 | import de.erichseifert.gral.io.data.DataReader;
20 | import de.erichseifert.gral.io.data.DataReaderFactory;
21 | import static java.lang.System.out;
22 | 
23 | public class HistogramExample extends JFrame {
24 | 
25 | 	public HistogramExample() {
26 | 
27 | 
28 | 		DataReader readType = DataReaderFactory.getInstance().get("text/csv");
29 | 		String fileName = "C://Jenn Personal//Packt Data Science//Chapter 4 Statistical Analysis//AgeofMarriage.csv";
30 | 		try {
31 | 			DataTable histData = (DataTable) readType.read(new FileInputStream(fileName), Integer.class);
32 | 			// Create histogram from data
33 | 			Number ageRange[] = {19,20,21,22,23,24,25,26,27,28,29,30};
34 | 			Histogram sampleHisto = new Histogram1D(histData, Orientation.VERTICAL, ageRange);
35 | 			// Create a second dimension (x axis) for plotting
36 | 			DataSource sampleHistData = new EnumeratedData(sampleHisto, 19, 1);
37 | 			// Create new bar plot
38 | 			BarPlot testPlot = new BarPlot(sampleHistData);
39 | 			// Format plot
40 | 			testPlot.setInsets(new Insets2D.Double(20.0, 50.0, 50.0, 20.0));
41 | 			testPlot.getTitle().setText("Average Age of Marriage");
42 | 			testPlot.setBarWidth(0.7);
43 | 
44 | 			// Format x axis
45 | 			testPlot.getAxis(BarPlot.AXIS_X).setRange(18, 30.0);
46 | 			testPlot.getAxisRenderer(BarPlot.AXIS_X).setTickAlignment(0.0);
47 | 			testPlot.getAxisRenderer(BarPlot.AXIS_X).setTickSpacing(1);
48 | 			testPlot.getAxisRenderer(BarPlot.AXIS_X).setMinorTicksVisible(false);
49 | 			// Format y axis
50 | 			testPlot.getAxis(BarPlot.AXIS_Y).setRange(0.0, 10.0);
51 | 			testPlot.getAxisRenderer(BarPlot.AXIS_Y).setTickAlignment(0.0);
52 | 			testPlot.getAxisRenderer(BarPlot.AXIS_Y).setMinorTicksVisible(false);
53 | 			testPlot.getAxisRenderer(BarPlot.AXIS_Y).setIntersection(0);
54 | 
55 | 			// Format bars
56 | 			PointRenderer renderHist = testPlot.getPointRenderers(sampleHistData).get(0);
57 | 			renderHist.setColor(GraphicsUtils.deriveWithAlpha(Color.black, 128));
58 | 			renderHist.setValueVisible(true);
59 | 
60 | 			// Add plot to Swing component
61 | 			InteractivePanel pan = new InteractivePanel(testPlot);
62 | 			pan.setPannable(false);
63 | 			pan.setZoomable(false);
64 | 			add(pan);
65 | 			setSize(1500, 700);
66 | 			this.setVisible(true);
67 | 		} catch (FileNotFoundException e) {
68 | 			e.printStackTrace();
69 | 		} catch (IOException e) {
70 | 			e.printStackTrace();
71 | 		}
72 | 			
73 | 	}
74 | 
75 | 	@Override
76 | 	public String getTitle() {
77 | 		return "Sample Histogram";
78 | 	}
79 | 
80 | 
81 | 	public static void main(String[] args) {
82 | 		new HistogramExample();
83 | 	}
84 | }


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/Chapter 4_Data Visualization/IndexChart - MainApp.java:
--------------------------------------------------------------------------------
 1 | package packt.simplelinearregressionmaven;
 2 | 
 3 | import javafx.application.Application;
 4 | import javafx.scene.Scene;
 5 | import javafx.scene.chart.CategoryAxis;
 6 | import javafx.scene.chart.LineChart;
 7 | import javafx.scene.chart.NumberAxis;
 8 | import javafx.scene.chart.XYChart;
 9 | import javafx.stage.Stage;
10 | 
11 | public class MainApp extends Application {
12 | 
13 |     final XYChart.Series<String, Number> series = new XYChart.Series<>();
14 |     final CategoryAxis xAxis = new CategoryAxis();
15 |     final NumberAxis yAxis = new NumberAxis(8000000, 11000000, 1000000);
16 |     final static String belgium = "Belgium";
17 | 
18 |     @Override
19 |     public void start(Stage stage) {
20 |         simpleIndexChart(stage);
21 |     }
22 | 
23 |     public void simpleIndexChart(Stage stage) {
24 |         stage.setTitle("Index Chart");
25 |         final LineChart<String, Number> lineChart
26 |                 = new LineChart<>(xAxis, yAxis);
27 |         lineChart.setTitle("Belgium Population");
28 |         yAxis.setLabel("Population");
29 | 
30 |         series.setName("Population");
31 |         addDataItem(series, "1950", 8639369);
32 |         addDataItem(series, "1960", 9118700);
33 |         addDataItem(series, "1970", 9637800);
34 |         addDataItem(series, "1980", 9846800);
35 |         addDataItem(series, "1990", 9969310);
36 |         addDataItem(series, "2000", 10263618);
37 | 
38 |         Scene scene = new Scene(lineChart, 800, 600);
39 |         lineChart.getData().add(series);
40 |         stage.setScene(scene);
41 |         stage.show();
42 |     }
43 | 
44 |     public void addDataItem(XYChart.Series<String, Number> series,
45 |             String x, Number y) {
46 |         series.getData().add(new XYChart.Data<>(x, y));
47 |     }
48 | 
49 |     public static void main(String[] args) {
50 |         launch(args);
51 |     }
52 | 
53 | }
54 | 
55 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/Chapter 4_Data Visualization/PieChart - MainApp.java:
--------------------------------------------------------------------------------
 1 | package packt;
 2 | 
 3 | import javafx.application.Application;
 4 | import javafx.collections.FXCollections;
 5 | import javafx.collections.ObservableList;
 6 | import javafx.scene.Group;
 7 | import javafx.scene.Scene;
 8 | import javafx.scene.chart.PieChart;
 9 | import javafx.stage.Stage;
10 | 
11 | public class PieChartSample extends Application {
12 |     
13 |     @Override
14 |     public void start(Stage stage) {
15 |         Scene scene = new Scene(new Group());
16 |         stage.setTitle("Europian Country Population");
17 |         stage.setWidth(500);
18 |         stage.setHeight(500);
19 |  
20 |         ObservableList<PieChart.Data> pieChartData =
21 |                 FXCollections.observableArrayList(
22 |                 new PieChart.Data("Belgium", 3),
23 |                 new PieChart.Data("France", 26),
24 |                 new PieChart.Data("Germany", 35),
25 |                 new PieChart.Data("Netherlands", 7),
26 |                 new PieChart.Data("Sweden", 4),
27 |                 new PieChart.Data("United Kingdom", 25));
28 |         final PieChart pieChart = new PieChart(pieChartData);
29 |         pieChart.setTitle("Country Population");
30 | 
31 |         ((Group) scene.getRoot()).getChildren().add(pieChart);
32 |         stage.setScene(scene);
33 |         stage.show();
34 |     }
35 | 
36 |     public static void main(String[] args) {
37 |         launch(args);
38 |     }
39 |     
40 | }
41 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/Chapter 4_Data Visualization/ScatterChart - MainApp.java:
--------------------------------------------------------------------------------
 1 | package packt.scatterchartmavenexample;
 2 | 
 3 | import com.opencsv.CSVReader;
 4 | import java.io.FileReader;
 5 | import javafx.application.Application;
 6 | import static javafx.application.Application.launch;
 7 | import javafx.scene.Scene;
 8 | import javafx.scene.chart.NumberAxis;
 9 | import javafx.scene.chart.ScatterChart;
10 | import javafx.scene.chart.XYChart;
11 | import javafx.stage.Stage;
12 | 
13 | public class MainApp extends Application {
14 | 
15 |     @Override
16 |     public void start(Stage stage) throws Exception {
17 |         stage.setTitle("Scatter Chart Sample");
18 |         final NumberAxis yAxis = new NumberAxis(1400, 2100, 100);
19 |         final NumberAxis xAxis = new NumberAxis(500000, 90000000, 1000000);
20 |         final ScatterChart<Number, Number> scatterChart = new ScatterChart<>(xAxis, yAxis);
21 |         xAxis.setLabel("Population");
22 |         yAxis.setLabel("Decade");
23 |         scatterChart.setTitle("Population Scatter Graph");
24 | 
25 |         XYChart.Series series = new XYChart.Series();
26 | 
27 |         try (CSVReader dataReader = new CSVReader(new FileReader("EuropeanScatterData.csv"), ',')) {
28 |             String[] nextLine;
29 |             while ((nextLine = dataReader.readNext()) != null) {
30 |                 int decade = Integer.parseInt(nextLine[0]);
31 |                 int population = Integer.parseInt(nextLine[1]);
32 |                 series.getData().add(new XYChart.Data(population, decade));
33 |                 System.out.println("Decade: " + decade + "  Population: " + population);
34 |             }
35 |         }
36 | 
37 |         scatterChart.getData().addAll(series);
38 |         Scene scene = new Scene(scatterChart, 500, 400);
39 |         stage.setScene(scene);
40 |         stage.show();
41 |     }
42 | 
43 |     public static void main(String[] args) {
44 |         launch(args);
45 |     }
46 | 
47 | }
48 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/Chapter 5-SDAT/MainApp - Simple Regression.java:
--------------------------------------------------------------------------------
  1 | package packt.simplelinearregressionmaven;
  2 | 
  3 | /*
  4 |     <dependencies>
  5 |         <dependency>
  6 |             <groupId>org.apache.commons</groupId>
  7 |             <artifactId>commons-math3</artifactId>
  8 |             <version>3.6.1</version>
  9 |         </dependency>
 10 |     </dependencies>
 11 | */
 12 | 
 13 | import static java.lang.System.out;
 14 | import java.text.NumberFormat;
 15 | import javafx.application.Application;
 16 | import static javafx.application.Application.launch;
 17 | import javafx.scene.Scene;
 18 | import javafx.scene.chart.LineChart;
 19 | import javafx.scene.chart.NumberAxis;
 20 | import javafx.scene.chart.XYChart;
 21 | import javafx.stage.Stage;
 22 | import javafx.util.StringConverter;
 23 | import org.apache.commons.math3.stat.regression.SimpleRegression;
 24 | 
 25 | public class MainApp extends Application {
 26 |     final XYChart.Series<Number, Number> originalSeries = new XYChart.Series<>();
 27 |     final XYChart.Series<Number, Number> projectedSeries = new XYChart.Series<>();
 28 |     final NumberAxis xAxis = new NumberAxis(1940, 2050, 10);
 29 |     final NumberAxis yAxis = new NumberAxis(8000000, 12000000, 1000000);
 30 |     final static String belgium = "Belgium";
 31 | 
 32 |     @Override
 33 |     public void start(Stage stage) {
 34 | 
 35 | //Belgium	1950	8639369
 36 | //Belgium	1960	9118700
 37 | //Belgium	1970	9637800
 38 | //Belgium	1980	9846800
 39 | //Belgium	1990	9969310
 40 | //Belgium	2000	10263618
 41 |         double[][] input = {
 42 |             {1950, 8639369},
 43 |             {1960, 9118700},
 44 |             {1970, 9637800},
 45 |             {1980, 9846800},
 46 |             {1990, 9969310},
 47 |             {2000, 10263618}};
 48 |         double[] predictionYears = {1950, 1960, 1970, 1980, 1990, 2000, 2010, 2020, 2030, 2040};
 49 | 
 50 |         NumberFormat yearFormat = NumberFormat.getNumberInstance();
 51 |         yearFormat.setMaximumFractionDigits(0);
 52 |         yearFormat.setGroupingUsed(false);
 53 |         NumberFormat populationFormat = NumberFormat.getNumberInstance();
 54 |         populationFormat.setMaximumFractionDigits(0);
 55 |         
 56 |         SimpleRegression regression = new SimpleRegression();
 57 |         regression.addData(input);
 58 |         projectedSeries.setName("Projected");
 59 |         for (int i = 0; i < predictionYears.length; i++) {
 60 |             out.println(yearFormat.format(predictionYears[i]) + "-"
 61 |                     + populationFormat.format(regression.predict(predictionYears[i])));
 62 |             addDataItem(projectedSeries, predictionYears[i],
 63 |                     regression.predict(predictionYears[i]));
 64 |         }
 65 | 
 66 |         displayAttribute("Slope",regression.getSlope());
 67 |         displayAttribute("Intercept", regression.getIntercept());
 68 |         displayAttribute("InterceptStdEr", regression.getInterceptStdErr());
 69 |         displayAttribute("MeanSquareError", regression.getMeanSquareError());
 70 |         displayAttribute("N", + regression.getN());
 71 |         displayAttribute("R", + regression.getR());
 72 |         displayAttribute("RSquare", regression.getRSquare());
 73 | 
 74 |         //Create index chart
 75 |         stage.setTitle("Simple Linear Regression");
 76 |         xAxis.setTickLabelFormatter(new StringConverter<Number>() {
 77 |             @Override
 78 |             public String toString(Number object) {
 79 |                 return (object.intValue()) + "";
 80 |             }
 81 | 
 82 |             @Override
 83 |             public Number fromString(String string) {
 84 |                 return 0;
 85 |             }
 86 |         });
 87 | 
 88 |         final LineChart<Number, Number> lineChart
 89 |                 = new LineChart<>(xAxis, yAxis);
 90 |         lineChart.setTitle("Belgium Population");
 91 |         yAxis.setLabel("Population");
 92 | 
 93 |         originalSeries.setName("Actual");
 94 |         addDataItem(originalSeries, 1950, 8639369);
 95 |         addDataItem(originalSeries, 1960, 9118700);
 96 |         addDataItem(originalSeries, 1970, 9637800);
 97 |         addDataItem(originalSeries, 1980, 9846800);
 98 |         addDataItem(originalSeries, 1990, 9969310);
 99 |         addDataItem(originalSeries, 2000, 10263618);
100 | 
101 |         Scene scene = new Scene(lineChart, 800, 600);
102 |         lineChart.getData().addAll(originalSeries, projectedSeries);
103 |         stage.setScene(scene);
104 |         stage.show();
105 |     }
106 |     
107 |     public void displayAttribute(String attribute, double value) {        
108 |         NumberFormat numberFormat = NumberFormat.getNumberInstance();
109 |         numberFormat.setMaximumFractionDigits(2);
110 |         out.println(attribute + ": " + numberFormat.format(value));
111 |     }
112 | 
113 |     public void addDataItem(XYChart.Series<Number, Number> series,
114 |             Number x, Number y) {
115 |         series.getData().add(new XYChart.Data<>(x, y));
116 |     }
117 | 
118 |     public static void main(String[] args) {
119 |         launch(args);
120 |     }
121 | 
122 | }
123 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/Chapter 7-Neural networks/KNNExample.java:
--------------------------------------------------------------------------------
 1 | package packt.knnexamples;
 2 | 
 3 | import java.io.FileReader;
 4 | import static java.lang.System.out;
 5 | import weka.classifiers.lazy.IBk;
 6 | import weka.classifiers.Evaluation;
 7 | import weka.core.Instance;
 8 | import weka.core.Instances;
 9 | import weka.core.SerializationHelper;
10 | 
11 | public class KNNExample {
12 | 
13 |     public KNNExample() {
14 |         // dermatology.arff
15 |         // Training set is first 80% - ends with line 456 of orginal set
16 |         // Testing set is last 20% - Starts with line 457 of original set (lines 457-530)
17 |         String trainingFileName = "dermatologyTrainingSet.arff";
18 |         String testingFileName = "dermatologyTestingSet.arff";
19 | 
20 |         try (FileReader trainingReader = new FileReader(trainingFileName);
21 |                 FileReader testingReader = new FileReader(testingFileName)) {
22 |             Instances trainingInstances = new Instances(trainingReader);
23 |             trainingInstances.setClassIndex(trainingInstances.numAttributes() - 1);
24 | 
25 |             Instances testingInstances = new Instances(testingReader);
26 |             testingInstances.setClassIndex(testingInstances.numAttributes() - 1);
27 | 
28 |             IBk ibk = new IBk();
29 |             ibk.buildClassifier(trainingInstances);
30 |             SerializationHelper.write("knnModel", ibk);
31 |             ibk = null;
32 |             ibk = (IBk) SerializationHelper.read("knnModel");
33 | 
34 |             // Evaluate
35 |             Evaluation evaluation = new Evaluation(trainingInstances);
36 |             evaluation.evaluateModel(ibk, testingInstances);
37 |             System.out.println(evaluation.toSummaryString());
38 | 
39 |             // Predict
40 |             for (int i = 0; i < testingInstances.numInstances(); i++) {
41 |                 double result = ibk.classifyInstance(testingInstances.instance(i));
42 |                 // Use for incorrect results
43 |                 if (result != testingInstances
44 |                         .instance(i)
45 |                         .value(testingInstances.numAttributes() - 1)) {
46 |                     out.println("Classify result: " + result
47 |                             + " Correct: " + testingInstances.instance(i)
48 |                             .value(testingInstances.numAttributes() - 1));
49 |                     Instance incorrectInstance = testingInstances.instance(i);
50 |                     incorrectInstance.setDataset(trainingInstances);
51 |                     double[] distribution = ibk.distributionForInstance(incorrectInstance);
52 |                     out.println("Probability of being positive: " + distribution[0]);
53 |                     out.println("Probability of being negative: " + distribution[1]);
54 |                 }
55 |             }
56 |         } catch (Exception ex) {
57 |             ex.printStackTrace();
58 |         }
59 |     }
60 | 
61 |     public static void main(String[] args) {
62 |         new KNNExample();
63 |     }
64 | }
65 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/Chapter 7-Neural networks/MultilayerPerceptronExample.java:
--------------------------------------------------------------------------------
 1 | package packt.wekanueralnetworkexamples;
 2 | 
 3 | import java.io.FileReader;
 4 | import static java.lang.System.out;
 5 | import weka.classifiers.Evaluation;
 6 | import weka.classifiers.functions.MultilayerPerceptron;
 7 | import weka.core.Instance;
 8 | import weka.core.Instances;
 9 | import weka.core.SerializationHelper;
10 | 
11 | /*
12 |     <dependencies>
13 |         <!-- https://mvnrepository.com/artifact/nz.ac.waikato.cms.weka/weka-dev -->
14 |         <dependency>
15 |             <groupId>nz.ac.waikato.cms.weka</groupId>
16 |             <artifactId>weka-dev</artifactId>
17 |             <version>3.7.5</version>
18 |         </dependency>
19 |     </dependencies>
20 | */
21 | public class MultilayerPerceptronExample {
22 |     
23 |     public MultilayerPerceptronExample() {
24 |         // dermatology.arff
25 |         // Training set is first 80% - ends with line 456 of orginal set
26 |         // Testing set is last 20% - Starts with line 457 of original set (lines 457-530)
27 |         String trainingFileName = "dermatologyTrainingSet.arff";
28 |         String testingFileName = "dermatologyTestingSet.arff";
29 | 
30 |         try (FileReader trainingReader = new FileReader(trainingFileName);
31 |                 FileReader testingReader = new FileReader(testingFileName)) {
32 |             Instances trainingInstances = new Instances(trainingReader);
33 |             trainingInstances.setClassIndex(trainingInstances.numAttributes() - 1);
34 | 
35 |             Instances testingInstances = new Instances(testingReader);
36 |             testingInstances.setClassIndex(testingInstances.numAttributes() - 1);
37 | 
38 |             MultilayerPerceptron mlp = new MultilayerPerceptron();
39 |             mlp.setLearningRate(0.1);
40 |             mlp.setMomentum(0.2);
41 |             mlp.setTrainingTime(2000);
42 |             mlp.setHiddenLayers("3");
43 |             mlp.buildClassifier(trainingInstances);
44 |             SerializationHelper.write("mlpModel", mlp);
45 |             out.println(mlp.getTrainingTime());
46 |             mlp = (MultilayerPerceptron)SerializationHelper.read("mlpModel");
47 | 
48 | 
49 |             // Evaluate
50 |             System.out.println("Starting evaluation");
51 |             Evaluation evaluation = new Evaluation(trainingInstances);
52 |             evaluation.evaluateModel(mlp, testingInstances);
53 |             System.out.println(evaluation.toSummaryString());
54 | 
55 |             // Predict
56 |             System.out.println("Starting Predicting");
57 |             for (int i = 0; i < testingInstances.numInstances(); i++) {
58 |                 double result = mlp.classifyInstance(testingInstances.instance(i));
59 |                 // Use for incorrect results
60 |                 if (result != testingInstances
61 |                         .instance(i)
62 |                         .value(testingInstances.numAttributes() - 1)) {
63 |                     out.println("Classify result: " + result
64 |                             + " Correct: " + testingInstances.instance(i)
65 |                             .value(testingInstances.numAttributes() - 1));
66 |                     Instance incorrectInstance = testingInstances.instance(i);
67 |                     incorrectInstance.setDataset(trainingInstances);
68 |                     double[] distribution = mlp.distributionForInstance(incorrectInstance);
69 |                     out.println("Probability of being positive: " + distribution[0]);
70 |                     out.println("Probability of being negative: " + distribution[1]);
71 |                 }
72 |             }
73 |         } catch (Exception ex) {
74 |             ex.printStackTrace();
75 |         }
76 |     }
77 | 
78 |     public static void main(String[] args) {
79 |         new MultilayerPerceptronExample();
80 |     }
81 | }
82 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/Chapter 7-Neural networks/NeuralNetworkTraining.java:
--------------------------------------------------------------------------------
 1 | import java.text.*;
 2 | 
 3 | public class NeuralNetworkTraining {
 4 | 	
 5 | 	public static void main(String args[]){
 6 | 		double xorIN[][] ={
 7 | 					{0.0,0.0},
 8 | 					{1.0,0.0},
 9 | 					{0.0,1.0},
10 | 					{1.0,1.0}};
11 | 
12 | 		double xorEXPECTED[][] = { {0.0},{1.0},{1.0},{0.0}};
13 | 
14 | 		//System.out.println("Learn:");
15 | 		
16 | 		SampleNeuralNetwork network = new SampleNeuralNetwork(2,3,1,0.7,0.9);
17 | 
18 | //		NumberFormat percentFormat = NumberFormat.getPercentInstance();
19 | //		percentFormat.setMinimumFractionDigits(4);
20 | 
21 | 
22 | 		for (int runCnt=0;runCnt<1000;runCnt++) {
23 | 			for (int loc=0;loc<xorIN.length;loc++) {
24 | 				network.calcOutput(xorIN[loc]);
25 | 				network.calcError(xorEXPECTED[loc]);
26 | 				network.train();
27 | 			}
28 | 			System.out.println("Trial #" + runCnt + ",Error:" + network.getError(xorIN.length));
29 | 			//System.out.println( "Trial #" + i + ",Error:" +
30 | 		//		percentFormat .format(network.getError(xorIN.length)) );
31 | 		}
32 | 
33 | //		System.out.println("Recall:");
34 | //
35 | //		for (int i=0;i<xorIN.length;i++) {
36 | //
37 | //			for (int j=0;j<xorIN[0].length;j++) {
38 | //				System.out.print( xorIN[i][j] +":" );
39 | //			}
40 | //
41 | //			double out[] = network.calcOutput(xorIN[i]);
42 | //			System.out.println("="+out[0]);
43 | //		}
44 | 	}
45 | }


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/Chapter 7-Neural networks/SOMExample.java:
--------------------------------------------------------------------------------
 1 | package packt.somexamples;
 2 | 
 3 | import java.io.FileReader;
 4 | import java.io.IOException;
 5 | import static java.lang.System.out;
 6 | import weka.core.Instance;
 7 | import weka.core.Instances;
 8 | 
 9 | /*
10 |     <dependencies>
11 |         <!-- https://mvnrepository.com/artifact/nz.ac.waikato.cms.weka/weka-dev -->
12 |         <dependency>
13 |             <groupId>nz.ac.waikato.cms.weka</groupId>
14 |             <artifactId>weka-dev</artifactId>
15 |             <version>3.7.5</version>
16 |         </dependency>
17 |         <dependency>
18 |             <groupId>weka.plugin.lvq</groupId>
19 |             <artifactId>lvq-bundle</artifactId>
20 |             <version>1.0</version>
21 |         </dependency>   
22 |         <dependency>
23 |             <groupId>weka.plugin.som</groupId>
24 |             <artifactId>som-bundle</artifactId>
25 |             <version>1.0</version>
26 |             <type>jar</type>
27 |         </dependency>             
28 |     </dependencies>
29 | */
30 | public class SOMExample {
31 | 
32 |     public SOMExample() {
33 |         SelfOrganizingMap som = new SelfOrganizingMap();
34 |         String trainingFileName = "iris.arff";
35 |         try (FileReader trainingReader = new FileReader(trainingFileName)) {
36 |             Instances trainingInstances = new Instances(trainingReader);
37 |             // The following linehas been commented out to avoid
38 |             // the error:Cannot handle any class attribute!
39 |             // This is because SOM uses unsupervised training
40 | //            trainingInstances.setClassIndex(trainingInstances.numAttributes() - 1);
41 | 
42 |             som.buildClusterer(trainingInstances);
43 |             out.println(som);
44 | 
45 |             Instances[] clusters = som.getClusterInstances();
46 |             out.println();
47 |             out.println(clusters.length);
48 |             int index = 0;
49 |             for (Instances instances : clusters) {
50 |                 out.println("-------Custer " + index);
51 |                 for (Instance instance : instances) {
52 |                     out.println(instance);
53 |                 }
54 |                 out.println();
55 |                 index++;
56 |             }
57 |         } catch (IOException ex) {
58 |             ex.printStackTrace();
59 |         } catch (Exception ex) {
60 |             ex.printStackTrace();
61 |         }
62 |     }
63 | 
64 |     public static void main(String[] args) {
65 |         new SOMExample();
66 |     }
67 | }
68 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/Chapter 9-Text Analysis/ClassifyBySimilarity.java:
--------------------------------------------------------------------------------
 1 | package com.mycompany.sentimentanalysis;
 2 | 
 3 | 
 4 | import org.datavec.api.util.ClassPathResource;
 5 | import org.deeplearning4j.models.paragraphvectors.ParagraphVectors;
 6 | import org.deeplearning4j.text.documentiterator.LabelsSource;
 7 | import org.deeplearning4j.text.sentenceiterator.BasicLineIterator;
 8 | import org.deeplearning4j.text.sentenceiterator.SentenceIterator;
 9 | import org.deeplearning4j.text.tokenization.tokenizer.preprocessor.CommonPreprocessor;
10 | import org.deeplearning4j.text.tokenization.tokenizerfactory.DefaultTokenizerFactory;
11 | import org.deeplearning4j.text.tokenization.tokenizerfactory.TokenizerFactory;
12 | 
13 | 
14 | import java.io.File;
15 | import static java.lang.System.out;
16 | 
17 | /**
18 |  *
19 |  * @author raver119@gmail.com
20 |  * adapted by Jennifer Reese
21 |  */
22 | public class ClassifyBySimilarity {
23 | 
24 | 
25 |     public static void main(String[] args) throws Exception {
26 |         ClassPathResource srcFile = new ClassPathResource("/raw_sentences.txt");
27 |         File file = srcFile.getFile();
28 |         SentenceIterator iter = new BasicLineIterator(file);
29 |         
30 |         TokenizerFactory tFact = new DefaultTokenizerFactory();
31 |         tFact.setTokenPreProcessor(new CommonPreprocessor());
32 | 
33 |         LabelsSource labelFormat = new LabelsSource("LINE_");
34 | 
35 |         ParagraphVectors vec = new ParagraphVectors.Builder()
36 |                 .minWordFrequency(1)
37 |                 .iterations(5)
38 |                 .epochs(1)
39 |                 .layerSize(100)
40 |                 .learningRate(0.025)
41 |                 .labelsSource(labelFormat)
42 |                 .windowSize(5)
43 |                 .iterate(iter)
44 |                 .trainWordVectors(false)
45 |                 .tokenizerFactory(tFact)
46 |                 .sampling(0)
47 |                 .build();
48 | 
49 |         vec.fit();
50 | 
51 |         double similar1 = vec.similarity("LINE_9835", "LINE_12492");
52 |         out.println("Comparing lines 9836 & 12493 ('This is my house .'/'This is my world .') Similarity = " + similar1);
53 | 
54 | 
55 |         double similar2 = vec.similarity("LINE_3720", "LINE_16392");
56 |         out.println("Comparing lines 3721 & 16393 ('This is my way .'/'This is my work .') Similarity = " + similar2);
57 | 
58 |         double similar3 = vec.similarity("LINE_6347", "LINE_3720");
59 |         out.println("Comparing lines 6348 & 3721 ('This is my case .'/'This is my way .') Similarity = " + similar3);
60 | 
61 |         double dissimilar1 = vec.similarity("LINE_3720", "LINE_9852");
62 |         out.println("Comparing lines 3721 & 9853 ('This is my way .'/'We now have one .') Similarity = " + dissimilar1);
63 |         
64 |         double dissimilar2 = vec.similarity("LINE_3720", "LINE_3719");
65 |         out.println("Comparing lines 3721 & 3720 ('This is my way .'/'At first he says no .') Similarity = " + dissimilar2);
66 |         
67 |         
68 |         
69 |     }
70 | }


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/Chapter 9-Text Analysis/NLPExamples.java:
--------------------------------------------------------------------------------
  1 | package packt.opennlpexamples;
  2 | 
  3 | 
  4 | 
  5 | import com.aliasi.tokenizer.EnglishStopTokenizerFactory;
  6 | import com.aliasi.tokenizer.IndoEuropeanTokenizerFactory;
  7 | import com.aliasi.tokenizer.LowerCaseTokenizerFactory;
  8 | import com.aliasi.tokenizer.Tokenizer;
  9 | import com.aliasi.tokenizer.TokenizerFactory;
 10 | import java.io.File;
 11 | import java.io.FileInputStream;
 12 | import java.io.IOException;
 13 | import java.io.InputStream;
 14 | import static java.lang.System.out;
 15 | import java.util.ArrayList;
 16 | import java.util.List;
 17 | import java.util.Scanner;
 18 | import opennlp.tools.cmdline.parser.ParserTool;
 19 | import opennlp.tools.namefind.NameFinderME;
 20 | import opennlp.tools.namefind.TokenNameFinderModel;
 21 | import opennlp.tools.parser.Parse;
 22 | import opennlp.tools.parser.Parser;
 23 | import opennlp.tools.parser.ParserFactory;
 24 | import opennlp.tools.parser.ParserModel;
 25 | import opennlp.tools.postag.POSModel;
 26 | import opennlp.tools.postag.POSTaggerME;
 27 | import opennlp.tools.tokenize.TokenizerME;
 28 | import opennlp.tools.tokenize.TokenizerModel;
 29 | import opennlp.tools.util.Sequence;
 30 | import opennlp.tools.util.Span;
 31 | 
 32 | public class NLPExamples {
 33 | 
 34 |     private String sentence = "Let's parse this sentence.";
 35 | 
 36 |     public void POSExample() {
 37 |         try (InputStream input = new FileInputStream(
 38 |                 new File("en-pos-maxent.bin"));) {
 39 | 
 40 |             // To lower case example
 41 |             String lowerCaseVersion = sentence.toLowerCase();
 42 |             out.println(lowerCaseVersion);
 43 | 
 44 |             // Pull out tokens
 45 |             List<String> list = new ArrayList<>();
 46 |             Scanner scanner = new Scanner(sentence);
 47 |             while (scanner.hasNext()) {
 48 |                 list.add(scanner.next());
 49 |             }
 50 |             // Convert list to an array
 51 |             String[] words = new String[1];
 52 |             words = list.toArray(words);
 53 | 
 54 |             // Build model
 55 |             POSModel posModel = new POSModel(input);
 56 |             POSTaggerME posTagger = new POSTaggerME(posModel);
 57 | 
 58 |             // Tag words
 59 |             String[] posTags = posTagger.tag(words);
 60 |             for (int i = 0; i < posTags.length; i++) {
 61 |                 out.println(words[i] + " - " + posTags[i]);
 62 |             }
 63 | 
 64 |             // Find top sequences
 65 |             Sequence sequences[] = posTagger.topKSequences(words);
 66 |             for (Sequence sequence : sequences) {
 67 |                 out.println(sequence);
 68 |             }
 69 |         } catch (IOException ex) {
 70 |             ex.printStackTrace();
 71 |         }
 72 |     }
 73 |     public void NERExample() {//Models found at http://opennlp.sourceforge.net/models-1.5/
 74 |         try (InputStream tokenStream = 
 75 |                     new FileInputStream(new File("en-token.bin"));
 76 |                 InputStream personModelStream = new FileInputStream(
 77 |                     new File("en-ner-person.bin"));) {
 78 |             TokenizerModel tm = new TokenizerModel(tokenStream);
 79 |             TokenizerME tokenizer = new TokenizerME(tm);
 80 | 
 81 |             TokenNameFinderModel tnfm = new TokenNameFinderModel(personModelStream);
 82 |             NameFinderME nf = new NameFinderME(tnfm);
 83 | 
 84 |             String sentence = "Mrs. Wilson went to Mary's house for dinner.";
 85 |             String[] tokens = tokenizer.tokenize(sentence);
 86 |             
 87 |             Span[] spans = nf.find(tokens);
 88 | 
 89 |             for (int i = 0; i < spans.length; i++) {
 90 |                 out.println(spans[i] + " - " + tokens[spans[i].getStart()]);
 91 |             }
 92 |         } catch (Exception ex) {
 93 |             ex.printStackTrace();
 94 |         }
 95 | 
 96 |         try (InputStream tokenStream = 
 97 |                     new FileInputStream("en-token.bin");
 98 |                 InputStream locationModelStream = new FileInputStream(
 99 |                     new File("en-ner-location.bin"));) {
100 |             
101 |             TokenizerModel tm = new TokenizerModel(tokenStream);
102 |             TokenizerME tokenizer = new TokenizerME(tm);
103 |             
104 |             TokenNameFinderModel tnfm = new TokenNameFinderModel(locationModelStream);
105 |             NameFinderME nf = new NameFinderME(tnfm);
106 |             
107 |             sentence = "Enid is located north of Oklahoma City.";
108 | //            sentence = "Pond Creek is located north of Oklahoma City.";
109 |             String tokens[] = tokenizer.tokenize(sentence);
110 |             
111 |             Span spans[] = nf.find(tokens);
112 | 
113 |             for (int i = 0; i < spans.length; i++) {
114 |                 out.println(spans[i] + " - " + tokens[spans[i].getStart()]);
115 |             }
116 |         } catch (Exception ex) {
117 |             ex.printStackTrace();
118 |         }
119 |     }
120 |     
121 |     public void extractReltionships() {
122 |         try (InputStream modelInputStream = new FileInputStream(
123 |                     new File("en-parser-chunking.bin"));) {
124 |             
125 |             ParserModel parserModel = new ParserModel(modelInputStream);
126 |             Parser parser = ParserFactory.create(parserModel);
127 |             
128 |             String sentence = "Let's parse this sentence.";
129 |             Parse[] parseTrees = ParserTool.parseLine(sentence, parser, 3);
130 |             
131 |             for(Parse tree : parseTrees) {
132 |                 tree.show();
133 |                 out.println("Probability: " + tree.getProb());
134 |             }
135 |             for(Parse tree : parseTrees) {
136 |                 out.println("Probability: " + tree.getProb());
137 |             }
138 |         } catch (Exception ex) {
139 |             ex.printStackTrace();
140 |         }        
141 |     }
142 | 
143 |     public NLPExamples() {
144 | //        POSExample();
145 | //        scannerClassExample();
146 | //        lingPipeExamples();
147 | //        NERExample();
148 | //        extractReltionships();
149 |     }
150 | 
151 |     public static void main(String[] args) {
152 |         new NLPExamples();
153 |     }
154 | }
155 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/Chapter 9-Text Analysis/ParagraphVectorsClassifierExample.java:
--------------------------------------------------------------------------------
 1 | package com.mycompany.sentimentanalysis;
 2 | 
 3 | import org.datavec.api.util.ClassPathResource;
 4 | import org.deeplearning4j.berkeley.Pair;
 5 | //import org.deeplearning4j.examples.nlp.paragraphvectors.tools.FileLabelAwareIterator;
 6 | //import org.deeplearning4j.examples.nlp.paragraphvectors.tools.LabelSeeker;
 7 | //import org.deeplearning4j.examples.nlp.paragraphvectors.tools.MeansBuilder;
 8 | import org.deeplearning4j.models.embeddings.inmemory.InMemoryLookupTable;
 9 | import org.deeplearning4j.models.paragraphvectors.ParagraphVectors;
10 | import org.deeplearning4j.models.word2vec.VocabWord;
11 | import org.deeplearning4j.text.documentiterator.LabelAwareIterator;
12 | import org.deeplearning4j.text.documentiterator.LabelledDocument;
13 | import org.deeplearning4j.text.tokenization.tokenizer.preprocessor.CommonPreprocessor;
14 | import org.deeplearning4j.text.tokenization.tokenizerfactory.DefaultTokenizerFactory;
15 | import org.deeplearning4j.text.tokenization.tokenizerfactory.TokenizerFactory;
16 | import org.nd4j.linalg.api.ndarray.INDArray;
17 | 
18 | import static java.lang.System.out;
19 | import java.util.List;
20 | 
21 | /**
22 |  *
23 |  * @author raver119@gmail.com adapted by Jennifer Reese
24 |  */
25 | public class ParagraphVectorsClassifierExample {
26 | 
27 |     static ParagraphVectors pVect;
28 |     static LabelAwareIterator iter;
29 |     static TokenizerFactory tFact;
30 | 
31 | 
32 |     public static void main(String[] args) throws Exception {
33 | 
34 |         ClassPathResource resource = new ClassPathResource("paravec/labeled");
35 | 
36 |         iter = new FileLabelAwareIterator.Builder()
37 |                 .addSourceFolder(resource.getFile())
38 |                 .build();
39 | 
40 |         tFact = new DefaultTokenizerFactory();
41 |         tFact.setTokenPreProcessor(new CommonPreprocessor());
42 | 
43 |         pVect = new ParagraphVectors.Builder()
44 |                 .learningRate(0.025)
45 |                 .minLearningRate(0.001)
46 |                 .batchSize(1000)
47 |                 .epochs(20)
48 |                 .iterate(iter)
49 |                 .trainWordVectors(true)
50 |                 .tokenizerFactory(tFact)
51 |                 .build();
52 | 
53 |         pVect.fit();
54 | 
55 | 
56 |         ClassPathResource unlabeledText = new ClassPathResource("paravec/unlabeled");
57 |         FileLabelAwareIterator unlabeledIter = new FileLabelAwareIterator.Builder()
58 |                 .addSourceFolder(unlabeledText.getFile())
59 |                 .build();
60 | 
61 | 
62 |         MeansBuilder mBuilder = new MeansBuilder(
63 |                 (InMemoryLookupTable<VocabWord>) pVect.getLookupTable(),
64 |                 tFact);
65 |         LabelSeeker lSeeker = new LabelSeeker(iter.getLabelsSource().getLabels(),
66 |                 (InMemoryLookupTable<VocabWord>) pVect.getLookupTable());
67 | 
68 |         while (unlabeledIter.hasNextDocument()) {
69 |             LabelledDocument doc = unlabeledIter.nextDocument();
70 |             INDArray docCentroid = mBuilder.documentAsVector(doc);
71 |             List<Pair<String, Double>> scores = lSeeker.getScores(docCentroid);
72 | 
73 |             out.println("Document '" + doc.getLabel() + "' falls into the following categories: ");
74 |             for (Pair<String, Double> score : scores) {
75 |                 out.println("        " + score.getFirst() + ": " + score.getSecond());
76 |             }
77 | 
78 |         }
79 |     }
80 | }


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/Readme.txt:
--------------------------------------------------------------------------------
 1 | Chapter 01 : Has no Code files. 
 2 | Chapter 02 : Contain Code files.
 3 | Chapter 03 : Contain Code files.
 4 | Chapter 04 : Contain Code files.
 5 | Chapter 05 : Contain Code files.
 6 | Chapter 06 : Contain Code files.
 7 | Chapter 07 : Contain Code files.
 8 | Chapter 08 : Contain Code files.
 9 | Chapter 09 : Contain Code files.
10 | Chapter 10 : Contain Code files.
11 | Chapter 11 : Contain Code files.
12 | Chapter 12 : Contain Code files.
13 |  
14 | All the code files are placed in the respective chapter folder.
15 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/chapter 2-data acquisition/BlikiExample.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * To change this license header, choose License Headers in Project Properties.
 3 |  * To change this template file, choose Tools | Templates
 4 |  * and open the template in the editor.
 5 |  */
 6 | package packt;
 7 | 
 8 | import info.bliki.api.Page;
 9 | import info.bliki.api.User;
10 | import info.bliki.wiki.filter.SectionHeader;
11 | import info.bliki.wiki.model.ITableOfContent;
12 | import info.bliki.wiki.model.Reference;
13 | import info.bliki.wiki.model.WikiModel;
14 | import java.util.List;
15 | import static java.lang.System.out;
16 | 
17 | public class BlikiExample {
18 | 
19 |     public static void main(String[] args) {
20 |         User user = new User("", "", "http://en.wikipedia.org/w/api.php");
21 |         user.login();
22 | 
23 |         String[] titles = {"Data science"};
24 |         List<Page> pageList = user.queryContent(titles);
25 | 
26 |         for (Page page : pageList) {
27 |             WikiModel wikiModel = new WikiModel("${image}", "${title}");
28 |             out.println("Image Base URL: " + wikiModel.getImageBaseURL() + "\n"
29 |                     + "Page Name: " + wikiModel.getPageName() + "\n"
30 |                     + "Wiki Base URL: " + wikiModel.getWikiBaseURL());
31 |             String htmlStr = wikiModel.render("This is a simple [[Hello World]] wiki tag");
32 |             System.out.println(htmlStr);
33 | 
34 |             String htmlText = wikiModel.render(page.toString());
35 |             out.println("Title: " + page.getTitle() + "\n"
36 |                     + "Image URL: " + page.getImageUrl()+ "\n"
37 |                     + "Timestamp: " + page.getCurrentRevision().getTimestamp());
38 |             
39 |             List <Reference> referenceList = wikiModel.getReferences();
40 |             out.println(referenceList.size());
41 |             for(Reference reference : referenceList) {
42 |                 out.println(reference.getRefString());
43 |             }
44 |             
45 |             ITableOfContent toc = wikiModel.getTableOfContent();
46 |             List<SectionHeader> sections = toc.getSectionHeaders();
47 |             for(SectionHeader sh : sections) {
48 |                 out.println(sh.getFirst());
49 |             }
50 |             
51 |             out.println(htmlText);
52 |         }
53 |     }
54 | 
55 | }
56 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/chapter 2-data acquisition/CrawlerController.java:
--------------------------------------------------------------------------------
 1 | package packt.crawlerj4mavenexample;
 2 | 
 3 | import edu.uci.ics.crawler4j.crawler.CrawlConfig;
 4 | import edu.uci.ics.crawler4j.crawler.CrawlController;
 5 | import edu.uci.ics.crawler4j.fetcher.PageFetcher;
 6 | import edu.uci.ics.crawler4j.robotstxt.RobotstxtConfig;
 7 | import edu.uci.ics.crawler4j.robotstxt.RobotstxtServer;
 8 | 
 9 | public class CrawlerController {
10 | 
11 |   public static void main(String[] args) throws Exception {
12 |     int numberOfCrawlers = 2;
13 |     CrawlConfig config = new CrawlConfig();
14 |     String crawlStorageFolder = "data";
15 |     
16 |     config.setCrawlStorageFolder(crawlStorageFolder);
17 |     config.setPolitenessDelay(500);
18 |     config.setMaxDepthOfCrawling(2);
19 |     config.setMaxPagesToFetch(20);
20 |     config.setIncludeBinaryContentInCrawling(false);
21 | 
22 |     PageFetcher pageFetcher = new PageFetcher(config);
23 |     RobotstxtConfig robotstxtConfig = new RobotstxtConfig();
24 |     RobotstxtServer robotstxtServer = new RobotstxtServer(robotstxtConfig, pageFetcher);
25 |     CrawlController controller = new CrawlController(config, pageFetcher, robotstxtServer);
26 | 
27 |     controller.addSeed("https://en.wikipedia.org/wiki/Bishop_Rock,_Isles_of_Scilly");
28 | 
29 |     controller.start(SampleCrawler.class, numberOfCrawlers);
30 |   }
31 | }
32 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/chapter 2-data acquisition/DatabaseExample.java:
--------------------------------------------------------------------------------
 1 | package packt.databasemavenexample;
 2 | 
 3 | /*
 4 |     <dependencies><!-- http://mvnrepository.com/artifact/mysql/mysql-connector-java -->
 5 |         <dependency>
 6 |             <groupId>mysql</groupId>
 7 |             <artifactId>mysql-connector-java</artifactId>
 8 |             <version>6.0.2</version>
 9 |         </dependency>
10 |         <dependency>
11 |             <!-- jsoup HTML parser library @ http://jsoup.org/ -->
12 |             <groupId>org.jsoup</groupId>
13 |             <artifactId>jsoup</artifactId>
14 |             <version>1.9.1</version>
15 |         </dependency>
16 |     </dependencies>
17 | */
18 | import java.sql.Connection;
19 | import java.sql.DriverManager;
20 | import java.sql.PreparedStatement;
21 | import java.sql.ResultSet;
22 | import java.sql.SQLException;
23 | import java.sql.Statement;
24 | import static java.lang.System.out;
25 | 
26 | public class DatabaseExample {
27 | 
28 |     private Connection connection;
29 | 
30 |     public DatabaseExample() {
31 |         try {
32 |             Class.forName("com.mysql.jdbc.Driver");
33 |             String url = "jdbc:mysql://localhost:3306/example";
34 |             connection = DriverManager.getConnection(url, "root", "explore");
35 | 
36 |             // Needed to reset the contents of the table
37 |             Statement statement = connection.createStatement();
38 |             statement.execute("TRUNCATE URLTABLE;");
39 |             
40 |             String insertSQL = "INSERT INTO  `example`.`URLTABLE` "
41 |                     + "(`url`) VALUES " + "(?);";
42 |             PreparedStatement stmt = connection.prepareStatement(insertSQL);
43 |             
44 |             stmt.setString(1, "https://en.wikipedia.org/wiki/Data_science");
45 |             stmt.execute();
46 |             stmt.setString(1, "https://en.wikipedia.org/wiki/Bishop_Rock,_Isles_of_Scilly");
47 |             stmt.execute();
48 | 
49 | //            String selectSQL = "select * from Record where URL = '" + url + "'";
50 |             String selectSQL = "select * from URLTABLE";
51 |             statement = connection.createStatement();
52 |             ResultSet resultSet = statement.executeQuery(selectSQL);
53 |             
54 |             out.println("List of URLs");
55 |             while (resultSet.next()) {
56 |                 out.println(resultSet.getString(2));
57 |             } 
58 |         } catch (SQLException | ClassNotFoundException ex) {
59 |             ex.printStackTrace();
60 |         }
61 |     }
62 | 
63 |     public static void main(String[] args) {
64 |         new DatabaseExample();
65 |     }
66 | }
67 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/chapter 2-data acquisition/FindPicture.java:
--------------------------------------------------------------------------------
 1 | package packt.flickrdemonstration;
 2 | 
 3 | import com.flickr4java.flickr.Flickr;
 4 | import com.flickr4java.flickr.FlickrException;
 5 | import com.flickr4java.flickr.REST;
 6 | import com.flickr4java.flickr.photos.Photo;
 7 | import com.flickr4java.flickr.photos.PhotoList;
 8 | import com.flickr4java.flickr.photos.PhotosInterface;
 9 | import com.flickr4java.flickr.photos.SearchParameters;
10 | import com.flickr4java.flickr.photos.Size;
11 | import java.awt.image.BufferedImage;
12 | import java.io.File;
13 | import java.io.IOException;
14 | import static java.lang.System.out;
15 | import java.util.Collection;
16 | import javax.imageio.ImageIO;
17 | 
18 | public class FindPicture {
19 | 
20 |     public FindPicture() {
21 |         try {
22 |             String apikey = "Your API key";
23 |             String secret = "Your secret";
24 | 
25 |             Flickr flickr = new Flickr(apikey, secret, new REST());
26 | 
27 |             SearchParameters searchParameters = new SearchParameters();
28 |             searchParameters.setBBox("-180", "-90", "180", "90");
29 |             searchParameters.setMedia("photos");
30 |             PhotoList<Photo> list = flickr.getPhotosInterface().search(searchParameters, 10, 0);
31 | 
32 |             out.println("Image List");
33 |             for (int i = 0; i < list.size(); i++) {
34 |                 Photo photo = list.get(i);
35 |                 out.println("Image: " + i
36 |                         + "\nTitle: " + photo.getTitle()
37 |                         + "\nMedia: " + photo.getOriginalFormat()
38 |                         + "\nPublic: " + photo.isPublicFlag()
39 |                         + "\nPublic: " + photo.isPublicFlag()
40 |                         + "\nUrl: " + photo.getUrl()
41 |                         + "\n");
42 |             }
43 |             out.println();
44 | 
45 |             PhotosInterface pi = new PhotosInterface(apikey, secret, new REST());
46 |             out.println("pi: " + pi);
47 |             Photo currentPhoto = list.get(0);
48 |             out.println("currentPhoto url: " + currentPhoto.getUrl());
49 | 
50 |             // Get image using URL
51 |             BufferedImage bufferedImage = pi.getImage(currentPhoto.getUrl());
52 |             out.println("bi: " + bufferedImage);
53 | 
54 |             // Get image using Photo instance
55 |             bufferedImage = pi.getImage(currentPhoto, Size.SMALL);
56 | 
57 |             // Save image to file
58 |             out.println("bufferedImage: " + bufferedImage);
59 |             File outputfile = new File("image.jpg");
60 |             ImageIO.write(bufferedImage, "jpg", outputfile);
61 |         } catch (FlickrException | IOException ex) {
62 |             ex.printStackTrace();
63 |         }
64 |     }
65 | 
66 |     public static void main(String[] args) {
67 |         new FindPicture();
68 |     }
69 | 
70 |     public void displaySizes(Photo photo) {
71 |         out.println("---Sizes----");
72 |         Collection<Size> sizes = photo.getSizes();
73 |         for (Size size : sizes) {
74 |             out.println(size.getLabel());
75 |         }
76 |     }
77 | }
78 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/chapter 2-data acquisition/HttpURLConnectionExample.java:
--------------------------------------------------------------------------------
 1 | package httpurlconnectionexample;
 2 | 
 3 | import java.io.BufferedReader;
 4 | import java.io.IOException;
 5 | import java.io.InputStream;
 6 | import java.io.InputStreamReader;
 7 | import java.net.HttpURLConnection;
 8 | import java.net.MalformedURLException;
 9 | import java.net.URL;
10 | import static java.lang.System.out;
11 | 
12 | public class HttpURLConnectionExample {
13 | 
14 |     public static void main(String[] args) {
15 |         try {
16 |             URL url = new URL("https://en.wikipedia.org/wiki/Data_science");
17 |             HttpURLConnection connection = (HttpURLConnection) url.openConnection();
18 |             connection.setRequestMethod("GET");
19 |             connection.connect();
20 | 
21 |             out.println("Response Code: " + connection.getResponseCode());
22 |             out.println("Content Type: " + connection.getContentType());
23 |             out.println("Content Length: " + connection.getContentLength());
24 | 
25 |             InputStreamReader isr = new InputStreamReader(
26 |                     (InputStream) connection.getContent());
27 |             BufferedReader br = new BufferedReader(isr);
28 |             StringBuilder buffer = new StringBuilder();
29 |             String line;
30 |             do {
31 |                 line = br.readLine();
32 |                 buffer.append(line + "\n");
33 |             } while (line != null);
34 |             out.println(buffer.toString());
35 |         } catch (MalformedURLException ex) {
36 |             ex.printStackTrace();
37 |         } catch (IOException ex) {
38 |             ex.printStackTrace();
39 |         }
40 |     }
41 | 
42 | }
43 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/chapter 2-data acquisition/JSoupExamples.java:
--------------------------------------------------------------------------------
 1 | package packt.webcrawlermavenjsoup;
 2 | 
 3 | import java.io.File;
 4 | import java.io.IOException;
 5 | import org.jsoup.Jsoup;
 6 | import org.jsoup.nodes.Document;
 7 | import org.jsoup.nodes.Element;
 8 | import org.jsoup.select.Elements;
 9 | import static java.lang.System.out;
10 | 
11 | public class JSoupExamples {
12 | 
13 |     public JSoupExamples() {
14 |         try {
15 |             Document document = Jsoup.connect("https://en.wikipedia.org/wiki/Data_science").get();
16 |             displayImages(document);
17 |         } catch (IOException ex) {
18 |             ex.printStackTrace();
19 |         }
20 |         loadDocumentFromFile();
21 |     }
22 | 
23 |     public void loadDocumentFromFile() {
24 |         try {
25 |             File file = new File("Example.html");
26 |             Document document = Jsoup.parse(file, "UTF-8", "");
27 |             listHyperlinks(document);
28 |         } catch (IOException ex) {
29 |             ex.printStackTrace();
30 |         }
31 |     }
32 | 
33 |     public void parseString() {
34 |         String html = "<html>\n"
35 |                 + "<head><title>Example Document</title></head>\n"
36 |                 + "<body>\n"
37 |                 + "<p>The body of the document</p>\n"
38 |                 + "Interesting Links:\n"
39 |                 + "<br>\n"
40 |                 + "<a href=\"https://en.wikipedia.org/wiki/Data_science\">Data Science</a>\n"
41 |                 + "<br>\n"
42 |                 + "<a href=\"https://en.wikipedia.org/wiki/Jsoup\">Jsoup</a>\n"
43 |                 + "<br>\n"
44 |                 + "Images:\n"
45 |                 + "<br>\n"
46 |                 + " <img src=\"eyechart.jpg\" alt=\"Eye Chart\"> \n"
47 |                 + "</body>\n"
48 |                 + "</html>";
49 |         Document document = Jsoup.parse(html);
50 |         listHyperlinks(document);
51 |     }
52 | 
53 |     public void displayBodyText(Document document) {
54 |         // Displays the entire body of the document
55 |         String title = document.title();
56 |         out.println("Title: " + title);
57 | 
58 |         out.println("---Body---");
59 |         Elements element = document.select("body");
60 |         out.println("Text: " + element.text());
61 |     }
62 | 
63 |     public void displayImages(Document document) {
64 |         out.println("---Images---");
65 |         Elements images = document.select("img[src$=.png]");
66 |         for (Element image : images) {
67 |             out.println("\nImage: " + image);
68 |         }
69 |     }
70 | 
71 |     public void listHyperlinks(Document document) {
72 |         out.println("---Links---");
73 |         Elements links = document.select("a[href]");
74 |         for (Element link : links) {
75 |             out.println("Link: " + link.attr("href")
76 |                     + " Text: " + link.text());
77 |         }
78 |         out.println("\n****************");
79 |     }
80 | 
81 |     public static void main(String[] args) {
82 |         new JSoupExamples();
83 |     }
84 | }
85 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/chapter 2-data acquisition/SampleCrawler.java:
--------------------------------------------------------------------------------
 1 | package packt.crawlerj4mavenexample;
 2 | 
 3 | import edu.uci.ics.crawler4j.crawler.Page;
 4 | import edu.uci.ics.crawler4j.crawler.WebCrawler;
 5 | import edu.uci.ics.crawler4j.parser.HtmlParseData;
 6 | import edu.uci.ics.crawler4j.url.WebURL;
 7 | import java.util.regex.Pattern;
 8 | import static java.lang.System.out;
 9 | 
10 | public class SampleCrawler extends WebCrawler {
11 |     private static final Pattern IMAGE_EXTENSIONS = Pattern.compile(".*\\.(bmp|gif|jpg|png)$");
12 | 
13 |     @Override
14 |     public boolean shouldVisit(Page referringPage, WebURL url) {
15 |         String href = url.getURL().toLowerCase();
16 |         if (IMAGE_EXTENSIONS.matcher(href).matches()) {
17 |             return false;
18 |         }
19 |         return href.startsWith("https://en.wikipedia.org/wiki/");
20 |     }
21 | 
22 |     @Override
23 |     public void visit(Page page) {
24 |         int docid = page.getWebURL().getDocid();
25 |         String url = page.getWebURL().getURL();
26 | 
27 |         if (page.getParseData() instanceof HtmlParseData) {
28 |             HtmlParseData htmlParseData = (HtmlParseData) page.getParseData();
29 |             String text = htmlParseData.getText();
30 |             if (text.contains("shipping route")) {
31 |                 out.println("\nURL: " + url);
32 |                 out.println("Text: " + text);
33 |                 out.println("Text length: " + text.length());
34 |             }
35 |         }
36 |     }
37 | }
38 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/chapter 2-data acquisition/SampleStreamExample.java:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | package packt.TwitterExample;
 4 | 
 5 | import com.twitter.hbc.ClientBuilder;
 6 | import com.twitter.hbc.core.Constants;
 7 | import com.twitter.hbc.core.endpoint.StatusesSampleEndpoint;
 8 | import com.twitter.hbc.core.processor.StringDelimitedProcessor;
 9 | import com.twitter.hbc.httpclient.BasicClient;
10 | import com.twitter.hbc.httpclient.auth.Authentication;
11 | import com.twitter.hbc.httpclient.auth.OAuth1;
12 | 
13 | import java.util.concurrent.BlockingQueue;
14 | import java.util.concurrent.LinkedBlockingQueue;
15 | import java.util.concurrent.TimeUnit;
16 | 
17 | public class SampleStreamExample {
18 | 
19 | 	public static void streamTwitter(String consumerKey, String consumerSecret, String accessToken, String accessSecret) throws InterruptedException {
20 | 
21 | 		BlockingQueue<String> statusQueue = new LinkedBlockingQueue<String>(10000);
22 | 
23 | 		StatusesSampleEndpoint ending = new StatusesSampleEndpoint();
24 | 		ending.stallWarnings(false);
25 | 
26 | 		Authentication twitterAuth = new OAuth1(consumerKey, consumerSecret, accessToken, accessSecret);
27 | 
28 | 		BasicClient twitterClient = new ClientBuilder()
29 | 				.name("Twitter client")
30 | 				.hosts(Constants.STREAM_HOST)
31 | 				.endpoint(ending)
32 | 				.authentication(twitterAuth)
33 | 				.processor(new StringDelimitedProcessor(statusQueue))
34 | 				.build();
35 | 
36 | 
37 | 		twitterClient.connect();
38 | 
39 | 
40 | 		for (int msgRead = 0; msgRead < 1000; msgRead++) {
41 | 			if (twitterClient.isDone()) {
42 | 				System.out.println(twitterClient.getExitEvent().getMessage());
43 | 				break;
44 | 			}
45 | 
46 | 			String msg = statusQueue.poll(10, TimeUnit.SECONDS);
47 | 			if (msg == null) {
48 | 				System.out.println("Waited 10 seconds - no message received");
49 | 			} else {
50 | 				System.out.println(msg);
51 | 			}
52 | 		}
53 | 
54 | 		twitterClient.stop();
55 | 
56 | 		System.out.printf("%d messages processed!\n", twitterClient.getStatsTracker().getNumMessages());
57 | 	}
58 | 
59 | 	public static void main(String[] args) {
60 | 
61 | 		String myKey = "myKey";
62 | 		String mySecret = "mySecret";
63 | 		String myToken = "myToken";
64 | 		String myAccess = "myAccess";
65 | 
66 | 		try {
67 | 			SampleStreamExample.streamTwitter(myKey, mySecret, myToken, myAccess); 
68 | 
69 | 		} catch (InterruptedException e) {
70 | 			System.out.println(e);
71 | 		}
72 | 	}
73 | }
74 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/chapter 2-data acquisition/Search.java:
--------------------------------------------------------------------------------
 1 | package packt;
 2 | 
 3 | /*
 4 |     <dependencies>
 5 |         <dependency>
 6 |             <groupId>com.google.oauth-client</groupId>
 7 |             <artifactId>google-oauth-client-jetty</artifactId>
 8 |             <version>1.20.0</version>
 9 |         </dependency>
10 |         <dependency>
11 |             <groupId>com.google.apis</groupId>
12 |             <artifactId>google-api-services-youtube</artifactId>
13 |             <version>v3-rev171-1.22.0</version>
14 |         </dependency>
15 |         <dependency>
16 |             <groupId>com.google.oauth-client</groupId>
17 |             <artifactId>google-oauth-client</artifactId>
18 |             <version>1.22.0</version>
19 |             <type>jar</type>
20 |         </dependency>
21 |         <dependency>
22 |             <groupId>com.google.http-client</groupId>
23 |             <artifactId>google-http-client-jackson2</artifactId>
24 |             <version>1.22.0</version>
25 |             <type>jar</type>
26 |         </dependency>
27 |     </dependencies>
28 | */
29 | import com.google.api.client.googleapis.json.GoogleJsonResponseException;
30 | import com.google.api.client.http.HttpRequest;
31 | import com.google.api.client.http.HttpRequestInitializer;
32 | import com.google.api.services.youtube.YouTube;
33 | import com.google.api.services.youtube.model.ResourceId;
34 | import com.google.api.services.youtube.model.SearchListResponse;
35 | import com.google.api.services.youtube.model.SearchResult;
36 | import com.google.api.services.youtube.model.Thumbnail;
37 | 
38 | import java.io.IOException;
39 | import java.util.List;
40 | import static java.lang.System.out;
41 | 
42 | // Adapted from https://developers.google.com/youtube/v3/code_samples/java#search_by_keyword
43 | public class Search {
44 | 
45 |     public static void main(String[] args) {
46 |         try {
47 |             YouTube youtube = new YouTube.Builder(
48 |                     Auth.HTTP_TRANSPORT,
49 |                     Auth.JSON_FACTORY,
50 |                     new HttpRequestInitializer() {
51 |                         public void initialize(HttpRequest request) throws IOException {
52 |                         }
53 |                     })
54 |                     .setApplicationName("application_name")
55 |                     .build();
56 |             
57 |             String queryTerm = "cats";
58 |             YouTube.Search.List search = youtube
59 |                     .search()
60 |                     .list("id,snippet");
61 | 
62 |             String apiKey = "AIzaSyDiVWbm1q3s3cI3RZNCfH85hXS95H8opgs";
63 |             search.setKey(apiKey);
64 |             search.setQ(queryTerm);
65 | 
66 |             // Valid types: "channel" "playlist" "video"
67 |             search.setType("video");
68 | 
69 |             search.setFields("items(id/kind,id/videoId,snippet/title," + 
70 |                     "snippet/description,snippet/thumbnails/default/url)");
71 |             search.setMaxResults(10L);
72 |             SearchListResponse searchResponse = search.execute();
73 | 
74 |             List<SearchResult> searchResultList = searchResponse.getItems();
75 |             SearchResult video = searchResultList.iterator().next();
76 |             Thumbnail thumbnail = video.getSnippet().getThumbnails().getDefault();
77 | 
78 |             out.println("Kind: " + video.getKind());
79 |             out.println("Video Id: " + video.getId().getVideoId());
80 |             out.println("Title: " + video.getSnippet().getTitle());
81 |             out.println("Description: " + video.getSnippet().getDescription());
82 |             out.println("Thumbnail: " + thumbnail.getUrl());
83 |         } catch (GoogleJsonResponseException ex) {
84 |             ex.printStackTrace();
85 |         } catch (IOException ex) {
86 |             ex.printStackTrace();
87 |         }
88 |     }
89 | 
90 | }
91 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/chapter 2-data acquisition/SimpleWebCrawler.java:
--------------------------------------------------------------------------------
 1 | package packt.webcrawlermavenexample;
 2 | 
 3 | import java.io.IOException;
 4 | import java.sql.SQLException;
 5 | import org.jsoup.Jsoup;
 6 | import org.jsoup.nodes.Document;
 7 | import org.jsoup.nodes.Element;
 8 | import org.jsoup.select.Elements;
 9 | import static java.lang.System.out;
10 | import java.util.ArrayList;
11 | 
12 | // Adapted from: http://www.programcreek.com/2012/12/how-to-make-a-web-crawler-using-java/
13 | public class SimpleWebCrawler {
14 | 
15 |     private final String topic;
16 |     private final String startingURL;
17 |     private final String urlLimiter;
18 |     private final int pageLimit = 20;
19 |     private final ArrayList<String> visitedList = new ArrayList<>();
20 |     private final ArrayList<String> pageList = new ArrayList<>();
21 | 
22 |     public SimpleWebCrawler() {
23 |         startingURL = "https://en.wikipedia.org/wiki/Bishop_Rock,_Isles_of_Scilly";
24 |         urlLimiter = "Bishop_Rock";
25 |         topic = "shipping route";
26 |         visitPage(startingURL);
27 |     }
28 | 
29 |     public void visitPage(String url) {
30 |         if (pageList.size() >= pageLimit) {
31 |             return;
32 |         }
33 |         if (visitedList.contains(url)) {
34 |             // URL already visited
35 |         } else {
36 |             visitedList.add(url);
37 |             try {
38 |                 Document doc = Jsoup.connect(url).get();
39 |                 if (doc.text().contains(topic)) {
40 |                     out.println((pageList.size() + 1) + ": [" + url + "]");
41 |                     pageList.add(url);
42 | 
43 |                     // Process page links
44 |                     Elements questions = doc.select("a[href]");
45 |                     for (Element link : questions) {
46 |                         if (link.attr("href").contains(urlLimiter)) {
47 |                             visitPage(link.attr("abs:href"));
48 |                         }
49 |                     }
50 |                 }
51 |             } catch (Exception ex) {
52 |                 ex.printStackTrace();
53 |             }
54 |         }
55 |     }
56 | 
57 |     public static void main(String[] args) {
58 |         new SimpleWebCrawler();
59 |     }
60 | 
61 | }
62 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/chapter 3/App (2).java:
--------------------------------------------------------------------------------
 1 | package packt.com.packt.java.guava;
 2 | 
 3 | import static java.lang.System.out;
 4 | 
 5 | import com.google.common.base.CharMatcher;
 6 | import com.google.common.base.Joiner;
 7 | import com.google.common.base.Splitter;
 8 | 
 9 | /**
10 |  * Hello world!
11 |  *
12 |  */
13 | public class App 
14 | {
15 |     public static void main( String[] args )
16 |     {
17 | 		String dirtyText = "Call me Ishmael. Some years ago- never mind how";
18 | 		dirtyText += " long precisely - having little or no money in my purse,";
19 | 		dirtyText += " and nothing particular to interest me on shore, I thought"; 
20 | 		dirtyText += " I would sail about a little and see the watery part of the world.";
21 |     	
22 | //		Splitter split = Splitter.on(',').omitEmptyStrings().trimResults();
23 | //    	Iterable<String> words = split.split(dirtyText); 
24 | //    	for(String token: words){
25 | ////    	 out.println(token);
26 | //    	}
27 |     	//cleanAndJoin(dirtyText);
28 |     	findReplaceGuava(dirtyText);
29 |     	
30 |     }
31 |     
32 | 	public static String cleanAndJoin(String text){
33 | 		out.println("Dirty text: " + text);
34 | 		String[] words = text.toLowerCase().trim().split("[\\W\\d]+");
35 | 		String cleanText = Joiner.on(" ").skipNulls().join(words);
36 | 		out.println("Cleaned text: " + cleanText);
37 | 		return cleanText;
38 | 	}
39 | 	
40 | 
41 | 	public static String findReplaceGuava(String text){
42 | 		out.println(text);
43 | 		text = text.replace("me", " ");
44 | 		out.println("With double spaces: " + text);
45 | 
46 | 		  // trim whitespace at ends, and replace/collapse whitespace into single spaces
47 | 		String spaced = CharMatcher.WHITESPACE.trimAndCollapseFrom(text, ' ');
48 | 		out.println("With double spaces removed: " + spaced);
49 | 		String noDigits = CharMatcher.JAVA_DIGIT.replaceFrom(text, "*"); // star out all digits
50 | 		String lowerAndDigit = CharMatcher.JAVA_DIGIT.or(CharMatcher.JAVA_LOWER_CASE).retainFrom(text);
51 | 		  // eliminate all characters that aren't digits or lowercase
52 | 		
53 | 		return text;
54 | 	}
55 | }
56 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/chapter 3/App.java:
--------------------------------------------------------------------------------
 1 | package packt.packt.com.java.apachecommons;
 2 | 
 3 | import org.apache.commons.lang3.StringUtils;
 4 | import org.apache.commons.lang3.text.StrSubstitutor;
 5 | import org.apache.commons.lang3.text.StrTokenizer;
 6 | import org.apache.commons.validator.EmailValidator;
 7 | import org.apache.commons.validator.routines.IntegerValidator;
 8 | 
 9 | import static java.lang.System.*;
10 | 
11 | import java.util.HashMap;
12 | import java.util.Map;
13 | import java.util.regex.Pattern;
14 | 
15 | public class App {
16 | 
17 | 	public static void main(String[] args){
18 | 		String dirtyText = "Call me Ishmael. Some years ago- never mind how";
19 | 		dirtyText += " long precisely - having little or no money in my purse,";
20 | 		dirtyText += " and nothing particular to interest me on shore, I thought"; 
21 | 		dirtyText += " I would sail about a little and see the watery part of the world.";
22 | 		//	apacheCommonsTokenizer(dirtyText);
23 | 
24 | 		validateEmailApache(dirtyText);
25 | 		//out.println(validateInt("1234"));
26 | 		//out.println(findReplaceApacheCommons(dirtyText,"me","X"));
27 | 	}
28 | 
29 | 	public static void apacheCommonsTokenizer(String text){
30 | 		StrTokenizer tokenizer = new StrTokenizer(text,",");
31 | 		while (tokenizer.hasNext()) {
32 | 			out.println(tokenizer.next());
33 | 		}
34 | 
35 | 	}
36 | 
37 | 	public static String validateEmailApache(String email){
38 | 		email = email.trim();
39 | 		EmailValidator eValidator = EmailValidator.getInstance();
40 | 		if(eValidator.isValid(email)){
41 | 			return email + " is a valid email address.";
42 | 		}else{
43 | 			return email + " is not a valid email address.";
44 | 		}
45 | 	}
46 | 
47 | 	public static String validateInt(String text){
48 | 		IntegerValidator intValidator = IntegerValidator.getInstance();
49 | 		if(intValidator.isValid(text)){
50 | 			return text + " is a valid integer.";
51 | 		}else{
52 | 			return text + " is not a valid integer.";
53 | 		}	
54 | 	}
55 | 
56 | 	public static String findReplaceApacheCommons(String text, String toFind, String replaceWith){
57 | 		out.println(text);
58 | 		text = StringUtils.replacePattern(text, "\\W\\s", " ");
59 | 		out.println(text);
60 | 		//out.println(StringUtils.replace(text, " me ", "X"));
61 | 		return StringUtils.replace(text, " me ", "X");
62 | 	}
63 | }
64 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/chapter 3/CSVwithScanner.java:
--------------------------------------------------------------------------------
 1 | import static java.lang.System.out;
 2 | 
 3 | import java.io.File;
 4 | import java.io.FileNotFoundException;
 5 | import java.io.IOException;
 6 | import java.util.Scanner;
 7 | 
 8 | public class CSVwithScanner {
 9 | 	public static void main(String[] args){
10 | 
11 | 
12 | 		try {
13 | 			File demoFile = new File("C:\\Users\\jreese\\workspace\\Packt Data Science\\Demographics.txt");
14 | 			Scanner getData = new Scanner(demoFile);
15 | 			while(getData.hasNext()){
16 | 				out.println(getData.nextLine());
17 | 			}
18 | 
19 | 		} catch (FileNotFoundException e) {
20 | 
21 | 			e.printStackTrace();
22 | 		}
23 | 
24 | 
25 | 
26 | 	}
27 | }
28 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/chapter 3/Cat.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Java-Data-Science-Made-Easy/29e2a5d0638c0ee8eb913c67bf549221bc4d0549/Module 1/Java for Data Science/chapter 3/Cat.jpg


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/chapter 3/DataImputation.java:
--------------------------------------------------------------------------------
 1 | import static java.lang.System.out;
 2 | 
 3 | import java.util.ArrayList;
 4 | import java.util.Optional;
 5 | 
 6 | public class DataImputation {
 7 | 
 8 | 	public static void main(String[] args) {
 9 | 
10 | 		tempExample();
11 | 
12 | 	}
13 | 
14 | 	public static void tempExample(){
15 | //		double[] tempList = new double[365];
16 | //		for(int x = 0; x < tempList.length; x++){
17 | //			tempList[x] = Math.random()*100;
18 | //		}
19 | //		tempList[5] = 0;
20 | //		double sum = 0;
21 | //		for(double d : tempList){
22 | //			out.println(d);
23 | //			sum += d;
24 | //		}
25 | //		out.println(sum/365);
26 | 		String useName = "";
27 | 		String[] nameList = {"Amy","Bob","Sally","Sue","Don","Rick",null,"Betsy"};
28 | 		Optional<String> tempName;
29 | 		for(String name : nameList){
30 | 			tempName = Optional.ofNullable(name);
31 | 			useName = tempName.orElse("DEFAULT");
32 | 			out.println("Name to use = " + useName);
33 | 		}
34 | 	}
35 | }
36 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/chapter 3/Dogs.java:
--------------------------------------------------------------------------------
 1 | 
 2 | public class Dogs {
 3 | 
 4 | 	private String name;
 5 | 	private int age;
 6 | 	
 7 | 	public Dogs(){
 8 | 		name = "Fido";
 9 | 		age = 0;
10 | 	}
11 | 	
12 | 	public Dogs(String n){
13 | 		name = n;
14 | 		age = 0;
15 | 	}
16 | 	
17 | 	public Dogs(int a){
18 | 		name = "Fido";
19 | 		age = a;
20 | 	}
21 | 	
22 | 	public Dogs(String n, int a){
23 | 		name = n;
24 | 		age = a;
25 | 	}
26 | 	
27 | 	public String getName(){
28 | 		return name;
29 | 	}
30 | 	
31 | 	public int getAge(){
32 | 		return age;
33 | 	}
34 | }
35 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/chapter 3/GrayScaleParrot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Java-Data-Science-Made-Easy/29e2a5d0638c0ee8eb913c67bf549221bc4d0549/Module 1/Java for Data Science/chapter 3/GrayScaleParrot.png


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/chapter 3/OCRExample.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Java-Data-Science-Made-Easy/29e2a5d0638c0ee8eb913c67bf549221bc4d0549/Module 1/Java for Data Science/chapter 3/OCRExample.png


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/chapter 3/OpenCVNonMavenExamples.java:
--------------------------------------------------------------------------------
  1 | package opencvnonmavenexamples;
  2 | 
  3 | import org.opencv.core.Core;
  4 | import org.opencv.core.CvType;
  5 | import static org.opencv.core.CvType.CV_8UC1;
  6 | import org.opencv.core.Mat;
  7 | import org.opencv.core.Scalar;
  8 | import org.opencv.core.Size;
  9 | import org.opencv.imgcodecs.Imgcodecs;
 10 | import org.opencv.imgproc.Imgproc;
 11 | 
 12 | public class OpenCVNonMavenExamples {
 13 | 
 14 |     public OpenCVNonMavenExamples() {
 15 |         System.loadLibrary(Core.NATIVE_LIBRARY_NAME);
 16 |         enhanceImageBrightness();
 17 |         enhanceImageContrast();
 18 | //        sharpenImage();
 19 |         smoothImage();
 20 |         resizeImage();
 21 |         convertImage();
 22 | //        noiseRemoval();
 23 | //        denoise();
 24 | //        convertToTIFF();
 25 |     }
 26 | 
 27 |     public static void main(String[] args) {
 28 |         new OpenCVNonMavenExamples();
 29 |     }
 30 | 
 31 |     // Histogram equalization is used to improve the overall appearnace of an image. 
 32 |     // http://docs.opencv.org/2.4/doc/tutorials/imgproc/histograms/histogram_equalization/histogram_equalization.html
 33 |     // From: http://www.tutorialspoint.com/java_dip/enhancing_image_contrast.htm
 34 |     // Enhancing grayscale images with histogram equalization.
 35 |     public void enhanceImageContrast() {
 36 |         Mat source = Imgcodecs.imread("GrayScaleParrot.png",
 37 |                 Imgcodecs.CV_LOAD_IMAGE_GRAYSCALE);
 38 |         Mat destination = new Mat(source.rows(), source.cols(), source.type());
 39 |         Imgproc.equalizeHist(source, destination);
 40 |         Imgcodecs.imwrite("enhancedParrot.jpg", destination);
 41 |     }
 42 | 
 43 |     public void smoothImage() {
 44 |         // Smoothing, also called blurring, will make the edges soother.
 45 |         Mat source = Imgcodecs.imread("cat.jpg");
 46 |         Mat destination = source.clone();
 47 |         for (int i = 0; i < 25; i++) {
 48 |             Mat sourceImage = destination.clone();
 49 |             Imgproc.blur(sourceImage, destination, new Size(3.0, 3.0));
 50 |         }
 51 |         Imgcodecs.imwrite("smoothCat.jpg", destination);
 52 |     }
 53 | 
 54 |     public void sharpenImage() {
 55 |         String fileName = "SharpnessExample2.png";
 56 |         fileName = "smoothCat.jpg";
 57 |         fileName = "blurredText.jpg";
 58 |         fileName = "Blurred Text3.jpg";
 59 |         try {
 60 | //            Not working that well !!!
 61 |             Mat source = Imgcodecs.imread(fileName,
 62 |                     //                    Imgcodecs.CV_LOAD_IMAGE_COLOR);
 63 |                     Imgcodecs.CV_LOAD_IMAGE_GRAYSCALE);
 64 |             Mat destination = new Mat(source.rows(), source.cols(), source.type());
 65 |             Imgproc.GaussianBlur(source, destination, new Size(0, 0), 10);
 66 |             // The following was used witht he cat
 67 | //            Core.addWeighted(source, 1.5, destination, -0.75, 0, destination);
 68 | //            Core.addWeighted(source, 2.5, destination, -1.5, 0, destination);
 69 |             Core.addWeighted(source, 1.5, destination, -0.75, 0, destination);
 70 |             Imgcodecs.imwrite("sharpenedCat.jpg", destination);
 71 |         } catch (Exception ex) {
 72 |             ex.printStackTrace();
 73 |         }
 74 |     }
 75 | 
 76 |     // Adapted from: http://www.tutorialspoint.com/java_dip/enhancing_image_brightness.htm
 77 |     public void enhanceImageBrightness() {
 78 |         double alpha = 1;   // Change to 2 for more brightness
 79 |         double beta = 50;
 80 |         String fileName = "cat.jpg";
 81 | 
 82 |         Mat source = Imgcodecs.imread("cat.jpg");
 83 |         Mat destination = new Mat(source.rows(), source.cols(),
 84 |                 source.type());
 85 |         source.convertTo(destination, -1, 1, 50);
 86 |         Imgcodecs.imwrite("brighterCat.jpg", destination);
 87 |     }
 88 | 
 89 |     public void resizeImage() {
 90 |         Mat source = Imgcodecs.imread("cat.jpg");
 91 |         Mat resizeimage = new Mat();
 92 |         Imgproc.resize(source, resizeimage, new Size(250, 250));
 93 |         Imgcodecs.imwrite("resizedCat.jpg", resizeimage);
 94 |     }
 95 | 
 96 |     public void convertImage() {
 97 |         Mat source = Imgcodecs.imread("cat.jpg");
 98 |         // The extension determines the format
 99 |         Imgcodecs.imwrite("convertedCat.jpg", source);
100 |         Imgcodecs.imwrite("convertedCat.jpeg", source);
101 |         Imgcodecs.imwrite("convertedCat.webp", source);
102 |         Imgcodecs.imwrite("convertedCat.png", source);
103 |         Imgcodecs.imwrite("convertedCat.tiff", source);
104 |     }
105 | 
106 |     public void noiseRemoval() {
107 | //        Mat Kernel = cv::Mat(cv::Size(Maximum_Width_of_Noise,Maximum_Height_of_noise),CV_8UC1,cv::Scalar(255));        
108 |         Mat Kernel = new Mat(new Size(3, 3), CvType.CV_8U, new Scalar(255));
109 |         Mat source = Imgcodecs.imread("noiseExample.png");
110 |         Mat temp = new Mat();
111 |         Mat topHat = new Mat();
112 |         Mat destination = new Mat();
113 | 
114 |         Imgproc.morphologyEx(source, temp, Imgproc.MORPH_OPEN, Kernel);
115 |         Imgproc.morphologyEx(temp, destination, Imgproc.MORPH_CLOSE, Kernel);
116 | //        Imgproc.morphologyEx(temp, topHat, Imgproc.MORPH_GRADIENT, Kernel);
117 | //        Imgproc.morphologyEx(topHat, destination, Imgproc.MORPH_CLOSE, Kernel);
118 |         Imgcodecs.imwrite("noiseRemovedExample.png", source);
119 |     }
120 | 
121 |     public static void denoise() {
122 |         String imgInPath = "captchaExample.jpg";
123 |         imgInPath = "MyCaptcha.PNG";
124 |         imgInPath = "blurredtext.jpg";
125 |         String imgOutPath = "captchaNoiseRemovedExample.png";
126 |         imgOutPath = "MyNoiseRemovedCaptcha.PNG";
127 | 
128 |         Mat image = Imgcodecs.imread(imgInPath);
129 |         Mat out = new Mat();
130 |         Mat tmp = new Mat();
131 |         Mat kernel = new Mat(new Size(3, 3), CvType.CV_8UC1, new Scalar(255));
132 | //        Mat kernel = new Mat(image.size(), CvType.CV_8UC1, new Scalar(255));
133 |         Imgproc.morphologyEx(image, tmp, Imgproc.MORPH_OPEN, kernel);
134 |         Imgproc.morphologyEx(tmp, out, Imgproc.MORPH_CLOSE, kernel);
135 |         Imgcodecs.imwrite(imgOutPath, out);
136 |     }
137 | 
138 | //    public void convertToTIFF() {
139 | //        Mat source = Imgcodecs.imread("OCRExample.png");
140 | //        Imgcodecs.imwrite("OCRExample.tiff", source);
141 | //    }
142 | }
143 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/chapter 3/PDF File.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Java-Data-Science-Made-Easy/29e2a5d0638c0ee8eb913c67bf549221bc4d0549/Module 1/Java for Data Science/chapter 3/PDF File.docx


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/chapter 3/PDF File.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Java-Data-Science-Made-Easy/29e2a5d0638c0ee8eb913c67bf549221bc4d0549/Module 1/Java for Data Science/chapter 3/PDF File.pdf


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/chapter 3/PDFExtractionExample.java:
--------------------------------------------------------------------------------
 1 | package packt.pdfextractionexample;
 2 | 
 3 | import java.io.File;
 4 | import org.apache.pdfbox.pdmodel.PDDocument;
 5 | import org.apache.pdfbox.text.PDFTextStripper;
 6 | 
 7 | public class PDFExtractionExample {
 8 | 
 9 |     public static void main(String[] args) {
10 |         try {
11 |             PDDocument document = PDDocument.load(new File("PDF File.pdf"));
12 |                 PDFTextStripper Tstripper = new PDFTextStripper();
13 |                 String documentText = Tstripper.getText(document);
14 |                 System.out.println(documentText);
15 |         } catch (Exception e) {
16 |             e.printStackTrace();
17 |         }
18 |     }
19 | }
20 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/chapter 3/Person.json:
--------------------------------------------------------------------------------
1 | { 
2 |    "firstname":"Smith",
3 |    "lastname":"Peter", 
4 |    "phone":8475552222,
5 |    "address":["100 Main Street","Corpus","Oklahoma"] 
6 | }
7 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/chapter 3/Persons.json:
--------------------------------------------------------------------------------
 1 | {
 2 |    "persons": {
 3 |       "groupname": "school",
 4 |       "person":
 5 |          [ 
 6 |             {"firstname":"Smith",
 7 |               "lastname":"Peter", 
 8 |               "phone":8475552222,
 9 |               "address":["100 Main Street","Corpus","Oklahoma"] },
10 |            {"firstname":"King",
11 |               "lastname":"Sarah", 
12 |               "phone":8475551111,
13 |               "address":["200 Main Street","Corpus","Oklahoma"] },
14 |            {"firstname":"Frost",
15 |               "lastname":"Nathan", 
16 |               "phone":8475553333,
17 |               "address":["300 Main Street","Corpus","Oklahoma"] }
18 |          ]
19 |    }
20 | }
21 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/chapter 3/ReadExcelExample.java:
--------------------------------------------------------------------------------
 1 | package packt.poiexamples;
 2 | 
 3 | import java.io.File;
 4 | import java.io.FileInputStream;
 5 | import java.io.IOException;
 6 | import org.apache.poi.ss.usermodel.Cell;
 7 | import org.apache.poi.ss.usermodel.Row;
 8 | import org.apache.poi.xssf.usermodel.XSSFSheet;
 9 | import org.apache.poi.xssf.usermodel.XSSFWorkbook;
10 | import static java.lang.System.out;
11 | 
12 | // From: http://howtodoinjava.com/apache-commons/readingwriting-excel-files-in-java-poi-tutorial/
13 | public class ReadExcelExample {
14 | 
15 |     public static void main(String[] args) {
16 |         //Create Workbook instance holding reference to .xlsx file
17 |         try (FileInputStream file = new FileInputStream(
18 |                 new File("Sample.xlsx"))) {
19 |             //Create Workbook instance holding reference to .xlsx file
20 |             XSSFWorkbook workbook = new XSSFWorkbook(file);
21 | 
22 |             //Get first/desired sheet from the workbook
23 |             XSSFSheet sheet = workbook.getSheetAt(0);
24 | 
25 |             //Iterate through each rows one by one
26 | //            Iterator<Row> rowIterator = sheet.iterator();
27 |             for(Row row : sheet) {
28 |                 for (Cell cell : row) {
29 |                     //Check the cell type and format accordingly
30 |                     switch (cell.getCellType()) {
31 |                         case Cell.CELL_TYPE_NUMERIC:
32 |                             out.print(cell.getNumericCellValue() + "\t");
33 |                             break;
34 |                         case Cell.CELL_TYPE_STRING:
35 |                             out.print(cell.getStringCellValue() + "\t");
36 |                             break;
37 |                     }
38 |                 }
39 |                 out.println();
40 |             }
41 |         } catch (IOException e) {
42 |             e.printStackTrace();
43 |         }
44 |     }
45 | }
46 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/chapter 3/Sample.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Java-Data-Science-Made-Easy/29e2a5d0638c0ee8eb913c67bf549221bc4d0549/Module 1/Java for Data Science/chapter 3/Sample.xlsx


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/chapter 3/SimpleSearching.java:
--------------------------------------------------------------------------------
  1 | import java.io.BufferedReader;
  2 | import java.io.File;
  3 | import static java.lang.System.*;
  4 | import java.io.FileNotFoundException;
  5 | import java.io.FileReader;
  6 | import java.io.IOException;
  7 | import java.util.Scanner;
  8 | 
  9 | public class SimpleSearching {
 10 | 
 11 | 	public static void main(String[] args) {
 12 | 		String toFind = "I";
 13 | 		String replaceWith = "Ishmael";
 14 | 		String dirtyText = "Call me Ishmael. Some years ago- never mind how";
 15 | 		dirtyText += " long precisely - having little or no money in my purse,";
 16 | 		dirtyText += " and nothing particular to interest me on shore, I thought"; 
 17 | 		dirtyText += " I would sail about a little and see the watery part of the world.";
 18 | 		
 19 | 		//simpleSearch(dirtyText,toFind);
 20 | 
 21 | 		//scannerSearch(dirtyText,toFind);
 22 | 		
 23 | 		simpleFindReplace(dirtyText,toFind,replaceWith);
 24 | 		
 25 | 		//searchWholeFile("C://Jenn Personal//Packt Data Science//Chapter 3 Data Cleaning//MobyDick.txt", toFind);
 26 | 		
 27 | 		try {
 28 | 			Scanner textToClean = new Scanner(new File("C://Jenn Personal//Packt Data Science//Chapter 3 Data Cleaning//101nos.txt"));
 29 | 			while(textToClean.hasNext()){
 30 | 				//String dirtyText = textToClean.nextLine();
 31 | 
 32 | 				//simpleSearch(dirtyText,toFind);
 33 | 
 34 | 				//scannerSearch(dirtyText,toFind);
 35 | 				
 36 | 				//simpleFindReplace(dirtyText,toFind,replaceWith);
 37 | 
 38 | 			}
 39 | 
 40 | 			textToClean.close();
 41 | 		} catch (FileNotFoundException e) {
 42 | 			// TODO Auto-generated catch block
 43 | 			e.printStackTrace();
 44 | 		}
 45 | 
 46 | 
 47 | 		//searchWholeFile("C://Jenn Personal//Packt Data Science//Chapter 3 Data Cleaning//101nos.txt", toFind);
 48 | 
 49 | 	}
 50 | 
 51 | 	public static void simpleSearch(String text, String toFind){
 52 | 		text = text.toLowerCase().trim();
 53 | 		toFind = toFind.toLowerCase().trim();
 54 | 		int count = 0;
 55 | 		if(text.contains(toFind)){
 56 | 			String[] words = text.split(" ");
 57 | 			for(String word : words){
 58 | 				if(word.equals(toFind)){
 59 | 					count++;
 60 | 				}
 61 | 			}
 62 | 			out.println("Found " + toFind + " " + count + " times in the text.");
 63 | 		}
 64 | 	}
 65 | 	
 66 | 	public static void scannerSearch(String text, String toFind){
 67 | 		text = text.toLowerCase().trim();
 68 | 		toFind = toFind.toLowerCase().trim();
 69 | 		Scanner textLine = new Scanner(text);
 70 | 		//NOTE horizon bound is zero - default to search entire file
 71 | 		out.println("Found " + textLine.findWithinHorizon(toFind, 10));
 72 | 	}
 73 | 	
 74 | 	public static void simpleFindReplace(String text, String toFind, String replaceWith){
 75 | 		text = text.toLowerCase().trim();
 76 | 		toFind = toFind.toLowerCase().trim();
 77 | 		out.println(text);
 78 | 		if(text.contains(toFind)){
 79 | 			text = text.replaceAll(toFind, replaceWith);
 80 | 			out.println(text);
 81 | //			for(String word : textLine){
 82 | //				out.print(word + " ");
 83 | //			}
 84 | 		}
 85 | 
 86 | 	}
 87 | 
 88 | 	public static void searchWholeFile(String path, String toFind){
 89 | 		try {
 90 | 			int line = 0;
 91 | 			String textLine = "";
 92 | 			toFind = toFind.toLowerCase().trim();
 93 | 			BufferedReader textToClean = new BufferedReader(new FileReader(path));
 94 | 			while((textLine = textToClean.readLine()) != null){
 95 | 				line++;
 96 | 				if(textLine.toLowerCase().trim().contains(toFind)){
 97 | 					out.println("Found " + toFind + " in " + textLine);
 98 | 					//out.println("Found " + toFind + " on line " + line + " of file.");
 99 | //					String[] words = textLine.split(" ");
100 | //					for(int x = 0; x < words.length; x++){
101 | //						if(words[x].equals(toFind)){
102 | //							out.println("On line " + line + " found " + toFind + " at location " + (x-1));
103 | //						}
104 | //					}
105 | 
106 | 				}
107 | 			}
108 | 			textToClean.close();
109 | 		} catch (FileNotFoundException e) {
110 | 			e.printStackTrace();
111 | 		} catch (IOException e) {
112 | 			e.printStackTrace();
113 | 		}
114 | 	}
115 | 
116 | }
117 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/chapter 3/SimpleSort.java:
--------------------------------------------------------------------------------
 1 | import java.util.ArrayList;
 2 | import java.util.Arrays;
 3 | import java.util.Collections;
 4 | import java.util.Comparator;
 5 | import java.util.List;
 6 | import static java.lang.System.out;
 7 | 
 8 | public class SimpleSort {
 9 | 
10 | 	public static void main(String[] args) {
11 | 
12 | 		basicSort();
13 | 		complexSort();
14 | 
15 | 	}
16 | 
17 | 
18 | 	public static void basicSort(){
19 | 
20 | 		//make original list and print
21 | 		String[] words = {"cat","dog","house","boat","road","zoo"};
22 | 		ArrayList<String> wordsList = new ArrayList<>(Arrays.asList(words));
23 | 		Integer[] nums = {12,46,52,34,87,123,14,44};
24 | 		ArrayList<Integer> numsList = new ArrayList<>(Arrays.asList(nums));
25 | 
26 | 		out.println("Original Word List: " + wordsList.toString());
27 | 		//simple sort with collections.sort()
28 | 		Collections.sort(wordsList);
29 | 		out.println("Ascending Word List: " + wordsList.toString());
30 | 
31 | 		out.println("Original Integer List: " + numsList.toString());
32 | 
33 | 		Collections.reverse(numsList);
34 | 		out.println("Reversed Integer List: " + numsList.toString());
35 | 
36 | 		Collections.sort(numsList);
37 | 		out.println("Ascending Integer List: " + numsList.toString());
38 | 
39 | 		//Use Comparator Interface
40 | 		Comparator<Integer> basicOrder = Integer::compare;
41 | 		Comparator<Integer> descendOrder = basicOrder.reversed();
42 | 		Collections.sort(numsList,descendOrder);
43 | 		out.println("Descending Integer List: " + numsList.toString());
44 | 
45 | 		//Using a lambda expression with Comparator and Collections
46 | 		Comparator<Integer> compareInts = (Integer first, Integer second) -> Integer
47 | 				.compare(first, second);
48 | 		Collections.sort(numsList,compareInts);
49 | 		out.println("Sorted integers using Lambda: " + numsList.toString());
50 | 
51 | 		Comparator<String> basicWords = String::compareTo;
52 | 		Comparator<String> descendWords = basicWords.reversed();
53 | 		Collections.sort(wordsList,descendWords);
54 | 		out.println("Reversed Words Using Comparator: " + wordsList.toString());
55 | 
56 | 		Comparator<String> compareWords = (String first, String second) -> first.compareTo(second);
57 | 		Collections.sort(wordsList,compareWords);
58 | 		out.println("Sorted words using Lambda: " + wordsList.toString());
59 | 	}
60 | 
61 | 
62 | 	public static void complexSort() {
63 | 		out.println();
64 | 		ArrayList<Dogs> dogs = new ArrayList<Dogs>();
65 | 		dogs.add(new Dogs("Zoey", 8));
66 | 		dogs.add(new Dogs("Roxie", 10));
67 | 		dogs.add(new Dogs("Kylie", 7));
68 | 		dogs.add(new Dogs("Shorty", 14));
69 | 		dogs.add(new Dogs("Ginger", 7));
70 | 		dogs.add(new Dogs("Penny", 7));
71 | 		out.println("Name " + " Age");
72 | 		for(Dogs d : dogs){
73 | 			out.println(d.getName() + " " + d.getAge());
74 | 		}
75 | 		out.println();
76 | 		dogs.sort(Comparator.comparing(Dogs::getName).thenComparing(Dogs::getAge));
77 | 		out.println("Name " + " Age");
78 | 		for(Dogs d : dogs){
79 | 			out.println(d.getName() + " " + d.getAge());
80 | 		}
81 | 		out.println();
82 | 		dogs.sort(Comparator.comparing(Dogs::getAge).thenComparing(Dogs::getName));
83 | 		out.println("Name " + " Age");
84 | 		for(Dogs d : dogs){
85 | 			out.println(d.getName() + " " + d.getAge());
86 | 		}
87 | 		out.println();
88 | 	}
89 | }
90 | 
91 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/chapter 3/SimpleStringCleaning.java:
--------------------------------------------------------------------------------
  1 | import static java.lang.System.out;
  2 | 
  3 | import java.io.File;
  4 | import java.io.FileNotFoundException;
  5 | import java.util.ArrayList;
  6 | import java.util.Arrays;
  7 | import java.util.Scanner;
  8 | import java.util.Set;
  9 | import java.util.TreeSet;
 10 | 
 11 | import com.aliasi.tokenizer.EnglishStopTokenizerFactory;
 12 | import com.aliasi.tokenizer.IndoEuropeanTokenizerFactory;
 13 | import com.aliasi.tokenizer.Tokenizer;
 14 | import com.aliasi.tokenizer.TokenizerFactory;
 15 | 
 16 | public class SimpleStringCleaning {
 17 | 
 18 | 	public static void main(String[] args) {
 19 | 
 20 | 		String dirtyText = "Call me Ishmael. Some years ago- never mind how";
 21 | 		dirtyText += " long precisely - having little or no money in my purse,";
 22 | 		dirtyText += " and nothing particular to interest me on shore, I thought"; 
 23 | 		dirtyText += " I would sail about a little and see the watery part of the world.";
 24 | 		
 25 | 		//Example 1 - basic clean (regex & String class methods)
 26 | 		//simpleClean(dirtyText);
 27 | 
 28 | 		//Example 2 - clean and put in array (split)
 29 | 		//simpleCleanToArray(dirtyText);
 30 | 
 31 | 		//Example 3 - join
 32 | 		//cleanAndJoin(dirtyText);
 33 | 
 34 | 		//Example 4 - simple remove stop words 
 35 | 		//removeStopWords(dirtyText);
 36 | 
 37 | 		//Example 5 - remove stop words with removeAll
 38 | 		//removeStopWordsRemoveAll(dirtyText);
 39 | 
 40 | 		//Example 6 - remove stop words with LingPipe
 41 | 		removeStopWithLing(dirtyText);
 42 | 
 43 | 
 44 | 
 45 | 	}
 46 | 
 47 | 	public static String simpleClean(String text){
 48 | 
 49 | 		out.println("Dirty text: " + text);
 50 | 		text = text.toLowerCase();
 51 | 		//explain what each part of this regex does
 52 | 		text = text.replaceAll("[\\d[^\\w\\s]]+", " ");
 53 | 		//NOTE trim only works on leading/trailing spaces
 54 | 		text = text.trim();
 55 | 		//is this the best way to do this? This isn't great - talk about it even?
 56 | 		while(text.contains("  ")){
 57 | 			text = text.replaceAll("  ", " ");
 58 | 		}		
 59 | 		out.println("Cleaned text: " + text);
 60 | 		return text;
 61 | 	}
 62 | 
 63 | 	public static String[] simpleCleanToArray(String text){
 64 | 		out.println("Dirty text: " + text);
 65 | 		text = text.replaceAll("[\\d[^\\w\\s]]+", "");
 66 | 		String[] cleanText = text.toLowerCase().trim().split("[\\W\\d]+");
 67 | 		out.print("Cleaned text: ");
 68 | 		for(String clean : cleanText){
 69 | 			out.print(clean + " ");
 70 | 		}
 71 | 		out.println();
 72 | 		return cleanText;
 73 | 	}
 74 | 
 75 | 	public static String cleanAndJoin(String text){
 76 | 		out.println("Dirty text: " + text);
 77 | 		String[] words = text.toLowerCase().trim().split("[\\W\\d]+");
 78 | 		String cleanText = String.join(" ", words);
 79 | 		out.println("Cleaned text: " + cleanText);
 80 | 		return cleanText;
 81 | 	}
 82 | 
 83 | 	public static void removeStopWords(String text){
 84 | 		//discuss stop words file - how to choose stop words? use whole alphabet as way to handle I'M --> I M
 85 | 
 86 | 		//****************** SIMPLE EXAMPLE *******************************************************************************************
 87 | 
 88 | 		try {
 89 | 			//read in list of stop words
 90 | 			Scanner readStop = new Scanner(new File("C://Jenn Personal//Packt Data Science//Chapter 3 Data Cleaning//stopwords.txt"));
 91 | 			//create an ArrayList to hold dirty text - call simpleCleanToArray to perform basic cleaning and put in array first
 92 | 			ArrayList<String> words = new ArrayList<String>(Arrays.asList(simpleCleanToArray(text)));
 93 | 			//loop through stop words file and check array for each word
 94 | 			out.println("Original clean text: " + words.toString());
 95 | 			ArrayList<String> foundWords = new ArrayList();
 96 | 			while(readStop.hasNextLine()){
 97 | 				String stopWord = readStop.nextLine().toLowerCase();
 98 | 				if(words.contains(stopWord)){
 99 | 					foundWords.add(stopWord);
100 | 				}
101 | 			}
102 | 			words.removeAll(foundWords);
103 | 			out.println("Text without stop words: " + words.toString());
104 | 		} catch (FileNotFoundException e) {
105 | 			// TODO Auto-generated catch block
106 | 			e.printStackTrace();
107 | 		}
108 | 
109 | 	}
110 | 
111 | 	public static void removeStopWordsRemoveAll(String text){
112 | 		//******************EXAMPLE WITH REMOVE ALL *******************************************************************************************
113 | 
114 | 		try {
115 | 			out.println(text);
116 | 			Scanner stopWordList = new Scanner(new File("C://Jenn Personal//Packt Data Science//Chapter 3 Data Cleaning//stopwords.txt"));
117 | 			TreeSet<String> stopWords = new TreeSet<String>();
118 | 			while(stopWordList.hasNextLine()){
119 | 				stopWords.add(stopWordList.nextLine());
120 | 			}
121 | 			ArrayList<String> dirtyText = new ArrayList<String>(Arrays.asList(text.split(" ")));
122 | 			dirtyText.removeAll(stopWords);
123 | 			out.println("Clean words: ");
124 | 			for(String x : dirtyText){
125 | 				out.print(x + " ");
126 | 			}
127 | 			out.println();
128 | 			stopWordList.close();
129 | 		} catch (FileNotFoundException e) {
130 | 			// TODO Auto-generated catch block
131 | 			e.printStackTrace();
132 | 		}
133 | 	}
134 | 
135 | 	public static void removeStopWithLing(String text){
136 | 		//******************EXAMPLE WITH ling pipe *******************************************************************************************
137 | 		//mention lower vs upper case
138 | 		out.println(text);
139 | 		text = text.toLowerCase().trim();
140 | 		TokenizerFactory fact = IndoEuropeanTokenizerFactory.INSTANCE;
141 | 		fact = new EnglishStopTokenizerFactory(fact);
142 | 		Tokenizer tok = fact.tokenizer(text.toCharArray(), 0, text.length());
143 | 		for(String word : tok){
144 | 			out.print(word + " ");
145 | 		}
146 | 	}
147 | }
148 | 
149 | 
150 | 
151 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/chapter 3/SimpleSubsetting.java:
--------------------------------------------------------------------------------
 1 | 
 2 | import java.util.Scanner;
 3 | import java.util.Set;
 4 | import java.util.SortedSet;
 5 | import java.util.TreeSet;
 6 | 
 7 | import static java.util.stream.Collectors.toCollection;
 8 | 
 9 | import java.io.BufferedReader;
10 | import java.io.File;
11 | import java.io.FileNotFoundException;
12 | import java.io.FileReader;
13 | import java.io.IOException;
14 | import java.util.ArrayList;
15 | import java.util.Arrays;
16 | 
17 | import static java.lang.System.out;
18 | 
19 | public class SimpleSubsetting {
20 | 
21 | 	public static void main(String[] args) throws FileNotFoundException {
22 | 
23 | 		//	treeSubSetMethod();
24 | 		//		simpleSubSet();
25 | 		subSetSkipLines();
26 | 
27 | 	}
28 | 
29 | 	public static void treeSubSetMethod(){
30 | 
31 | 		//sub set is not populating - not sure why
32 | 		//http://www.tutorialspoint.com/java/util/treeset_subset.htm
33 | 
34 | 		Integer[] nums = {12,46,52,34,87,123,14,44};
35 | 		TreeSet <Integer> fullNumsList = new TreeSet<Integer>(new ArrayList<>(Arrays.asList(nums)));
36 | 		TreeSet <Integer> partNumsList = new TreeSet<Integer>();
37 | 		out.println("Original List: " + fullNumsList.toString());
38 | 		partNumsList = (TreeSet<Integer>) fullNumsList.subSet(1,3); 
39 | 		out.println("SubSet of List: " + partNumsList.toString());
40 | 		out.println(partNumsList.size());
41 | 
42 | 
43 | 	}    
44 | 
45 | 	public static void simpleSubSet(){
46 | 		Integer[] nums = {12,46,52,34,87,123,14,44};
47 | 		ArrayList<Integer> numsList = new ArrayList<>(Arrays.asList(nums));
48 | 		out.println("Original List: " + numsList.toString());
49 | 		Set<Integer> fullNumsList = new TreeSet<Integer>(numsList);
50 | 		Set<Integer> partNumsList = fullNumsList.stream().skip(5).collect(toCollection(TreeSet::new));
51 | 		out.println("SubSet of List: " + partNumsList.toString());
52 | 
53 | 	}
54 | 
55 | 	public static void subSetSkipLines() throws FileNotFoundException{
56 | 
57 | 		//not behaving as expected
58 | 		try (BufferedReader br = new BufferedReader(new FileReader("C:\\Jenn Personal\\Packt Data Science\\Chapter 3 Data Cleaning\\stopwords.txt"))) {
59 | 			br
60 | 			.lines()
61 | 			.filter(s -> !s.equals(""))
62 | 			.forEach(s -> out.println(s));
63 | 		} catch (IOException ex) {
64 | 			ex.printStackTrace();
65 | 		}
66 | 
67 | 		//Scanner file = new Scanner(new File("C:\\Jenn Personal\\Packt Data Science\\Chapter 3 Data Cleaning\\stopwords.txt"));
68 | 		//		ArrayList<String> lines = new ArrayList<>();
69 | 		//		while(file.hasNextLine()){
70 | 		//			lines.add(file.nextLine());
71 | 		//		}
72 | 		//		out.println("Original List: " + lines.toString());
73 | 		//		out.println("Original list is " + lines.size() + " elements long");
74 | 		//		Set<String> fullWordsList = new TreeSet<String>(lines);
75 | 		//		Set<String> partWordsList = fullWordsList.stream().skip(2).collect(toCollection(TreeSet::new));
76 | 		//		out.println("SubSet of List: " + partWordsList.toString());
77 | 		//		out.println("Subsetted list is " + partWordsList.size() + " elements long");
78 | 		//		
79 | 		//		file.close();
80 | 	}
81 | }
82 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/chapter 3/TokenizerExamples.java:
--------------------------------------------------------------------------------
 1 | import static java.lang.System.out;
 2 | 
 3 | import java.util.StringTokenizer;
 4 | 
 5 | public class TokenizerExamples{
 6 | 	
 7 | 	public static void main(String[] args){
 8 | 		
 9 | 		String dirtyText = "Call me Ishmael. Some years ago- never mind how";
10 | 		dirtyText += " long precisely - having little or no money in my purse,";
11 | 		dirtyText += " and nothing particular to interest me on shore, I thought"; 
12 | 		dirtyText += " I would sail about a little and see the watery part of the world.";
13 | 		
14 | 		StringTokenizer tokenizer = new StringTokenizer(dirtyText," ");
15 | 		while(tokenizer.hasMoreTokens()){
16 | 			out.print(tokenizer.nextToken() + " ");
17 | 		}
18 | 	}
19 | }


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/chapter 3/ValidatingData.java:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Java-Data-Science-Made-Easy/29e2a5d0638c0ee8eb913c67bf549221bc4d0549/Module 1/Java for Data Science/chapter 3/ValidatingData.java


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/chapter 6-Machine Learning/BookDecisionTree.java:
--------------------------------------------------------------------------------
 1 | package packt.decisiontreeexamples;
 2 | 
 3 | import java.io.BufferedReader;
 4 | import java.io.FileReader;
 5 | import java.io.IOException;
 6 | import static java.lang.System.out;
 7 | import java.util.Enumeration;
 8 | 
 9 | import weka.classifiers.trees.J48;
10 | import weka.core.DenseInstance;
11 | import weka.core.Instance;
12 | import weka.core.Instances;
13 | 
14 | /*
15 |     <repositories>
16 |         <repository>
17 |             <id>jboss-3rd-party-releases</id>
18 |             <url>https://repository.jboss.org/nexus/content/repositories/thirdparty-releases/</url>
19 |         </repository>
20 |     </repositories>
21 |     <dependencies>
22 |         <!-- https://mvnrepository.com/artifact/nz.ac.waikato.cms.weka/weka-dev -->
23 |         <dependency>
24 |             <groupId>nz.ac.waikato.cms.weka</groupId>
25 |             <artifactId>weka-dev</artifactId>
26 |             <version>3.7.5</version>
27 |         </dependency>
28 |         <dependency>
29 |             <groupId>com.google.guava</groupId>
30 |             <artifactId>guava</artifactId>
31 |             <version>20.0-hal</version>
32 |         </dependency>
33 |     </dependencies>
34 |  */
35 | public class BookDecisionTree {
36 | 
37 |     private Instances trainingData;
38 | 
39 |     public static void main(String[] args) {
40 |         try {
41 |             BookDecisionTree decisionTree = new BookDecisionTree("books.arff");
42 |             J48 tree = decisionTree.performTraining();
43 |             System.out.println(tree.toString());
44 |             
45 |             Instance testInstance = decisionTree.
46 |                     getTestInstance("Leather", "yes", "historical");
47 |             int result = (int) tree.classifyInstance(testInstance);
48 |             String results = decisionTree.trainingData.attribute(3).value(result);
49 |             System.out.println(
50 |                     "Test with: " + testInstance + "  Result: " + results);
51 | 
52 |             testInstance = decisionTree.
53 |                     getTestInstance("Paperback", "no", "historical");
54 |             result = (int) tree.classifyInstance(testInstance);
55 |             results = decisionTree.trainingData.attribute(3).value(result);
56 |             System.out.println(
57 |                     "Test with: " + testInstance + "  Result: " + results);
58 |         } catch (Exception ex) {
59 |             ex.printStackTrace();
60 |         }
61 |     }
62 | 
63 |     public BookDecisionTree(String fileName) {
64 |         try {
65 |             BufferedReader reader = new BufferedReader(new FileReader(fileName));
66 |             trainingData = new Instances(reader);
67 |             trainingData.setClassIndex(trainingData.numAttributes() - 1);
68 |         } catch (IOException ex) {
69 |             ex.printStackTrace();
70 |         }
71 |     }
72 | 
73 |     private J48 performTraining() {
74 |         J48 j48 = new J48();
75 |         String[] options = {"-U"};
76 | //        Use unpruned tree. -U
77 |         try {
78 |             j48.setOptions(options);
79 |             j48.buildClassifier(trainingData);
80 |         } catch (Exception ex) {
81 |             ex.printStackTrace();
82 |         }
83 |         return j48;
84 |     }
85 | 
86 |     private Instance getTestInstance(
87 |             String binding, String multicolor, String genre) {
88 |         Instance instance = new DenseInstance(3);
89 |         instance.setDataset(trainingData);
90 |         instance.setValue(trainingData.attribute(0), binding);
91 |         instance.setValue(trainingData.attribute(1), multicolor);
92 |         instance.setValue(trainingData.attribute(2), genre);
93 |         return instance;
94 |     }
95 | }
96 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/chapter 6-Machine Learning/FXMLController.java:
--------------------------------------------------------------------------------
 1 | package packt.scatterchartmaven2;
 2 | 
 3 | import java.net.URL;
 4 | import java.util.ResourceBundle;
 5 | import javafx.event.ActionEvent;
 6 | import javafx.fxml.FXML;
 7 | import javafx.fxml.Initializable;
 8 | import javafx.scene.control.Label;
 9 | 
10 | public class FXMLController implements Initializable {
11 |     
12 |     @FXML
13 |     private Label label;
14 |     
15 |     @FXML
16 |     private void handleButtonAction(ActionEvent event) {
17 |         System.out.println("You clicked me!");
18 |         label.setText("Hello World!");
19 |     }
20 |     
21 |     @Override
22 |     public void initialize(URL url, ResourceBundle rb) {
23 |         // TODO
24 |     }    
25 | }
26 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/chapter 6-Machine Learning/JBayesTest.java:
--------------------------------------------------------------------------------
 1 | package com.packt.java.jayes;
 2 | 
 3 | import java.io.File;
 4 | import java.io.FileWriter;
 5 | import java.io.IOException;
 6 | import static java.lang.System.out;
 7 | import com.github.vangj.jbayes.inf.prob.Graph;
 8 | import com.github.vangj.jbayes.inf.prob.Node;
 9 | import com.github.vangj.jbayes.inf.prob.util.CsvUtil;
10 | 
11 | public class JBayesTest {
12 | 	
13 | 	public static void main(String[] args){
14 | 		//each node must have its name and values defined
15 | 		Node storms = Node.newBuilder().name("Thunderstorm").value("t").value("f").build();
16 | 		Node traffic = Node.newBuilder().name("Traffic").value("t").value("f").build();
17 | 		Node powerOut = Node.newBuilder().name("PowerOutage").value("t").value("f").build();
18 | 		Node alarm = Node.newBuilder().name("Alarm").value("t").value("f").build();
19 | 		Node overslept = Node.newBuilder().name("Overslept").value("t").value("f").build();
20 | 		Node lateToWork = Node.newBuilder().name("LateToWork").value("t").value("f").build();
21 | 
22 | 		//nodes may have parents
23 | 		traffic.addParent(storms);
24 | 		powerOut.addParent(storms);
25 | 		lateToWork.addParent(traffic);
26 | 		alarm.addParent(powerOut);
27 | 		overslept.addParent(alarm);
28 | 		lateToWork.addParent(overslept);
29 | 
30 | 		//define the CPTs for each node
31 | 		storms.setCpt(new double[][] {
32 | 			{0.7, 0.3}
33 | 		});
34 | 		traffic.setCpt(new double[][] {
35 | 			{0.8, 0.2}
36 | 		});
37 | 		powerOut.setCpt(new double[][] {
38 | 			{0.5, 0.5}
39 | 		});
40 | 		alarm.setCpt(new double[][] {
41 | 			{0.7, 0.3}
42 | 		});
43 | 		overslept.setCpt(new double[][] {
44 | 			{0.5, 0.5}
45 | 		});
46 | 		lateToWork.setCpt(new double[][] {
47 | 			{0.5, 0.5},
48 | 			{0.5, 0.5}
49 | 		});
50 | 
51 | 		//create a graph from the nodes
52 | 		Graph bayesGraph = new Graph();
53 | 		bayesGraph.addNode(storms);
54 | 		bayesGraph.addNode(traffic);
55 | 		bayesGraph.addNode(powerOut);
56 | 		bayesGraph.addNode(alarm);
57 | 		bayesGraph.addNode(overslept);
58 | 		bayesGraph.addNode(lateToWork);
59 | 
60 | 		//samples and computes the marginal probabilities aka the inference
61 | 		double d = bayesGraph.sample(1000);
62 | 		out.println(d);
63 | 
64 | 		//look at the marginal probabilities
65 | 		double[] stormProb = storms.probs();
66 | 		double[] trafficProb = traffic.probs();
67 | 		double[] powerProb = powerOut.probs();
68 | 		double[] alarmProb = alarm.probs();
69 | 		double[] oversleptProb = overslept.probs();
70 | 		double[] lateProb = lateToWork.probs();
71 | 		
72 | 		out.println("\nStorm Probabilities");
73 | 		out.println("True: " + stormProb[0] + " False: " + stormProb[1]);
74 | 		out.println("\nTraffic Probabilities");
75 | 		out.println("True: " + trafficProb[0] + " False: " + trafficProb[1]);
76 | 		out.println("\nPower Outage Probabilities");
77 | 		out.println("True: " + powerProb[0] + " False: " + powerProb[1]);
78 | 		out.println("vAlarm Probabilities");
79 | 		out.println("True: " + alarmProb[0] + " False: " + alarmProb[1]);
80 | 		out.println("\nOverslept Probabilities");
81 | 		out.println("True: " + oversleptProb[0] + " False: " + oversleptProb[1]);
82 | 		out.println("\nLate to Work Probabilities");
83 | 		out.println("True: " + lateProb[0] + " False: " + lateProb[1]);
84 | 
85 | 		bayesGraph.setSaveSamples(true); //stores samples in memory!
86 | 		bayesGraph.sample(100);
87 | 
88 | 		try {
89 | 			CsvUtil.saveSamples(bayesGraph, new FileWriter(new File("C://Jenn Personal//Packt Data Science//Chapter 6 Machine Learning//jbayes.csv")));
90 | 		} catch (IOException e) {
91 | 			// TODO Auto-generated catch block
92 | 			e.printStackTrace();
93 | 		} //save samples into CSV file
94 | 
95 | 		bayesGraph.clearSamples(); //clear samples, this might help with memory usage
96 | 	}
97 | 
98 | }
99 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/chapter 6-Machine Learning/Main-ARL.java:
--------------------------------------------------------------------------------
 1 | package packt.aprioriexamples;
 2 | 
 3 | import java.io.BufferedReader;
 4 | import java.io.FileReader;
 5 | import weka.associations.Apriori;
 6 | import weka.core.Instances;
 7 | 
 8 | public class Main {
 9 | 
10 |     public static void main(String[] args) {
11 |         try {
12 |             BufferedReader br;
13 |             br = new BufferedReader(new FileReader("babies.arff"));
14 |             Instances data = new Instances(br);
15 |             br.close();
16 | 
17 |             Apriori apriori = new Apriori();
18 |             apriori.setNumRules(100);
19 |             apriori.setMinMetric(0.5);
20 |             
21 |             apriori.buildAssociations(data);
22 |             System.out.println(apriori);
23 |         } catch (Exception ex) {
24 |             ex.printStackTrace();
25 |         }
26 |     }
27 | }
28 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/chapter 6-Machine Learning/Main-SVG.java:
--------------------------------------------------------------------------------
 1 | package packt.svmexamples;
 2 | 
 3 | import java.io.BufferedReader;
 4 | import java.io.FileNotFoundException;
 5 | import java.io.FileReader;
 6 | import java.io.IOException;
 7 | import static java.lang.System.out;
 8 | import java.util.ArrayList;
 9 | import weka.classifiers.Classifier;
10 | import weka.classifiers.Evaluation;
11 | import weka.classifiers.evaluation.NominalPrediction;
12 | import weka.classifiers.evaluation.Prediction;
13 | import weka.classifiers.functions.SMO;
14 | import weka.classifiers.functions.supportVector.PrecomputedKernelMatrixKernel;
15 | import weka.core.DenseInstance;
16 | import weka.core.Instance;
17 | import weka.core.Instances;
18 | 
19 | public class Main {
20 | 
21 |     public BufferedReader readDataFile(String filename) {
22 |         BufferedReader inputReader = null;
23 |         try {
24 |             inputReader = new BufferedReader(new FileReader(filename));
25 |         } catch (FileNotFoundException ex) {
26 |             out.println("File not found: " + filename);
27 |         }
28 |         return inputReader;
29 |     }
30 | 
31 | 
32 |     public Main() {
33 |         try {
34 |             BufferedReader datafile;
35 |             datafile = readDataFile("camping.txt");
36 |             Instances data = new Instances(datafile);
37 |             data.setClassIndex(data.numAttributes() - 1);
38 | 
39 |             Instances trainingData = new Instances(data, 0, 14);
40 |             Instances testingData = new Instances(data, 14, 5);
41 |             Evaluation evaluation = new Evaluation(trainingData);
42 | 
43 |             SMO smo = new SMO();
44 |             smo.buildClassifier(data);
45 | 
46 |             evaluation.evaluateModel(smo, testingData);
47 |             System.out.println(evaluation.toSummaryString());
48 | 
49 |             // Test instance 
50 |             Instance instance = new DenseInstance(3);
51 |             instance.setValue(data.attribute("age"), 78);
52 |             instance.setValue(data.attribute("income"), 125700);
53 |             instance.setValue(data.attribute("camps"), 1);            
54 |             instance.setDataset(data);
55 |             System.out.println("The instance: " + instance);
56 |             System.out.println(smo.classifyInstance(instance));
57 |         } catch (Exception ex) {
58 |             ex.printStackTrace();
59 |         }
60 |     }
61 | 
62 |     public static void main(String[] arg) {
63 |         new Main();
64 |     }
65 | }
66 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/chapter 6-Machine Learning/MainApp-Camping.java:
--------------------------------------------------------------------------------
 1 | package packt.scatterchartmaven2;
 2 | 
 3 | /*
 4 |     <dependencies>
 5 |         <dependency>
 6 |             <groupId>com.opencsv</groupId>
 7 |             <artifactId>opencsv</artifactId>
 8 |             <version>3.7</version>
 9 |         </dependency>
10 |         <dependency>
11 |             <groupId>org.apache.commons</groupId>
12 |             <artifactId>commons-math3</artifactId>
13 |             <version>3.6.1</version>
14 |         </dependency>
15 |     </dependencies>
16 | */
17 | 
18 | import javafx.application.Application;
19 | import static javafx.application.Application.launch;
20 | import javafx.scene.Scene;
21 | import javafx.scene.chart.NumberAxis;
22 | import javafx.scene.chart.ScatterChart;
23 | import javafx.scene.chart.XYChart;
24 | import javafx.stage.Stage;
25 | 
26 | public class MainApp extends Application {
27 |     
28 |     @Override
29 |     public void start(Stage stage) throws Exception {
30 |         stage.setTitle("Scatter Chart Sample");
31 |         final NumberAxis xAxis = new NumberAxis(0, 100, 10);
32 |         final NumberAxis yAxis = new NumberAxis(0, 130000, 10000);        
33 |         final ScatterChart<Number,Number> sc = new
34 |             ScatterChart<Number,Number>(xAxis,yAxis);
35 |         xAxis.setLabel("Age");                
36 |         yAxis.setLabel("Income");
37 |         sc.setTitle("Camping Inclination");
38 |        
39 |         /*
40 | 23,45600,1
41 | 26,32000,0
42 | 45,65700,1
43 | 29,25300,0
44 | 72,55600,1
45 | 24,28700,1
46 | 56,125300,1
47 | 22,34200,1
48 | 28,32800,1
49 | 32,24600,1
50 | 25,36500,1
51 | 67,76800,0
52 | 25,14500,1
53 | 86,58900,0
54 |         */
55 |         XYChart.Series series1 = new XYChart.Series();
56 |         series1.setName("Camps");
57 |         series1.getData().add(new XYChart.Data(23,45600));
58 |         series1.getData().add(new XYChart.Data(45,65700));
59 |         series1.getData().add(new XYChart.Data(72,55600));
60 |         series1.getData().add(new XYChart.Data(24,28700));
61 |         series1.getData().add(new XYChart.Data(22,34200));
62 |         series1.getData().add(new XYChart.Data(28,32800));
63 |         series1.getData().add(new XYChart.Data(32,24600));
64 |         series1.getData().add(new XYChart.Data(25,36500));
65 |         series1.getData().add(new XYChart.Data(22,43600));
66 |         series1.getData().add(new XYChart.Data(78,125700));
67 |         series1.getData().add(new XYChart.Data(73,56500));
68 |         
69 |         XYChart.Series series2 = new XYChart.Series();
70 |         series2.setName("Doesn't Camp");
71 |         series2.getData().add(new XYChart.Data(26,91000));
72 |         series2.getData().add(new XYChart.Data(29,85300));
73 |         series2.getData().add(new XYChart.Data(67,76800));
74 |         series2.getData().add(new XYChart.Data(86,58900));
75 |         series2.getData().add(new XYChart.Data(56,125300));
76 |         series2.getData().add(new XYChart.Data(25,125000));
77 |         series2.getData().add(new XYChart.Data(29,87600));
78 |         series2.getData().add(new XYChart.Data(65,79300));
79 |  
80 |         sc.getData().addAll(series1, series2);
81 |         Scene scene  = new Scene(sc, 500, 400);
82 |         stage.setScene(scene);
83 |         stage.show();
84 |     }
85 | 
86 |     public static void main(String[] args) {
87 |         launch(args);
88 |     }
89 | 
90 | }
91 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/chapter 8-Deep learning/ConvolutionalNetworkExample.java:
--------------------------------------------------------------------------------
  1 | package packt.dl4jexamples;
  2 | 
  3 | import static java.lang.System.out;
  4 | import org.deeplearning4j.datasets.fetchers.MnistDataFetcher;
  5 | import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator;
  6 | import org.deeplearning4j.nn.api.OptimizationAlgorithm;
  7 | import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
  8 | import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
  9 | import org.deeplearning4j.nn.conf.layers.OutputLayer;
 10 | import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
 11 | import org.deeplearning4j.optimize.api.IterationListener;
 12 | import org.deeplearning4j.optimize.listeners.ScoreIterationListener;
 13 | import org.nd4j.linalg.dataset.DataSet;
 14 | import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
 15 | import org.nd4j.linalg.lossfunctions.LossFunctions;
 16 | import org.slf4j.Logger;
 17 | import org.slf4j.LoggerFactory;
 18 | 
 19 | import java.util.Collections;
 20 | import org.deeplearning4j.eval.Evaluation;
 21 | import org.deeplearning4j.nn.conf.Updater;
 22 | import org.deeplearning4j.nn.conf.layers.ConvolutionLayer;
 23 | import org.deeplearning4j.nn.conf.layers.DenseLayer;
 24 | import org.deeplearning4j.nn.conf.layers.SubsamplingLayer;
 25 | import org.deeplearning4j.nn.conf.layers.setup.ConvolutionLayerSetup;
 26 | import org.deeplearning4j.nn.weights.WeightInit;
 27 | import org.nd4j.linalg.api.ndarray.INDArray;
 28 | import org.nd4j.linalg.dataset.SplitTestAndTrain;
 29 | import org.nd4j.linalg.dataset.api.preprocessor.DataNormalization;
 30 | import org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize;
 31 | 
 32 | /**
 33 |  * ***** NOTE: This example has not been tuned. It requires additional work to
 34 |  * produce sensible results *****
 35 |  *
 36 |  * @author Adam Gibson
 37 |  */
 38 | public class ConvolutionalNetworkExample {
 39 | 
 40 |     private static Logger log = LoggerFactory.getLogger(ConvolutionalNetworkExample.class);
 41 | 
 42 |     public static void main(String[] args) throws Exception {
 43 | 
 44 |         log.info("Load data....");
 45 |         //params - batch size, num examples, true??
 46 |         DataSetIterator iter = new MnistDataSetIterator(1000, MnistDataFetcher.NUM_EXAMPLES);
 47 |         //ADDED
 48 |         DataSet dataset = iter.next();
 49 |         dataset.shuffle();
 50 |         SplitTestAndTrain testAndTrain = dataset.splitTestAndTrain(0.65);
 51 |         DataSet trainingData = testAndTrain.getTrain();
 52 |         DataSet testData = testAndTrain.getTest();
 53 |         DataNormalization normalizer = new NormalizerStandardize();
 54 |         normalizer.fit(trainingData);
 55 |         normalizer.transform(trainingData);
 56 |         normalizer.transform(testData);
 57 | 
 58 |         log.info("Build model....");
 59 |         MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder()
 60 |                 .seed(123)
 61 |                 .iterations(1)
 62 |                 .regularization(true).l2(0.0005)
 63 |                 .learningRate(0.01)
 64 |                 .weightInit(WeightInit.XAVIER)
 65 |                 .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
 66 |                 .updater(Updater.NESTEROVS).momentum(0.9)
 67 |                 .list()
 68 |                 .layer(0, new ConvolutionLayer.Builder(5, 5)
 69 |                         //nIn and nOut specify depth. nIn here is the nChannels and nOut is the number of filters to be applied
 70 |                         .nIn(3)
 71 |                         .stride(1, 1)
 72 |                         .nOut(20)
 73 |                         .activation("identity")
 74 |                         .build())
 75 |                 .layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX)
 76 |                         .kernelSize(2, 2)
 77 |                         .stride(2, 2)
 78 |                         .build())
 79 |                 .layer(2, new ConvolutionLayer.Builder(5, 5)
 80 |                         .stride(1, 1)
 81 |                         .nOut(50)
 82 |                         .activation("identity")
 83 |                         .build())
 84 |                 .layer(3, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX)
 85 |                         .kernelSize(2, 2)
 86 |                         .stride(2, 2)
 87 |                         .build())
 88 |                 .layer(4, new DenseLayer.Builder().activation("relu")
 89 |                         .nOut(500).build())
 90 |                 .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD)
 91 |                         .nOut(10)
 92 |                         .activation("softmax")
 93 |                         .build())
 94 |                 .backprop(true).pretrain(false);
 95 |         // The builder needs the dimensions of the image along with the number of channels. these are 28x28 images in one channel
 96 |         new ConvolutionLayerSetup(builder, 28, 28, 1);
 97 | 
 98 |         MultiLayerConfiguration conf = builder.build();
 99 |         MultiLayerNetwork model = new MultiLayerNetwork(conf);
100 |         model.init();
101 |         model.setListeners(Collections.singletonList((IterationListener) new ScoreIterationListener(1/5)));
102 | 
103 |         while (iter.hasNext()) {
104 |             DataSet next = iter.next();
105 |             model.fit(new DataSet(next.getFeatureMatrix(), next.getLabels()));
106 |         }
107 | 
108 |         Evaluation evaluation = new Evaluation(4);
109 |         INDArray output = model.output(testData.getFeatureMatrix());
110 |         evaluation.eval(testData.getLabels(), output);
111 |         out.println(evaluation.stats());
112 |     }
113 | }
114 | 
115 | 
116 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/chapter 8-Deep learning/DeepAutoEncoderExample.java:
--------------------------------------------------------------------------------
 1 | package packt.dl4jexamples;
 2 | 
 3 | import java.io.File;
 4 | import java.io.IOException;
 5 | import org.deeplearning4j.datasets.fetchers.MnistDataFetcher;
 6 | import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator;
 7 | import org.deeplearning4j.nn.api.OptimizationAlgorithm;
 8 | import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
 9 | import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
10 | import org.deeplearning4j.nn.conf.layers.OutputLayer;
11 | import org.deeplearning4j.nn.conf.layers.RBM;
12 | import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
13 | import org.deeplearning4j.optimize.api.IterationListener;
14 | import org.deeplearning4j.optimize.listeners.ScoreIterationListener;
15 | import org.nd4j.linalg.dataset.DataSet;
16 | import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
17 | import org.nd4j.linalg.lossfunctions.LossFunctions;
18 | 
19 | import java.util.Collections;
20 | import org.deeplearning4j.util.ModelSerializer;
21 | 
22 | public class DeepAutoEncoderExample {
23 |     private MultiLayerNetwork model;
24 |     private File modelFile;
25 |     private DataSetIterator iterator;
26 |     private final int numberOfRows = 28;
27 |     private final int numberOfColumns = 28;
28 | 
29 |     public DeepAutoEncoderExample() {
30 |         try {
31 |             int seed = 123;
32 |             int numberOfIterations = 1;
33 |             iterator = new MnistDataSetIterator(1000, MnistDataFetcher.NUM_EXAMPLES, true);
34 |             
35 |             MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
36 |                     .seed(seed)
37 |                     .iterations(numberOfIterations)
38 |                     .optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT)
39 |                     .list()
40 |                     .layer(0, new RBM.Builder().nIn(numberOfRows * numberOfColumns)
41 |                             .nOut(1000)
42 |                             .lossFunction(LossFunctions.LossFunction.RMSE_XENT).build())
43 |                     .layer(1, new RBM.Builder().nIn(1000).nOut(500)
44 |                             .lossFunction(LossFunctions.LossFunction.RMSE_XENT).build())
45 |                     .layer(2, new RBM.Builder().nIn(500).nOut(250)
46 |                             .lossFunction(LossFunctions.LossFunction.RMSE_XENT).build())
47 |                     .layer(3, new RBM.Builder().nIn(250).nOut(100)
48 |                             .lossFunction(LossFunctions.LossFunction.RMSE_XENT).build())
49 |                     .layer(4, new RBM.Builder().nIn(100).nOut(30)
50 |                             .lossFunction(LossFunctions.LossFunction.RMSE_XENT).build()) //encoding stops
51 |                     .layer(5, new RBM.Builder().nIn(30).nOut(100)
52 |                             .lossFunction(LossFunctions.LossFunction.RMSE_XENT).build()) //decoding starts
53 |                     .layer(6, new RBM.Builder().nIn(100).nOut(250)
54 |                             .lossFunction(LossFunctions.LossFunction.RMSE_XENT).build())
55 |                     .layer(7, new RBM.Builder().nIn(250).nOut(500)
56 |                             .lossFunction(LossFunctions.LossFunction.RMSE_XENT).build())
57 |                     .layer(8, new RBM.Builder().nIn(500).nOut(1000)
58 |                             .lossFunction(LossFunctions.LossFunction.RMSE_XENT).build())
59 |                     .layer(9, new OutputLayer.Builder(
60 |                                     LossFunctions.LossFunction.RMSE_XENT).nIn(1000)
61 |                             .nOut(numberOfRows * numberOfColumns).build())
62 |                     .pretrain(true).backprop(true)
63 |                     .build();
64 | 
65 |             model = new MultiLayerNetwork(conf);
66 |             model.init();
67 | 
68 |             model.setListeners(Collections.singletonList(
69 |                     (IterationListener) new ScoreIterationListener()));
70 | 
71 |             while (iterator.hasNext()) {
72 |                 DataSet dataSet = iterator.next();
73 |                 model.fit(new DataSet(dataSet.getFeatureMatrix(),
74 |                         dataSet.getFeatureMatrix()));
75 |             }
76 | 
77 |             modelFile = new File("savedModel");
78 |             ModelSerializer.writeModel(model, modelFile, true);
79 |         } catch (IOException ex) {
80 |             ex.printStackTrace();
81 |         }
82 |     }
83 | 
84 |     public void retrieveModel() {
85 |         try {
86 |             modelFile = new File("savedModel");
87 |             MultiLayerNetwork model = ModelSerializer.restoreMultiLayerNetwork(modelFile);
88 |         } catch (IOException ex) {
89 |             ex.printStackTrace();
90 |         }
91 |     }
92 | 
93 |     public static void main(String[] args) throws Exception {
94 |         new DeepAutoEncoderExample();
95 |     }
96 | }
97 | 


--------------------------------------------------------------------------------
/Module 1/Java for Data Science/chapter 8-Deep learning/RegressionExample.java:
--------------------------------------------------------------------------------
1 | package packt.dl4jexamples;
2 | 
3 | public class RegressionExample {
4 |     
5 | }
6 | 


--------------------------------------------------------------------------------
/Module 2/MasteringJavaforDataScience_Code/Chapter02/data/keywords.txt:
--------------------------------------------------------------------------------
  1 | adidas basketball shoes
  2 | angry birds
  3 | animal shelter
  4 | apple cider
  5 | apples
  6 | aquarium fish
  7 | auto auction
  8 | avengers
  9 | bananas
 10 | batman
 11 | battlefield 4
 12 | bed and breakfast
 13 | berlin
 14 | black friday
 15 | blogging
 16 | boston
 17 | brazil
 18 | burger king
 19 | buritos
 20 | calculus
 21 | car loans
 22 | cauliflower
 23 | chinese food
 24 | chinese food history
 25 | christmas tree
 26 | citibank
 27 | classic rock songs
 28 | cocktail
 29 | coconut oil
 30 | cranberry sauce recipe
 31 | cyber security
 32 | data protection
 33 | david guetta
 34 | deep dish pizza
 35 | deep fryer
 36 | digital frame
 37 | digital tv
 38 | eggplant recipes
 39 | electric dryer
 40 | energy bar
 41 | family fitness
 42 | fashion
 43 | fifa
 44 | fifa world cup
 45 | film camera
 46 | fitbit flex
 47 | flowers
 48 | food poisoning
 49 | food processor
 50 | free audio books
 51 | free cloud storage
 52 | fryer
 53 | funny cat pictures
 54 | furniture
 55 | game of thrones
 56 | gaming pcs
 57 | garlic bread recipe
 58 | green card
 59 | green tea
 60 | hand cream
 61 | harry potter
 62 | herbal tea benefits
 63 | high tech
 64 | home automation
 65 | home brewing
 66 | homemade pancakes
 67 | homemade pizza
 68 | homemade salsa
 69 | how to draw
 70 | how to learn programming
 71 | hp printers
 72 | iit delhi
 73 | imax
 74 | immigration australia
 75 | immigration canada
 76 | immigration germany
 77 | immigration usa
 78 | india
 79 | instant food
 80 | instant noodles
 81 | insurance
 82 | intel core i5
 83 | internet router
 84 | iphone
 85 | korean restaurant
 86 | laptop
 87 | lawn mower
 88 | lime cookies
 89 | low blood pressure
 90 | mickey mouse
 91 | microscopes
 92 | microsoft mouse
 93 | microsoft phone
 94 | microwave
 95 | microwave oven
 96 | mini cooper
 97 | mortgage
 98 | new delhi
 99 | new york times
100 | new york
101 | orange juice
102 | oranges
103 | organic bananas
104 | organic tomatoes
105 | outdoor security cameras
106 | physical therapy
107 | pineapples
108 | pit bull
109 | pizza sauce
110 | polish food
111 | potato soup
112 | printer
113 | random password generator
114 | seoul south korea
115 | smart house
116 | soft boiled egg
117 | sony xperia m
118 | sore throat
119 | speed dating
120 | sri lanka
121 | star trek
122 | steam cooker
123 | steve madden boots
124 | tax calculator
125 | tax return
126 | the hobbit extended edition
127 | thesaurus
128 | tomatoes
129 | top indian movies
130 | tv streaming
131 | ultrasound
132 | usc football
133 | used cars
134 | used laptops
135 | video camera
136 | video chat
137 | walkie talkies
138 | wedding dress
139 | windows surface pro
140 | wonder woman
141 | x men
142 | xbox


--------------------------------------------------------------------------------
/Module 2/MasteringJavaforDataScience_Code/Chapter02/data/text.txt:
--------------------------------------------------------------------------------
1 | My dog also likes eating sausage.
2 | The motor accepts beside a surplus.
3 | Every capable slash succeeds with a worldwide blame.
4 | The continued task coughs around the guilty kiss.
5 | 


--------------------------------------------------------------------------------
/Module 2/MasteringJavaforDataScience_Code/Chapter02/data/words.txt:
--------------------------------------------------------------------------------
  1 | pellentesque	RB
  2 | aliquam	RB
  3 | fringilla	VB
  4 | urna	RB
  5 | ornare	NN
  6 | primis	NN
  7 | Quisque	VB
  8 | Proin	VB
  9 | Proin	NN
 10 | Morbi	NN
 11 | risus	RPR
 12 | blandit	VB
 13 | ut	VB
 14 | fermentum	NN
 15 | neque	VB
 16 | lacinia	RB
 17 | ultrices	NN
 18 | consectetuer	NN
 19 | dapibus	NN
 20 | lectus	RB
 21 | nisi	NN
 22 | purus	NN
 23 | eu	RB
 24 | congue	NN
 25 | aliquet	RB
 26 | eget	NN
 27 | sociis	NN
 28 | ut	VB
 29 | morbi	RPR
 30 | et	RPR
 31 | magna	VB
 32 | non	RB
 33 | libero	RPR
 34 | elit	NN
 35 | felis	RPR
 36 | lorem	RPR
 37 | Nunc	VB
 38 | pellentesque	RB
 39 | consequat	RB
 40 | nunc	VB
 41 | libero	NN
 42 | felis	RPR
 43 | mauris	NN
 44 | montes	RB
 45 | ipsum	RPR
 46 | Proin	RPR
 47 | Maecenas	RPR
 48 | nec	NN
 49 | nunc	RB
 50 | eu	RB
 51 | semper	RPR
 52 | erat	VB
 53 | tincidunt	NN
 54 | nibh	NN
 55 | lectus	RB
 56 | gravida	RB
 57 | vulputate	VB
 58 | fringilla	NN
 59 | congue	VB
 60 | Aenean	VB
 61 | eu	VB
 62 | lorem	RB
 63 | dui	NN
 64 | eu	RB
 65 | ornare	NN
 66 | fringilla	VB
 67 | luctus	NN
 68 | vulputate	RPR
 69 | fermentum	RPR
 70 | libero	NN
 71 | sapien	RPR
 72 | cubilia	NN
 73 | venenatis	RPR
 74 | sollicitudin	VB
 75 | eros	RB
 76 | montes	RPR
 77 | mollis	NN
 78 | Nunc	RPR
 79 | a	RPR
 80 | sed	VB
 81 | odio	RB
 82 | ante	VB
 83 | sociis	VB
 84 | turpis	RB
 85 | est	RB
 86 | mauris	RPR
 87 | faucibus	RPR
 88 | habitant	RPR
 89 | Phasellus	RB
 90 | porttitor	VB
 91 | imperdiet	RPR
 92 | non	VB
 93 | nisl	RPR
 94 | mus	RPR
 95 | viverra	RPR
 96 | ut	RPR
 97 | dui	RPR
 98 | adipiscing	NN
 99 | mus	RB
100 | quam	RB


--------------------------------------------------------------------------------
/Module 2/MasteringJavaforDataScience_Code/Chapter02/pom.xml:
--------------------------------------------------------------------------------
  1 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  2 |   xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  3 |   <modelVersion>4.0.0</modelVersion>
  4 | 
  5 |   <groupId>com.alexeygrigorev.javads</groupId>
  6 |   <artifactId>chapter-02-dataprocessing</artifactId>
  7 |   <version>0.0.1-SNAPSHOT</version>
  8 | 
  9 |   <properties>
 10 |     <slf4j.version>1.7.21</slf4j.version>
 11 |     <logback.version>1.1.7</logback.version>
 12 |   </properties>
 13 | 
 14 |   <repositories>
 15 |     <repository>
 16 |       <id>central</id>
 17 |       <url>http://repo1.maven.org/maven2</url>
 18 |     </repository>
 19 |     <repository>
 20 |       <id>bintray</id>
 21 |       <url>http://jcenter.bintray.com</url>
 22 |     </repository>
 23 |   </repositories>
 24 | 
 25 |   <dependencies>
 26 |     <!-- Logging -->
 27 |     <dependency>
 28 |       <groupId>org.slf4j</groupId>
 29 |       <artifactId>slf4j-api</artifactId>
 30 |       <version>${slf4j.version}</version>
 31 |     </dependency>
 32 |     <dependency>
 33 |       <groupId>ch.qos.logback</groupId>
 34 |       <artifactId>logback-classic</artifactId>
 35 |       <version>${logback.version}</version>
 36 |     </dependency>
 37 |     <dependency>
 38 |       <groupId>ch.qos.logback</groupId>
 39 |       <artifactId>logback-core</artifactId>
 40 |       <version>${logback.version}</version>
 41 |     </dependency>
 42 | 
 43 |     <!-- Commons & Guava -->
 44 |     <dependency>
 45 |       <groupId>org.apache.commons</groupId>
 46 |       <artifactId>commons-lang3</artifactId>
 47 |       <version>3.4</version>
 48 |     </dependency>
 49 |     <dependency>
 50 |       <groupId>commons-io</groupId>
 51 |       <artifactId>commons-io</artifactId>
 52 |       <version>2.5</version>
 53 |     </dependency>
 54 |     <dependency>
 55 |       <groupId>com.google.guava</groupId>
 56 |       <artifactId>guava</artifactId>
 57 |       <version>19.0</version>
 58 |     </dependency>
 59 | 
 60 |     <dependency>
 61 |       <groupId>org.jsoup</groupId>
 62 |       <artifactId>jsoup</artifactId>
 63 |       <version>1.9.2</version>
 64 |     </dependency>
 65 |     <dependency>
 66 |       <groupId>com.fasterxml.jackson.jr</groupId>
 67 |       <artifactId>jackson-jr-all</artifactId>
 68 |       <version>2.8.1</version>
 69 |     </dependency>
 70 |     <dependency>
 71 |       <groupId>com.jayway.jsonpath</groupId>
 72 |       <artifactId>json-path</artifactId>
 73 |       <version>2.2.0</version>
 74 |     </dependency>
 75 | 
 76 |     <dependency>
 77 |       <groupId>org.apache.commons</groupId>
 78 |       <artifactId>commons-collections4</artifactId>
 79 |       <version>4.1</version>
 80 |     </dependency>
 81 | 
 82 |     <dependency>
 83 |       <groupId>org.apache.commons</groupId>
 84 |       <artifactId>commons-csv</artifactId>
 85 |       <version>1.4</version>
 86 |     </dependency>
 87 | 
 88 |     <dependency>
 89 |       <groupId>org.mapdb</groupId>
 90 |       <artifactId>mapdb</artifactId>
 91 |       <version>3.0.1</version>
 92 |     </dependency>
 93 |     <dependency>
 94 |       <groupId>mysql</groupId>
 95 |       <artifactId>mysql-connector-java</artifactId>
 96 |       <version>5.1.39</version>
 97 |     </dependency>
 98 | 
 99 |     <dependency>
100 |       <groupId>joinery</groupId>
101 |       <artifactId>joinery-dataframe</artifactId>
102 |       <version>1.7</version>
103 |     </dependency>
104 |     <dependency>
105 |       <groupId>org.apache.poi</groupId>
106 |       <artifactId>poi</artifactId>
107 |       <version>3.14</version>
108 |     </dependency>
109 | 
110 |     <dependency>
111 |       <groupId>com.aol.simplereact</groupId>
112 |       <artifactId>cyclops-react</artifactId>
113 |       <version>1.0.0-RC4</version>
114 |     </dependency>
115 | 
116 |   </dependencies>
117 | 
118 |   <build>
119 |     <plugins>
120 |       <plugin>
121 |         <groupId>org.apache.maven.plugins</groupId>
122 |         <artifactId>maven-compiler-plugin</artifactId>
123 |         <version>3.5.1</version>
124 |         <configuration>
125 |           <source>1.8</source>
126 |           <target>1.8</target>
127 |         </configuration>
128 |       </plugin>
129 |       <plugin>
130 |         <groupId>org.apache.maven.plugins</groupId>
131 |         <artifactId>maven-surefire-plugin</artifactId>
132 |         <version>2.19.1</version>
133 |       </plugin>
134 |     </plugins>
135 | 
136 |     <pluginManagement>
137 |       <plugins>
138 |         <!-- Ignore/Execute plugin execution -->
139 |         <plugin>
140 |           <groupId>org.eclipse.m2e</groupId>
141 |           <artifactId>lifecycle-mapping</artifactId>
142 |           <version>1.0.0</version>
143 |           <configuration>
144 |             <lifecycleMappingMetadata>
145 |               <pluginExecutions>
146 |                 <!-- copy-dependency plugin -->
147 |                 <pluginExecution>
148 |                   <pluginExecutionFilter>
149 |                     <groupId>org.apache.maven.plugins</groupId>
150 |                     <artifactId>maven-dependency-plugin</artifactId>
151 |                     <versionRange>[1.0.0,)</versionRange>
152 |                     <goals>
153 |                       <goal>copy-dependencies</goal>
154 |                       <goal>unpack</goal>
155 |                     </goals>
156 |                   </pluginExecutionFilter>
157 |                   <action>
158 |                     <ignore />
159 |                   </action>
160 |                 </pluginExecution>
161 |               </pluginExecutions>
162 |             </lifecycleMappingMetadata>
163 |           </configuration>
164 |         </plugin>
165 |       </plugins>
166 |     </pluginManagement>
167 |   </build>
168 | </project>


--------------------------------------------------------------------------------
/Module 2/MasteringJavaforDataScience_Code/Chapter03/pom.xml:
--------------------------------------------------------------------------------
  1 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  2 |   xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  3 |   <modelVersion>4.0.0</modelVersion>
  4 | 
  5 |   <groupId>com.alexeygrigorev.javads</groupId>
  6 |   <artifactId>chapter-03-eda</artifactId>
  7 |   <version>0.0.1-SNAPSHOT</version>
  8 | 
  9 |   <properties>
 10 |     <slf4j.version>1.7.21</slf4j.version>
 11 |     <logback.version>1.1.7</logback.version>
 12 |   </properties>
 13 | 
 14 |   <repositories>
 15 |     <repository>
 16 |       <id>central</id>
 17 |       <url>http://repo1.maven.org/maven2</url>
 18 |     </repository>
 19 |     <repository>
 20 |       <id>bintray</id>
 21 |       <url>http://jcenter.bintray.com</url>
 22 |     </repository>
 23 |   </repositories>
 24 | 
 25 | 
 26 |   <dependencies>
 27 |     <!-- Logging -->
 28 |     <dependency>
 29 |       <groupId>org.slf4j</groupId>
 30 |       <artifactId>slf4j-api</artifactId>
 31 |       <version>${slf4j.version}</version>
 32 |     </dependency>
 33 |     <dependency>
 34 |       <groupId>ch.qos.logback</groupId>
 35 |       <artifactId>logback-classic</artifactId>
 36 |       <version>${logback.version}</version>
 37 |     </dependency>
 38 |     <dependency>
 39 |       <groupId>ch.qos.logback</groupId>
 40 |       <artifactId>logback-core</artifactId>
 41 |       <version>${logback.version}</version>
 42 |     </dependency>
 43 | 
 44 |     <!-- Commons & Guava -->
 45 |     <dependency>
 46 |       <groupId>org.apache.commons</groupId>
 47 |       <artifactId>commons-lang3</artifactId>
 48 |       <version>3.4</version>
 49 |     </dependency>
 50 |     <dependency>
 51 |       <groupId>commons-io</groupId>
 52 |       <artifactId>commons-io</artifactId>
 53 |       <version>2.5</version>
 54 |     </dependency>
 55 |     <dependency>
 56 |       <groupId>com.google.guava</groupId>
 57 |       <artifactId>guava</artifactId>
 58 |       <version>19.0</version>
 59 |     </dependency>
 60 | 
 61 |     <dependency>
 62 |       <groupId>com.fasterxml.jackson.jr</groupId>
 63 |       <artifactId>jackson-jr-all</artifactId>
 64 |       <version>2.8.1</version>
 65 |     </dependency>
 66 | 
 67 |     <dependency>
 68 |       <groupId>org.apache.commons</groupId>
 69 |       <artifactId>commons-math3</artifactId>
 70 |       <version>3.6.1</version>
 71 |     </dependency>
 72 | 
 73 |     <dependency>
 74 |       <groupId>joinery</groupId>
 75 |       <artifactId>joinery-dataframe</artifactId>
 76 |       <version>1.7</version>
 77 |     </dependency>
 78 |     <dependency>
 79 |       <groupId>org.apache.poi</groupId>
 80 |       <artifactId>poi</artifactId>
 81 |       <version>3.14</version>
 82 |     </dependency>
 83 |     <dependency>
 84 |       <groupId>rhino</groupId>
 85 |       <artifactId>js</artifactId>
 86 |       <version>1.7R2</version>
 87 |     </dependency>
 88 |     <dependency>
 89 |       <groupId>jline</groupId>
 90 |       <artifactId>jline</artifactId>
 91 |       <version>2.14.2</version>
 92 |     </dependency>
 93 |     <dependency>
 94 |       <groupId>com.xeiam.xchart</groupId>
 95 |       <artifactId>xchart</artifactId>
 96 |       <version>2.5.1</version>
 97 |     </dependency>
 98 | 
 99 |     <dependency>
100 |       <groupId>com.github.haifengl</groupId>
101 |       <artifactId>smile-core</artifactId>
102 |       <version>1.1.0</version>
103 |     </dependency>
104 |     <dependency>
105 |       <groupId>com.github.haifengl</groupId>
106 |       <artifactId>smile-plot</artifactId>
107 |       <version>1.1.0</version>
108 |     </dependency>
109 | 
110 |     <!-- Tests -->
111 |     <dependency>
112 |       <groupId>junit</groupId>
113 |       <artifactId>junit-dep</artifactId>
114 |       <version>4.8.1</version>
115 |       <scope>test</scope>
116 |     </dependency>
117 |   </dependencies>
118 | 
119 |   <build>
120 |     <plugins>
121 |       <plugin>
122 |         <groupId>org.apache.maven.plugins</groupId>
123 |         <artifactId>maven-compiler-plugin</artifactId>
124 |         <version>3.5.1</version>
125 |         <configuration>
126 |           <source>1.8</source>
127 |           <target>1.8</target>
128 |         </configuration>
129 |       </plugin>
130 |       <plugin>
131 |         <groupId>org.apache.maven.plugins</groupId>
132 |         <artifactId>maven-surefire-plugin</artifactId>
133 |         <version>2.19.1</version>
134 |       </plugin>
135 |     </plugins>
136 | 
137 |     <pluginManagement>
138 |       <plugins>
139 |         <!-- Ignore/Execute plugin execution -->
140 |         <plugin>
141 |           <groupId>org.eclipse.m2e</groupId>
142 |           <artifactId>lifecycle-mapping</artifactId>
143 |           <version>1.0.0</version>
144 |           <configuration>
145 |             <lifecycleMappingMetadata>
146 |               <pluginExecutions>
147 |                 <!-- copy-dependency plugin -->
148 |                 <pluginExecution>
149 |                   <pluginExecutionFilter>
150 |                     <groupId>org.apache.maven.plugins</groupId>
151 |                     <artifactId>maven-dependency-plugin</artifactId>
152 |                     <versionRange>[1.0.0,)</versionRange>
153 |                     <goals>
154 |                       <goal>copy-dependencies</goal>
155 |                       <goal>unpack</goal>
156 |                     </goals>
157 |                   </pluginExecutionFilter>
158 |                   <action>
159 |                     <ignore />
160 |                   </action>
161 |                 </pluginExecution>
162 |               </pluginExecutions>
163 |             </lifecycleMappingMetadata>
164 |           </configuration>
165 |         </plugin>
166 |       </plugins>
167 |     </pluginManagement>
168 |   </build>
169 | </project>


--------------------------------------------------------------------------------
/Module 2/MasteringJavaforDataScience_Code/Chapter03/src/main/java/chapter03/Data.java:
--------------------------------------------------------------------------------
 1 | package chapter03;
 2 | 
 3 | import java.io.IOException;
 4 | import java.nio.file.Files;
 5 | import java.nio.file.Path;
 6 | import java.nio.file.Paths;
 7 | import java.util.List;
 8 | import java.util.stream.Collectors;
 9 | import java.util.stream.Stream;
10 | 
11 | import com.fasterxml.jackson.jr.ob.JSON;
12 | import com.google.common.base.Throwables;
13 | 
14 | public class Data {
15 | 
16 |     public static List<RankedPage> readRankedPages() throws IOException {
17 |         Path path = Paths.get("./data/ranked-pages.json");
18 |         try (Stream<String> lines = Files.lines(path)) {
19 |             return lines.map(line -> parseJson(line)).collect(Collectors.toList());
20 |         }
21 |     }
22 | 
23 |     public static RankedPage parseJson(String line) {
24 |         try {
25 |             return JSON.std.beanFrom(RankedPage.class, line);
26 |         } catch (IOException e) {
27 |             throw Throwables.propagate(e);
28 |         }
29 |     }
30 | 
31 | }
32 | 


--------------------------------------------------------------------------------
/Module 2/MasteringJavaforDataScience_Code/Chapter04/pom.xml:
--------------------------------------------------------------------------------
  1 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  2 |   xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  3 |   <modelVersion>4.0.0</modelVersion>
  4 | 
  5 |   <groupId>com.alexeygrigorev.javads</groupId>
  6 |   <artifactId>chapter-04-supervised</artifactId>
  7 |   <version>0.0.1-SNAPSHOT</version>
  8 | 
  9 |   <properties>
 10 |     <slf4j.version>1.7.21</slf4j.version>
 11 |     <logback.version>1.1.7</logback.version>
 12 |   </properties>
 13 | 
 14 |   <repositories>
 15 |     <repository>
 16 |       <id>central</id>
 17 |       <url>http://repo1.maven.org/maven2</url>
 18 |     </repository>
 19 |     <repository>
 20 |       <id>bintray</id>
 21 |       <url>http://jcenter.bintray.com</url>
 22 |     </repository>
 23 |     <repository>
 24 |       <id>java-ds</id>
 25 |       <url>https://raw.githubusercontent.com/alexeygrigorev/maven-repo/master/</url>
 26 |     </repository>
 27 |   </repositories>
 28 | 
 29 |   <dependencies>
 30 |     <!-- Logging -->
 31 |     <dependency>
 32 |       <groupId>org.slf4j</groupId>
 33 |       <artifactId>slf4j-api</artifactId>
 34 |       <version>${slf4j.version}</version>
 35 |     </dependency>
 36 |     <dependency>
 37 |       <groupId>ch.qos.logback</groupId>
 38 |       <artifactId>logback-classic</artifactId>
 39 |       <version>${logback.version}</version>
 40 |     </dependency>
 41 |     <dependency>
 42 |       <groupId>ch.qos.logback</groupId>
 43 |       <artifactId>logback-core</artifactId>
 44 |       <version>${logback.version}</version>
 45 |     </dependency>
 46 | 
 47 |     <!-- Commons & Guava -->
 48 |     <dependency>
 49 |       <groupId>org.apache.commons</groupId>
 50 |       <artifactId>commons-lang3</artifactId>
 51 |       <version>3.4</version>
 52 |     </dependency>
 53 |     <dependency>
 54 |       <groupId>commons-io</groupId>
 55 |       <artifactId>commons-io</artifactId>
 56 |       <version>2.5</version>
 57 |     </dependency>
 58 |     <dependency>
 59 |       <groupId>com.google.guava</groupId>
 60 |       <artifactId>guava</artifactId>
 61 |       <version>19.0</version>
 62 |     </dependency>
 63 | 
 64 |     <dependency>
 65 |       <groupId>com.fasterxml.jackson.jr</groupId>
 66 |       <artifactId>jackson-jr-all</artifactId>
 67 |       <version>2.8.1</version>
 68 |     </dependency>
 69 | 
 70 |     <dependency>
 71 |       <groupId>joinery</groupId>
 72 |       <artifactId>joinery-dataframe</artifactId>
 73 |       <version>1.7</version>
 74 |     </dependency>
 75 |     <dependency>
 76 |       <groupId>org.apache.poi</groupId>
 77 |       <artifactId>poi</artifactId>
 78 |       <version>3.14</version>
 79 |     </dependency>
 80 | 
 81 |     <dependency>
 82 |       <groupId>com.github.haifengl</groupId>
 83 |       <artifactId>smile-core</artifactId>
 84 |       <version>1.2.0</version>
 85 |     </dependency>
 86 |     <dependency>
 87 |       <groupId>com.github.haifengl</groupId>
 88 |       <artifactId>smile-plot</artifactId>
 89 |       <version>1.1.0</version>
 90 |     </dependency>
 91 | 
 92 |     <dependency>
 93 |       <groupId>com.edwardraff</groupId>
 94 |       <artifactId>JSAT</artifactId>
 95 |       <version>0.0.5</version>
 96 |     </dependency>
 97 | 
 98 |     <dependency>
 99 |       <groupId>net.sourceforge</groupId>
100 |       <artifactId>javaml</artifactId>
101 |       <version>0.1.7</version>
102 |     </dependency>
103 |     <dependency>
104 |       <groupId>be.abeel</groupId>
105 |       <artifactId>ajt</artifactId>
106 |       <version>2.9</version>
107 |     </dependency>
108 | 
109 |     <dependency>
110 |       <groupId>org.encog</groupId>
111 |       <artifactId>encog-core</artifactId>
112 |       <version>3.3.0</version>
113 |     </dependency>
114 | 
115 |     <dependency>
116 |       <groupId>tw.edu.ntu.csie</groupId>
117 |       <artifactId>libsvm</artifactId>
118 |       <version>3.17</version>
119 |     </dependency>
120 |     <dependency>
121 |       <groupId>de.bwaldvogel</groupId>
122 |       <artifactId>liblinear</artifactId>
123 |       <version>1.95</version>
124 |     </dependency>
125 | 
126 |     <!-- Tests -->
127 |     <dependency>
128 |       <groupId>junit</groupId>
129 |       <artifactId>junit-dep</artifactId>
130 |       <version>4.8.1</version>
131 |       <scope>test</scope>
132 |     </dependency>
133 |   </dependencies>
134 | 
135 |   <build>
136 |     <plugins>
137 |       <plugin>
138 |         <groupId>org.apache.maven.plugins</groupId>
139 |         <artifactId>maven-compiler-plugin</artifactId>
140 |         <version>3.5.1</version>
141 |         <configuration>
142 |           <source>1.8</source>
143 |           <target>1.8</target>
144 |         </configuration>
145 |       </plugin>
146 |       <plugin>
147 |         <groupId>org.apache.maven.plugins</groupId>
148 |         <artifactId>maven-surefire-plugin</artifactId>
149 |         <version>2.19.1</version>
150 |       </plugin>
151 |     </plugins>
152 | 
153 |     <pluginManagement>
154 |       <plugins>
155 |         <!-- Ignore/Execute plugin execution -->
156 |         <plugin>
157 |           <groupId>org.eclipse.m2e</groupId>
158 |           <artifactId>lifecycle-mapping</artifactId>
159 |           <version>1.0.0</version>
160 |           <configuration>
161 |             <lifecycleMappingMetadata>
162 |               <pluginExecutions>
163 |                 <!-- copy-dependency plugin -->
164 |                 <pluginExecution>
165 |                   <pluginExecutionFilter>
166 |                     <groupId>org.apache.maven.plugins</groupId>
167 |                     <artifactId>maven-dependency-plugin</artifactId>
168 |                     <versionRange>[1.0.0,)</versionRange>
169 |                     <goals>
170 |                       <goal>copy-dependencies</goal>
171 |                       <goal>unpack</goal>
172 |                     </goals>
173 |                   </pluginExecutionFilter>
174 |                   <action>
175 |                     <ignore />
176 |                   </action>
177 |                 </pluginExecution>
178 |               </pluginExecutions>
179 |             </lifecycleMappingMetadata>
180 |           </configuration>
181 |         </plugin>
182 |       </plugins>
183 |     </pluginManagement>
184 |   </build>
185 | </project>


--------------------------------------------------------------------------------
/Module 2/MasteringJavaforDataScience_Code/Chapter04/src/main/resources/logback.xml:
--------------------------------------------------------------------------------
 1 | <configuration>
 2 | 
 3 |   <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
 4 |     <!-- encoders are assigned the type
 5 |          ch.qos.logback.classic.encoder.PatternLayoutEncoder by default -->
 6 |     <encoder>
 7 |       <pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
 8 |     </encoder>
 9 |   </appender>
10 | 
11 |   <root level="debug">
12 |     <appender-ref ref="STDOUT" />
13 |   </root>
14 | 
15 |   <logger name="smile" level="WARN"/>
16 | 
17 | </configuration>


--------------------------------------------------------------------------------
/Module 2/MasteringJavaforDataScience_Code/Chapter05/pom.xml:
--------------------------------------------------------------------------------
  1 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  2 |   xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  3 |   <modelVersion>4.0.0</modelVersion>
  4 | 
  5 |   <groupId>com.alexeygrigorev.javads</groupId>
  6 |   <artifactId>chapter-05-unsupervised</artifactId>
  7 |   <version>0.0.1-SNAPSHOT</version>
  8 | 
  9 |   <properties>
 10 |     <slf4j.version>1.7.21</slf4j.version>
 11 |     <logback.version>1.1.7</logback.version>
 12 |   </properties>
 13 | 
 14 |   <repositories>
 15 |     <repository>
 16 |       <id>central</id>
 17 |       <url>http://repo1.maven.org/maven2</url>
 18 |     </repository>
 19 |     <repository>
 20 |       <id>bintray</id>
 21 |       <url>http://jcenter.bintray.com</url>
 22 |     </repository>
 23 |   </repositories>
 24 | 
 25 |   <dependencies>
 26 |     <!-- Logging -->
 27 |     <dependency>
 28 |       <groupId>org.slf4j</groupId>
 29 |       <artifactId>slf4j-api</artifactId>
 30 |       <version>${slf4j.version}</version>
 31 |     </dependency>
 32 |     <dependency>
 33 |       <groupId>ch.qos.logback</groupId>
 34 |       <artifactId>logback-classic</artifactId>
 35 |       <version>${logback.version}</version>
 36 |     </dependency>
 37 |     <dependency>
 38 |       <groupId>ch.qos.logback</groupId>
 39 |       <artifactId>logback-core</artifactId>
 40 |       <version>${logback.version}</version>
 41 |     </dependency>
 42 | 
 43 |     <!-- Commons & Guava -->
 44 |     <dependency>
 45 |       <groupId>org.apache.commons</groupId>
 46 |       <artifactId>commons-lang3</artifactId>
 47 |       <version>3.4</version>
 48 |     </dependency>
 49 |     <dependency>
 50 |       <groupId>commons-io</groupId>
 51 |       <artifactId>commons-io</artifactId>
 52 |       <version>2.5</version>
 53 |     </dependency>
 54 |     <dependency>
 55 |       <groupId>org.apache.commons</groupId>
 56 |       <artifactId>commons-math3</artifactId>
 57 |       <version>3.6.1</version>
 58 |     </dependency>
 59 |     <dependency>
 60 |       <groupId>com.google.guava</groupId>
 61 |       <artifactId>guava</artifactId>
 62 |       <version>19.0</version>
 63 |     </dependency>
 64 | 
 65 |     <dependency>
 66 |       <groupId>joinery</groupId>
 67 |       <artifactId>joinery-dataframe</artifactId>
 68 |       <version>1.7</version>
 69 |     </dependency>
 70 |     <dependency>
 71 |       <groupId>org.apache.poi</groupId>
 72 |       <artifactId>poi</artifactId>
 73 |       <version>3.14</version>
 74 |     </dependency>
 75 | 
 76 |     <dependency>
 77 |       <groupId>com.github.haifengl</groupId>
 78 |       <artifactId>smile-core</artifactId>
 79 |       <version>1.2.1</version>
 80 |     </dependency>
 81 |     <dependency>
 82 |       <groupId>com.github.haifengl</groupId>
 83 |       <artifactId>smile-plot</artifactId>
 84 |       <version>1.2.0</version>
 85 |     </dependency>
 86 | 
 87 |     <dependency>
 88 |       <groupId>com.edwardraff</groupId>
 89 |       <artifactId>JSAT</artifactId>
 90 |       <version>0.0.5</version>
 91 |     </dependency>
 92 | 
 93 |     <dependency>
 94 |       <groupId>com.googlecode.matrix-toolkits-java</groupId>
 95 |       <artifactId>mtj</artifactId>
 96 |       <version>1.0.2</version>
 97 |     </dependency>
 98 | 
 99 |     <dependency>
100 |       <groupId>com.aol.simplereact</groupId>
101 |       <artifactId>cyclops-react</artifactId>
102 |       <version>1.0.0-FINAL</version>
103 |     </dependency>
104 | 
105 |     <!-- Tests -->
106 |     <dependency>
107 |       <groupId>junit</groupId>
108 |       <artifactId>junit-dep</artifactId>
109 |       <version>4.8.1</version>
110 |       <scope>test</scope>
111 |     </dependency>
112 |   </dependencies>
113 | 
114 |   <build>
115 |     <plugins>
116 |       <plugin>
117 |         <groupId>org.apache.maven.plugins</groupId>
118 |         <artifactId>maven-compiler-plugin</artifactId>
119 |         <version>3.5.1</version>
120 |         <configuration>
121 |           <source>1.8</source>
122 |           <target>1.8</target>
123 |         </configuration>
124 |       </plugin>
125 |       <plugin>
126 |         <groupId>org.apache.maven.plugins</groupId>
127 |         <artifactId>maven-surefire-plugin</artifactId>
128 |         <version>2.19.1</version>
129 |       </plugin>
130 |     </plugins>
131 | 
132 |     <pluginManagement>
133 |       <plugins>
134 |         <!-- Ignore/Execute plugin execution -->
135 |         <plugin>
136 |           <groupId>org.eclipse.m2e</groupId>
137 |           <artifactId>lifecycle-mapping</artifactId>
138 |           <version>1.0.0</version>
139 |           <configuration>
140 |             <lifecycleMappingMetadata>
141 |               <pluginExecutions>
142 |                 <!-- copy-dependency plugin -->
143 |                 <pluginExecution>
144 |                   <pluginExecutionFilter>
145 |                     <groupId>org.apache.maven.plugins</groupId>
146 |                     <artifactId>maven-dependency-plugin</artifactId>
147 |                     <versionRange>[1.0.0,)</versionRange>
148 |                     <goals>
149 |                       <goal>copy-dependencies</goal>
150 |                       <goal>unpack</goal>
151 |                     </goals>
152 |                   </pluginExecutionFilter>
153 |                   <action>
154 |                     <ignore />
155 |                   </action>
156 |                 </pluginExecution>
157 |               </pluginExecutions>
158 |             </lifecycleMappingMetadata>
159 |           </configuration>
160 |         </plugin>
161 |       </plugins>
162 |     </pluginManagement>
163 |   </build>
164 | </project>


--------------------------------------------------------------------------------
/Module 2/MasteringJavaforDataScience_Code/Chapter06/src/main/java/chapter06/cv/CV.java:
--------------------------------------------------------------------------------
 1 | package chapter06.cv;
 2 | 
 3 | import java.util.ArrayList;
 4 | import java.util.Arrays;
 5 | import java.util.List;
 6 | import java.util.Random;
 7 | import java.util.stream.IntStream;
 8 | 
 9 | import org.apache.commons.lang3.Validate;
10 | 
11 | public class CV {
12 | 
13 |     public static List<IndexSplit> kfold(int length, int k, boolean shuffle, long seed) {
14 |         Validate.isTrue(k < length);
15 | 
16 |         int[] indexes = IntStream.range(0, length).toArray();
17 |         if (shuffle) {
18 |             shuffle(indexes, seed);
19 |         }
20 | 
21 |         int[][] folds = prepareFolds(indexes, k);
22 |         List<IndexSplit> result = new ArrayList<>();
23 | 
24 |         for (int i = 0; i < k; i++) {
25 |             int[] testIdx = folds[i];
26 |             int[] trainIdx = combineTrainFolds(folds, indexes.length, i);
27 |             result.add(new IndexSplit(trainIdx, testIdx));
28 |         }
29 | 
30 |         return result;
31 |     }
32 | 
33 |     public static IndexSplit trainTestSplit(int length, double testRatio, boolean shuffle, long seed) {
34 |         Validate.isTrue(testRatio > 0.0 && testRatio < 1.0, "testRatio must be in (0, 1) interval");
35 | 
36 |         int[] indexes = IntStream.range(0, length).toArray();
37 |         if (shuffle) {
38 |             shuffle(indexes, seed);
39 |         }
40 | 
41 |         int trainSize = (int) (indexes.length * (1 - testRatio));
42 | 
43 |         int[] trainIndex = Arrays.copyOfRange(indexes, 0, trainSize);
44 |         int[] testIndex = Arrays.copyOfRange(indexes, trainSize, indexes.length);
45 | 
46 |         return new IndexSplit(trainIndex, testIndex);
47 |     }
48 | 
49 |     public static void shuffle(int[] indexes, long seed) {
50 |         Random rnd = new Random(seed);
51 |         shuffle(indexes, rnd);
52 |     }
53 | 
54 |     public static void shuffle(int[] indexes, Random rnd) {
55 |         for (int i = indexes.length - 1; i > 0; i--) {
56 |             int index = rnd.nextInt(i + 1);
57 | 
58 |             int tmp = indexes[index];
59 |             indexes[index] = indexes[i];
60 |             indexes[i] = tmp;
61 |         }
62 |     }
63 | 
64 |     private static int[][] prepareFolds(int[] indexes, int k) {
65 |         int[][] foldIndexes = new int[k][];
66 | 
67 |         int step = indexes.length / k;
68 |         int beginIndex = 0;
69 | 
70 |         for (int i = 0; i < k - 1; i++) {
71 |             foldIndexes[i] = Arrays.copyOfRange(indexes, beginIndex, beginIndex + step);
72 |             beginIndex = beginIndex + step;
73 |         }
74 | 
75 |         foldIndexes[k - 1] = Arrays.copyOfRange(indexes, beginIndex, indexes.length);
76 |         return foldIndexes;
77 |     }
78 | 
79 |     private static int[] combineTrainFolds(int[][] folds, int totalSize, int excludeIndex) {
80 |         int size = totalSize - folds[excludeIndex].length;
81 |         int result[] = new int[size];
82 | 
83 |         int start = 0;
84 |         for (int i = 0; i < folds.length; i++) {
85 |             if (i == excludeIndex) {
86 |                 continue;
87 |             }
88 |             int[] fold = folds[i];
89 |             System.arraycopy(fold, 0, result, start, fold.length);
90 |             start = start + fold.length;
91 |         }
92 | 
93 |         return result;
94 |     }
95 | 
96 | }
97 | 


--------------------------------------------------------------------------------
/Module 2/MasteringJavaforDataScience_Code/Chapter06/src/main/resources/logback.xml:
--------------------------------------------------------------------------------
 1 | <configuration>
 2 | 
 3 |   <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
 4 |     <!-- encoders are assigned the type
 5 |          ch.qos.logback.classic.encoder.PatternLayoutEncoder by default -->
 6 |     <encoder>
 7 |       <pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
 8 |     </encoder>
 9 |   </appender>
10 | 
11 |   <root level="debug">
12 |     <appender-ref ref="STDOUT" />
13 |   </root>
14 | 
15 |   <logger name="smile" level="WARN"/>
16 | 
17 | </configuration>


--------------------------------------------------------------------------------
/Module 2/MasteringJavaforDataScience_Code/Chapter07/src/main/java/chapter07/Metrics.java:
--------------------------------------------------------------------------------
 1 | package chapter07;
 2 | 
 3 | import java.util.Arrays;
 4 | 
 5 | import org.apache.commons.lang3.Validate;
 6 | 
 7 | import smile.validation.AUC;
 8 | 
 9 | public class Metrics {
10 | 
11 |     public static double auc(double[] actual, double[] predicted) {
12 |         Validate.isTrue(actual.length == predicted.length, "the lengths don't match");
13 |         int[] truth = Arrays.stream(actual).mapToInt(i -> (int) i).toArray();
14 |         double result = AUC.measure(truth, predicted);
15 |         if (result < 0.5) {
16 |             return 1 - result;
17 |         } else {
18 |             return result;
19 |         }
20 |     }
21 | 
22 | }
23 | 


--------------------------------------------------------------------------------
/Module 2/MasteringJavaforDataScience_Code/Chapter07/src/main/java/chapter07/TextUtils.java:
--------------------------------------------------------------------------------
 1 | package chapter07;
 2 | 
 3 | import java.util.Arrays;
 4 | import java.util.List;
 5 | import java.util.Set;
 6 | import java.util.regex.Pattern;
 7 | import java.util.stream.Collectors;
 8 | 
 9 | import com.google.common.collect.ImmutableSet;
10 | 
11 | public class TextUtils {
12 | 
13 |     public static final Set<String> EN_STOPWORDS = ImmutableSet.of("a", "an", "and", "are", "as", "at", "be",
14 |             "but", "by", "for", "if", "in", "into", "is", "it", "no", "not", "of", "on", "or", "such", "that", "the",
15 |             "their", "then", "there", "these", "they", "this", "to", "was", "will", "with", "what", "which", "s", "m", "t");
16 | 
17 |     public static List<String> tokenize(String line) {
18 |         Pattern pattern = Pattern.compile("\\W+");
19 |         String[] split = pattern.split(line.toLowerCase());
20 |         return Arrays.stream(split)
21 |                 .map(String::trim)
22 |                 .filter(s -> s.length() > 2)
23 |                 .collect(Collectors.toList());
24 |     }
25 | 
26 |     public static List<String> tokenizeFilter(String line) {
27 |         Pattern pattern = Pattern.compile("\\W+");
28 |         String[] split = pattern.split(line.toLowerCase());
29 |         return Arrays.stream(split)
30 |                 .map(String::trim)
31 |                 .filter(s -> s.length() > 2)
32 |                 .filter(s -> !isStopword(s))
33 |                 .collect(Collectors.toList());
34 |     }
35 | 
36 |     public static boolean isStopword(String token) {
37 |         return EN_STOPWORDS.contains(token);
38 |     }
39 | 
40 |     public static List<String> removeStopwords(List<String> line) {
41 |         return removeStopwords(line, EN_STOPWORDS);
42 |     }
43 | 
44 |     public static List<String> removeStopwords(List<String> line, Set<String> stopwords) {
45 |         return line.stream().filter(token -> !stopwords.contains(token)).collect(Collectors.toList());
46 |     }
47 | }


--------------------------------------------------------------------------------
/Module 2/MasteringJavaforDataScience_Code/Chapter07/src/main/java/chapter07/cv/CV.java:
--------------------------------------------------------------------------------
 1 | package chapter07.cv;
 2 | 
 3 | import java.util.ArrayList;
 4 | import java.util.Arrays;
 5 | import java.util.List;
 6 | import java.util.Random;
 7 | import java.util.stream.IntStream;
 8 | 
 9 | import org.apache.commons.lang3.Validate;
10 | 
11 | public class CV {
12 | 
13 |     public static List<Split> kfold(Dataset dataset, int k, boolean shuffle, long seed) {
14 |         int length = dataset.length();
15 |         Validate.isTrue(k < length);
16 | 
17 |         int[] indexes = IntStream.range(0, length).toArray();
18 |         if (shuffle) {
19 |             shuffle(indexes, seed);
20 |         }
21 | 
22 |         int[][] folds = prepareFolds(indexes, k);
23 |         List<Split> result = new ArrayList<>();
24 | 
25 |         for (int i = 0; i < k; i++) {
26 |             int[] testIdx = folds[i];
27 |             int[] trainIdx = combineTrainFolds(folds, indexes.length, i);
28 |             result.add(Split.fromIndexes(dataset, trainIdx, testIdx));
29 |         }
30 | 
31 |         return result;
32 |     }
33 | 
34 |     public static Split trainTestSplit(Dataset dataset, double testRatio, boolean shuffle, long seed) {
35 |         Validate.isTrue(testRatio > 0.0 && testRatio < 1.0, "testRatio must be in (0, 1) interval");
36 | 
37 |         int[] indexes = IntStream.range(0, dataset.length()).toArray();
38 |         if (shuffle) {
39 |             shuffle(indexes, seed);
40 |         }
41 | 
42 |         int trainSize = (int) (indexes.length * (1 - testRatio));
43 | 
44 |         int[] trainIndex = Arrays.copyOfRange(indexes, 0, trainSize);
45 |         int[] testIndex = Arrays.copyOfRange(indexes, trainSize, indexes.length);
46 | 
47 |         return Split.fromIndexes(dataset, trainIndex, testIndex);
48 |     }
49 | 
50 |     public static void shuffle(int[] indexes, long seed) {
51 |         Random rnd = new Random(seed);
52 | 
53 |         for (int i = indexes.length - 1; i > 0; i--) {
54 |             int index = rnd.nextInt(i + 1);
55 | 
56 |             int tmp = indexes[index];
57 |             indexes[index] = indexes[i];
58 |             indexes[i] = tmp;
59 |         }
60 |     }
61 | 
62 |     private static int[][] prepareFolds(int[] indexes, int k) {
63 |         int[][] foldIndexes = new int[k][];
64 | 
65 |         int step = indexes.length / k;
66 |         int beginIndex = 0;
67 | 
68 |         for (int i = 0; i < k - 1; i++) {
69 |             foldIndexes[i] = Arrays.copyOfRange(indexes, beginIndex, beginIndex + step);
70 |             beginIndex = beginIndex + step;
71 |         }
72 | 
73 |         foldIndexes[k - 1] = Arrays.copyOfRange(indexes, beginIndex, indexes.length);
74 |         return foldIndexes;
75 |     }
76 | 
77 |     private static int[] combineTrainFolds(int[][] folds, int totalSize, int excludeIndex) {
78 |         int size = totalSize - folds[excludeIndex].length;
79 |         int result[] = new int[size];
80 | 
81 |         int start = 0;
82 |         for (int i = 0; i < folds.length; i++) {
83 |             if (i == excludeIndex) {
84 |                 continue;
85 |             }
86 |             int[] fold = folds[i];
87 |             System.arraycopy(fold, 0, result, start, fold.length);
88 |             start = start + fold.length;
89 |         }
90 | 
91 |         return result;
92 |     }
93 | }
94 | 


--------------------------------------------------------------------------------
/Module 2/MasteringJavaforDataScience_Code/Chapter07/src/main/java/chapter07/cv/Split.java:
--------------------------------------------------------------------------------
 1 | package chapter07.cv;
 2 | 
 3 | import java.util.Objects;
 4 | 
 5 | public class Split {
 6 | 
 7 |     private final Dataset train;
 8 |     private final Dataset test;
 9 | 
10 |     public Split(Dataset train, Dataset test) {
11 |         this.train = train;
12 |         this.test = test;
13 |     }
14 | 
15 |     public static Split fromIndexes(Dataset dataset, int[] trainIndex, int[] testIndex) {
16 |         double[][] X = dataset.getX();
17 |         double[] y = dataset.getY();
18 | 
19 |         int trainSize = trainIndex.length;
20 | 
21 |         double[][] trainXres = new double[trainSize][];
22 |         double[] trainYres = new double[trainSize];
23 |         for (int i = 0; i < trainSize; i++) {
24 |             int idx = trainIndex[i];
25 |             trainXres[i] = X[idx];
26 |             trainYres[i] = y[idx];
27 |         }
28 | 
29 |         int testSize = testIndex.length;
30 | 
31 |         double[][] testXres = new double[testSize][];
32 |         double[] testYres = new double[testSize];
33 |         for (int i = 0; i < testSize; i++) {
34 |             int idx = testIndex[i];
35 |             testXres[i] = X[idx];
36 |             testYres[i] = y[idx];
37 |         }
38 | 
39 |         Dataset train = new Dataset(trainXres, trainYres, dataset.getFeatureNames());
40 |         Dataset test = new Dataset(testXres, testYres, dataset.getFeatureNames());
41 |         return new Split(train, test);
42 |     }
43 | 
44 |     public Dataset getTrain() {
45 |         return train;
46 |     }
47 | 
48 |     public Dataset getTest() {
49 |         return test;
50 |     }
51 | 
52 |     @Override
53 |     public boolean equals(Object obj) {
54 |         if (obj instanceof Split) {
55 |             Split other = (Split) obj;
56 |             return train.equals(other.train) && test.equals(test);
57 |         }
58 | 
59 |         return false;
60 |     }
61 | 
62 |     @Override
63 |     public int hashCode() {
64 |         return Objects.hash(train, test);
65 |     }
66 | }
67 | 


--------------------------------------------------------------------------------
/Module 2/MasteringJavaforDataScience_Code/Chapter07/src/main/resources/logback.xml:
--------------------------------------------------------------------------------
 1 | <configuration>
 2 | 
 3 |   <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
 4 |     <!-- encoders are assigned the type
 5 |          ch.qos.logback.classic.encoder.PatternLayoutEncoder by default -->
 6 |     <encoder>
 7 |       <pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
 8 |     </encoder>
 9 |   </appender>
10 | 
11 |   <root level="debug">
12 |     <appender-ref ref="STDOUT" />
13 |   </root>
14 | 
15 |   <logger name="smile" level="WARN"/>
16 | 
17 | </configuration>


--------------------------------------------------------------------------------
/Module 2/MasteringJavaforDataScience_Code/Chapter08/pom.xml:
--------------------------------------------------------------------------------
  1 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  2 |   xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  3 |   <modelVersion>4.0.0</modelVersion>
  4 | 
  5 |   <groupId>com.alexeygrigorev.javads</groupId>
  6 |   <artifactId>chapter-08-dl4j</artifactId>
  7 |   <version>0.0.1-SNAPSHOT</version>
  8 | 
  9 |   <properties>
 10 |     <slf4j.version>1.7.21</slf4j.version>
 11 |     <logback.version>1.1.7</logback.version>
 12 |   </properties>
 13 | 
 14 |   <repositories>
 15 |     <repository>
 16 |       <id>central</id>
 17 |       <url>http://repo1.maven.org/maven2</url>
 18 |     </repository>
 19 |     <repository>
 20 |       <id>bintray</id>
 21 |       <url>http://jcenter.bintray.com</url>
 22 |     </repository>
 23 |   </repositories>
 24 | 
 25 |   <dependencies>
 26 |     <!-- Logging -->
 27 |     <dependency>
 28 |       <groupId>org.slf4j</groupId>
 29 |       <artifactId>slf4j-api</artifactId>
 30 |       <version>${slf4j.version}</version>
 31 |     </dependency>
 32 |     <dependency>
 33 |       <groupId>ch.qos.logback</groupId>
 34 |       <artifactId>logback-classic</artifactId>
 35 |       <version>${logback.version}</version>
 36 |     </dependency>
 37 |     <dependency>
 38 |       <groupId>ch.qos.logback</groupId>
 39 |       <artifactId>logback-core</artifactId>
 40 |       <version>${logback.version}</version>
 41 |     </dependency>
 42 | 
 43 |     <!-- Commons & Guava -->
 44 |     <dependency>
 45 |       <groupId>org.apache.commons</groupId>
 46 |       <artifactId>commons-lang3</artifactId>
 47 |       <version>3.4</version>
 48 |     </dependency>
 49 |     <dependency>
 50 |       <groupId>commons-io</groupId>
 51 |       <artifactId>commons-io</artifactId>
 52 |       <version>2.5</version>
 53 |     </dependency>
 54 |     <dependency>
 55 |       <groupId>org.apache.commons</groupId>
 56 |       <artifactId>commons-math3</artifactId>
 57 |       <version>3.6.1</version>
 58 |     </dependency>
 59 |     <dependency>
 60 |       <groupId>com.google.guava</groupId>
 61 |       <artifactId>guava</artifactId>
 62 |       <version>19.0</version>
 63 |     </dependency>
 64 | 
 65 |     <dependency>
 66 |       <groupId>org.deeplearning4j</groupId>
 67 |       <artifactId>deeplearning4j-core</artifactId>
 68 |       <version>0.7.1</version>
 69 |     </dependency>
 70 |     <dependency>
 71 |       <groupId>org.deeplearning4j</groupId>
 72 |       <artifactId>deeplearning4j-ui_2.10</artifactId>
 73 |       <version>0.7.1</version>
 74 |     </dependency>
 75 |     <dependency>
 76 |       <groupId>org.nd4j</groupId>
 77 |       <artifactId>nd4j-native-platform</artifactId> 
 78 |       <version>0.7.1</version>
 79 |     </dependency>
 80 |     <!-- 
 81 |     <dependency>
 82 |       <groupId>org.nd4j</groupId>
 83 |       <artifactId>nd4j-cuda-7.5</artifactId> 
 84 |       <version>0.7.1</version>
 85 |     </dependency>
 86 |      -->
 87 |     <dependency>
 88 |       <groupId>joinery</groupId>
 89 |       <artifactId>joinery-dataframe</artifactId>
 90 |       <version>1.7</version>
 91 |     </dependency>
 92 |     <dependency>
 93 |       <groupId>org.apache.poi</groupId>
 94 |       <artifactId>poi</artifactId>
 95 |       <version>3.14</version>
 96 |     </dependency>
 97 | 
 98 |     <dependency>
 99 |       <groupId>org.imgscalr</groupId>
100 |       <artifactId>imgscalr-lib</artifactId>
101 |       <version>4.2</version>
102 |     </dependency>
103 | 
104 |     <!-- Tests -->
105 |     <dependency>
106 |       <groupId>junit</groupId>
107 |       <artifactId>junit-dep</artifactId>
108 |       <version>4.8.1</version>
109 |       <scope>test</scope>
110 |     </dependency>
111 |   </dependencies>
112 | 
113 |   <build>
114 |     <plugins>
115 |       <plugin>
116 |         <groupId>org.apache.maven.plugins</groupId>
117 |         <artifactId>maven-compiler-plugin</artifactId>
118 |         <version>3.5.1</version>
119 |         <configuration>
120 |           <source>1.8</source>
121 |           <target>1.8</target>
122 |         </configuration>
123 |       </plugin>
124 |       <plugin>
125 |         <groupId>org.apache.maven.plugins</groupId>
126 |         <artifactId>maven-surefire-plugin</artifactId>
127 |         <version>2.19.1</version>
128 |       </plugin>
129 |       <plugin>
130 |         <groupId>org.apache.maven.plugins</groupId>
131 |         <artifactId>maven-dependency-plugin</artifactId>
132 |         <executions>
133 |           <execution>
134 |             <id>copy-dependencies</id>
135 |             <phase>prepare-package</phase>
136 |             <goals>
137 |               <goal>copy-dependencies</goal>
138 |             </goals>
139 |             <configuration>
140 |               <outputDirectory>libs</outputDirectory>
141 |               <overWriteReleases>false</overWriteReleases>
142 |               <overWriteSnapshots>false</overWriteSnapshots>
143 |               <overWriteIfNewer>true</overWriteIfNewer>
144 |             </configuration>
145 |           </execution>
146 |         </executions>
147 |       </plugin>
148 |     </plugins>
149 | 
150 |     <pluginManagement>
151 |       <plugins>
152 |         <!-- Ignore/Execute plugin execution -->
153 |         <plugin>
154 |           <groupId>org.eclipse.m2e</groupId>
155 |           <artifactId>lifecycle-mapping</artifactId>
156 |           <version>1.0.0</version>
157 |           <configuration>
158 |             <lifecycleMappingMetadata>
159 |               <pluginExecutions>
160 |                 <!-- copy-dependency plugin -->
161 |                 <pluginExecution>
162 |                   <pluginExecutionFilter>
163 |                     <groupId>org.apache.maven.plugins</groupId>
164 |                     <artifactId>maven-dependency-plugin</artifactId>
165 |                     <versionRange>[1.0.0,)</versionRange>
166 |                     <goals>
167 |                       <goal>copy-dependencies</goal>
168 |                       <goal>unpack</goal>
169 |                     </goals>
170 |                   </pluginExecutionFilter>
171 |                   <action>
172 |                     <ignore />
173 |                   </action>
174 |                 </pluginExecution>
175 |               </pluginExecutions>
176 |             </lifecycleMappingMetadata>
177 |           </configuration>
178 |         </plugin>
179 |       </plugins>
180 |     </pluginManagement>
181 |   </build>
182 | </project>
183 | 


--------------------------------------------------------------------------------
/Module 2/MasteringJavaforDataScience_Code/Chapter08/src/main/java/chapter08/Metrics.java:
--------------------------------------------------------------------------------
 1 | package chapter08;
 2 | 
 3 | import org.apache.commons.lang3.Validate;
 4 | 
 5 | public class Metrics {
 6 | 
 7 |     public static double logLoss(double[] actual, double[] predicted) {
 8 |         return logLoss(actual, predicted, 1e-15);
 9 |     }
10 | 
11 |     public static double logLoss(double[] actual, double[] predicted, double eps) {
12 |         Validate.isTrue(actual.length == predicted.length, "the lengths don't match");
13 |         int n = actual.length;
14 |         double total = 0.0;
15 | 
16 |         for (int i = 0; i < n; i++) {
17 |             double yi = actual[i];
18 |             double pi = predicted[i];
19 | 
20 |             if (yi == 0.0) {
21 |                 double log = Math.log(Math.min(1 - pi, 1 - eps));
22 |                 total = total + log;
23 |             } else if (yi == 1.0) {
24 |                 double log = Math.log(Math.max(pi, eps));
25 |                 total = total + log;
26 |             } else {
27 |                 throw new IllegalArgumentException("unrecognized class " + yi);
28 |             }
29 |         }
30 | 
31 |         return -total / n;
32 |     }
33 | 
34 | }
35 | 


--------------------------------------------------------------------------------
/Module 2/MasteringJavaforDataScience_Code/Chapter08/src/main/resources/logback.xml:
--------------------------------------------------------------------------------
 1 | <configuration>
 2 | 
 3 |   <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
 4 |     <!-- encoders are assigned the type
 5 |          ch.qos.logback.classic.encoder.PatternLayoutEncoder by default -->
 6 |     <encoder>
 7 |       <pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
 8 |     </encoder>
 9 |   </appender>
10 | 
11 |   <root level="debug">
12 |     <appender-ref ref="STDOUT" />
13 |   </root>
14 | 
15 |   <logger name="smile" level="WARN"/>
16 | 
17 | </configuration>


--------------------------------------------------------------------------------
/Module 2/MasteringJavaforDataScience_Code/Chapter09/src/main/resources/logback.xml:
--------------------------------------------------------------------------------
 1 | <configuration>
 2 | 
 3 |   <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
 4 |     <!-- encoders are assigned the type
 5 |          ch.qos.logback.classic.encoder.PatternLayoutEncoder by default -->
 6 |     <encoder>
 7 |       <pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
 8 |     </encoder>
 9 |   </appender>
10 | 
11 |   <root level="warn">
12 |     <appender-ref ref="STDOUT" />
13 |   </root>
14 | 
15 |   <logger name="org.apache.spark">
16 |     <level value="warn" />
17 |   </logger>
18 | 
19 | </configuration>


--------------------------------------------------------------------------------
/Module 2/MasteringJavaforDataScience_Code/Chapter10/src/main/resources/logback.xml:
--------------------------------------------------------------------------------
 1 | <configuration>
 2 | 
 3 |   <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
 4 |     <!-- encoders are assigned the type
 5 |          ch.qos.logback.classic.encoder.PatternLayoutEncoder by default -->
 6 |     <encoder>
 7 |       <pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
 8 |     </encoder>
 9 |   </appender>
10 | 
11 |   <root level="info">
12 |     <appender-ref ref="STDOUT" />
13 |   </root>
14 | 
15 | </configuration>


--------------------------------------------------------------------------------
/Module 2/MasteringJavaforDataScience_Code/ReadMe.txt:
--------------------------------------------------------------------------------
1 | Chapter 1 does not contain code files


--------------------------------------------------------------------------------
/Module 2/MasteringJavaforDataScience_Code/SoftwareHardwareList.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Java-Data-Science-Made-Easy/29e2a5d0638c0ee8eb913c67bf549221bc4d0549/Module 2/MasteringJavaforDataScience_Code/SoftwareHardwareList.pdf


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | 
 4 | ## $5 Tech Unlocked 2021!
 5 | [Buy and download this Course for only $5 on PacktPub.com](https://www.packtpub.com/product/java-data-science-made-easy/9781788475655)
 6 | -----
 7 | *The $5 campaign         runs from __December 15th 2020__ to __January 13th 2021.__*
 8 | 
 9 | # Java-Data-Science-Made-Easy
10 | Code Repository for Java: Data Science Made Easy 
11 | ### Download a free PDF
12 | 
13 |  <i>If you have already purchased a print or Kindle version of this book, you can get a DRM-free PDF version at no cost.<br>Simply click on the link to claim your free PDF.</i>
14 | <p align="center"> <a href="https://packt.link/free-ebook/9781788475655">https://packt.link/free-ebook/9781788475655 </a> </p>


--------------------------------------------------------------------------------