├── LICENSE ├── Module 1 └── Java for Data Science │ ├── B04295_SoftwareHardware list.pdf │ ├── Chapter 10-Visual and Audio analysis │ ├── CMUSphinxExamples.java │ ├── DetectFaceDemo.java │ ├── TSSExamples.java │ └── TessrJExample.java │ ├── Chapter 11-Mathematical │ ├── AparapiExamples.java │ ├── AveragePageCount.java │ ├── Book.java │ ├── Java8Examples.java │ ├── Java8MapReduceExample2.java │ ├── MathExamples.java │ ├── MatrixMultiplicationKernel.java │ └── ScalarMultiplicationKernel.java │ ├── Chapter 12-Bringing it together │ ├── ApplicationDriver.java │ ├── SentimentAnalysisTrainingData.java │ ├── TweetHandler.java │ └── TwitterStream.java │ ├── Chapter 4_Data Visualization │ ├── BarCharts - MainApp.java │ ├── BubblePlotExample.java │ ├── DonutPlotExample.java │ ├── HistogramExample.java │ ├── IndexChart - MainApp.java │ ├── PieChart - MainApp.java │ └── ScatterChart - MainApp.java │ ├── Chapter 5-SDAT │ ├── Main.java │ ├── MainApp - Simple Regression.java │ └── MainApp. - Multiple Regressionjava.txt │ ├── Chapter 7-Neural networks │ ├── KNNExample.java │ ├── MultilayerPerceptronExample.java │ ├── NeuralNetworkTraining.java │ ├── SOMExample.java │ └── SampleNeuralNetwork.java │ ├── Chapter 9-Text Analysis │ ├── ClassifyBySimilarity.java │ ├── DL4JSentimentAnalysisExample.java │ ├── NLPExamples.java │ └── ParagraphVectorsClassifierExample.java │ ├── Readme.txt │ ├── chapter 2-data acquisition │ ├── BlikiExample.java │ ├── CrawlerController.java │ ├── DatabaseExample.java │ ├── FindPicture.java │ ├── HttpURLConnectionExample.java │ ├── JSoupExamples.java │ ├── SampleCrawler.java │ ├── SampleStreamExample.java │ ├── Search.java │ └── SimpleWebCrawler.java │ ├── chapter 3 │ ├── App (2).java │ ├── App.java │ ├── CSVwithScanner.java │ ├── Cat.jpg │ ├── DataImputation.java │ ├── Dogs.java │ ├── GrayScaleParrot.png │ ├── JSONExamples.java │ ├── OCRExample.png │ ├── OpenCVNonMavenExamples.java │ ├── PDF File.docx │ ├── PDF File.pdf │ ├── PDFExtractionExample.java │ ├── Person.json │ ├── Persons.json │ ├── ReadExcelExample.java │ ├── Sample.xlsx │ ├── SimpleSearching.java │ ├── SimpleSort.java │ ├── SimpleStringCleaning.java │ ├── SimpleSubsetting.java │ ├── TokenizerExamples.java │ └── ValidatingData.java │ ├── chapter 6-Machine Learning │ ├── BookDecisionTree.java │ ├── FXMLController.java │ ├── JBayesTest.java │ ├── Main-ARL.java │ ├── Main-SVG.java │ └── MainApp-Camping.java │ └── chapter 8-Deep learning │ ├── ConvolutionalNetworkExample.java │ ├── DeepAutoEncoderExample.java │ └── RegressionExample.java ├── Module 2 └── MasteringJavaforDataScience_Code │ ├── Chapter02 │ ├── data │ │ ├── keywords.txt │ │ ├── search-results.txt │ │ ├── text.txt │ │ └── words.txt │ ├── pom.xml │ └── ranked-pages.json │ ├── Chapter03 │ ├── data │ │ └── ranked-pages.json │ ├── pom.xml │ └── src │ │ └── main │ │ └── java │ │ └── chapter03 │ │ └── Data.java │ ├── Chapter04 │ ├── data │ │ ├── default.csv │ │ ├── performance │ │ │ └── y_train.csv │ │ └── ranked-pages.json │ ├── pom.xml │ └── src │ │ └── main │ │ └── resources │ │ └── logback.xml │ ├── Chapter05 │ ├── data │ │ └── performance │ │ │ └── y_train.csv │ └── pom.xml │ ├── Chapter06 │ ├── pom.xml │ └── src │ │ └── main │ │ ├── java │ │ └── chapter06 │ │ │ └── cv │ │ │ └── CV.java │ │ └── resources │ │ └── logback.xml │ ├── Chapter07 │ ├── pom.xml │ └── src │ │ └── main │ │ ├── java │ │ └── chapter07 │ │ │ ├── Metrics.java │ │ │ ├── TextUtils.java │ │ │ └── cv │ │ │ ├── CV.java │ │ │ └── Split.java │ │ └── resources │ │ └── logback.xml │ ├── Chapter08 │ ├── pom.xml │ └── src │ │ └── main │ │ ├── java │ │ └── chapter08 │ │ │ └── Metrics.java │ │ └── resources │ │ └── logback.xml │ ├── Chapter09 │ ├── pom.xml │ └── src │ │ └── main │ │ └── resources │ │ └── logback.xml │ ├── Chapter10 │ ├── pom.xml │ └── src │ │ └── main │ │ └── resources │ │ └── logback.xml │ ├── ReadMe.txt │ └── SoftwareHardwareList.pdf └── README.md /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Packt 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Module 1/Java for Data Science/B04295_SoftwareHardware list.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Java-Data-Science-Made-Easy/29e2a5d0638c0ee8eb913c67bf549221bc4d0549/Module 1/Java for Data Science/B04295_SoftwareHardware list.pdf -------------------------------------------------------------------------------- /Module 1/Java for Data Science/Chapter 10-Visual and Audio analysis/CMUSphinxExamples.java: -------------------------------------------------------------------------------- 1 | package packt; 2 | 3 | import edu.cmu.sphinx.api.Configuration; 4 | import edu.cmu.sphinx.api.SpeechResult; 5 | import edu.cmu.sphinx.api.StreamSpeechRecognizer; 6 | import edu.cmu.sphinx.result.WordResult; 7 | import java.io.File; 8 | import java.io.FileInputStream; 9 | import java.io.IOException; 10 | import java.io.InputStream; 11 | import static java.lang.System.out; 12 | import java.util.Collection; 13 | import java.util.List; 14 | 15 | /* 16 | 17 | 18 | edu.cmu.sphinx 19 | sphinx4-core 20 | 5prealpha-SNAPSHOT 21 | 22 | 23 | edu.cmu.sphinx 24 | sphinx4-data 25 | 5prealpha-SNAPSHOT 26 | 27 | 28 | */ 29 | 30 | public class CMUSphinxExamples { 31 | 32 | public CMUSphinxExamples() { 33 | simpleSpeechExample(); 34 | } 35 | 36 | public static void main(String[] args) { 37 | new CMUSphinxExamples(); 38 | } 39 | 40 | public void simpleSpeechExample() { 41 | try { 42 | Configuration configuration = new Configuration(); 43 | 44 | String prefix = "resource:/edu/cmu/sphinx/models/en-us/"; 45 | configuration 46 | .setAcousticModelPath(prefix + "en-us"); 47 | configuration 48 | .setDictionaryPath(prefix + "cmudict-en-us.dict"); 49 | configuration 50 | .setLanguageModelPath(prefix + "en-us.lm.bin"); 51 | 52 | StreamSpeechRecognizer recognizer = new StreamSpeechRecognizer( 53 | configuration); 54 | InputStream stream = new FileInputStream(new File("Original5.wav")); 55 | 56 | recognizer.startRecognition(stream); 57 | SpeechResult result; 58 | while ((result = recognizer.getResult()) != null) { 59 | out.println("Hypothesis: " + result.getHypothesis()); 60 | 61 | out.println(); 62 | Collection results = result.getNbest(3); 63 | for (String sentence : results) { 64 | out.println(sentence); 65 | } 66 | out.println("-----"); 67 | List words = result.getWords(); 68 | for (WordResult wordResult : words) { 69 | out.print(wordResult.getWord() + " "); 70 | } 71 | out.println(); 72 | out.println("-----"); 73 | for (WordResult wordResult : words) { 74 | out.printf("%s\n\tConfidence: %.3f\n\tTime Frame: %s\n", 75 | wordResult.getWord(), result 76 | .getResult() 77 | .getLogMath() 78 | .logToLinear((float)wordResult 79 | .getConfidence()), 80 | wordResult.getTimeFrame()); 81 | } 82 | out.println(); 83 | } 84 | 85 | recognizer.stopRecognition(); 86 | } catch (IOException ex) { 87 | ex.printStackTrace(); 88 | } 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /Module 1/Java for Data Science/Chapter 10-Visual and Audio analysis/DetectFaceDemo.java: -------------------------------------------------------------------------------- 1 | package packt; 2 | 3 | import static java.lang.System.out; 4 | import org.opencv.core.Core; 5 | import org.opencv.core.Mat; 6 | import org.opencv.core.MatOfRect; 7 | import org.opencv.core.Point; 8 | import org.opencv.core.Rect; 9 | import org.opencv.core.Scalar; 10 | import org.opencv.imgcodecs.Imgcodecs; 11 | import org.opencv.imgproc.Imgproc; 12 | import org.opencv.objdetect.CascadeClassifier; 13 | 14 | // Adapted from http://docs.opencv.org/2.4/doc/tutorials/introduction/desktop_java/java_dev_intro.html 15 | 16 | public class DetectFaceDemo { 17 | 18 | public void run() { 19 | System.loadLibrary(Core.NATIVE_LIBRARY_NAME); 20 | String base = "C:/Books in Progress/Java for Data Science/Chapter 10/OpenCVExamples/src/resources"; 21 | CascadeClassifier faceDetector = 22 | new CascadeClassifier(base + "/lbpcascade_frontalface.xml"); 23 | 24 | Mat image = Imgcodecs.imread(base + "/images.jpg"); 25 | 26 | MatOfRect faceVectors = new MatOfRect(); 27 | faceDetector.detectMultiScale(image, faceVectors); 28 | 29 | out.println(faceVectors.toArray().length + " faces found"); 30 | 31 | for (Rect rect : faceVectors.toArray()) { 32 | Imgproc.rectangle(image, new Point(rect.x, rect.y), 33 | new Point(rect.x + rect.width, rect.y + rect.height), 34 | new Scalar(0, 255, 0)); 35 | } 36 | Imgcodecs.imwrite("faceDetection.png", image); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /Module 1/Java for Data Science/Chapter 10-Visual and Audio analysis/TSSExamples.java: -------------------------------------------------------------------------------- 1 | package packt; 2 | 3 | import com.sun.speech.freetts.Voice; 4 | import com.sun.speech.freetts.VoiceManager; 5 | import static java.lang.System.out; 6 | 7 | public class TSSExamples { 8 | 9 | public TSSExamples() { 10 | System.setProperty("mbrola.base", "C:\\Books in Progress\\Java for Data Science\\Chapter 10\\Downloads\\MBROLA"); 11 | demonstrateFreeTTS(); 12 | demonstrateVoice(); 13 | } 14 | 15 | public static void main(String[] args) { 16 | new TSSExamples(); 17 | } 18 | 19 | public void demonstrateFreeTTS() { 20 | VoiceManager vm = VoiceManager.getInstance(); 21 | Voice voice = vm.getVoice("kevin16"); 22 | voice.allocate(); 23 | voice.speak("Hello World"); 24 | 25 | // Voices 26 | out.println("------Voices-------"); 27 | Voice[] voices = vm.getVoices(); 28 | for (Voice v : voices) { 29 | out.println(v); 30 | } 31 | 32 | // Voice information 33 | out.println(); 34 | out.println("------Voice Information-------"); 35 | out.println(vm.getVoiceInfo()); 36 | 37 | out.println(); 38 | out.println("------Alan Voice Information-------"); 39 | Voice v = vm.getVoice("alan"); 40 | out.println(v); 41 | } 42 | 43 | public void demonstrateVoice() { 44 | out.println(); 45 | out.println("------Voice Demonstration-------"); 46 | 47 | VoiceManager vm = VoiceManager.getInstance(); 48 | Voice voice = vm.getVoice("kevin16"); 49 | voice.allocate(); 50 | 51 | out.println("Name: " + voice.getName()); 52 | out.println("Description: " + voice.getDescription()); 53 | out.println("Organization: " + voice.getOrganization()); 54 | out.println("Age: " + voice.getAge()); 55 | out.println("Gender: " + voice.getGender()); 56 | out.println("Rate: " + voice.getRate()); 57 | out.println("Pitch: " + voice.getPitch()); 58 | out.println("Style: " + voice.getStyle()); 59 | out.println(); 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /Module 1/Java for Data Science/Chapter 10-Visual and Audio analysis/TessrJExample.java: -------------------------------------------------------------------------------- 1 | package tessrj.example; 2 | 3 | import java.io.File; 4 | import net.sourceforge.tess4j.ITesseract; 5 | import net.sourceforge.tess4j.Tesseract; 6 | import net.sourceforge.tess4j.TesseractException; 7 | 8 | public class TessrJExample { 9 | 10 | public static void main(String[] args) { 11 | ITesseract instance = new Tesseract(); 12 | instance.setLanguage("eng"); 13 | try { 14 | String result; 15 | result = instance.doOCR(new File("OCRExample.png")); 16 | System.out.println(result); 17 | } catch (TesseractException e) { 18 | System.err.println(e.getMessage()); 19 | } 20 | } 21 | 22 | } 23 | -------------------------------------------------------------------------------- /Module 1/Java for Data Science/Chapter 11-Mathematical/AparapiExamples.java: -------------------------------------------------------------------------------- 1 | package packt; 2 | 3 | import com.amd.aparapi.Kernel; 4 | import com.amd.aparapi.internal.exception.ClassParseException; 5 | import static java.lang.System.out; 6 | import java.util.Arrays; 7 | import java.util.Random; 8 | import java.util.stream.IntStream; 9 | 10 | public class AparapiExamples { 11 | int n = 4; 12 | int m = 2; 13 | int p = 3; 14 | 15 | double A[][] = { 16 | {0.1950, 0.0311}, 17 | {0.3588, 0.2203}, 18 | {0.1716, 0.5931}, 19 | {0.2105, 0.3242}}; 20 | double B[][] = { 21 | {0.0502, 0.9823, 0.9472}, 22 | {0.5732, 0.2694, 0.916}}; 23 | double C[][] = new double[n][p]; 24 | 25 | public AparapiExamples() { 26 | simpleMatrixMultiplication(); 27 | matrixMulitplication(); 28 | scalarMatrixMultiplication(); 29 | } 30 | 31 | public void scalarMatrixMultiplication() { 32 | float inputMatrix[] = {3, 4, 5, 6, 7, 8, 9}; 33 | int size = inputMatrix.length; 34 | 35 | ScalarMultiplicationKernel kernel 36 | = new ScalarMultiplicationKernel(inputMatrix); 37 | kernel.setExecutionMode(Kernel.EXECUTION_MODE.NONE); 38 | kernel.execute(size); 39 | kernel.displayResult(); 40 | kernel.dispose(); 41 | } 42 | 43 | public void matrixMulitplication() { 44 | MatrixMultiplicationKernel kernel 45 | = new MatrixMultiplicationKernel(n, m, p); 46 | kernel.execute(6, 3); 47 | kernel.displayResults(); 48 | kernel.dispose(); 49 | } 50 | 51 | public void simpleMatrixMultiplication() { 52 | System.out.println(); 53 | System.out.println("Simple Matrix Multiplication"); 54 | for (int i = 0; i < n; i++) { 55 | for (int k = 0; k < m; k++) { 56 | for (int j = 0; j < p; j++) { 57 | C[i][j] += A[i][k] * B[k][j]; 58 | } 59 | } 60 | } 61 | displayResult(); 62 | } 63 | 64 | public void displayResult() { 65 | out.println("Result"); 66 | for (int i = 0; i < n; i++) { 67 | for (int j = 0; j < p; j++) { 68 | System.out.printf("%.4f ", C[i][j]); 69 | } 70 | System.out.println(); 71 | } 72 | } 73 | 74 | public static void main(String[] args) throws Exception { 75 | new AparapiExamples(); 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /Module 1/Java for Data Science/Chapter 11-Mathematical/AveragePageCount.java: -------------------------------------------------------------------------------- 1 | 2 | import java.io.IOException; 3 | import org.apache.hadoop.conf.Configuration; 4 | import org.apache.hadoop.fs.Path; 5 | import org.apache.hadoop.io.FloatWritable; 6 | import org.apache.hadoop.io.IntWritable; 7 | import org.apache.hadoop.io.Text; 8 | import org.apache.hadoop.mapreduce.Job; 9 | import org.apache.hadoop.mapreduce.Mapper; 10 | import org.apache.hadoop.mapreduce.Reducer; 11 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 12 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 13 | 14 | public class AveragePageCount { 15 | 16 | public static class TextMapper 17 | extends Mapper { 18 | 19 | private final IntWritable pgs = new IntWritable(); 20 | private final Text bookTitle = new Text(); 21 | 22 | @Override 23 | public void map(Object key, Text bookInfo, Context context 24 | ) throws IOException, InterruptedException { 25 | String[] book = bookInfo.toString().split("\t"); 26 | bookTitle.set(book[0]); 27 | pgs.set(Integer.parseInt(book[2])); 28 | context.write(bookTitle, pgs); 29 | } 30 | } 31 | 32 | public static class AverageReduce 33 | extends Reducer { 34 | 35 | private final FloatWritable finalAvg = new FloatWritable(); 36 | Float average = 0f; 37 | Float count = 0f; 38 | int sum = 0; 39 | 40 | @Override 41 | public void reduce(Text key, Iterable pageCnts, 42 | Context context 43 | ) throws IOException, InterruptedException { 44 | 45 | for (IntWritable cnt : pageCnts) { 46 | sum += cnt.get(); 47 | } 48 | count += 1; 49 | average = sum / count; 50 | finalAvg.set(average); 51 | context.write(new Text("Average Page Count = "), finalAvg); 52 | } 53 | } 54 | 55 | public static void main(String[] args) throws Exception { 56 | Configuration con = new Configuration(); 57 | Job bookJob = Job.getInstance(con, "Average Page Count"); 58 | bookJob.setJarByClass(AveragePageCount.class); 59 | bookJob.setMapperClass(TextMapper.class); 60 | bookJob.setReducerClass(AverageReduce.class); 61 | bookJob.setOutputKeyClass(Text.class); 62 | bookJob.setOutputValueClass(IntWritable.class); 63 | 64 | FileInputFormat.addInputPath(bookJob, new Path("C:/Hadoop/books.txt")); 65 | FileOutputFormat.setOutputPath(bookJob, new Path("C:/Hadoop/BookOutput")); 66 | if (bookJob.waitForCompletion(true)) { 67 | System.exit(0); 68 | } 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /Module 1/Java for Data Science/Chapter 11-Mathematical/Book.java: -------------------------------------------------------------------------------- 1 | 2 | public class Book { 3 | 4 | public String title; 5 | public String author; 6 | public int pgCnt; 7 | 8 | public Book() { 9 | title = ""; 10 | author = ""; 11 | pgCnt = 0; 12 | } 13 | 14 | public Book(String t, String a, int p) { 15 | title = t; 16 | author = a; 17 | pgCnt = p; 18 | } 19 | 20 | public int getPgCnt(){ 21 | return pgCnt; 22 | } 23 | 24 | } 25 | -------------------------------------------------------------------------------- /Module 1/Java for Data Science/Chapter 11-Mathematical/Java8Examples.java: -------------------------------------------------------------------------------- 1 | package java8examples; 2 | 3 | import static java.lang.System.out; 4 | import java.util.Arrays; 5 | import java.util.stream.IntStream; 6 | 7 | public class Java8Examples { 8 | int n = 4; 9 | int m = 2; 10 | int p = 3; 11 | 12 | double A[][] = { 13 | {0.1950, 0.0311}, 14 | {0.3588, 0.2203}, 15 | {0.1716, 0.5931}, 16 | {0.2105, 0.3242}}; 17 | double B[][] = { 18 | {0.0502, 0.9823, 0.9472}, 19 | {0.5732, 0.2694, 0.916}}; 20 | double C[][] = new double[n][p]; 21 | 22 | public void simpleStreams() { 23 | int numbers[] = {3, 54, 23, 87, 50, 21, 31}; 24 | 25 | IntStream stream = Arrays.stream(numbers); 26 | stream.forEach(e -> out.printf("%d ", e)); 27 | out.println(); 28 | 29 | stream = Arrays.stream(numbers); 30 | stream 31 | .mapToDouble((int e) -> { 32 | return 2 * e; 33 | }) 34 | .forEach(e -> out.printf("%.4f ", e)); 35 | out.println(); 36 | 37 | stream = Arrays.stream(numbers); 38 | IntStream.range(0, 3).forEach(e -> out.printf("%d ", e)); 39 | out.println(); 40 | 41 | out.println(Arrays.stream(numbers).sum()); 42 | 43 | int nums[] = stream.toArray(); 44 | for (int e : nums) { 45 | out.print(e + " "); 46 | } 47 | out.println(); 48 | } 49 | 50 | public void matrixMultipliationSolution() { 51 | // Java 8 Stream solution 52 | out.println("\nJava 8 Stream Solution"); 53 | C = Arrays.stream(A) 54 | // .parallel() 55 | .map(AMatrixRow -> IntStream.range(0, B[0].length) 56 | .mapToDouble(i -> IntStream.range(0, B.length) 57 | .mapToDouble(j -> AMatrixRow[j] * B[j][i]) 58 | .sum() 59 | ).toArray()).toArray(double[][]::new); 60 | displayResult(); 61 | out.println(); 62 | } 63 | 64 | public Java8Examples() { 65 | // simpleStreams(); 66 | matrixMultipliationSolution(); 67 | } 68 | 69 | public void displayResult() { 70 | out.println("Result"); 71 | for (int i = 0; i < n; i++) { 72 | for (int j = 0; j < p; j++) { 73 | System.out.printf("%.4f ", C[i][j]); 74 | } 75 | System.out.println(); 76 | } 77 | } 78 | 79 | public static void main(String[] args) { 80 | new Java8Examples(); 81 | } 82 | 83 | } 84 | -------------------------------------------------------------------------------- /Module 1/Java for Data Science/Chapter 11-Mathematical/Java8MapReduceExample2.java: -------------------------------------------------------------------------------- 1 | 2 | import static java.lang.System.out; 3 | import java.util.ArrayList; 4 | 5 | public class Java8MapReduceExample2 { 6 | 7 | public static void main(String[] args) { 8 | 9 | ArrayList books = new ArrayList<>(); 10 | double average; 11 | int totalPg = 0; 12 | 13 | books.add(new Book("Moby Dick", "Herman Melville", 822)); 14 | books.add(new Book("Charlotte's Web", "E.B. White", 189)); 15 | books.add(new Book("The Grapes of Wrath", "John Steinbeck", 212)); 16 | books.add(new Book("Jane Eyre", "Charlotte Bronte", 299)); 17 | books.add(new Book("A Tale of Two Cities", "Charles Dickens", 673)); 18 | books.add(new Book("War and Peace", "Leo Tolstoy", 1032)); 19 | books.add(new Book("The Great Gatsby", "F. Scott Fitzgerald", 275)); 20 | 21 | totalPg = books 22 | .stream() 23 | .parallel() 24 | .map((b) -> b.pgCnt) 25 | .reduce(totalPg, (accumulator, _item) -> { 26 | out.println(accumulator + " " +_item); 27 | return accumulator + _item; 28 | }); 29 | 30 | average = 1.0 * totalPg / books.size(); 31 | out.println("Average Page Count: " + average); 32 | 33 | average = books 34 | .parallelStream() 35 | .map(b -> b.pgCnt) 36 | .mapToDouble(s -> s) 37 | .average() 38 | .getAsDouble(); 39 | out.println("Average Page Count: " + average); 40 | } 41 | 42 | } 43 | -------------------------------------------------------------------------------- /Module 1/Java for Data Science/Chapter 11-Mathematical/MathExamples.java: -------------------------------------------------------------------------------- 1 | package packt.jblasexamples; 2 | 3 | import static java.lang.System.out; 4 | import org.apache.commons.math3.linear.Array2DRowRealMatrix; 5 | import org.apache.commons.math3.linear.RealMatrix; 6 | import org.jblas.DoubleMatrix; 7 | import org.nd4j.linalg.api.ndarray.INDArray; 8 | import org.nd4j.linalg.factory.Nd4j; 9 | 10 | /* 11 | // 12 | // 13 | // org.jblas 14 | // jblas 15 | // 1.2.4 16 | // 17 | // 18 | // 19 | // org.apache.commons 20 | // commons-exec 21 | // 1.3 22 | // 23 | // 24 | // org.apache.commons 25 | // commons-math3 26 | // 3.6.1 27 | // jar 28 | // 29 | // 30 | // 31 | // org.nd4j 32 | // nd4j-native 33 | // 0.6.0 34 | // 35 | // 36 | */ 37 | public class MathExamples { 38 | 39 | public static void main(String[] args) { 40 | new MathExamples(); 41 | } 42 | 43 | public MathExamples() { 44 | JBLASExample(); 45 | apacheCommonsExample(); 46 | nd4JExample(); 47 | } 48 | 49 | public void JBLASExample() { 50 | DoubleMatrix A = new DoubleMatrix(new double[][]{ 51 | {0.1950, 0.0311}, 52 | {0.3588, 0.2203}, 53 | {0.1716, 0.5931}, 54 | {0.2105, 0.3242}}); 55 | 56 | DoubleMatrix B = new DoubleMatrix(new double[][]{ 57 | {0.0502, 0.9823, 0.9472}, 58 | {0.5732, 0.2694, 0.916}}); 59 | DoubleMatrix C; 60 | 61 | C = A.mmul(B); 62 | 63 | for (int i = 0; i < C.getRows(); i++) { 64 | out.println(C.getRow(i)); 65 | } 66 | } 67 | 68 | public void apacheCommonsExample() { 69 | double[][] A = { 70 | {0.1950, 0.0311}, 71 | {0.3588, 0.2203}, 72 | {0.1716, 0.5931}, 73 | {0.2105, 0.3242}}; 74 | 75 | double[][] B = { 76 | {0.0502, 0.9823, 0.9472}, 77 | {0.5732, 0.2694, 0.916}}; 78 | 79 | RealMatrix aRealMatrix = new Array2DRowRealMatrix(A); 80 | RealMatrix bRealMatrix = new Array2DRowRealMatrix(B); 81 | 82 | RealMatrix cRealMatrix = aRealMatrix.multiply(bRealMatrix); 83 | System.out.println(); 84 | for (int i = 0; i < cRealMatrix.getRowDimension(); i++) { 85 | System.out.println(cRealMatrix.getRowVector(i)); 86 | } 87 | } 88 | 89 | public void nd4JExample() { 90 | double[] A = { 91 | 0.1950, 0.0311, 92 | 0.3588, 0.2203, 93 | 0.1716, 0.5931, 94 | 0.2105, 0.3242}; 95 | 96 | double[] B = { 97 | 0.0502, 0.9823, 0.9472, 98 | 0.5732, 0.2694, 0.916}; 99 | 100 | 101 | INDArray aINDArray = Nd4j.create(A,new int[]{4,2},'c'); 102 | INDArray bINDArray = Nd4j.create(B,new int[]{2,3},'c'); 103 | 104 | INDArray cINDArray; 105 | cINDArray = aINDArray.mmul(bINDArray); 106 | for(int i=0; i stream 33 | = new TwitterStream(this.numberOfTweets, this.topic).stream(); 34 | stream 35 | .map(s -> s.processJSON()) 36 | .map(s -> s.toLowerCase()) 37 | .filter(s -> s.isEnglish()) 38 | .map(s -> s.removeStopWords()) 39 | .filter(s -> s.containsCharacter(this.subTopic)) 40 | .map(s -> s.performSentimentAnalysis()) 41 | .forEach((TweetHandler s) -> { 42 | s.computeStats(); 43 | out.println(s); 44 | }); 45 | out.println(); 46 | out.println("Positive Reviews: " 47 | + TweetHandler.getNumberOfPositiveReviews()); 48 | out.println("Negative Reviews: " 49 | + TweetHandler.getNumberOfNegativeReviews()); 50 | } 51 | 52 | public static void main(String[] args) { 53 | new ApplicationDriver(); 54 | } 55 | 56 | } 57 | -------------------------------------------------------------------------------- /Module 1/Java for Data Science/Chapter 12-Bringing it together/SentimentAnalysisTrainingData.java: -------------------------------------------------------------------------------- 1 | package packt.twitterdatascienceproject; 2 | 3 | import java.io.IOException; 4 | import java.nio.charset.StandardCharsets; 5 | import java.nio.file.Files; 6 | import java.nio.file.Paths; 7 | import java.util.List; 8 | 9 | public class SentimentAnalysisTrainingData { 10 | 11 | public static void main(String[] args) { 12 | try { 13 | String filename; 14 | String file; 15 | String text; 16 | List lines = Files.readAllLines(Paths.get("C:\\Jenn Personal\\Packt Data Science\\Chapter 12\\Sentiment-Analysis-Dataset\\SentimentAnalysisDataset.csv"),StandardCharsets.ISO_8859_1); 17 | for(String s : lines){ 18 | String[] oneLine = s.split(","); 19 | if(Integer.parseInt(oneLine[1])==1){ 20 | filename = "pos"; 21 | }else{ 22 | filename = "neg"; 23 | } 24 | file = oneLine[0]+".txt"; 25 | text = oneLine[3]; 26 | Files.write(Paths.get("C:\\Jenn Personal\\Packt Data Science\\Chapter 12\\review_polarity\\txt_sentoken\\"+filename+"\\"+file), text.getBytes()); 27 | } 28 | 29 | } catch (IOException ex) { 30 | ex.printStackTrace(); 31 | } 32 | } 33 | 34 | } 35 | -------------------------------------------------------------------------------- /Module 1/Java for Data Science/Chapter 12-Bringing it together/TwitterStream.java: -------------------------------------------------------------------------------- 1 | package packt.twitterdatascienceproject; 2 | 3 | import com.google.common.collect.Lists; 4 | import com.twitter.hbc.ClientBuilder; 5 | import com.twitter.hbc.core.Constants; 6 | import com.twitter.hbc.core.endpoint.StatusesFilterEndpoint; 7 | import com.twitter.hbc.core.processor.StringDelimitedProcessor; 8 | import com.twitter.hbc.httpclient.BasicClient; 9 | import com.twitter.hbc.httpclient.auth.Authentication; 10 | import com.twitter.hbc.httpclient.auth.OAuth1; 11 | import static java.lang.System.out; 12 | import java.util.ArrayList; 13 | import java.util.List; 14 | import java.util.concurrent.BlockingQueue; 15 | import java.util.concurrent.LinkedBlockingQueue; 16 | import java.util.stream.Stream; 17 | 18 | public class TwitterStream { 19 | private int numberOfTweets; 20 | private String topic; 21 | 22 | public TwitterStream() { 23 | this(100, "Stars Wars"); 24 | } 25 | 26 | public TwitterStream(int numberOfTweets, String topic) { 27 | this.numberOfTweets = numberOfTweets; 28 | this.topic = topic; 29 | } 30 | 31 | public Stream stream() { 32 | String myKey = "sl2WbCf4UnIr08xvHVitHJ99r"; 33 | String mySecret = "PE6yauvXjKLuvoQNXZAJo5C8N5U5piSFb3udwkoI76paK6KyqI"; 34 | String myToken = "1098376471-p6iWfxCLtyMvMutTb010w1D1xZ3UyJhcC2kkBjN"; 35 | String myAccess = "2o1uGcp4b2bFynOfu2cA1uz63n5aruV0RwNsUjRpjDBZS"; 36 | 37 | out.println("Creating Twitter Stream"); 38 | BlockingQueue statusQueue = new LinkedBlockingQueue<>(1000); 39 | StatusesFilterEndpoint endpoint = new StatusesFilterEndpoint(); 40 | endpoint.trackTerms(Lists.newArrayList("twitterapi", this.topic)); 41 | endpoint.stallWarnings(false); 42 | Authentication twitterAuth = new OAuth1(myKey, mySecret, myToken, myAccess); 43 | 44 | BasicClient twitterClient = new ClientBuilder() 45 | .name("Twitter client") 46 | .hosts(Constants.STREAM_HOST) 47 | .endpoint(endpoint) 48 | .authentication(twitterAuth) 49 | .processor(new StringDelimitedProcessor(statusQueue)) 50 | .build(); 51 | 52 | twitterClient.connect(); 53 | 54 | List list = new ArrayList(); 55 | List twitterList = new ArrayList(); 56 | 57 | statusQueue.drainTo(twitterList); 58 | for(int i=0; i series = new XYChart.Series<>(); 14 | final CategoryAxis xAxis = new CategoryAxis(); 15 | final NumberAxis yAxis = new NumberAxis(8000000, 11000000, 1000000); 16 | final static String belgium = "Belgium"; 17 | 18 | @Override 19 | public void start(Stage stage) { 20 | simpleIndexChart(stage); 21 | } 22 | 23 | public void simpleIndexChart(Stage stage) { 24 | stage.setTitle("Index Chart"); 25 | final LineChart lineChart 26 | = new LineChart<>(xAxis, yAxis); 27 | lineChart.setTitle("Belgium Population"); 28 | yAxis.setLabel("Population"); 29 | 30 | series.setName("Population"); 31 | addDataItem(series, "1950", 8639369); 32 | addDataItem(series, "1960", 9118700); 33 | addDataItem(series, "1970", 9637800); 34 | addDataItem(series, "1980", 9846800); 35 | addDataItem(series, "1990", 9969310); 36 | addDataItem(series, "2000", 10263618); 37 | 38 | Scene scene = new Scene(lineChart, 800, 600); 39 | lineChart.getData().add(series); 40 | stage.setScene(scene); 41 | stage.show(); 42 | } 43 | 44 | public void addDataItem(XYChart.Series series, 45 | String x, Number y) { 46 | series.getData().add(new XYChart.Data<>(x, y)); 47 | } 48 | 49 | public static void main(String[] args) { 50 | launch(args); 51 | } 52 | 53 | } 54 | 55 | -------------------------------------------------------------------------------- /Module 1/Java for Data Science/Chapter 4_Data Visualization/PieChart - MainApp.java: -------------------------------------------------------------------------------- 1 | package packt; 2 | 3 | import javafx.application.Application; 4 | import javafx.collections.FXCollections; 5 | import javafx.collections.ObservableList; 6 | import javafx.scene.Group; 7 | import javafx.scene.Scene; 8 | import javafx.scene.chart.PieChart; 9 | import javafx.stage.Stage; 10 | 11 | public class PieChartSample extends Application { 12 | 13 | @Override 14 | public void start(Stage stage) { 15 | Scene scene = new Scene(new Group()); 16 | stage.setTitle("Europian Country Population"); 17 | stage.setWidth(500); 18 | stage.setHeight(500); 19 | 20 | ObservableList pieChartData = 21 | FXCollections.observableArrayList( 22 | new PieChart.Data("Belgium", 3), 23 | new PieChart.Data("France", 26), 24 | new PieChart.Data("Germany", 35), 25 | new PieChart.Data("Netherlands", 7), 26 | new PieChart.Data("Sweden", 4), 27 | new PieChart.Data("United Kingdom", 25)); 28 | final PieChart pieChart = new PieChart(pieChartData); 29 | pieChart.setTitle("Country Population"); 30 | 31 | ((Group) scene.getRoot()).getChildren().add(pieChart); 32 | stage.setScene(scene); 33 | stage.show(); 34 | } 35 | 36 | public static void main(String[] args) { 37 | launch(args); 38 | } 39 | 40 | } 41 | -------------------------------------------------------------------------------- /Module 1/Java for Data Science/Chapter 4_Data Visualization/ScatterChart - MainApp.java: -------------------------------------------------------------------------------- 1 | package packt.scatterchartmavenexample; 2 | 3 | import com.opencsv.CSVReader; 4 | import java.io.FileReader; 5 | import javafx.application.Application; 6 | import static javafx.application.Application.launch; 7 | import javafx.scene.Scene; 8 | import javafx.scene.chart.NumberAxis; 9 | import javafx.scene.chart.ScatterChart; 10 | import javafx.scene.chart.XYChart; 11 | import javafx.stage.Stage; 12 | 13 | public class MainApp extends Application { 14 | 15 | @Override 16 | public void start(Stage stage) throws Exception { 17 | stage.setTitle("Scatter Chart Sample"); 18 | final NumberAxis yAxis = new NumberAxis(1400, 2100, 100); 19 | final NumberAxis xAxis = new NumberAxis(500000, 90000000, 1000000); 20 | final ScatterChart scatterChart = new ScatterChart<>(xAxis, yAxis); 21 | xAxis.setLabel("Population"); 22 | yAxis.setLabel("Decade"); 23 | scatterChart.setTitle("Population Scatter Graph"); 24 | 25 | XYChart.Series series = new XYChart.Series(); 26 | 27 | try (CSVReader dataReader = new CSVReader(new FileReader("EuropeanScatterData.csv"), ',')) { 28 | String[] nextLine; 29 | while ((nextLine = dataReader.readNext()) != null) { 30 | int decade = Integer.parseInt(nextLine[0]); 31 | int population = Integer.parseInt(nextLine[1]); 32 | series.getData().add(new XYChart.Data(population, decade)); 33 | System.out.println("Decade: " + decade + " Population: " + population); 34 | } 35 | } 36 | 37 | scatterChart.getData().addAll(series); 38 | Scene scene = new Scene(scatterChart, 500, 400); 39 | stage.setScene(scene); 40 | stage.show(); 41 | } 42 | 43 | public static void main(String[] args) { 44 | launch(args); 45 | } 46 | 47 | } 48 | -------------------------------------------------------------------------------- /Module 1/Java for Data Science/Chapter 5-SDAT/MainApp - Simple Regression.java: -------------------------------------------------------------------------------- 1 | package packt.simplelinearregressionmaven; 2 | 3 | /* 4 | 5 | 6 | org.apache.commons 7 | commons-math3 8 | 3.6.1 9 | 10 | 11 | */ 12 | 13 | import static java.lang.System.out; 14 | import java.text.NumberFormat; 15 | import javafx.application.Application; 16 | import static javafx.application.Application.launch; 17 | import javafx.scene.Scene; 18 | import javafx.scene.chart.LineChart; 19 | import javafx.scene.chart.NumberAxis; 20 | import javafx.scene.chart.XYChart; 21 | import javafx.stage.Stage; 22 | import javafx.util.StringConverter; 23 | import org.apache.commons.math3.stat.regression.SimpleRegression; 24 | 25 | public class MainApp extends Application { 26 | final XYChart.Series originalSeries = new XYChart.Series<>(); 27 | final XYChart.Series projectedSeries = new XYChart.Series<>(); 28 | final NumberAxis xAxis = new NumberAxis(1940, 2050, 10); 29 | final NumberAxis yAxis = new NumberAxis(8000000, 12000000, 1000000); 30 | final static String belgium = "Belgium"; 31 | 32 | @Override 33 | public void start(Stage stage) { 34 | 35 | //Belgium 1950 8639369 36 | //Belgium 1960 9118700 37 | //Belgium 1970 9637800 38 | //Belgium 1980 9846800 39 | //Belgium 1990 9969310 40 | //Belgium 2000 10263618 41 | double[][] input = { 42 | {1950, 8639369}, 43 | {1960, 9118700}, 44 | {1970, 9637800}, 45 | {1980, 9846800}, 46 | {1990, 9969310}, 47 | {2000, 10263618}}; 48 | double[] predictionYears = {1950, 1960, 1970, 1980, 1990, 2000, 2010, 2020, 2030, 2040}; 49 | 50 | NumberFormat yearFormat = NumberFormat.getNumberInstance(); 51 | yearFormat.setMaximumFractionDigits(0); 52 | yearFormat.setGroupingUsed(false); 53 | NumberFormat populationFormat = NumberFormat.getNumberInstance(); 54 | populationFormat.setMaximumFractionDigits(0); 55 | 56 | SimpleRegression regression = new SimpleRegression(); 57 | regression.addData(input); 58 | projectedSeries.setName("Projected"); 59 | for (int i = 0; i < predictionYears.length; i++) { 60 | out.println(yearFormat.format(predictionYears[i]) + "-" 61 | + populationFormat.format(regression.predict(predictionYears[i]))); 62 | addDataItem(projectedSeries, predictionYears[i], 63 | regression.predict(predictionYears[i])); 64 | } 65 | 66 | displayAttribute("Slope",regression.getSlope()); 67 | displayAttribute("Intercept", regression.getIntercept()); 68 | displayAttribute("InterceptStdEr", regression.getInterceptStdErr()); 69 | displayAttribute("MeanSquareError", regression.getMeanSquareError()); 70 | displayAttribute("N", + regression.getN()); 71 | displayAttribute("R", + regression.getR()); 72 | displayAttribute("RSquare", regression.getRSquare()); 73 | 74 | //Create index chart 75 | stage.setTitle("Simple Linear Regression"); 76 | xAxis.setTickLabelFormatter(new StringConverter() { 77 | @Override 78 | public String toString(Number object) { 79 | return (object.intValue()) + ""; 80 | } 81 | 82 | @Override 83 | public Number fromString(String string) { 84 | return 0; 85 | } 86 | }); 87 | 88 | final LineChart lineChart 89 | = new LineChart<>(xAxis, yAxis); 90 | lineChart.setTitle("Belgium Population"); 91 | yAxis.setLabel("Population"); 92 | 93 | originalSeries.setName("Actual"); 94 | addDataItem(originalSeries, 1950, 8639369); 95 | addDataItem(originalSeries, 1960, 9118700); 96 | addDataItem(originalSeries, 1970, 9637800); 97 | addDataItem(originalSeries, 1980, 9846800); 98 | addDataItem(originalSeries, 1990, 9969310); 99 | addDataItem(originalSeries, 2000, 10263618); 100 | 101 | Scene scene = new Scene(lineChart, 800, 600); 102 | lineChart.getData().addAll(originalSeries, projectedSeries); 103 | stage.setScene(scene); 104 | stage.show(); 105 | } 106 | 107 | public void displayAttribute(String attribute, double value) { 108 | NumberFormat numberFormat = NumberFormat.getNumberInstance(); 109 | numberFormat.setMaximumFractionDigits(2); 110 | out.println(attribute + ": " + numberFormat.format(value)); 111 | } 112 | 113 | public void addDataItem(XYChart.Series series, 114 | Number x, Number y) { 115 | series.getData().add(new XYChart.Data<>(x, y)); 116 | } 117 | 118 | public static void main(String[] args) { 119 | launch(args); 120 | } 121 | 122 | } 123 | -------------------------------------------------------------------------------- /Module 1/Java for Data Science/Chapter 7-Neural networks/KNNExample.java: -------------------------------------------------------------------------------- 1 | package packt.knnexamples; 2 | 3 | import java.io.FileReader; 4 | import static java.lang.System.out; 5 | import weka.classifiers.lazy.IBk; 6 | import weka.classifiers.Evaluation; 7 | import weka.core.Instance; 8 | import weka.core.Instances; 9 | import weka.core.SerializationHelper; 10 | 11 | public class KNNExample { 12 | 13 | public KNNExample() { 14 | // dermatology.arff 15 | // Training set is first 80% - ends with line 456 of orginal set 16 | // Testing set is last 20% - Starts with line 457 of original set (lines 457-530) 17 | String trainingFileName = "dermatologyTrainingSet.arff"; 18 | String testingFileName = "dermatologyTestingSet.arff"; 19 | 20 | try (FileReader trainingReader = new FileReader(trainingFileName); 21 | FileReader testingReader = new FileReader(testingFileName)) { 22 | Instances trainingInstances = new Instances(trainingReader); 23 | trainingInstances.setClassIndex(trainingInstances.numAttributes() - 1); 24 | 25 | Instances testingInstances = new Instances(testingReader); 26 | testingInstances.setClassIndex(testingInstances.numAttributes() - 1); 27 | 28 | IBk ibk = new IBk(); 29 | ibk.buildClassifier(trainingInstances); 30 | SerializationHelper.write("knnModel", ibk); 31 | ibk = null; 32 | ibk = (IBk) SerializationHelper.read("knnModel"); 33 | 34 | // Evaluate 35 | Evaluation evaluation = new Evaluation(trainingInstances); 36 | evaluation.evaluateModel(ibk, testingInstances); 37 | System.out.println(evaluation.toSummaryString()); 38 | 39 | // Predict 40 | for (int i = 0; i < testingInstances.numInstances(); i++) { 41 | double result = ibk.classifyInstance(testingInstances.instance(i)); 42 | // Use for incorrect results 43 | if (result != testingInstances 44 | .instance(i) 45 | .value(testingInstances.numAttributes() - 1)) { 46 | out.println("Classify result: " + result 47 | + " Correct: " + testingInstances.instance(i) 48 | .value(testingInstances.numAttributes() - 1)); 49 | Instance incorrectInstance = testingInstances.instance(i); 50 | incorrectInstance.setDataset(trainingInstances); 51 | double[] distribution = ibk.distributionForInstance(incorrectInstance); 52 | out.println("Probability of being positive: " + distribution[0]); 53 | out.println("Probability of being negative: " + distribution[1]); 54 | } 55 | } 56 | } catch (Exception ex) { 57 | ex.printStackTrace(); 58 | } 59 | } 60 | 61 | public static void main(String[] args) { 62 | new KNNExample(); 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /Module 1/Java for Data Science/Chapter 7-Neural networks/MultilayerPerceptronExample.java: -------------------------------------------------------------------------------- 1 | package packt.wekanueralnetworkexamples; 2 | 3 | import java.io.FileReader; 4 | import static java.lang.System.out; 5 | import weka.classifiers.Evaluation; 6 | import weka.classifiers.functions.MultilayerPerceptron; 7 | import weka.core.Instance; 8 | import weka.core.Instances; 9 | import weka.core.SerializationHelper; 10 | 11 | /* 12 | 13 | 14 | 15 | nz.ac.waikato.cms.weka 16 | weka-dev 17 | 3.7.5 18 | 19 | 20 | */ 21 | public class MultilayerPerceptronExample { 22 | 23 | public MultilayerPerceptronExample() { 24 | // dermatology.arff 25 | // Training set is first 80% - ends with line 456 of orginal set 26 | // Testing set is last 20% - Starts with line 457 of original set (lines 457-530) 27 | String trainingFileName = "dermatologyTrainingSet.arff"; 28 | String testingFileName = "dermatologyTestingSet.arff"; 29 | 30 | try (FileReader trainingReader = new FileReader(trainingFileName); 31 | FileReader testingReader = new FileReader(testingFileName)) { 32 | Instances trainingInstances = new Instances(trainingReader); 33 | trainingInstances.setClassIndex(trainingInstances.numAttributes() - 1); 34 | 35 | Instances testingInstances = new Instances(testingReader); 36 | testingInstances.setClassIndex(testingInstances.numAttributes() - 1); 37 | 38 | MultilayerPerceptron mlp = new MultilayerPerceptron(); 39 | mlp.setLearningRate(0.1); 40 | mlp.setMomentum(0.2); 41 | mlp.setTrainingTime(2000); 42 | mlp.setHiddenLayers("3"); 43 | mlp.buildClassifier(trainingInstances); 44 | SerializationHelper.write("mlpModel", mlp); 45 | out.println(mlp.getTrainingTime()); 46 | mlp = (MultilayerPerceptron)SerializationHelper.read("mlpModel"); 47 | 48 | 49 | // Evaluate 50 | System.out.println("Starting evaluation"); 51 | Evaluation evaluation = new Evaluation(trainingInstances); 52 | evaluation.evaluateModel(mlp, testingInstances); 53 | System.out.println(evaluation.toSummaryString()); 54 | 55 | // Predict 56 | System.out.println("Starting Predicting"); 57 | for (int i = 0; i < testingInstances.numInstances(); i++) { 58 | double result = mlp.classifyInstance(testingInstances.instance(i)); 59 | // Use for incorrect results 60 | if (result != testingInstances 61 | .instance(i) 62 | .value(testingInstances.numAttributes() - 1)) { 63 | out.println("Classify result: " + result 64 | + " Correct: " + testingInstances.instance(i) 65 | .value(testingInstances.numAttributes() - 1)); 66 | Instance incorrectInstance = testingInstances.instance(i); 67 | incorrectInstance.setDataset(trainingInstances); 68 | double[] distribution = mlp.distributionForInstance(incorrectInstance); 69 | out.println("Probability of being positive: " + distribution[0]); 70 | out.println("Probability of being negative: " + distribution[1]); 71 | } 72 | } 73 | } catch (Exception ex) { 74 | ex.printStackTrace(); 75 | } 76 | } 77 | 78 | public static void main(String[] args) { 79 | new MultilayerPerceptronExample(); 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /Module 1/Java for Data Science/Chapter 7-Neural networks/NeuralNetworkTraining.java: -------------------------------------------------------------------------------- 1 | import java.text.*; 2 | 3 | public class NeuralNetworkTraining { 4 | 5 | public static void main(String args[]){ 6 | double xorIN[][] ={ 7 | {0.0,0.0}, 8 | {1.0,0.0}, 9 | {0.0,1.0}, 10 | {1.0,1.0}}; 11 | 12 | double xorEXPECTED[][] = { {0.0},{1.0},{1.0},{0.0}}; 13 | 14 | //System.out.println("Learn:"); 15 | 16 | SampleNeuralNetwork network = new SampleNeuralNetwork(2,3,1,0.7,0.9); 17 | 18 | // NumberFormat percentFormat = NumberFormat.getPercentInstance(); 19 | // percentFormat.setMinimumFractionDigits(4); 20 | 21 | 22 | for (int runCnt=0;runCnt<1000;runCnt++) { 23 | for (int loc=0;loc list = new ArrayList<>(); 46 | Scanner scanner = new Scanner(sentence); 47 | while (scanner.hasNext()) { 48 | list.add(scanner.next()); 49 | } 50 | // Convert list to an array 51 | String[] words = new String[1]; 52 | words = list.toArray(words); 53 | 54 | // Build model 55 | POSModel posModel = new POSModel(input); 56 | POSTaggerME posTagger = new POSTaggerME(posModel); 57 | 58 | // Tag words 59 | String[] posTags = posTagger.tag(words); 60 | for (int i = 0; i < posTags.length; i++) { 61 | out.println(words[i] + " - " + posTags[i]); 62 | } 63 | 64 | // Find top sequences 65 | Sequence sequences[] = posTagger.topKSequences(words); 66 | for (Sequence sequence : sequences) { 67 | out.println(sequence); 68 | } 69 | } catch (IOException ex) { 70 | ex.printStackTrace(); 71 | } 72 | } 73 | public void NERExample() {//Models found at http://opennlp.sourceforge.net/models-1.5/ 74 | try (InputStream tokenStream = 75 | new FileInputStream(new File("en-token.bin")); 76 | InputStream personModelStream = new FileInputStream( 77 | new File("en-ner-person.bin"));) { 78 | TokenizerModel tm = new TokenizerModel(tokenStream); 79 | TokenizerME tokenizer = new TokenizerME(tm); 80 | 81 | TokenNameFinderModel tnfm = new TokenNameFinderModel(personModelStream); 82 | NameFinderME nf = new NameFinderME(tnfm); 83 | 84 | String sentence = "Mrs. Wilson went to Mary's house for dinner."; 85 | String[] tokens = tokenizer.tokenize(sentence); 86 | 87 | Span[] spans = nf.find(tokens); 88 | 89 | for (int i = 0; i < spans.length; i++) { 90 | out.println(spans[i] + " - " + tokens[spans[i].getStart()]); 91 | } 92 | } catch (Exception ex) { 93 | ex.printStackTrace(); 94 | } 95 | 96 | try (InputStream tokenStream = 97 | new FileInputStream("en-token.bin"); 98 | InputStream locationModelStream = new FileInputStream( 99 | new File("en-ner-location.bin"));) { 100 | 101 | TokenizerModel tm = new TokenizerModel(tokenStream); 102 | TokenizerME tokenizer = new TokenizerME(tm); 103 | 104 | TokenNameFinderModel tnfm = new TokenNameFinderModel(locationModelStream); 105 | NameFinderME nf = new NameFinderME(tnfm); 106 | 107 | sentence = "Enid is located north of Oklahoma City."; 108 | // sentence = "Pond Creek is located north of Oklahoma City."; 109 | String tokens[] = tokenizer.tokenize(sentence); 110 | 111 | Span spans[] = nf.find(tokens); 112 | 113 | for (int i = 0; i < spans.length; i++) { 114 | out.println(spans[i] + " - " + tokens[spans[i].getStart()]); 115 | } 116 | } catch (Exception ex) { 117 | ex.printStackTrace(); 118 | } 119 | } 120 | 121 | public void extractReltionships() { 122 | try (InputStream modelInputStream = new FileInputStream( 123 | new File("en-parser-chunking.bin"));) { 124 | 125 | ParserModel parserModel = new ParserModel(modelInputStream); 126 | Parser parser = ParserFactory.create(parserModel); 127 | 128 | String sentence = "Let's parse this sentence."; 129 | Parse[] parseTrees = ParserTool.parseLine(sentence, parser, 3); 130 | 131 | for(Parse tree : parseTrees) { 132 | tree.show(); 133 | out.println("Probability: " + tree.getProb()); 134 | } 135 | for(Parse tree : parseTrees) { 136 | out.println("Probability: " + tree.getProb()); 137 | } 138 | } catch (Exception ex) { 139 | ex.printStackTrace(); 140 | } 141 | } 142 | 143 | public NLPExamples() { 144 | // POSExample(); 145 | // scannerClassExample(); 146 | // lingPipeExamples(); 147 | // NERExample(); 148 | // extractReltionships(); 149 | } 150 | 151 | public static void main(String[] args) { 152 | new NLPExamples(); 153 | } 154 | } 155 | -------------------------------------------------------------------------------- /Module 1/Java for Data Science/Chapter 9-Text Analysis/ParagraphVectorsClassifierExample.java: -------------------------------------------------------------------------------- 1 | package com.mycompany.sentimentanalysis; 2 | 3 | import org.datavec.api.util.ClassPathResource; 4 | import org.deeplearning4j.berkeley.Pair; 5 | //import org.deeplearning4j.examples.nlp.paragraphvectors.tools.FileLabelAwareIterator; 6 | //import org.deeplearning4j.examples.nlp.paragraphvectors.tools.LabelSeeker; 7 | //import org.deeplearning4j.examples.nlp.paragraphvectors.tools.MeansBuilder; 8 | import org.deeplearning4j.models.embeddings.inmemory.InMemoryLookupTable; 9 | import org.deeplearning4j.models.paragraphvectors.ParagraphVectors; 10 | import org.deeplearning4j.models.word2vec.VocabWord; 11 | import org.deeplearning4j.text.documentiterator.LabelAwareIterator; 12 | import org.deeplearning4j.text.documentiterator.LabelledDocument; 13 | import org.deeplearning4j.text.tokenization.tokenizer.preprocessor.CommonPreprocessor; 14 | import org.deeplearning4j.text.tokenization.tokenizerfactory.DefaultTokenizerFactory; 15 | import org.deeplearning4j.text.tokenization.tokenizerfactory.TokenizerFactory; 16 | import org.nd4j.linalg.api.ndarray.INDArray; 17 | 18 | import static java.lang.System.out; 19 | import java.util.List; 20 | 21 | /** 22 | * 23 | * @author raver119@gmail.com adapted by Jennifer Reese 24 | */ 25 | public class ParagraphVectorsClassifierExample { 26 | 27 | static ParagraphVectors pVect; 28 | static LabelAwareIterator iter; 29 | static TokenizerFactory tFact; 30 | 31 | 32 | public static void main(String[] args) throws Exception { 33 | 34 | ClassPathResource resource = new ClassPathResource("paravec/labeled"); 35 | 36 | iter = new FileLabelAwareIterator.Builder() 37 | .addSourceFolder(resource.getFile()) 38 | .build(); 39 | 40 | tFact = new DefaultTokenizerFactory(); 41 | tFact.setTokenPreProcessor(new CommonPreprocessor()); 42 | 43 | pVect = new ParagraphVectors.Builder() 44 | .learningRate(0.025) 45 | .minLearningRate(0.001) 46 | .batchSize(1000) 47 | .epochs(20) 48 | .iterate(iter) 49 | .trainWordVectors(true) 50 | .tokenizerFactory(tFact) 51 | .build(); 52 | 53 | pVect.fit(); 54 | 55 | 56 | ClassPathResource unlabeledText = new ClassPathResource("paravec/unlabeled"); 57 | FileLabelAwareIterator unlabeledIter = new FileLabelAwareIterator.Builder() 58 | .addSourceFolder(unlabeledText.getFile()) 59 | .build(); 60 | 61 | 62 | MeansBuilder mBuilder = new MeansBuilder( 63 | (InMemoryLookupTable) pVect.getLookupTable(), 64 | tFact); 65 | LabelSeeker lSeeker = new LabelSeeker(iter.getLabelsSource().getLabels(), 66 | (InMemoryLookupTable) pVect.getLookupTable()); 67 | 68 | while (unlabeledIter.hasNextDocument()) { 69 | LabelledDocument doc = unlabeledIter.nextDocument(); 70 | INDArray docCentroid = mBuilder.documentAsVector(doc); 71 | List> scores = lSeeker.getScores(docCentroid); 72 | 73 | out.println("Document '" + doc.getLabel() + "' falls into the following categories: "); 74 | for (Pair score : scores) { 75 | out.println(" " + score.getFirst() + ": " + score.getSecond()); 76 | } 77 | 78 | } 79 | } 80 | } -------------------------------------------------------------------------------- /Module 1/Java for Data Science/Readme.txt: -------------------------------------------------------------------------------- 1 | Chapter 01 : Has no Code files. 2 | Chapter 02 : Contain Code files. 3 | Chapter 03 : Contain Code files. 4 | Chapter 04 : Contain Code files. 5 | Chapter 05 : Contain Code files. 6 | Chapter 06 : Contain Code files. 7 | Chapter 07 : Contain Code files. 8 | Chapter 08 : Contain Code files. 9 | Chapter 09 : Contain Code files. 10 | Chapter 10 : Contain Code files. 11 | Chapter 11 : Contain Code files. 12 | Chapter 12 : Contain Code files. 13 | 14 | All the code files are placed in the respective chapter folder. 15 | -------------------------------------------------------------------------------- /Module 1/Java for Data Science/chapter 2-data acquisition/BlikiExample.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | package packt; 7 | 8 | import info.bliki.api.Page; 9 | import info.bliki.api.User; 10 | import info.bliki.wiki.filter.SectionHeader; 11 | import info.bliki.wiki.model.ITableOfContent; 12 | import info.bliki.wiki.model.Reference; 13 | import info.bliki.wiki.model.WikiModel; 14 | import java.util.List; 15 | import static java.lang.System.out; 16 | 17 | public class BlikiExample { 18 | 19 | public static void main(String[] args) { 20 | User user = new User("", "", "http://en.wikipedia.org/w/api.php"); 21 | user.login(); 22 | 23 | String[] titles = {"Data science"}; 24 | List pageList = user.queryContent(titles); 25 | 26 | for (Page page : pageList) { 27 | WikiModel wikiModel = new WikiModel("${image}", "${title}"); 28 | out.println("Image Base URL: " + wikiModel.getImageBaseURL() + "\n" 29 | + "Page Name: " + wikiModel.getPageName() + "\n" 30 | + "Wiki Base URL: " + wikiModel.getWikiBaseURL()); 31 | String htmlStr = wikiModel.render("This is a simple [[Hello World]] wiki tag"); 32 | System.out.println(htmlStr); 33 | 34 | String htmlText = wikiModel.render(page.toString()); 35 | out.println("Title: " + page.getTitle() + "\n" 36 | + "Image URL: " + page.getImageUrl()+ "\n" 37 | + "Timestamp: " + page.getCurrentRevision().getTimestamp()); 38 | 39 | List referenceList = wikiModel.getReferences(); 40 | out.println(referenceList.size()); 41 | for(Reference reference : referenceList) { 42 | out.println(reference.getRefString()); 43 | } 44 | 45 | ITableOfContent toc = wikiModel.getTableOfContent(); 46 | List sections = toc.getSectionHeaders(); 47 | for(SectionHeader sh : sections) { 48 | out.println(sh.getFirst()); 49 | } 50 | 51 | out.println(htmlText); 52 | } 53 | } 54 | 55 | } 56 | -------------------------------------------------------------------------------- /Module 1/Java for Data Science/chapter 2-data acquisition/CrawlerController.java: -------------------------------------------------------------------------------- 1 | package packt.crawlerj4mavenexample; 2 | 3 | import edu.uci.ics.crawler4j.crawler.CrawlConfig; 4 | import edu.uci.ics.crawler4j.crawler.CrawlController; 5 | import edu.uci.ics.crawler4j.fetcher.PageFetcher; 6 | import edu.uci.ics.crawler4j.robotstxt.RobotstxtConfig; 7 | import edu.uci.ics.crawler4j.robotstxt.RobotstxtServer; 8 | 9 | public class CrawlerController { 10 | 11 | public static void main(String[] args) throws Exception { 12 | int numberOfCrawlers = 2; 13 | CrawlConfig config = new CrawlConfig(); 14 | String crawlStorageFolder = "data"; 15 | 16 | config.setCrawlStorageFolder(crawlStorageFolder); 17 | config.setPolitenessDelay(500); 18 | config.setMaxDepthOfCrawling(2); 19 | config.setMaxPagesToFetch(20); 20 | config.setIncludeBinaryContentInCrawling(false); 21 | 22 | PageFetcher pageFetcher = new PageFetcher(config); 23 | RobotstxtConfig robotstxtConfig = new RobotstxtConfig(); 24 | RobotstxtServer robotstxtServer = new RobotstxtServer(robotstxtConfig, pageFetcher); 25 | CrawlController controller = new CrawlController(config, pageFetcher, robotstxtServer); 26 | 27 | controller.addSeed("https://en.wikipedia.org/wiki/Bishop_Rock,_Isles_of_Scilly"); 28 | 29 | controller.start(SampleCrawler.class, numberOfCrawlers); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /Module 1/Java for Data Science/chapter 2-data acquisition/DatabaseExample.java: -------------------------------------------------------------------------------- 1 | package packt.databasemavenexample; 2 | 3 | /* 4 | 5 | 6 | mysql 7 | mysql-connector-java 8 | 6.0.2 9 | 10 | 11 | 12 | org.jsoup 13 | jsoup 14 | 1.9.1 15 | 16 | 17 | */ 18 | import java.sql.Connection; 19 | import java.sql.DriverManager; 20 | import java.sql.PreparedStatement; 21 | import java.sql.ResultSet; 22 | import java.sql.SQLException; 23 | import java.sql.Statement; 24 | import static java.lang.System.out; 25 | 26 | public class DatabaseExample { 27 | 28 | private Connection connection; 29 | 30 | public DatabaseExample() { 31 | try { 32 | Class.forName("com.mysql.jdbc.Driver"); 33 | String url = "jdbc:mysql://localhost:3306/example"; 34 | connection = DriverManager.getConnection(url, "root", "explore"); 35 | 36 | // Needed to reset the contents of the table 37 | Statement statement = connection.createStatement(); 38 | statement.execute("TRUNCATE URLTABLE;"); 39 | 40 | String insertSQL = "INSERT INTO `example`.`URLTABLE` " 41 | + "(`url`) VALUES " + "(?);"; 42 | PreparedStatement stmt = connection.prepareStatement(insertSQL); 43 | 44 | stmt.setString(1, "https://en.wikipedia.org/wiki/Data_science"); 45 | stmt.execute(); 46 | stmt.setString(1, "https://en.wikipedia.org/wiki/Bishop_Rock,_Isles_of_Scilly"); 47 | stmt.execute(); 48 | 49 | // String selectSQL = "select * from Record where URL = '" + url + "'"; 50 | String selectSQL = "select * from URLTABLE"; 51 | statement = connection.createStatement(); 52 | ResultSet resultSet = statement.executeQuery(selectSQL); 53 | 54 | out.println("List of URLs"); 55 | while (resultSet.next()) { 56 | out.println(resultSet.getString(2)); 57 | } 58 | } catch (SQLException | ClassNotFoundException ex) { 59 | ex.printStackTrace(); 60 | } 61 | } 62 | 63 | public static void main(String[] args) { 64 | new DatabaseExample(); 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /Module 1/Java for Data Science/chapter 2-data acquisition/FindPicture.java: -------------------------------------------------------------------------------- 1 | package packt.flickrdemonstration; 2 | 3 | import com.flickr4java.flickr.Flickr; 4 | import com.flickr4java.flickr.FlickrException; 5 | import com.flickr4java.flickr.REST; 6 | import com.flickr4java.flickr.photos.Photo; 7 | import com.flickr4java.flickr.photos.PhotoList; 8 | import com.flickr4java.flickr.photos.PhotosInterface; 9 | import com.flickr4java.flickr.photos.SearchParameters; 10 | import com.flickr4java.flickr.photos.Size; 11 | import java.awt.image.BufferedImage; 12 | import java.io.File; 13 | import java.io.IOException; 14 | import static java.lang.System.out; 15 | import java.util.Collection; 16 | import javax.imageio.ImageIO; 17 | 18 | public class FindPicture { 19 | 20 | public FindPicture() { 21 | try { 22 | String apikey = "Your API key"; 23 | String secret = "Your secret"; 24 | 25 | Flickr flickr = new Flickr(apikey, secret, new REST()); 26 | 27 | SearchParameters searchParameters = new SearchParameters(); 28 | searchParameters.setBBox("-180", "-90", "180", "90"); 29 | searchParameters.setMedia("photos"); 30 | PhotoList list = flickr.getPhotosInterface().search(searchParameters, 10, 0); 31 | 32 | out.println("Image List"); 33 | for (int i = 0; i < list.size(); i++) { 34 | Photo photo = list.get(i); 35 | out.println("Image: " + i 36 | + "\nTitle: " + photo.getTitle() 37 | + "\nMedia: " + photo.getOriginalFormat() 38 | + "\nPublic: " + photo.isPublicFlag() 39 | + "\nPublic: " + photo.isPublicFlag() 40 | + "\nUrl: " + photo.getUrl() 41 | + "\n"); 42 | } 43 | out.println(); 44 | 45 | PhotosInterface pi = new PhotosInterface(apikey, secret, new REST()); 46 | out.println("pi: " + pi); 47 | Photo currentPhoto = list.get(0); 48 | out.println("currentPhoto url: " + currentPhoto.getUrl()); 49 | 50 | // Get image using URL 51 | BufferedImage bufferedImage = pi.getImage(currentPhoto.getUrl()); 52 | out.println("bi: " + bufferedImage); 53 | 54 | // Get image using Photo instance 55 | bufferedImage = pi.getImage(currentPhoto, Size.SMALL); 56 | 57 | // Save image to file 58 | out.println("bufferedImage: " + bufferedImage); 59 | File outputfile = new File("image.jpg"); 60 | ImageIO.write(bufferedImage, "jpg", outputfile); 61 | } catch (FlickrException | IOException ex) { 62 | ex.printStackTrace(); 63 | } 64 | } 65 | 66 | public static void main(String[] args) { 67 | new FindPicture(); 68 | } 69 | 70 | public void displaySizes(Photo photo) { 71 | out.println("---Sizes----"); 72 | Collection sizes = photo.getSizes(); 73 | for (Size size : sizes) { 74 | out.println(size.getLabel()); 75 | } 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /Module 1/Java for Data Science/chapter 2-data acquisition/HttpURLConnectionExample.java: -------------------------------------------------------------------------------- 1 | package httpurlconnectionexample; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.IOException; 5 | import java.io.InputStream; 6 | import java.io.InputStreamReader; 7 | import java.net.HttpURLConnection; 8 | import java.net.MalformedURLException; 9 | import java.net.URL; 10 | import static java.lang.System.out; 11 | 12 | public class HttpURLConnectionExample { 13 | 14 | public static void main(String[] args) { 15 | try { 16 | URL url = new URL("https://en.wikipedia.org/wiki/Data_science"); 17 | HttpURLConnection connection = (HttpURLConnection) url.openConnection(); 18 | connection.setRequestMethod("GET"); 19 | connection.connect(); 20 | 21 | out.println("Response Code: " + connection.getResponseCode()); 22 | out.println("Content Type: " + connection.getContentType()); 23 | out.println("Content Length: " + connection.getContentLength()); 24 | 25 | InputStreamReader isr = new InputStreamReader( 26 | (InputStream) connection.getContent()); 27 | BufferedReader br = new BufferedReader(isr); 28 | StringBuilder buffer = new StringBuilder(); 29 | String line; 30 | do { 31 | line = br.readLine(); 32 | buffer.append(line + "\n"); 33 | } while (line != null); 34 | out.println(buffer.toString()); 35 | } catch (MalformedURLException ex) { 36 | ex.printStackTrace(); 37 | } catch (IOException ex) { 38 | ex.printStackTrace(); 39 | } 40 | } 41 | 42 | } 43 | -------------------------------------------------------------------------------- /Module 1/Java for Data Science/chapter 2-data acquisition/JSoupExamples.java: -------------------------------------------------------------------------------- 1 | package packt.webcrawlermavenjsoup; 2 | 3 | import java.io.File; 4 | import java.io.IOException; 5 | import org.jsoup.Jsoup; 6 | import org.jsoup.nodes.Document; 7 | import org.jsoup.nodes.Element; 8 | import org.jsoup.select.Elements; 9 | import static java.lang.System.out; 10 | 11 | public class JSoupExamples { 12 | 13 | public JSoupExamples() { 14 | try { 15 | Document document = Jsoup.connect("https://en.wikipedia.org/wiki/Data_science").get(); 16 | displayImages(document); 17 | } catch (IOException ex) { 18 | ex.printStackTrace(); 19 | } 20 | loadDocumentFromFile(); 21 | } 22 | 23 | public void loadDocumentFromFile() { 24 | try { 25 | File file = new File("Example.html"); 26 | Document document = Jsoup.parse(file, "UTF-8", ""); 27 | listHyperlinks(document); 28 | } catch (IOException ex) { 29 | ex.printStackTrace(); 30 | } 31 | } 32 | 33 | public void parseString() { 34 | String html = "\n" 35 | + "Example Document\n" 36 | + "\n" 37 | + "

The body of the document

\n" 38 | + "Interesting Links:\n" 39 | + "
\n" 40 | + "Data Science\n" 41 | + "
\n" 42 | + "Jsoup\n" 43 | + "
\n" 44 | + "Images:\n" 45 | + "
\n" 46 | + " \"Eye \n" 47 | + "\n" 48 | + ""; 49 | Document document = Jsoup.parse(html); 50 | listHyperlinks(document); 51 | } 52 | 53 | public void displayBodyText(Document document) { 54 | // Displays the entire body of the document 55 | String title = document.title(); 56 | out.println("Title: " + title); 57 | 58 | out.println("---Body---"); 59 | Elements element = document.select("body"); 60 | out.println("Text: " + element.text()); 61 | } 62 | 63 | public void displayImages(Document document) { 64 | out.println("---Images---"); 65 | Elements images = document.select("img[src$=.png]"); 66 | for (Element image : images) { 67 | out.println("\nImage: " + image); 68 | } 69 | } 70 | 71 | public void listHyperlinks(Document document) { 72 | out.println("---Links---"); 73 | Elements links = document.select("a[href]"); 74 | for (Element link : links) { 75 | out.println("Link: " + link.attr("href") 76 | + " Text: " + link.text()); 77 | } 78 | out.println("\n****************"); 79 | } 80 | 81 | public static void main(String[] args) { 82 | new JSoupExamples(); 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /Module 1/Java for Data Science/chapter 2-data acquisition/SampleCrawler.java: -------------------------------------------------------------------------------- 1 | package packt.crawlerj4mavenexample; 2 | 3 | import edu.uci.ics.crawler4j.crawler.Page; 4 | import edu.uci.ics.crawler4j.crawler.WebCrawler; 5 | import edu.uci.ics.crawler4j.parser.HtmlParseData; 6 | import edu.uci.ics.crawler4j.url.WebURL; 7 | import java.util.regex.Pattern; 8 | import static java.lang.System.out; 9 | 10 | public class SampleCrawler extends WebCrawler { 11 | private static final Pattern IMAGE_EXTENSIONS = Pattern.compile(".*\\.(bmp|gif|jpg|png)$"); 12 | 13 | @Override 14 | public boolean shouldVisit(Page referringPage, WebURL url) { 15 | String href = url.getURL().toLowerCase(); 16 | if (IMAGE_EXTENSIONS.matcher(href).matches()) { 17 | return false; 18 | } 19 | return href.startsWith("https://en.wikipedia.org/wiki/"); 20 | } 21 | 22 | @Override 23 | public void visit(Page page) { 24 | int docid = page.getWebURL().getDocid(); 25 | String url = page.getWebURL().getURL(); 26 | 27 | if (page.getParseData() instanceof HtmlParseData) { 28 | HtmlParseData htmlParseData = (HtmlParseData) page.getParseData(); 29 | String text = htmlParseData.getText(); 30 | if (text.contains("shipping route")) { 31 | out.println("\nURL: " + url); 32 | out.println("Text: " + text); 33 | out.println("Text length: " + text.length()); 34 | } 35 | } 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /Module 1/Java for Data Science/chapter 2-data acquisition/SampleStreamExample.java: -------------------------------------------------------------------------------- 1 | 2 | 3 | package packt.TwitterExample; 4 | 5 | import com.twitter.hbc.ClientBuilder; 6 | import com.twitter.hbc.core.Constants; 7 | import com.twitter.hbc.core.endpoint.StatusesSampleEndpoint; 8 | import com.twitter.hbc.core.processor.StringDelimitedProcessor; 9 | import com.twitter.hbc.httpclient.BasicClient; 10 | import com.twitter.hbc.httpclient.auth.Authentication; 11 | import com.twitter.hbc.httpclient.auth.OAuth1; 12 | 13 | import java.util.concurrent.BlockingQueue; 14 | import java.util.concurrent.LinkedBlockingQueue; 15 | import java.util.concurrent.TimeUnit; 16 | 17 | public class SampleStreamExample { 18 | 19 | public static void streamTwitter(String consumerKey, String consumerSecret, String accessToken, String accessSecret) throws InterruptedException { 20 | 21 | BlockingQueue statusQueue = new LinkedBlockingQueue(10000); 22 | 23 | StatusesSampleEndpoint ending = new StatusesSampleEndpoint(); 24 | ending.stallWarnings(false); 25 | 26 | Authentication twitterAuth = new OAuth1(consumerKey, consumerSecret, accessToken, accessSecret); 27 | 28 | BasicClient twitterClient = new ClientBuilder() 29 | .name("Twitter client") 30 | .hosts(Constants.STREAM_HOST) 31 | .endpoint(ending) 32 | .authentication(twitterAuth) 33 | .processor(new StringDelimitedProcessor(statusQueue)) 34 | .build(); 35 | 36 | 37 | twitterClient.connect(); 38 | 39 | 40 | for (int msgRead = 0; msgRead < 1000; msgRead++) { 41 | if (twitterClient.isDone()) { 42 | System.out.println(twitterClient.getExitEvent().getMessage()); 43 | break; 44 | } 45 | 46 | String msg = statusQueue.poll(10, TimeUnit.SECONDS); 47 | if (msg == null) { 48 | System.out.println("Waited 10 seconds - no message received"); 49 | } else { 50 | System.out.println(msg); 51 | } 52 | } 53 | 54 | twitterClient.stop(); 55 | 56 | System.out.printf("%d messages processed!\n", twitterClient.getStatsTracker().getNumMessages()); 57 | } 58 | 59 | public static void main(String[] args) { 60 | 61 | String myKey = "myKey"; 62 | String mySecret = "mySecret"; 63 | String myToken = "myToken"; 64 | String myAccess = "myAccess"; 65 | 66 | try { 67 | SampleStreamExample.streamTwitter(myKey, mySecret, myToken, myAccess); 68 | 69 | } catch (InterruptedException e) { 70 | System.out.println(e); 71 | } 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /Module 1/Java for Data Science/chapter 2-data acquisition/Search.java: -------------------------------------------------------------------------------- 1 | package packt; 2 | 3 | /* 4 | 5 | 6 | com.google.oauth-client 7 | google-oauth-client-jetty 8 | 1.20.0 9 | 10 | 11 | com.google.apis 12 | google-api-services-youtube 13 | v3-rev171-1.22.0 14 | 15 | 16 | com.google.oauth-client 17 | google-oauth-client 18 | 1.22.0 19 | jar 20 | 21 | 22 | com.google.http-client 23 | google-http-client-jackson2 24 | 1.22.0 25 | jar 26 | 27 | 28 | */ 29 | import com.google.api.client.googleapis.json.GoogleJsonResponseException; 30 | import com.google.api.client.http.HttpRequest; 31 | import com.google.api.client.http.HttpRequestInitializer; 32 | import com.google.api.services.youtube.YouTube; 33 | import com.google.api.services.youtube.model.ResourceId; 34 | import com.google.api.services.youtube.model.SearchListResponse; 35 | import com.google.api.services.youtube.model.SearchResult; 36 | import com.google.api.services.youtube.model.Thumbnail; 37 | 38 | import java.io.IOException; 39 | import java.util.List; 40 | import static java.lang.System.out; 41 | 42 | // Adapted from https://developers.google.com/youtube/v3/code_samples/java#search_by_keyword 43 | public class Search { 44 | 45 | public static void main(String[] args) { 46 | try { 47 | YouTube youtube = new YouTube.Builder( 48 | Auth.HTTP_TRANSPORT, 49 | Auth.JSON_FACTORY, 50 | new HttpRequestInitializer() { 51 | public void initialize(HttpRequest request) throws IOException { 52 | } 53 | }) 54 | .setApplicationName("application_name") 55 | .build(); 56 | 57 | String queryTerm = "cats"; 58 | YouTube.Search.List search = youtube 59 | .search() 60 | .list("id,snippet"); 61 | 62 | String apiKey = "AIzaSyDiVWbm1q3s3cI3RZNCfH85hXS95H8opgs"; 63 | search.setKey(apiKey); 64 | search.setQ(queryTerm); 65 | 66 | // Valid types: "channel" "playlist" "video" 67 | search.setType("video"); 68 | 69 | search.setFields("items(id/kind,id/videoId,snippet/title," + 70 | "snippet/description,snippet/thumbnails/default/url)"); 71 | search.setMaxResults(10L); 72 | SearchListResponse searchResponse = search.execute(); 73 | 74 | List searchResultList = searchResponse.getItems(); 75 | SearchResult video = searchResultList.iterator().next(); 76 | Thumbnail thumbnail = video.getSnippet().getThumbnails().getDefault(); 77 | 78 | out.println("Kind: " + video.getKind()); 79 | out.println("Video Id: " + video.getId().getVideoId()); 80 | out.println("Title: " + video.getSnippet().getTitle()); 81 | out.println("Description: " + video.getSnippet().getDescription()); 82 | out.println("Thumbnail: " + thumbnail.getUrl()); 83 | } catch (GoogleJsonResponseException ex) { 84 | ex.printStackTrace(); 85 | } catch (IOException ex) { 86 | ex.printStackTrace(); 87 | } 88 | } 89 | 90 | } 91 | -------------------------------------------------------------------------------- /Module 1/Java for Data Science/chapter 2-data acquisition/SimpleWebCrawler.java: -------------------------------------------------------------------------------- 1 | package packt.webcrawlermavenexample; 2 | 3 | import java.io.IOException; 4 | import java.sql.SQLException; 5 | import org.jsoup.Jsoup; 6 | import org.jsoup.nodes.Document; 7 | import org.jsoup.nodes.Element; 8 | import org.jsoup.select.Elements; 9 | import static java.lang.System.out; 10 | import java.util.ArrayList; 11 | 12 | // Adapted from: http://www.programcreek.com/2012/12/how-to-make-a-web-crawler-using-java/ 13 | public class SimpleWebCrawler { 14 | 15 | private final String topic; 16 | private final String startingURL; 17 | private final String urlLimiter; 18 | private final int pageLimit = 20; 19 | private final ArrayList visitedList = new ArrayList<>(); 20 | private final ArrayList pageList = new ArrayList<>(); 21 | 22 | public SimpleWebCrawler() { 23 | startingURL = "https://en.wikipedia.org/wiki/Bishop_Rock,_Isles_of_Scilly"; 24 | urlLimiter = "Bishop_Rock"; 25 | topic = "shipping route"; 26 | visitPage(startingURL); 27 | } 28 | 29 | public void visitPage(String url) { 30 | if (pageList.size() >= pageLimit) { 31 | return; 32 | } 33 | if (visitedList.contains(url)) { 34 | // URL already visited 35 | } else { 36 | visitedList.add(url); 37 | try { 38 | Document doc = Jsoup.connect(url).get(); 39 | if (doc.text().contains(topic)) { 40 | out.println((pageList.size() + 1) + ": [" + url + "]"); 41 | pageList.add(url); 42 | 43 | // Process page links 44 | Elements questions = doc.select("a[href]"); 45 | for (Element link : questions) { 46 | if (link.attr("href").contains(urlLimiter)) { 47 | visitPage(link.attr("abs:href")); 48 | } 49 | } 50 | } 51 | } catch (Exception ex) { 52 | ex.printStackTrace(); 53 | } 54 | } 55 | } 56 | 57 | public static void main(String[] args) { 58 | new SimpleWebCrawler(); 59 | } 60 | 61 | } 62 | -------------------------------------------------------------------------------- /Module 1/Java for Data Science/chapter 3/App (2).java: -------------------------------------------------------------------------------- 1 | package packt.com.packt.java.guava; 2 | 3 | import static java.lang.System.out; 4 | 5 | import com.google.common.base.CharMatcher; 6 | import com.google.common.base.Joiner; 7 | import com.google.common.base.Splitter; 8 | 9 | /** 10 | * Hello world! 11 | * 12 | */ 13 | public class App 14 | { 15 | public static void main( String[] args ) 16 | { 17 | String dirtyText = "Call me Ishmael. Some years ago- never mind how"; 18 | dirtyText += " long precisely - having little or no money in my purse,"; 19 | dirtyText += " and nothing particular to interest me on shore, I thought"; 20 | dirtyText += " I would sail about a little and see the watery part of the world."; 21 | 22 | // Splitter split = Splitter.on(',').omitEmptyStrings().trimResults(); 23 | // Iterable words = split.split(dirtyText); 24 | // for(String token: words){ 25 | //// out.println(token); 26 | // } 27 | //cleanAndJoin(dirtyText); 28 | findReplaceGuava(dirtyText); 29 | 30 | } 31 | 32 | public static String cleanAndJoin(String text){ 33 | out.println("Dirty text: " + text); 34 | String[] words = text.toLowerCase().trim().split("[\\W\\d]+"); 35 | String cleanText = Joiner.on(" ").skipNulls().join(words); 36 | out.println("Cleaned text: " + cleanText); 37 | return cleanText; 38 | } 39 | 40 | 41 | public static String findReplaceGuava(String text){ 42 | out.println(text); 43 | text = text.replace("me", " "); 44 | out.println("With double spaces: " + text); 45 | 46 | // trim whitespace at ends, and replace/collapse whitespace into single spaces 47 | String spaced = CharMatcher.WHITESPACE.trimAndCollapseFrom(text, ' '); 48 | out.println("With double spaces removed: " + spaced); 49 | String noDigits = CharMatcher.JAVA_DIGIT.replaceFrom(text, "*"); // star out all digits 50 | String lowerAndDigit = CharMatcher.JAVA_DIGIT.or(CharMatcher.JAVA_LOWER_CASE).retainFrom(text); 51 | // eliminate all characters that aren't digits or lowercase 52 | 53 | return text; 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /Module 1/Java for Data Science/chapter 3/App.java: -------------------------------------------------------------------------------- 1 | package packt.packt.com.java.apachecommons; 2 | 3 | import org.apache.commons.lang3.StringUtils; 4 | import org.apache.commons.lang3.text.StrSubstitutor; 5 | import org.apache.commons.lang3.text.StrTokenizer; 6 | import org.apache.commons.validator.EmailValidator; 7 | import org.apache.commons.validator.routines.IntegerValidator; 8 | 9 | import static java.lang.System.*; 10 | 11 | import java.util.HashMap; 12 | import java.util.Map; 13 | import java.util.regex.Pattern; 14 | 15 | public class App { 16 | 17 | public static void main(String[] args){ 18 | String dirtyText = "Call me Ishmael. Some years ago- never mind how"; 19 | dirtyText += " long precisely - having little or no money in my purse,"; 20 | dirtyText += " and nothing particular to interest me on shore, I thought"; 21 | dirtyText += " I would sail about a little and see the watery part of the world."; 22 | // apacheCommonsTokenizer(dirtyText); 23 | 24 | validateEmailApache(dirtyText); 25 | //out.println(validateInt("1234")); 26 | //out.println(findReplaceApacheCommons(dirtyText,"me","X")); 27 | } 28 | 29 | public static void apacheCommonsTokenizer(String text){ 30 | StrTokenizer tokenizer = new StrTokenizer(text,","); 31 | while (tokenizer.hasNext()) { 32 | out.println(tokenizer.next()); 33 | } 34 | 35 | } 36 | 37 | public static String validateEmailApache(String email){ 38 | email = email.trim(); 39 | EmailValidator eValidator = EmailValidator.getInstance(); 40 | if(eValidator.isValid(email)){ 41 | return email + " is a valid email address."; 42 | }else{ 43 | return email + " is not a valid email address."; 44 | } 45 | } 46 | 47 | public static String validateInt(String text){ 48 | IntegerValidator intValidator = IntegerValidator.getInstance(); 49 | if(intValidator.isValid(text)){ 50 | return text + " is a valid integer."; 51 | }else{ 52 | return text + " is not a valid integer."; 53 | } 54 | } 55 | 56 | public static String findReplaceApacheCommons(String text, String toFind, String replaceWith){ 57 | out.println(text); 58 | text = StringUtils.replacePattern(text, "\\W\\s", " "); 59 | out.println(text); 60 | //out.println(StringUtils.replace(text, " me ", "X")); 61 | return StringUtils.replace(text, " me ", "X"); 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /Module 1/Java for Data Science/chapter 3/CSVwithScanner.java: -------------------------------------------------------------------------------- 1 | import static java.lang.System.out; 2 | 3 | import java.io.File; 4 | import java.io.FileNotFoundException; 5 | import java.io.IOException; 6 | import java.util.Scanner; 7 | 8 | public class CSVwithScanner { 9 | public static void main(String[] args){ 10 | 11 | 12 | try { 13 | File demoFile = new File("C:\\Users\\jreese\\workspace\\Packt Data Science\\Demographics.txt"); 14 | Scanner getData = new Scanner(demoFile); 15 | while(getData.hasNext()){ 16 | out.println(getData.nextLine()); 17 | } 18 | 19 | } catch (FileNotFoundException e) { 20 | 21 | e.printStackTrace(); 22 | } 23 | 24 | 25 | 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /Module 1/Java for Data Science/chapter 3/Cat.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Java-Data-Science-Made-Easy/29e2a5d0638c0ee8eb913c67bf549221bc4d0549/Module 1/Java for Data Science/chapter 3/Cat.jpg -------------------------------------------------------------------------------- /Module 1/Java for Data Science/chapter 3/DataImputation.java: -------------------------------------------------------------------------------- 1 | import static java.lang.System.out; 2 | 3 | import java.util.ArrayList; 4 | import java.util.Optional; 5 | 6 | public class DataImputation { 7 | 8 | public static void main(String[] args) { 9 | 10 | tempExample(); 11 | 12 | } 13 | 14 | public static void tempExample(){ 15 | // double[] tempList = new double[365]; 16 | // for(int x = 0; x < tempList.length; x++){ 17 | // tempList[x] = Math.random()*100; 18 | // } 19 | // tempList[5] = 0; 20 | // double sum = 0; 21 | // for(double d : tempList){ 22 | // out.println(d); 23 | // sum += d; 24 | // } 25 | // out.println(sum/365); 26 | String useName = ""; 27 | String[] nameList = {"Amy","Bob","Sally","Sue","Don","Rick",null,"Betsy"}; 28 | Optional tempName; 29 | for(String name : nameList){ 30 | tempName = Optional.ofNullable(name); 31 | useName = tempName.orElse("DEFAULT"); 32 | out.println("Name to use = " + useName); 33 | } 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /Module 1/Java for Data Science/chapter 3/Dogs.java: -------------------------------------------------------------------------------- 1 | 2 | public class Dogs { 3 | 4 | private String name; 5 | private int age; 6 | 7 | public Dogs(){ 8 | name = "Fido"; 9 | age = 0; 10 | } 11 | 12 | public Dogs(String n){ 13 | name = n; 14 | age = 0; 15 | } 16 | 17 | public Dogs(int a){ 18 | name = "Fido"; 19 | age = a; 20 | } 21 | 22 | public Dogs(String n, int a){ 23 | name = n; 24 | age = a; 25 | } 26 | 27 | public String getName(){ 28 | return name; 29 | } 30 | 31 | public int getAge(){ 32 | return age; 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /Module 1/Java for Data Science/chapter 3/GrayScaleParrot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Java-Data-Science-Made-Easy/29e2a5d0638c0ee8eb913c67bf549221bc4d0549/Module 1/Java for Data Science/chapter 3/GrayScaleParrot.png -------------------------------------------------------------------------------- /Module 1/Java for Data Science/chapter 3/OCRExample.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Java-Data-Science-Made-Easy/29e2a5d0638c0ee8eb913c67bf549221bc4d0549/Module 1/Java for Data Science/chapter 3/OCRExample.png -------------------------------------------------------------------------------- /Module 1/Java for Data Science/chapter 3/OpenCVNonMavenExamples.java: -------------------------------------------------------------------------------- 1 | package opencvnonmavenexamples; 2 | 3 | import org.opencv.core.Core; 4 | import org.opencv.core.CvType; 5 | import static org.opencv.core.CvType.CV_8UC1; 6 | import org.opencv.core.Mat; 7 | import org.opencv.core.Scalar; 8 | import org.opencv.core.Size; 9 | import org.opencv.imgcodecs.Imgcodecs; 10 | import org.opencv.imgproc.Imgproc; 11 | 12 | public class OpenCVNonMavenExamples { 13 | 14 | public OpenCVNonMavenExamples() { 15 | System.loadLibrary(Core.NATIVE_LIBRARY_NAME); 16 | enhanceImageBrightness(); 17 | enhanceImageContrast(); 18 | // sharpenImage(); 19 | smoothImage(); 20 | resizeImage(); 21 | convertImage(); 22 | // noiseRemoval(); 23 | // denoise(); 24 | // convertToTIFF(); 25 | } 26 | 27 | public static void main(String[] args) { 28 | new OpenCVNonMavenExamples(); 29 | } 30 | 31 | // Histogram equalization is used to improve the overall appearnace of an image. 32 | // http://docs.opencv.org/2.4/doc/tutorials/imgproc/histograms/histogram_equalization/histogram_equalization.html 33 | // From: http://www.tutorialspoint.com/java_dip/enhancing_image_contrast.htm 34 | // Enhancing grayscale images with histogram equalization. 35 | public void enhanceImageContrast() { 36 | Mat source = Imgcodecs.imread("GrayScaleParrot.png", 37 | Imgcodecs.CV_LOAD_IMAGE_GRAYSCALE); 38 | Mat destination = new Mat(source.rows(), source.cols(), source.type()); 39 | Imgproc.equalizeHist(source, destination); 40 | Imgcodecs.imwrite("enhancedParrot.jpg", destination); 41 | } 42 | 43 | public void smoothImage() { 44 | // Smoothing, also called blurring, will make the edges soother. 45 | Mat source = Imgcodecs.imread("cat.jpg"); 46 | Mat destination = source.clone(); 47 | for (int i = 0; i < 25; i++) { 48 | Mat sourceImage = destination.clone(); 49 | Imgproc.blur(sourceImage, destination, new Size(3.0, 3.0)); 50 | } 51 | Imgcodecs.imwrite("smoothCat.jpg", destination); 52 | } 53 | 54 | public void sharpenImage() { 55 | String fileName = "SharpnessExample2.png"; 56 | fileName = "smoothCat.jpg"; 57 | fileName = "blurredText.jpg"; 58 | fileName = "Blurred Text3.jpg"; 59 | try { 60 | // Not working that well !!! 61 | Mat source = Imgcodecs.imread(fileName, 62 | // Imgcodecs.CV_LOAD_IMAGE_COLOR); 63 | Imgcodecs.CV_LOAD_IMAGE_GRAYSCALE); 64 | Mat destination = new Mat(source.rows(), source.cols(), source.type()); 65 | Imgproc.GaussianBlur(source, destination, new Size(0, 0), 10); 66 | // The following was used witht he cat 67 | // Core.addWeighted(source, 1.5, destination, -0.75, 0, destination); 68 | // Core.addWeighted(source, 2.5, destination, -1.5, 0, destination); 69 | Core.addWeighted(source, 1.5, destination, -0.75, 0, destination); 70 | Imgcodecs.imwrite("sharpenedCat.jpg", destination); 71 | } catch (Exception ex) { 72 | ex.printStackTrace(); 73 | } 74 | } 75 | 76 | // Adapted from: http://www.tutorialspoint.com/java_dip/enhancing_image_brightness.htm 77 | public void enhanceImageBrightness() { 78 | double alpha = 1; // Change to 2 for more brightness 79 | double beta = 50; 80 | String fileName = "cat.jpg"; 81 | 82 | Mat source = Imgcodecs.imread("cat.jpg"); 83 | Mat destination = new Mat(source.rows(), source.cols(), 84 | source.type()); 85 | source.convertTo(destination, -1, 1, 50); 86 | Imgcodecs.imwrite("brighterCat.jpg", destination); 87 | } 88 | 89 | public void resizeImage() { 90 | Mat source = Imgcodecs.imread("cat.jpg"); 91 | Mat resizeimage = new Mat(); 92 | Imgproc.resize(source, resizeimage, new Size(250, 250)); 93 | Imgcodecs.imwrite("resizedCat.jpg", resizeimage); 94 | } 95 | 96 | public void convertImage() { 97 | Mat source = Imgcodecs.imread("cat.jpg"); 98 | // The extension determines the format 99 | Imgcodecs.imwrite("convertedCat.jpg", source); 100 | Imgcodecs.imwrite("convertedCat.jpeg", source); 101 | Imgcodecs.imwrite("convertedCat.webp", source); 102 | Imgcodecs.imwrite("convertedCat.png", source); 103 | Imgcodecs.imwrite("convertedCat.tiff", source); 104 | } 105 | 106 | public void noiseRemoval() { 107 | // Mat Kernel = cv::Mat(cv::Size(Maximum_Width_of_Noise,Maximum_Height_of_noise),CV_8UC1,cv::Scalar(255)); 108 | Mat Kernel = new Mat(new Size(3, 3), CvType.CV_8U, new Scalar(255)); 109 | Mat source = Imgcodecs.imread("noiseExample.png"); 110 | Mat temp = new Mat(); 111 | Mat topHat = new Mat(); 112 | Mat destination = new Mat(); 113 | 114 | Imgproc.morphologyEx(source, temp, Imgproc.MORPH_OPEN, Kernel); 115 | Imgproc.morphologyEx(temp, destination, Imgproc.MORPH_CLOSE, Kernel); 116 | // Imgproc.morphologyEx(temp, topHat, Imgproc.MORPH_GRADIENT, Kernel); 117 | // Imgproc.morphologyEx(topHat, destination, Imgproc.MORPH_CLOSE, Kernel); 118 | Imgcodecs.imwrite("noiseRemovedExample.png", source); 119 | } 120 | 121 | public static void denoise() { 122 | String imgInPath = "captchaExample.jpg"; 123 | imgInPath = "MyCaptcha.PNG"; 124 | imgInPath = "blurredtext.jpg"; 125 | String imgOutPath = "captchaNoiseRemovedExample.png"; 126 | imgOutPath = "MyNoiseRemovedCaptcha.PNG"; 127 | 128 | Mat image = Imgcodecs.imread(imgInPath); 129 | Mat out = new Mat(); 130 | Mat tmp = new Mat(); 131 | Mat kernel = new Mat(new Size(3, 3), CvType.CV_8UC1, new Scalar(255)); 132 | // Mat kernel = new Mat(image.size(), CvType.CV_8UC1, new Scalar(255)); 133 | Imgproc.morphologyEx(image, tmp, Imgproc.MORPH_OPEN, kernel); 134 | Imgproc.morphologyEx(tmp, out, Imgproc.MORPH_CLOSE, kernel); 135 | Imgcodecs.imwrite(imgOutPath, out); 136 | } 137 | 138 | // public void convertToTIFF() { 139 | // Mat source = Imgcodecs.imread("OCRExample.png"); 140 | // Imgcodecs.imwrite("OCRExample.tiff", source); 141 | // } 142 | } 143 | -------------------------------------------------------------------------------- /Module 1/Java for Data Science/chapter 3/PDF File.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Java-Data-Science-Made-Easy/29e2a5d0638c0ee8eb913c67bf549221bc4d0549/Module 1/Java for Data Science/chapter 3/PDF File.docx -------------------------------------------------------------------------------- /Module 1/Java for Data Science/chapter 3/PDF File.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Java-Data-Science-Made-Easy/29e2a5d0638c0ee8eb913c67bf549221bc4d0549/Module 1/Java for Data Science/chapter 3/PDF File.pdf -------------------------------------------------------------------------------- /Module 1/Java for Data Science/chapter 3/PDFExtractionExample.java: -------------------------------------------------------------------------------- 1 | package packt.pdfextractionexample; 2 | 3 | import java.io.File; 4 | import org.apache.pdfbox.pdmodel.PDDocument; 5 | import org.apache.pdfbox.text.PDFTextStripper; 6 | 7 | public class PDFExtractionExample { 8 | 9 | public static void main(String[] args) { 10 | try { 11 | PDDocument document = PDDocument.load(new File("PDF File.pdf")); 12 | PDFTextStripper Tstripper = new PDFTextStripper(); 13 | String documentText = Tstripper.getText(document); 14 | System.out.println(documentText); 15 | } catch (Exception e) { 16 | e.printStackTrace(); 17 | } 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /Module 1/Java for Data Science/chapter 3/Person.json: -------------------------------------------------------------------------------- 1 | { 2 | "firstname":"Smith", 3 | "lastname":"Peter", 4 | "phone":8475552222, 5 | "address":["100 Main Street","Corpus","Oklahoma"] 6 | } 7 | -------------------------------------------------------------------------------- /Module 1/Java for Data Science/chapter 3/Persons.json: -------------------------------------------------------------------------------- 1 | { 2 | "persons": { 3 | "groupname": "school", 4 | "person": 5 | [ 6 | {"firstname":"Smith", 7 | "lastname":"Peter", 8 | "phone":8475552222, 9 | "address":["100 Main Street","Corpus","Oklahoma"] }, 10 | {"firstname":"King", 11 | "lastname":"Sarah", 12 | "phone":8475551111, 13 | "address":["200 Main Street","Corpus","Oklahoma"] }, 14 | {"firstname":"Frost", 15 | "lastname":"Nathan", 16 | "phone":8475553333, 17 | "address":["300 Main Street","Corpus","Oklahoma"] } 18 | ] 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /Module 1/Java for Data Science/chapter 3/ReadExcelExample.java: -------------------------------------------------------------------------------- 1 | package packt.poiexamples; 2 | 3 | import java.io.File; 4 | import java.io.FileInputStream; 5 | import java.io.IOException; 6 | import org.apache.poi.ss.usermodel.Cell; 7 | import org.apache.poi.ss.usermodel.Row; 8 | import org.apache.poi.xssf.usermodel.XSSFSheet; 9 | import org.apache.poi.xssf.usermodel.XSSFWorkbook; 10 | import static java.lang.System.out; 11 | 12 | // From: http://howtodoinjava.com/apache-commons/readingwriting-excel-files-in-java-poi-tutorial/ 13 | public class ReadExcelExample { 14 | 15 | public static void main(String[] args) { 16 | //Create Workbook instance holding reference to .xlsx file 17 | try (FileInputStream file = new FileInputStream( 18 | new File("Sample.xlsx"))) { 19 | //Create Workbook instance holding reference to .xlsx file 20 | XSSFWorkbook workbook = new XSSFWorkbook(file); 21 | 22 | //Get first/desired sheet from the workbook 23 | XSSFSheet sheet = workbook.getSheetAt(0); 24 | 25 | //Iterate through each rows one by one 26 | // Iterator rowIterator = sheet.iterator(); 27 | for(Row row : sheet) { 28 | for (Cell cell : row) { 29 | //Check the cell type and format accordingly 30 | switch (cell.getCellType()) { 31 | case Cell.CELL_TYPE_NUMERIC: 32 | out.print(cell.getNumericCellValue() + "\t"); 33 | break; 34 | case Cell.CELL_TYPE_STRING: 35 | out.print(cell.getStringCellValue() + "\t"); 36 | break; 37 | } 38 | } 39 | out.println(); 40 | } 41 | } catch (IOException e) { 42 | e.printStackTrace(); 43 | } 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /Module 1/Java for Data Science/chapter 3/Sample.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Java-Data-Science-Made-Easy/29e2a5d0638c0ee8eb913c67bf549221bc4d0549/Module 1/Java for Data Science/chapter 3/Sample.xlsx -------------------------------------------------------------------------------- /Module 1/Java for Data Science/chapter 3/SimpleSearching.java: -------------------------------------------------------------------------------- 1 | import java.io.BufferedReader; 2 | import java.io.File; 3 | import static java.lang.System.*; 4 | import java.io.FileNotFoundException; 5 | import java.io.FileReader; 6 | import java.io.IOException; 7 | import java.util.Scanner; 8 | 9 | public class SimpleSearching { 10 | 11 | public static void main(String[] args) { 12 | String toFind = "I"; 13 | String replaceWith = "Ishmael"; 14 | String dirtyText = "Call me Ishmael. Some years ago- never mind how"; 15 | dirtyText += " long precisely - having little or no money in my purse,"; 16 | dirtyText += " and nothing particular to interest me on shore, I thought"; 17 | dirtyText += " I would sail about a little and see the watery part of the world."; 18 | 19 | //simpleSearch(dirtyText,toFind); 20 | 21 | //scannerSearch(dirtyText,toFind); 22 | 23 | simpleFindReplace(dirtyText,toFind,replaceWith); 24 | 25 | //searchWholeFile("C://Jenn Personal//Packt Data Science//Chapter 3 Data Cleaning//MobyDick.txt", toFind); 26 | 27 | try { 28 | Scanner textToClean = new Scanner(new File("C://Jenn Personal//Packt Data Science//Chapter 3 Data Cleaning//101nos.txt")); 29 | while(textToClean.hasNext()){ 30 | //String dirtyText = textToClean.nextLine(); 31 | 32 | //simpleSearch(dirtyText,toFind); 33 | 34 | //scannerSearch(dirtyText,toFind); 35 | 36 | //simpleFindReplace(dirtyText,toFind,replaceWith); 37 | 38 | } 39 | 40 | textToClean.close(); 41 | } catch (FileNotFoundException e) { 42 | // TODO Auto-generated catch block 43 | e.printStackTrace(); 44 | } 45 | 46 | 47 | //searchWholeFile("C://Jenn Personal//Packt Data Science//Chapter 3 Data Cleaning//101nos.txt", toFind); 48 | 49 | } 50 | 51 | public static void simpleSearch(String text, String toFind){ 52 | text = text.toLowerCase().trim(); 53 | toFind = toFind.toLowerCase().trim(); 54 | int count = 0; 55 | if(text.contains(toFind)){ 56 | String[] words = text.split(" "); 57 | for(String word : words){ 58 | if(word.equals(toFind)){ 59 | count++; 60 | } 61 | } 62 | out.println("Found " + toFind + " " + count + " times in the text."); 63 | } 64 | } 65 | 66 | public static void scannerSearch(String text, String toFind){ 67 | text = text.toLowerCase().trim(); 68 | toFind = toFind.toLowerCase().trim(); 69 | Scanner textLine = new Scanner(text); 70 | //NOTE horizon bound is zero - default to search entire file 71 | out.println("Found " + textLine.findWithinHorizon(toFind, 10)); 72 | } 73 | 74 | public static void simpleFindReplace(String text, String toFind, String replaceWith){ 75 | text = text.toLowerCase().trim(); 76 | toFind = toFind.toLowerCase().trim(); 77 | out.println(text); 78 | if(text.contains(toFind)){ 79 | text = text.replaceAll(toFind, replaceWith); 80 | out.println(text); 81 | // for(String word : textLine){ 82 | // out.print(word + " "); 83 | // } 84 | } 85 | 86 | } 87 | 88 | public static void searchWholeFile(String path, String toFind){ 89 | try { 90 | int line = 0; 91 | String textLine = ""; 92 | toFind = toFind.toLowerCase().trim(); 93 | BufferedReader textToClean = new BufferedReader(new FileReader(path)); 94 | while((textLine = textToClean.readLine()) != null){ 95 | line++; 96 | if(textLine.toLowerCase().trim().contains(toFind)){ 97 | out.println("Found " + toFind + " in " + textLine); 98 | //out.println("Found " + toFind + " on line " + line + " of file."); 99 | // String[] words = textLine.split(" "); 100 | // for(int x = 0; x < words.length; x++){ 101 | // if(words[x].equals(toFind)){ 102 | // out.println("On line " + line + " found " + toFind + " at location " + (x-1)); 103 | // } 104 | // } 105 | 106 | } 107 | } 108 | textToClean.close(); 109 | } catch (FileNotFoundException e) { 110 | e.printStackTrace(); 111 | } catch (IOException e) { 112 | e.printStackTrace(); 113 | } 114 | } 115 | 116 | } 117 | -------------------------------------------------------------------------------- /Module 1/Java for Data Science/chapter 3/SimpleSort.java: -------------------------------------------------------------------------------- 1 | import java.util.ArrayList; 2 | import java.util.Arrays; 3 | import java.util.Collections; 4 | import java.util.Comparator; 5 | import java.util.List; 6 | import static java.lang.System.out; 7 | 8 | public class SimpleSort { 9 | 10 | public static void main(String[] args) { 11 | 12 | basicSort(); 13 | complexSort(); 14 | 15 | } 16 | 17 | 18 | public static void basicSort(){ 19 | 20 | //make original list and print 21 | String[] words = {"cat","dog","house","boat","road","zoo"}; 22 | ArrayList wordsList = new ArrayList<>(Arrays.asList(words)); 23 | Integer[] nums = {12,46,52,34,87,123,14,44}; 24 | ArrayList numsList = new ArrayList<>(Arrays.asList(nums)); 25 | 26 | out.println("Original Word List: " + wordsList.toString()); 27 | //simple sort with collections.sort() 28 | Collections.sort(wordsList); 29 | out.println("Ascending Word List: " + wordsList.toString()); 30 | 31 | out.println("Original Integer List: " + numsList.toString()); 32 | 33 | Collections.reverse(numsList); 34 | out.println("Reversed Integer List: " + numsList.toString()); 35 | 36 | Collections.sort(numsList); 37 | out.println("Ascending Integer List: " + numsList.toString()); 38 | 39 | //Use Comparator Interface 40 | Comparator basicOrder = Integer::compare; 41 | Comparator descendOrder = basicOrder.reversed(); 42 | Collections.sort(numsList,descendOrder); 43 | out.println("Descending Integer List: " + numsList.toString()); 44 | 45 | //Using a lambda expression with Comparator and Collections 46 | Comparator compareInts = (Integer first, Integer second) -> Integer 47 | .compare(first, second); 48 | Collections.sort(numsList,compareInts); 49 | out.println("Sorted integers using Lambda: " + numsList.toString()); 50 | 51 | Comparator basicWords = String::compareTo; 52 | Comparator descendWords = basicWords.reversed(); 53 | Collections.sort(wordsList,descendWords); 54 | out.println("Reversed Words Using Comparator: " + wordsList.toString()); 55 | 56 | Comparator compareWords = (String first, String second) -> first.compareTo(second); 57 | Collections.sort(wordsList,compareWords); 58 | out.println("Sorted words using Lambda: " + wordsList.toString()); 59 | } 60 | 61 | 62 | public static void complexSort() { 63 | out.println(); 64 | ArrayList dogs = new ArrayList(); 65 | dogs.add(new Dogs("Zoey", 8)); 66 | dogs.add(new Dogs("Roxie", 10)); 67 | dogs.add(new Dogs("Kylie", 7)); 68 | dogs.add(new Dogs("Shorty", 14)); 69 | dogs.add(new Dogs("Ginger", 7)); 70 | dogs.add(new Dogs("Penny", 7)); 71 | out.println("Name " + " Age"); 72 | for(Dogs d : dogs){ 73 | out.println(d.getName() + " " + d.getAge()); 74 | } 75 | out.println(); 76 | dogs.sort(Comparator.comparing(Dogs::getName).thenComparing(Dogs::getAge)); 77 | out.println("Name " + " Age"); 78 | for(Dogs d : dogs){ 79 | out.println(d.getName() + " " + d.getAge()); 80 | } 81 | out.println(); 82 | dogs.sort(Comparator.comparing(Dogs::getAge).thenComparing(Dogs::getName)); 83 | out.println("Name " + " Age"); 84 | for(Dogs d : dogs){ 85 | out.println(d.getName() + " " + d.getAge()); 86 | } 87 | out.println(); 88 | } 89 | } 90 | 91 | -------------------------------------------------------------------------------- /Module 1/Java for Data Science/chapter 3/SimpleStringCleaning.java: -------------------------------------------------------------------------------- 1 | import static java.lang.System.out; 2 | 3 | import java.io.File; 4 | import java.io.FileNotFoundException; 5 | import java.util.ArrayList; 6 | import java.util.Arrays; 7 | import java.util.Scanner; 8 | import java.util.Set; 9 | import java.util.TreeSet; 10 | 11 | import com.aliasi.tokenizer.EnglishStopTokenizerFactory; 12 | import com.aliasi.tokenizer.IndoEuropeanTokenizerFactory; 13 | import com.aliasi.tokenizer.Tokenizer; 14 | import com.aliasi.tokenizer.TokenizerFactory; 15 | 16 | public class SimpleStringCleaning { 17 | 18 | public static void main(String[] args) { 19 | 20 | String dirtyText = "Call me Ishmael. Some years ago- never mind how"; 21 | dirtyText += " long precisely - having little or no money in my purse,"; 22 | dirtyText += " and nothing particular to interest me on shore, I thought"; 23 | dirtyText += " I would sail about a little and see the watery part of the world."; 24 | 25 | //Example 1 - basic clean (regex & String class methods) 26 | //simpleClean(dirtyText); 27 | 28 | //Example 2 - clean and put in array (split) 29 | //simpleCleanToArray(dirtyText); 30 | 31 | //Example 3 - join 32 | //cleanAndJoin(dirtyText); 33 | 34 | //Example 4 - simple remove stop words 35 | //removeStopWords(dirtyText); 36 | 37 | //Example 5 - remove stop words with removeAll 38 | //removeStopWordsRemoveAll(dirtyText); 39 | 40 | //Example 6 - remove stop words with LingPipe 41 | removeStopWithLing(dirtyText); 42 | 43 | 44 | 45 | } 46 | 47 | public static String simpleClean(String text){ 48 | 49 | out.println("Dirty text: " + text); 50 | text = text.toLowerCase(); 51 | //explain what each part of this regex does 52 | text = text.replaceAll("[\\d[^\\w\\s]]+", " "); 53 | //NOTE trim only works on leading/trailing spaces 54 | text = text.trim(); 55 | //is this the best way to do this? This isn't great - talk about it even? 56 | while(text.contains(" ")){ 57 | text = text.replaceAll(" ", " "); 58 | } 59 | out.println("Cleaned text: " + text); 60 | return text; 61 | } 62 | 63 | public static String[] simpleCleanToArray(String text){ 64 | out.println("Dirty text: " + text); 65 | text = text.replaceAll("[\\d[^\\w\\s]]+", ""); 66 | String[] cleanText = text.toLowerCase().trim().split("[\\W\\d]+"); 67 | out.print("Cleaned text: "); 68 | for(String clean : cleanText){ 69 | out.print(clean + " "); 70 | } 71 | out.println(); 72 | return cleanText; 73 | } 74 | 75 | public static String cleanAndJoin(String text){ 76 | out.println("Dirty text: " + text); 77 | String[] words = text.toLowerCase().trim().split("[\\W\\d]+"); 78 | String cleanText = String.join(" ", words); 79 | out.println("Cleaned text: " + cleanText); 80 | return cleanText; 81 | } 82 | 83 | public static void removeStopWords(String text){ 84 | //discuss stop words file - how to choose stop words? use whole alphabet as way to handle I'M --> I M 85 | 86 | //****************** SIMPLE EXAMPLE ******************************************************************************************* 87 | 88 | try { 89 | //read in list of stop words 90 | Scanner readStop = new Scanner(new File("C://Jenn Personal//Packt Data Science//Chapter 3 Data Cleaning//stopwords.txt")); 91 | //create an ArrayList to hold dirty text - call simpleCleanToArray to perform basic cleaning and put in array first 92 | ArrayList words = new ArrayList(Arrays.asList(simpleCleanToArray(text))); 93 | //loop through stop words file and check array for each word 94 | out.println("Original clean text: " + words.toString()); 95 | ArrayList foundWords = new ArrayList(); 96 | while(readStop.hasNextLine()){ 97 | String stopWord = readStop.nextLine().toLowerCase(); 98 | if(words.contains(stopWord)){ 99 | foundWords.add(stopWord); 100 | } 101 | } 102 | words.removeAll(foundWords); 103 | out.println("Text without stop words: " + words.toString()); 104 | } catch (FileNotFoundException e) { 105 | // TODO Auto-generated catch block 106 | e.printStackTrace(); 107 | } 108 | 109 | } 110 | 111 | public static void removeStopWordsRemoveAll(String text){ 112 | //******************EXAMPLE WITH REMOVE ALL ******************************************************************************************* 113 | 114 | try { 115 | out.println(text); 116 | Scanner stopWordList = new Scanner(new File("C://Jenn Personal//Packt Data Science//Chapter 3 Data Cleaning//stopwords.txt")); 117 | TreeSet stopWords = new TreeSet(); 118 | while(stopWordList.hasNextLine()){ 119 | stopWords.add(stopWordList.nextLine()); 120 | } 121 | ArrayList dirtyText = new ArrayList(Arrays.asList(text.split(" "))); 122 | dirtyText.removeAll(stopWords); 123 | out.println("Clean words: "); 124 | for(String x : dirtyText){ 125 | out.print(x + " "); 126 | } 127 | out.println(); 128 | stopWordList.close(); 129 | } catch (FileNotFoundException e) { 130 | // TODO Auto-generated catch block 131 | e.printStackTrace(); 132 | } 133 | } 134 | 135 | public static void removeStopWithLing(String text){ 136 | //******************EXAMPLE WITH ling pipe ******************************************************************************************* 137 | //mention lower vs upper case 138 | out.println(text); 139 | text = text.toLowerCase().trim(); 140 | TokenizerFactory fact = IndoEuropeanTokenizerFactory.INSTANCE; 141 | fact = new EnglishStopTokenizerFactory(fact); 142 | Tokenizer tok = fact.tokenizer(text.toCharArray(), 0, text.length()); 143 | for(String word : tok){ 144 | out.print(word + " "); 145 | } 146 | } 147 | } 148 | 149 | 150 | 151 | -------------------------------------------------------------------------------- /Module 1/Java for Data Science/chapter 3/SimpleSubsetting.java: -------------------------------------------------------------------------------- 1 | 2 | import java.util.Scanner; 3 | import java.util.Set; 4 | import java.util.SortedSet; 5 | import java.util.TreeSet; 6 | 7 | import static java.util.stream.Collectors.toCollection; 8 | 9 | import java.io.BufferedReader; 10 | import java.io.File; 11 | import java.io.FileNotFoundException; 12 | import java.io.FileReader; 13 | import java.io.IOException; 14 | import java.util.ArrayList; 15 | import java.util.Arrays; 16 | 17 | import static java.lang.System.out; 18 | 19 | public class SimpleSubsetting { 20 | 21 | public static void main(String[] args) throws FileNotFoundException { 22 | 23 | // treeSubSetMethod(); 24 | // simpleSubSet(); 25 | subSetSkipLines(); 26 | 27 | } 28 | 29 | public static void treeSubSetMethod(){ 30 | 31 | //sub set is not populating - not sure why 32 | //http://www.tutorialspoint.com/java/util/treeset_subset.htm 33 | 34 | Integer[] nums = {12,46,52,34,87,123,14,44}; 35 | TreeSet fullNumsList = new TreeSet(new ArrayList<>(Arrays.asList(nums))); 36 | TreeSet partNumsList = new TreeSet(); 37 | out.println("Original List: " + fullNumsList.toString()); 38 | partNumsList = (TreeSet) fullNumsList.subSet(1,3); 39 | out.println("SubSet of List: " + partNumsList.toString()); 40 | out.println(partNumsList.size()); 41 | 42 | 43 | } 44 | 45 | public static void simpleSubSet(){ 46 | Integer[] nums = {12,46,52,34,87,123,14,44}; 47 | ArrayList numsList = new ArrayList<>(Arrays.asList(nums)); 48 | out.println("Original List: " + numsList.toString()); 49 | Set fullNumsList = new TreeSet(numsList); 50 | Set partNumsList = fullNumsList.stream().skip(5).collect(toCollection(TreeSet::new)); 51 | out.println("SubSet of List: " + partNumsList.toString()); 52 | 53 | } 54 | 55 | public static void subSetSkipLines() throws FileNotFoundException{ 56 | 57 | //not behaving as expected 58 | try (BufferedReader br = new BufferedReader(new FileReader("C:\\Jenn Personal\\Packt Data Science\\Chapter 3 Data Cleaning\\stopwords.txt"))) { 59 | br 60 | .lines() 61 | .filter(s -> !s.equals("")) 62 | .forEach(s -> out.println(s)); 63 | } catch (IOException ex) { 64 | ex.printStackTrace(); 65 | } 66 | 67 | //Scanner file = new Scanner(new File("C:\\Jenn Personal\\Packt Data Science\\Chapter 3 Data Cleaning\\stopwords.txt")); 68 | // ArrayList lines = new ArrayList<>(); 69 | // while(file.hasNextLine()){ 70 | // lines.add(file.nextLine()); 71 | // } 72 | // out.println("Original List: " + lines.toString()); 73 | // out.println("Original list is " + lines.size() + " elements long"); 74 | // Set fullWordsList = new TreeSet(lines); 75 | // Set partWordsList = fullWordsList.stream().skip(2).collect(toCollection(TreeSet::new)); 76 | // out.println("SubSet of List: " + partWordsList.toString()); 77 | // out.println("Subsetted list is " + partWordsList.size() + " elements long"); 78 | // 79 | // file.close(); 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /Module 1/Java for Data Science/chapter 3/TokenizerExamples.java: -------------------------------------------------------------------------------- 1 | import static java.lang.System.out; 2 | 3 | import java.util.StringTokenizer; 4 | 5 | public class TokenizerExamples{ 6 | 7 | public static void main(String[] args){ 8 | 9 | String dirtyText = "Call me Ishmael. Some years ago- never mind how"; 10 | dirtyText += " long precisely - having little or no money in my purse,"; 11 | dirtyText += " and nothing particular to interest me on shore, I thought"; 12 | dirtyText += " I would sail about a little and see the watery part of the world."; 13 | 14 | StringTokenizer tokenizer = new StringTokenizer(dirtyText," "); 15 | while(tokenizer.hasMoreTokens()){ 16 | out.print(tokenizer.nextToken() + " "); 17 | } 18 | } 19 | } -------------------------------------------------------------------------------- /Module 1/Java for Data Science/chapter 3/ValidatingData.java: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Java-Data-Science-Made-Easy/29e2a5d0638c0ee8eb913c67bf549221bc4d0549/Module 1/Java for Data Science/chapter 3/ValidatingData.java -------------------------------------------------------------------------------- /Module 1/Java for Data Science/chapter 6-Machine Learning/BookDecisionTree.java: -------------------------------------------------------------------------------- 1 | package packt.decisiontreeexamples; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.FileReader; 5 | import java.io.IOException; 6 | import static java.lang.System.out; 7 | import java.util.Enumeration; 8 | 9 | import weka.classifiers.trees.J48; 10 | import weka.core.DenseInstance; 11 | import weka.core.Instance; 12 | import weka.core.Instances; 13 | 14 | /* 15 | 16 | 17 | jboss-3rd-party-releases 18 | https://repository.jboss.org/nexus/content/repositories/thirdparty-releases/ 19 | 20 | 21 | 22 | 23 | 24 | nz.ac.waikato.cms.weka 25 | weka-dev 26 | 3.7.5 27 | 28 | 29 | com.google.guava 30 | guava 31 | 20.0-hal 32 | 33 | 34 | */ 35 | public class BookDecisionTree { 36 | 37 | private Instances trainingData; 38 | 39 | public static void main(String[] args) { 40 | try { 41 | BookDecisionTree decisionTree = new BookDecisionTree("books.arff"); 42 | J48 tree = decisionTree.performTraining(); 43 | System.out.println(tree.toString()); 44 | 45 | Instance testInstance = decisionTree. 46 | getTestInstance("Leather", "yes", "historical"); 47 | int result = (int) tree.classifyInstance(testInstance); 48 | String results = decisionTree.trainingData.attribute(3).value(result); 49 | System.out.println( 50 | "Test with: " + testInstance + " Result: " + results); 51 | 52 | testInstance = decisionTree. 53 | getTestInstance("Paperback", "no", "historical"); 54 | result = (int) tree.classifyInstance(testInstance); 55 | results = decisionTree.trainingData.attribute(3).value(result); 56 | System.out.println( 57 | "Test with: " + testInstance + " Result: " + results); 58 | } catch (Exception ex) { 59 | ex.printStackTrace(); 60 | } 61 | } 62 | 63 | public BookDecisionTree(String fileName) { 64 | try { 65 | BufferedReader reader = new BufferedReader(new FileReader(fileName)); 66 | trainingData = new Instances(reader); 67 | trainingData.setClassIndex(trainingData.numAttributes() - 1); 68 | } catch (IOException ex) { 69 | ex.printStackTrace(); 70 | } 71 | } 72 | 73 | private J48 performTraining() { 74 | J48 j48 = new J48(); 75 | String[] options = {"-U"}; 76 | // Use unpruned tree. -U 77 | try { 78 | j48.setOptions(options); 79 | j48.buildClassifier(trainingData); 80 | } catch (Exception ex) { 81 | ex.printStackTrace(); 82 | } 83 | return j48; 84 | } 85 | 86 | private Instance getTestInstance( 87 | String binding, String multicolor, String genre) { 88 | Instance instance = new DenseInstance(3); 89 | instance.setDataset(trainingData); 90 | instance.setValue(trainingData.attribute(0), binding); 91 | instance.setValue(trainingData.attribute(1), multicolor); 92 | instance.setValue(trainingData.attribute(2), genre); 93 | return instance; 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /Module 1/Java for Data Science/chapter 6-Machine Learning/FXMLController.java: -------------------------------------------------------------------------------- 1 | package packt.scatterchartmaven2; 2 | 3 | import java.net.URL; 4 | import java.util.ResourceBundle; 5 | import javafx.event.ActionEvent; 6 | import javafx.fxml.FXML; 7 | import javafx.fxml.Initializable; 8 | import javafx.scene.control.Label; 9 | 10 | public class FXMLController implements Initializable { 11 | 12 | @FXML 13 | private Label label; 14 | 15 | @FXML 16 | private void handleButtonAction(ActionEvent event) { 17 | System.out.println("You clicked me!"); 18 | label.setText("Hello World!"); 19 | } 20 | 21 | @Override 22 | public void initialize(URL url, ResourceBundle rb) { 23 | // TODO 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /Module 1/Java for Data Science/chapter 6-Machine Learning/JBayesTest.java: -------------------------------------------------------------------------------- 1 | package com.packt.java.jayes; 2 | 3 | import java.io.File; 4 | import java.io.FileWriter; 5 | import java.io.IOException; 6 | import static java.lang.System.out; 7 | import com.github.vangj.jbayes.inf.prob.Graph; 8 | import com.github.vangj.jbayes.inf.prob.Node; 9 | import com.github.vangj.jbayes.inf.prob.util.CsvUtil; 10 | 11 | public class JBayesTest { 12 | 13 | public static void main(String[] args){ 14 | //each node must have its name and values defined 15 | Node storms = Node.newBuilder().name("Thunderstorm").value("t").value("f").build(); 16 | Node traffic = Node.newBuilder().name("Traffic").value("t").value("f").build(); 17 | Node powerOut = Node.newBuilder().name("PowerOutage").value("t").value("f").build(); 18 | Node alarm = Node.newBuilder().name("Alarm").value("t").value("f").build(); 19 | Node overslept = Node.newBuilder().name("Overslept").value("t").value("f").build(); 20 | Node lateToWork = Node.newBuilder().name("LateToWork").value("t").value("f").build(); 21 | 22 | //nodes may have parents 23 | traffic.addParent(storms); 24 | powerOut.addParent(storms); 25 | lateToWork.addParent(traffic); 26 | alarm.addParent(powerOut); 27 | overslept.addParent(alarm); 28 | lateToWork.addParent(overslept); 29 | 30 | //define the CPTs for each node 31 | storms.setCpt(new double[][] { 32 | {0.7, 0.3} 33 | }); 34 | traffic.setCpt(new double[][] { 35 | {0.8, 0.2} 36 | }); 37 | powerOut.setCpt(new double[][] { 38 | {0.5, 0.5} 39 | }); 40 | alarm.setCpt(new double[][] { 41 | {0.7, 0.3} 42 | }); 43 | overslept.setCpt(new double[][] { 44 | {0.5, 0.5} 45 | }); 46 | lateToWork.setCpt(new double[][] { 47 | {0.5, 0.5}, 48 | {0.5, 0.5} 49 | }); 50 | 51 | //create a graph from the nodes 52 | Graph bayesGraph = new Graph(); 53 | bayesGraph.addNode(storms); 54 | bayesGraph.addNode(traffic); 55 | bayesGraph.addNode(powerOut); 56 | bayesGraph.addNode(alarm); 57 | bayesGraph.addNode(overslept); 58 | bayesGraph.addNode(lateToWork); 59 | 60 | //samples and computes the marginal probabilities aka the inference 61 | double d = bayesGraph.sample(1000); 62 | out.println(d); 63 | 64 | //look at the marginal probabilities 65 | double[] stormProb = storms.probs(); 66 | double[] trafficProb = traffic.probs(); 67 | double[] powerProb = powerOut.probs(); 68 | double[] alarmProb = alarm.probs(); 69 | double[] oversleptProb = overslept.probs(); 70 | double[] lateProb = lateToWork.probs(); 71 | 72 | out.println("\nStorm Probabilities"); 73 | out.println("True: " + stormProb[0] + " False: " + stormProb[1]); 74 | out.println("\nTraffic Probabilities"); 75 | out.println("True: " + trafficProb[0] + " False: " + trafficProb[1]); 76 | out.println("\nPower Outage Probabilities"); 77 | out.println("True: " + powerProb[0] + " False: " + powerProb[1]); 78 | out.println("vAlarm Probabilities"); 79 | out.println("True: " + alarmProb[0] + " False: " + alarmProb[1]); 80 | out.println("\nOverslept Probabilities"); 81 | out.println("True: " + oversleptProb[0] + " False: " + oversleptProb[1]); 82 | out.println("\nLate to Work Probabilities"); 83 | out.println("True: " + lateProb[0] + " False: " + lateProb[1]); 84 | 85 | bayesGraph.setSaveSamples(true); //stores samples in memory! 86 | bayesGraph.sample(100); 87 | 88 | try { 89 | CsvUtil.saveSamples(bayesGraph, new FileWriter(new File("C://Jenn Personal//Packt Data Science//Chapter 6 Machine Learning//jbayes.csv"))); 90 | } catch (IOException e) { 91 | // TODO Auto-generated catch block 92 | e.printStackTrace(); 93 | } //save samples into CSV file 94 | 95 | bayesGraph.clearSamples(); //clear samples, this might help with memory usage 96 | } 97 | 98 | } 99 | -------------------------------------------------------------------------------- /Module 1/Java for Data Science/chapter 6-Machine Learning/Main-ARL.java: -------------------------------------------------------------------------------- 1 | package packt.aprioriexamples; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.FileReader; 5 | import weka.associations.Apriori; 6 | import weka.core.Instances; 7 | 8 | public class Main { 9 | 10 | public static void main(String[] args) { 11 | try { 12 | BufferedReader br; 13 | br = new BufferedReader(new FileReader("babies.arff")); 14 | Instances data = new Instances(br); 15 | br.close(); 16 | 17 | Apriori apriori = new Apriori(); 18 | apriori.setNumRules(100); 19 | apriori.setMinMetric(0.5); 20 | 21 | apriori.buildAssociations(data); 22 | System.out.println(apriori); 23 | } catch (Exception ex) { 24 | ex.printStackTrace(); 25 | } 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /Module 1/Java for Data Science/chapter 6-Machine Learning/Main-SVG.java: -------------------------------------------------------------------------------- 1 | package packt.svmexamples; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.FileNotFoundException; 5 | import java.io.FileReader; 6 | import java.io.IOException; 7 | import static java.lang.System.out; 8 | import java.util.ArrayList; 9 | import weka.classifiers.Classifier; 10 | import weka.classifiers.Evaluation; 11 | import weka.classifiers.evaluation.NominalPrediction; 12 | import weka.classifiers.evaluation.Prediction; 13 | import weka.classifiers.functions.SMO; 14 | import weka.classifiers.functions.supportVector.PrecomputedKernelMatrixKernel; 15 | import weka.core.DenseInstance; 16 | import weka.core.Instance; 17 | import weka.core.Instances; 18 | 19 | public class Main { 20 | 21 | public BufferedReader readDataFile(String filename) { 22 | BufferedReader inputReader = null; 23 | try { 24 | inputReader = new BufferedReader(new FileReader(filename)); 25 | } catch (FileNotFoundException ex) { 26 | out.println("File not found: " + filename); 27 | } 28 | return inputReader; 29 | } 30 | 31 | 32 | public Main() { 33 | try { 34 | BufferedReader datafile; 35 | datafile = readDataFile("camping.txt"); 36 | Instances data = new Instances(datafile); 37 | data.setClassIndex(data.numAttributes() - 1); 38 | 39 | Instances trainingData = new Instances(data, 0, 14); 40 | Instances testingData = new Instances(data, 14, 5); 41 | Evaluation evaluation = new Evaluation(trainingData); 42 | 43 | SMO smo = new SMO(); 44 | smo.buildClassifier(data); 45 | 46 | evaluation.evaluateModel(smo, testingData); 47 | System.out.println(evaluation.toSummaryString()); 48 | 49 | // Test instance 50 | Instance instance = new DenseInstance(3); 51 | instance.setValue(data.attribute("age"), 78); 52 | instance.setValue(data.attribute("income"), 125700); 53 | instance.setValue(data.attribute("camps"), 1); 54 | instance.setDataset(data); 55 | System.out.println("The instance: " + instance); 56 | System.out.println(smo.classifyInstance(instance)); 57 | } catch (Exception ex) { 58 | ex.printStackTrace(); 59 | } 60 | } 61 | 62 | public static void main(String[] arg) { 63 | new Main(); 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /Module 1/Java for Data Science/chapter 6-Machine Learning/MainApp-Camping.java: -------------------------------------------------------------------------------- 1 | package packt.scatterchartmaven2; 2 | 3 | /* 4 | 5 | 6 | com.opencsv 7 | opencsv 8 | 3.7 9 | 10 | 11 | org.apache.commons 12 | commons-math3 13 | 3.6.1 14 | 15 | 16 | */ 17 | 18 | import javafx.application.Application; 19 | import static javafx.application.Application.launch; 20 | import javafx.scene.Scene; 21 | import javafx.scene.chart.NumberAxis; 22 | import javafx.scene.chart.ScatterChart; 23 | import javafx.scene.chart.XYChart; 24 | import javafx.stage.Stage; 25 | 26 | public class MainApp extends Application { 27 | 28 | @Override 29 | public void start(Stage stage) throws Exception { 30 | stage.setTitle("Scatter Chart Sample"); 31 | final NumberAxis xAxis = new NumberAxis(0, 100, 10); 32 | final NumberAxis yAxis = new NumberAxis(0, 130000, 10000); 33 | final ScatterChart sc = new 34 | ScatterChart(xAxis,yAxis); 35 | xAxis.setLabel("Age"); 36 | yAxis.setLabel("Income"); 37 | sc.setTitle("Camping Inclination"); 38 | 39 | /* 40 | 23,45600,1 41 | 26,32000,0 42 | 45,65700,1 43 | 29,25300,0 44 | 72,55600,1 45 | 24,28700,1 46 | 56,125300,1 47 | 22,34200,1 48 | 28,32800,1 49 | 32,24600,1 50 | 25,36500,1 51 | 67,76800,0 52 | 25,14500,1 53 | 86,58900,0 54 | */ 55 | XYChart.Series series1 = new XYChart.Series(); 56 | series1.setName("Camps"); 57 | series1.getData().add(new XYChart.Data(23,45600)); 58 | series1.getData().add(new XYChart.Data(45,65700)); 59 | series1.getData().add(new XYChart.Data(72,55600)); 60 | series1.getData().add(new XYChart.Data(24,28700)); 61 | series1.getData().add(new XYChart.Data(22,34200)); 62 | series1.getData().add(new XYChart.Data(28,32800)); 63 | series1.getData().add(new XYChart.Data(32,24600)); 64 | series1.getData().add(new XYChart.Data(25,36500)); 65 | series1.getData().add(new XYChart.Data(22,43600)); 66 | series1.getData().add(new XYChart.Data(78,125700)); 67 | series1.getData().add(new XYChart.Data(73,56500)); 68 | 69 | XYChart.Series series2 = new XYChart.Series(); 70 | series2.setName("Doesn't Camp"); 71 | series2.getData().add(new XYChart.Data(26,91000)); 72 | series2.getData().add(new XYChart.Data(29,85300)); 73 | series2.getData().add(new XYChart.Data(67,76800)); 74 | series2.getData().add(new XYChart.Data(86,58900)); 75 | series2.getData().add(new XYChart.Data(56,125300)); 76 | series2.getData().add(new XYChart.Data(25,125000)); 77 | series2.getData().add(new XYChart.Data(29,87600)); 78 | series2.getData().add(new XYChart.Data(65,79300)); 79 | 80 | sc.getData().addAll(series1, series2); 81 | Scene scene = new Scene(sc, 500, 400); 82 | stage.setScene(scene); 83 | stage.show(); 84 | } 85 | 86 | public static void main(String[] args) { 87 | launch(args); 88 | } 89 | 90 | } 91 | -------------------------------------------------------------------------------- /Module 1/Java for Data Science/chapter 8-Deep learning/ConvolutionalNetworkExample.java: -------------------------------------------------------------------------------- 1 | package packt.dl4jexamples; 2 | 3 | import static java.lang.System.out; 4 | import org.deeplearning4j.datasets.fetchers.MnistDataFetcher; 5 | import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator; 6 | import org.deeplearning4j.nn.api.OptimizationAlgorithm; 7 | import org.deeplearning4j.nn.conf.MultiLayerConfiguration; 8 | import org.deeplearning4j.nn.conf.NeuralNetConfiguration; 9 | import org.deeplearning4j.nn.conf.layers.OutputLayer; 10 | import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; 11 | import org.deeplearning4j.optimize.api.IterationListener; 12 | import org.deeplearning4j.optimize.listeners.ScoreIterationListener; 13 | import org.nd4j.linalg.dataset.DataSet; 14 | import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; 15 | import org.nd4j.linalg.lossfunctions.LossFunctions; 16 | import org.slf4j.Logger; 17 | import org.slf4j.LoggerFactory; 18 | 19 | import java.util.Collections; 20 | import org.deeplearning4j.eval.Evaluation; 21 | import org.deeplearning4j.nn.conf.Updater; 22 | import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; 23 | import org.deeplearning4j.nn.conf.layers.DenseLayer; 24 | import org.deeplearning4j.nn.conf.layers.SubsamplingLayer; 25 | import org.deeplearning4j.nn.conf.layers.setup.ConvolutionLayerSetup; 26 | import org.deeplearning4j.nn.weights.WeightInit; 27 | import org.nd4j.linalg.api.ndarray.INDArray; 28 | import org.nd4j.linalg.dataset.SplitTestAndTrain; 29 | import org.nd4j.linalg.dataset.api.preprocessor.DataNormalization; 30 | import org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize; 31 | 32 | /** 33 | * ***** NOTE: This example has not been tuned. It requires additional work to 34 | * produce sensible results ***** 35 | * 36 | * @author Adam Gibson 37 | */ 38 | public class ConvolutionalNetworkExample { 39 | 40 | private static Logger log = LoggerFactory.getLogger(ConvolutionalNetworkExample.class); 41 | 42 | public static void main(String[] args) throws Exception { 43 | 44 | log.info("Load data...."); 45 | //params - batch size, num examples, true?? 46 | DataSetIterator iter = new MnistDataSetIterator(1000, MnistDataFetcher.NUM_EXAMPLES); 47 | //ADDED 48 | DataSet dataset = iter.next(); 49 | dataset.shuffle(); 50 | SplitTestAndTrain testAndTrain = dataset.splitTestAndTrain(0.65); 51 | DataSet trainingData = testAndTrain.getTrain(); 52 | DataSet testData = testAndTrain.getTest(); 53 | DataNormalization normalizer = new NormalizerStandardize(); 54 | normalizer.fit(trainingData); 55 | normalizer.transform(trainingData); 56 | normalizer.transform(testData); 57 | 58 | log.info("Build model...."); 59 | MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder() 60 | .seed(123) 61 | .iterations(1) 62 | .regularization(true).l2(0.0005) 63 | .learningRate(0.01) 64 | .weightInit(WeightInit.XAVIER) 65 | .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) 66 | .updater(Updater.NESTEROVS).momentum(0.9) 67 | .list() 68 | .layer(0, new ConvolutionLayer.Builder(5, 5) 69 | //nIn and nOut specify depth. nIn here is the nChannels and nOut is the number of filters to be applied 70 | .nIn(3) 71 | .stride(1, 1) 72 | .nOut(20) 73 | .activation("identity") 74 | .build()) 75 | .layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX) 76 | .kernelSize(2, 2) 77 | .stride(2, 2) 78 | .build()) 79 | .layer(2, new ConvolutionLayer.Builder(5, 5) 80 | .stride(1, 1) 81 | .nOut(50) 82 | .activation("identity") 83 | .build()) 84 | .layer(3, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX) 85 | .kernelSize(2, 2) 86 | .stride(2, 2) 87 | .build()) 88 | .layer(4, new DenseLayer.Builder().activation("relu") 89 | .nOut(500).build()) 90 | .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) 91 | .nOut(10) 92 | .activation("softmax") 93 | .build()) 94 | .backprop(true).pretrain(false); 95 | // The builder needs the dimensions of the image along with the number of channels. these are 28x28 images in one channel 96 | new ConvolutionLayerSetup(builder, 28, 28, 1); 97 | 98 | MultiLayerConfiguration conf = builder.build(); 99 | MultiLayerNetwork model = new MultiLayerNetwork(conf); 100 | model.init(); 101 | model.setListeners(Collections.singletonList((IterationListener) new ScoreIterationListener(1/5))); 102 | 103 | while (iter.hasNext()) { 104 | DataSet next = iter.next(); 105 | model.fit(new DataSet(next.getFeatureMatrix(), next.getLabels())); 106 | } 107 | 108 | Evaluation evaluation = new Evaluation(4); 109 | INDArray output = model.output(testData.getFeatureMatrix()); 110 | evaluation.eval(testData.getLabels(), output); 111 | out.println(evaluation.stats()); 112 | } 113 | } 114 | 115 | 116 | -------------------------------------------------------------------------------- /Module 1/Java for Data Science/chapter 8-Deep learning/DeepAutoEncoderExample.java: -------------------------------------------------------------------------------- 1 | package packt.dl4jexamples; 2 | 3 | import java.io.File; 4 | import java.io.IOException; 5 | import org.deeplearning4j.datasets.fetchers.MnistDataFetcher; 6 | import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator; 7 | import org.deeplearning4j.nn.api.OptimizationAlgorithm; 8 | import org.deeplearning4j.nn.conf.MultiLayerConfiguration; 9 | import org.deeplearning4j.nn.conf.NeuralNetConfiguration; 10 | import org.deeplearning4j.nn.conf.layers.OutputLayer; 11 | import org.deeplearning4j.nn.conf.layers.RBM; 12 | import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; 13 | import org.deeplearning4j.optimize.api.IterationListener; 14 | import org.deeplearning4j.optimize.listeners.ScoreIterationListener; 15 | import org.nd4j.linalg.dataset.DataSet; 16 | import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; 17 | import org.nd4j.linalg.lossfunctions.LossFunctions; 18 | 19 | import java.util.Collections; 20 | import org.deeplearning4j.util.ModelSerializer; 21 | 22 | public class DeepAutoEncoderExample { 23 | private MultiLayerNetwork model; 24 | private File modelFile; 25 | private DataSetIterator iterator; 26 | private final int numberOfRows = 28; 27 | private final int numberOfColumns = 28; 28 | 29 | public DeepAutoEncoderExample() { 30 | try { 31 | int seed = 123; 32 | int numberOfIterations = 1; 33 | iterator = new MnistDataSetIterator(1000, MnistDataFetcher.NUM_EXAMPLES, true); 34 | 35 | MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() 36 | .seed(seed) 37 | .iterations(numberOfIterations) 38 | .optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT) 39 | .list() 40 | .layer(0, new RBM.Builder().nIn(numberOfRows * numberOfColumns) 41 | .nOut(1000) 42 | .lossFunction(LossFunctions.LossFunction.RMSE_XENT).build()) 43 | .layer(1, new RBM.Builder().nIn(1000).nOut(500) 44 | .lossFunction(LossFunctions.LossFunction.RMSE_XENT).build()) 45 | .layer(2, new RBM.Builder().nIn(500).nOut(250) 46 | .lossFunction(LossFunctions.LossFunction.RMSE_XENT).build()) 47 | .layer(3, new RBM.Builder().nIn(250).nOut(100) 48 | .lossFunction(LossFunctions.LossFunction.RMSE_XENT).build()) 49 | .layer(4, new RBM.Builder().nIn(100).nOut(30) 50 | .lossFunction(LossFunctions.LossFunction.RMSE_XENT).build()) //encoding stops 51 | .layer(5, new RBM.Builder().nIn(30).nOut(100) 52 | .lossFunction(LossFunctions.LossFunction.RMSE_XENT).build()) //decoding starts 53 | .layer(6, new RBM.Builder().nIn(100).nOut(250) 54 | .lossFunction(LossFunctions.LossFunction.RMSE_XENT).build()) 55 | .layer(7, new RBM.Builder().nIn(250).nOut(500) 56 | .lossFunction(LossFunctions.LossFunction.RMSE_XENT).build()) 57 | .layer(8, new RBM.Builder().nIn(500).nOut(1000) 58 | .lossFunction(LossFunctions.LossFunction.RMSE_XENT).build()) 59 | .layer(9, new OutputLayer.Builder( 60 | LossFunctions.LossFunction.RMSE_XENT).nIn(1000) 61 | .nOut(numberOfRows * numberOfColumns).build()) 62 | .pretrain(true).backprop(true) 63 | .build(); 64 | 65 | model = new MultiLayerNetwork(conf); 66 | model.init(); 67 | 68 | model.setListeners(Collections.singletonList( 69 | (IterationListener) new ScoreIterationListener())); 70 | 71 | while (iterator.hasNext()) { 72 | DataSet dataSet = iterator.next(); 73 | model.fit(new DataSet(dataSet.getFeatureMatrix(), 74 | dataSet.getFeatureMatrix())); 75 | } 76 | 77 | modelFile = new File("savedModel"); 78 | ModelSerializer.writeModel(model, modelFile, true); 79 | } catch (IOException ex) { 80 | ex.printStackTrace(); 81 | } 82 | } 83 | 84 | public void retrieveModel() { 85 | try { 86 | modelFile = new File("savedModel"); 87 | MultiLayerNetwork model = ModelSerializer.restoreMultiLayerNetwork(modelFile); 88 | } catch (IOException ex) { 89 | ex.printStackTrace(); 90 | } 91 | } 92 | 93 | public static void main(String[] args) throws Exception { 94 | new DeepAutoEncoderExample(); 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /Module 1/Java for Data Science/chapter 8-Deep learning/RegressionExample.java: -------------------------------------------------------------------------------- 1 | package packt.dl4jexamples; 2 | 3 | public class RegressionExample { 4 | 5 | } 6 | -------------------------------------------------------------------------------- /Module 2/MasteringJavaforDataScience_Code/Chapter02/data/keywords.txt: -------------------------------------------------------------------------------- 1 | adidas basketball shoes 2 | angry birds 3 | animal shelter 4 | apple cider 5 | apples 6 | aquarium fish 7 | auto auction 8 | avengers 9 | bananas 10 | batman 11 | battlefield 4 12 | bed and breakfast 13 | berlin 14 | black friday 15 | blogging 16 | boston 17 | brazil 18 | burger king 19 | buritos 20 | calculus 21 | car loans 22 | cauliflower 23 | chinese food 24 | chinese food history 25 | christmas tree 26 | citibank 27 | classic rock songs 28 | cocktail 29 | coconut oil 30 | cranberry sauce recipe 31 | cyber security 32 | data protection 33 | david guetta 34 | deep dish pizza 35 | deep fryer 36 | digital frame 37 | digital tv 38 | eggplant recipes 39 | electric dryer 40 | energy bar 41 | family fitness 42 | fashion 43 | fifa 44 | fifa world cup 45 | film camera 46 | fitbit flex 47 | flowers 48 | food poisoning 49 | food processor 50 | free audio books 51 | free cloud storage 52 | fryer 53 | funny cat pictures 54 | furniture 55 | game of thrones 56 | gaming pcs 57 | garlic bread recipe 58 | green card 59 | green tea 60 | hand cream 61 | harry potter 62 | herbal tea benefits 63 | high tech 64 | home automation 65 | home brewing 66 | homemade pancakes 67 | homemade pizza 68 | homemade salsa 69 | how to draw 70 | how to learn programming 71 | hp printers 72 | iit delhi 73 | imax 74 | immigration australia 75 | immigration canada 76 | immigration germany 77 | immigration usa 78 | india 79 | instant food 80 | instant noodles 81 | insurance 82 | intel core i5 83 | internet router 84 | iphone 85 | korean restaurant 86 | laptop 87 | lawn mower 88 | lime cookies 89 | low blood pressure 90 | mickey mouse 91 | microscopes 92 | microsoft mouse 93 | microsoft phone 94 | microwave 95 | microwave oven 96 | mini cooper 97 | mortgage 98 | new delhi 99 | new york times 100 | new york 101 | orange juice 102 | oranges 103 | organic bananas 104 | organic tomatoes 105 | outdoor security cameras 106 | physical therapy 107 | pineapples 108 | pit bull 109 | pizza sauce 110 | polish food 111 | potato soup 112 | printer 113 | random password generator 114 | seoul south korea 115 | smart house 116 | soft boiled egg 117 | sony xperia m 118 | sore throat 119 | speed dating 120 | sri lanka 121 | star trek 122 | steam cooker 123 | steve madden boots 124 | tax calculator 125 | tax return 126 | the hobbit extended edition 127 | thesaurus 128 | tomatoes 129 | top indian movies 130 | tv streaming 131 | ultrasound 132 | usc football 133 | used cars 134 | used laptops 135 | video camera 136 | video chat 137 | walkie talkies 138 | wedding dress 139 | windows surface pro 140 | wonder woman 141 | x men 142 | xbox -------------------------------------------------------------------------------- /Module 2/MasteringJavaforDataScience_Code/Chapter02/data/text.txt: -------------------------------------------------------------------------------- 1 | My dog also likes eating sausage. 2 | The motor accepts beside a surplus. 3 | Every capable slash succeeds with a worldwide blame. 4 | The continued task coughs around the guilty kiss. 5 | -------------------------------------------------------------------------------- /Module 2/MasteringJavaforDataScience_Code/Chapter02/data/words.txt: -------------------------------------------------------------------------------- 1 | pellentesque RB 2 | aliquam RB 3 | fringilla VB 4 | urna RB 5 | ornare NN 6 | primis NN 7 | Quisque VB 8 | Proin VB 9 | Proin NN 10 | Morbi NN 11 | risus RPR 12 | blandit VB 13 | ut VB 14 | fermentum NN 15 | neque VB 16 | lacinia RB 17 | ultrices NN 18 | consectetuer NN 19 | dapibus NN 20 | lectus RB 21 | nisi NN 22 | purus NN 23 | eu RB 24 | congue NN 25 | aliquet RB 26 | eget NN 27 | sociis NN 28 | ut VB 29 | morbi RPR 30 | et RPR 31 | magna VB 32 | non RB 33 | libero RPR 34 | elit NN 35 | felis RPR 36 | lorem RPR 37 | Nunc VB 38 | pellentesque RB 39 | consequat RB 40 | nunc VB 41 | libero NN 42 | felis RPR 43 | mauris NN 44 | montes RB 45 | ipsum RPR 46 | Proin RPR 47 | Maecenas RPR 48 | nec NN 49 | nunc RB 50 | eu RB 51 | semper RPR 52 | erat VB 53 | tincidunt NN 54 | nibh NN 55 | lectus RB 56 | gravida RB 57 | vulputate VB 58 | fringilla NN 59 | congue VB 60 | Aenean VB 61 | eu VB 62 | lorem RB 63 | dui NN 64 | eu RB 65 | ornare NN 66 | fringilla VB 67 | luctus NN 68 | vulputate RPR 69 | fermentum RPR 70 | libero NN 71 | sapien RPR 72 | cubilia NN 73 | venenatis RPR 74 | sollicitudin VB 75 | eros RB 76 | montes RPR 77 | mollis NN 78 | Nunc RPR 79 | a RPR 80 | sed VB 81 | odio RB 82 | ante VB 83 | sociis VB 84 | turpis RB 85 | est RB 86 | mauris RPR 87 | faucibus RPR 88 | habitant RPR 89 | Phasellus RB 90 | porttitor VB 91 | imperdiet RPR 92 | non VB 93 | nisl RPR 94 | mus RPR 95 | viverra RPR 96 | ut RPR 97 | dui RPR 98 | adipiscing NN 99 | mus RB 100 | quam RB -------------------------------------------------------------------------------- /Module 2/MasteringJavaforDataScience_Code/Chapter02/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | com.alexeygrigorev.javads 6 | chapter-02-dataprocessing 7 | 0.0.1-SNAPSHOT 8 | 9 | 10 | 1.7.21 11 | 1.1.7 12 | 13 | 14 | 15 | 16 | central 17 | http://repo1.maven.org/maven2 18 | 19 | 20 | bintray 21 | http://jcenter.bintray.com 22 | 23 | 24 | 25 | 26 | 27 | 28 | org.slf4j 29 | slf4j-api 30 | ${slf4j.version} 31 | 32 | 33 | ch.qos.logback 34 | logback-classic 35 | ${logback.version} 36 | 37 | 38 | ch.qos.logback 39 | logback-core 40 | ${logback.version} 41 | 42 | 43 | 44 | 45 | org.apache.commons 46 | commons-lang3 47 | 3.4 48 | 49 | 50 | commons-io 51 | commons-io 52 | 2.5 53 | 54 | 55 | com.google.guava 56 | guava 57 | 19.0 58 | 59 | 60 | 61 | org.jsoup 62 | jsoup 63 | 1.9.2 64 | 65 | 66 | com.fasterxml.jackson.jr 67 | jackson-jr-all 68 | 2.8.1 69 | 70 | 71 | com.jayway.jsonpath 72 | json-path 73 | 2.2.0 74 | 75 | 76 | 77 | org.apache.commons 78 | commons-collections4 79 | 4.1 80 | 81 | 82 | 83 | org.apache.commons 84 | commons-csv 85 | 1.4 86 | 87 | 88 | 89 | org.mapdb 90 | mapdb 91 | 3.0.1 92 | 93 | 94 | mysql 95 | mysql-connector-java 96 | 5.1.39 97 | 98 | 99 | 100 | joinery 101 | joinery-dataframe 102 | 1.7 103 | 104 | 105 | org.apache.poi 106 | poi 107 | 3.14 108 | 109 | 110 | 111 | com.aol.simplereact 112 | cyclops-react 113 | 1.0.0-RC4 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | org.apache.maven.plugins 122 | maven-compiler-plugin 123 | 3.5.1 124 | 125 | 1.8 126 | 1.8 127 | 128 | 129 | 130 | org.apache.maven.plugins 131 | maven-surefire-plugin 132 | 2.19.1 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | org.eclipse.m2e 141 | lifecycle-mapping 142 | 1.0.0 143 | 144 | 145 | 146 | 147 | 148 | 149 | org.apache.maven.plugins 150 | maven-dependency-plugin 151 | [1.0.0,) 152 | 153 | copy-dependencies 154 | unpack 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | -------------------------------------------------------------------------------- /Module 2/MasteringJavaforDataScience_Code/Chapter03/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | com.alexeygrigorev.javads 6 | chapter-03-eda 7 | 0.0.1-SNAPSHOT 8 | 9 | 10 | 1.7.21 11 | 1.1.7 12 | 13 | 14 | 15 | 16 | central 17 | http://repo1.maven.org/maven2 18 | 19 | 20 | bintray 21 | http://jcenter.bintray.com 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | org.slf4j 30 | slf4j-api 31 | ${slf4j.version} 32 | 33 | 34 | ch.qos.logback 35 | logback-classic 36 | ${logback.version} 37 | 38 | 39 | ch.qos.logback 40 | logback-core 41 | ${logback.version} 42 | 43 | 44 | 45 | 46 | org.apache.commons 47 | commons-lang3 48 | 3.4 49 | 50 | 51 | commons-io 52 | commons-io 53 | 2.5 54 | 55 | 56 | com.google.guava 57 | guava 58 | 19.0 59 | 60 | 61 | 62 | com.fasterxml.jackson.jr 63 | jackson-jr-all 64 | 2.8.1 65 | 66 | 67 | 68 | org.apache.commons 69 | commons-math3 70 | 3.6.1 71 | 72 | 73 | 74 | joinery 75 | joinery-dataframe 76 | 1.7 77 | 78 | 79 | org.apache.poi 80 | poi 81 | 3.14 82 | 83 | 84 | rhino 85 | js 86 | 1.7R2 87 | 88 | 89 | jline 90 | jline 91 | 2.14.2 92 | 93 | 94 | com.xeiam.xchart 95 | xchart 96 | 2.5.1 97 | 98 | 99 | 100 | com.github.haifengl 101 | smile-core 102 | 1.1.0 103 | 104 | 105 | com.github.haifengl 106 | smile-plot 107 | 1.1.0 108 | 109 | 110 | 111 | 112 | junit 113 | junit-dep 114 | 4.8.1 115 | test 116 | 117 | 118 | 119 | 120 | 121 | 122 | org.apache.maven.plugins 123 | maven-compiler-plugin 124 | 3.5.1 125 | 126 | 1.8 127 | 1.8 128 | 129 | 130 | 131 | org.apache.maven.plugins 132 | maven-surefire-plugin 133 | 2.19.1 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | org.eclipse.m2e 142 | lifecycle-mapping 143 | 1.0.0 144 | 145 | 146 | 147 | 148 | 149 | 150 | org.apache.maven.plugins 151 | maven-dependency-plugin 152 | [1.0.0,) 153 | 154 | copy-dependencies 155 | unpack 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | -------------------------------------------------------------------------------- /Module 2/MasteringJavaforDataScience_Code/Chapter03/src/main/java/chapter03/Data.java: -------------------------------------------------------------------------------- 1 | package chapter03; 2 | 3 | import java.io.IOException; 4 | import java.nio.file.Files; 5 | import java.nio.file.Path; 6 | import java.nio.file.Paths; 7 | import java.util.List; 8 | import java.util.stream.Collectors; 9 | import java.util.stream.Stream; 10 | 11 | import com.fasterxml.jackson.jr.ob.JSON; 12 | import com.google.common.base.Throwables; 13 | 14 | public class Data { 15 | 16 | public static List readRankedPages() throws IOException { 17 | Path path = Paths.get("./data/ranked-pages.json"); 18 | try (Stream lines = Files.lines(path)) { 19 | return lines.map(line -> parseJson(line)).collect(Collectors.toList()); 20 | } 21 | } 22 | 23 | public static RankedPage parseJson(String line) { 24 | try { 25 | return JSON.std.beanFrom(RankedPage.class, line); 26 | } catch (IOException e) { 27 | throw Throwables.propagate(e); 28 | } 29 | } 30 | 31 | } 32 | -------------------------------------------------------------------------------- /Module 2/MasteringJavaforDataScience_Code/Chapter04/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | com.alexeygrigorev.javads 6 | chapter-04-supervised 7 | 0.0.1-SNAPSHOT 8 | 9 | 10 | 1.7.21 11 | 1.1.7 12 | 13 | 14 | 15 | 16 | central 17 | http://repo1.maven.org/maven2 18 | 19 | 20 | bintray 21 | http://jcenter.bintray.com 22 | 23 | 24 | java-ds 25 | https://raw.githubusercontent.com/alexeygrigorev/maven-repo/master/ 26 | 27 | 28 | 29 | 30 | 31 | 32 | org.slf4j 33 | slf4j-api 34 | ${slf4j.version} 35 | 36 | 37 | ch.qos.logback 38 | logback-classic 39 | ${logback.version} 40 | 41 | 42 | ch.qos.logback 43 | logback-core 44 | ${logback.version} 45 | 46 | 47 | 48 | 49 | org.apache.commons 50 | commons-lang3 51 | 3.4 52 | 53 | 54 | commons-io 55 | commons-io 56 | 2.5 57 | 58 | 59 | com.google.guava 60 | guava 61 | 19.0 62 | 63 | 64 | 65 | com.fasterxml.jackson.jr 66 | jackson-jr-all 67 | 2.8.1 68 | 69 | 70 | 71 | joinery 72 | joinery-dataframe 73 | 1.7 74 | 75 | 76 | org.apache.poi 77 | poi 78 | 3.14 79 | 80 | 81 | 82 | com.github.haifengl 83 | smile-core 84 | 1.2.0 85 | 86 | 87 | com.github.haifengl 88 | smile-plot 89 | 1.1.0 90 | 91 | 92 | 93 | com.edwardraff 94 | JSAT 95 | 0.0.5 96 | 97 | 98 | 99 | net.sourceforge 100 | javaml 101 | 0.1.7 102 | 103 | 104 | be.abeel 105 | ajt 106 | 2.9 107 | 108 | 109 | 110 | org.encog 111 | encog-core 112 | 3.3.0 113 | 114 | 115 | 116 | tw.edu.ntu.csie 117 | libsvm 118 | 3.17 119 | 120 | 121 | de.bwaldvogel 122 | liblinear 123 | 1.95 124 | 125 | 126 | 127 | 128 | junit 129 | junit-dep 130 | 4.8.1 131 | test 132 | 133 | 134 | 135 | 136 | 137 | 138 | org.apache.maven.plugins 139 | maven-compiler-plugin 140 | 3.5.1 141 | 142 | 1.8 143 | 1.8 144 | 145 | 146 | 147 | org.apache.maven.plugins 148 | maven-surefire-plugin 149 | 2.19.1 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | org.eclipse.m2e 158 | lifecycle-mapping 159 | 1.0.0 160 | 161 | 162 | 163 | 164 | 165 | 166 | org.apache.maven.plugins 167 | maven-dependency-plugin 168 | [1.0.0,) 169 | 170 | copy-dependencies 171 | unpack 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | -------------------------------------------------------------------------------- /Module 2/MasteringJavaforDataScience_Code/Chapter04/src/main/resources/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /Module 2/MasteringJavaforDataScience_Code/Chapter05/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | com.alexeygrigorev.javads 6 | chapter-05-unsupervised 7 | 0.0.1-SNAPSHOT 8 | 9 | 10 | 1.7.21 11 | 1.1.7 12 | 13 | 14 | 15 | 16 | central 17 | http://repo1.maven.org/maven2 18 | 19 | 20 | bintray 21 | http://jcenter.bintray.com 22 | 23 | 24 | 25 | 26 | 27 | 28 | org.slf4j 29 | slf4j-api 30 | ${slf4j.version} 31 | 32 | 33 | ch.qos.logback 34 | logback-classic 35 | ${logback.version} 36 | 37 | 38 | ch.qos.logback 39 | logback-core 40 | ${logback.version} 41 | 42 | 43 | 44 | 45 | org.apache.commons 46 | commons-lang3 47 | 3.4 48 | 49 | 50 | commons-io 51 | commons-io 52 | 2.5 53 | 54 | 55 | org.apache.commons 56 | commons-math3 57 | 3.6.1 58 | 59 | 60 | com.google.guava 61 | guava 62 | 19.0 63 | 64 | 65 | 66 | joinery 67 | joinery-dataframe 68 | 1.7 69 | 70 | 71 | org.apache.poi 72 | poi 73 | 3.14 74 | 75 | 76 | 77 | com.github.haifengl 78 | smile-core 79 | 1.2.1 80 | 81 | 82 | com.github.haifengl 83 | smile-plot 84 | 1.2.0 85 | 86 | 87 | 88 | com.edwardraff 89 | JSAT 90 | 0.0.5 91 | 92 | 93 | 94 | com.googlecode.matrix-toolkits-java 95 | mtj 96 | 1.0.2 97 | 98 | 99 | 100 | com.aol.simplereact 101 | cyclops-react 102 | 1.0.0-FINAL 103 | 104 | 105 | 106 | 107 | junit 108 | junit-dep 109 | 4.8.1 110 | test 111 | 112 | 113 | 114 | 115 | 116 | 117 | org.apache.maven.plugins 118 | maven-compiler-plugin 119 | 3.5.1 120 | 121 | 1.8 122 | 1.8 123 | 124 | 125 | 126 | org.apache.maven.plugins 127 | maven-surefire-plugin 128 | 2.19.1 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | org.eclipse.m2e 137 | lifecycle-mapping 138 | 1.0.0 139 | 140 | 141 | 142 | 143 | 144 | 145 | org.apache.maven.plugins 146 | maven-dependency-plugin 147 | [1.0.0,) 148 | 149 | copy-dependencies 150 | unpack 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | -------------------------------------------------------------------------------- /Module 2/MasteringJavaforDataScience_Code/Chapter06/src/main/java/chapter06/cv/CV.java: -------------------------------------------------------------------------------- 1 | package chapter06.cv; 2 | 3 | import java.util.ArrayList; 4 | import java.util.Arrays; 5 | import java.util.List; 6 | import java.util.Random; 7 | import java.util.stream.IntStream; 8 | 9 | import org.apache.commons.lang3.Validate; 10 | 11 | public class CV { 12 | 13 | public static List kfold(int length, int k, boolean shuffle, long seed) { 14 | Validate.isTrue(k < length); 15 | 16 | int[] indexes = IntStream.range(0, length).toArray(); 17 | if (shuffle) { 18 | shuffle(indexes, seed); 19 | } 20 | 21 | int[][] folds = prepareFolds(indexes, k); 22 | List result = new ArrayList<>(); 23 | 24 | for (int i = 0; i < k; i++) { 25 | int[] testIdx = folds[i]; 26 | int[] trainIdx = combineTrainFolds(folds, indexes.length, i); 27 | result.add(new IndexSplit(trainIdx, testIdx)); 28 | } 29 | 30 | return result; 31 | } 32 | 33 | public static IndexSplit trainTestSplit(int length, double testRatio, boolean shuffle, long seed) { 34 | Validate.isTrue(testRatio > 0.0 && testRatio < 1.0, "testRatio must be in (0, 1) interval"); 35 | 36 | int[] indexes = IntStream.range(0, length).toArray(); 37 | if (shuffle) { 38 | shuffle(indexes, seed); 39 | } 40 | 41 | int trainSize = (int) (indexes.length * (1 - testRatio)); 42 | 43 | int[] trainIndex = Arrays.copyOfRange(indexes, 0, trainSize); 44 | int[] testIndex = Arrays.copyOfRange(indexes, trainSize, indexes.length); 45 | 46 | return new IndexSplit(trainIndex, testIndex); 47 | } 48 | 49 | public static void shuffle(int[] indexes, long seed) { 50 | Random rnd = new Random(seed); 51 | shuffle(indexes, rnd); 52 | } 53 | 54 | public static void shuffle(int[] indexes, Random rnd) { 55 | for (int i = indexes.length - 1; i > 0; i--) { 56 | int index = rnd.nextInt(i + 1); 57 | 58 | int tmp = indexes[index]; 59 | indexes[index] = indexes[i]; 60 | indexes[i] = tmp; 61 | } 62 | } 63 | 64 | private static int[][] prepareFolds(int[] indexes, int k) { 65 | int[][] foldIndexes = new int[k][]; 66 | 67 | int step = indexes.length / k; 68 | int beginIndex = 0; 69 | 70 | for (int i = 0; i < k - 1; i++) { 71 | foldIndexes[i] = Arrays.copyOfRange(indexes, beginIndex, beginIndex + step); 72 | beginIndex = beginIndex + step; 73 | } 74 | 75 | foldIndexes[k - 1] = Arrays.copyOfRange(indexes, beginIndex, indexes.length); 76 | return foldIndexes; 77 | } 78 | 79 | private static int[] combineTrainFolds(int[][] folds, int totalSize, int excludeIndex) { 80 | int size = totalSize - folds[excludeIndex].length; 81 | int result[] = new int[size]; 82 | 83 | int start = 0; 84 | for (int i = 0; i < folds.length; i++) { 85 | if (i == excludeIndex) { 86 | continue; 87 | } 88 | int[] fold = folds[i]; 89 | System.arraycopy(fold, 0, result, start, fold.length); 90 | start = start + fold.length; 91 | } 92 | 93 | return result; 94 | } 95 | 96 | } 97 | -------------------------------------------------------------------------------- /Module 2/MasteringJavaforDataScience_Code/Chapter06/src/main/resources/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /Module 2/MasteringJavaforDataScience_Code/Chapter07/src/main/java/chapter07/Metrics.java: -------------------------------------------------------------------------------- 1 | package chapter07; 2 | 3 | import java.util.Arrays; 4 | 5 | import org.apache.commons.lang3.Validate; 6 | 7 | import smile.validation.AUC; 8 | 9 | public class Metrics { 10 | 11 | public static double auc(double[] actual, double[] predicted) { 12 | Validate.isTrue(actual.length == predicted.length, "the lengths don't match"); 13 | int[] truth = Arrays.stream(actual).mapToInt(i -> (int) i).toArray(); 14 | double result = AUC.measure(truth, predicted); 15 | if (result < 0.5) { 16 | return 1 - result; 17 | } else { 18 | return result; 19 | } 20 | } 21 | 22 | } 23 | -------------------------------------------------------------------------------- /Module 2/MasteringJavaforDataScience_Code/Chapter07/src/main/java/chapter07/TextUtils.java: -------------------------------------------------------------------------------- 1 | package chapter07; 2 | 3 | import java.util.Arrays; 4 | import java.util.List; 5 | import java.util.Set; 6 | import java.util.regex.Pattern; 7 | import java.util.stream.Collectors; 8 | 9 | import com.google.common.collect.ImmutableSet; 10 | 11 | public class TextUtils { 12 | 13 | public static final Set EN_STOPWORDS = ImmutableSet.of("a", "an", "and", "are", "as", "at", "be", 14 | "but", "by", "for", "if", "in", "into", "is", "it", "no", "not", "of", "on", "or", "such", "that", "the", 15 | "their", "then", "there", "these", "they", "this", "to", "was", "will", "with", "what", "which", "s", "m", "t"); 16 | 17 | public static List tokenize(String line) { 18 | Pattern pattern = Pattern.compile("\\W+"); 19 | String[] split = pattern.split(line.toLowerCase()); 20 | return Arrays.stream(split) 21 | .map(String::trim) 22 | .filter(s -> s.length() > 2) 23 | .collect(Collectors.toList()); 24 | } 25 | 26 | public static List tokenizeFilter(String line) { 27 | Pattern pattern = Pattern.compile("\\W+"); 28 | String[] split = pattern.split(line.toLowerCase()); 29 | return Arrays.stream(split) 30 | .map(String::trim) 31 | .filter(s -> s.length() > 2) 32 | .filter(s -> !isStopword(s)) 33 | .collect(Collectors.toList()); 34 | } 35 | 36 | public static boolean isStopword(String token) { 37 | return EN_STOPWORDS.contains(token); 38 | } 39 | 40 | public static List removeStopwords(List line) { 41 | return removeStopwords(line, EN_STOPWORDS); 42 | } 43 | 44 | public static List removeStopwords(List line, Set stopwords) { 45 | return line.stream().filter(token -> !stopwords.contains(token)).collect(Collectors.toList()); 46 | } 47 | } -------------------------------------------------------------------------------- /Module 2/MasteringJavaforDataScience_Code/Chapter07/src/main/java/chapter07/cv/CV.java: -------------------------------------------------------------------------------- 1 | package chapter07.cv; 2 | 3 | import java.util.ArrayList; 4 | import java.util.Arrays; 5 | import java.util.List; 6 | import java.util.Random; 7 | import java.util.stream.IntStream; 8 | 9 | import org.apache.commons.lang3.Validate; 10 | 11 | public class CV { 12 | 13 | public static List kfold(Dataset dataset, int k, boolean shuffle, long seed) { 14 | int length = dataset.length(); 15 | Validate.isTrue(k < length); 16 | 17 | int[] indexes = IntStream.range(0, length).toArray(); 18 | if (shuffle) { 19 | shuffle(indexes, seed); 20 | } 21 | 22 | int[][] folds = prepareFolds(indexes, k); 23 | List result = new ArrayList<>(); 24 | 25 | for (int i = 0; i < k; i++) { 26 | int[] testIdx = folds[i]; 27 | int[] trainIdx = combineTrainFolds(folds, indexes.length, i); 28 | result.add(Split.fromIndexes(dataset, trainIdx, testIdx)); 29 | } 30 | 31 | return result; 32 | } 33 | 34 | public static Split trainTestSplit(Dataset dataset, double testRatio, boolean shuffle, long seed) { 35 | Validate.isTrue(testRatio > 0.0 && testRatio < 1.0, "testRatio must be in (0, 1) interval"); 36 | 37 | int[] indexes = IntStream.range(0, dataset.length()).toArray(); 38 | if (shuffle) { 39 | shuffle(indexes, seed); 40 | } 41 | 42 | int trainSize = (int) (indexes.length * (1 - testRatio)); 43 | 44 | int[] trainIndex = Arrays.copyOfRange(indexes, 0, trainSize); 45 | int[] testIndex = Arrays.copyOfRange(indexes, trainSize, indexes.length); 46 | 47 | return Split.fromIndexes(dataset, trainIndex, testIndex); 48 | } 49 | 50 | public static void shuffle(int[] indexes, long seed) { 51 | Random rnd = new Random(seed); 52 | 53 | for (int i = indexes.length - 1; i > 0; i--) { 54 | int index = rnd.nextInt(i + 1); 55 | 56 | int tmp = indexes[index]; 57 | indexes[index] = indexes[i]; 58 | indexes[i] = tmp; 59 | } 60 | } 61 | 62 | private static int[][] prepareFolds(int[] indexes, int k) { 63 | int[][] foldIndexes = new int[k][]; 64 | 65 | int step = indexes.length / k; 66 | int beginIndex = 0; 67 | 68 | for (int i = 0; i < k - 1; i++) { 69 | foldIndexes[i] = Arrays.copyOfRange(indexes, beginIndex, beginIndex + step); 70 | beginIndex = beginIndex + step; 71 | } 72 | 73 | foldIndexes[k - 1] = Arrays.copyOfRange(indexes, beginIndex, indexes.length); 74 | return foldIndexes; 75 | } 76 | 77 | private static int[] combineTrainFolds(int[][] folds, int totalSize, int excludeIndex) { 78 | int size = totalSize - folds[excludeIndex].length; 79 | int result[] = new int[size]; 80 | 81 | int start = 0; 82 | for (int i = 0; i < folds.length; i++) { 83 | if (i == excludeIndex) { 84 | continue; 85 | } 86 | int[] fold = folds[i]; 87 | System.arraycopy(fold, 0, result, start, fold.length); 88 | start = start + fold.length; 89 | } 90 | 91 | return result; 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /Module 2/MasteringJavaforDataScience_Code/Chapter07/src/main/java/chapter07/cv/Split.java: -------------------------------------------------------------------------------- 1 | package chapter07.cv; 2 | 3 | import java.util.Objects; 4 | 5 | public class Split { 6 | 7 | private final Dataset train; 8 | private final Dataset test; 9 | 10 | public Split(Dataset train, Dataset test) { 11 | this.train = train; 12 | this.test = test; 13 | } 14 | 15 | public static Split fromIndexes(Dataset dataset, int[] trainIndex, int[] testIndex) { 16 | double[][] X = dataset.getX(); 17 | double[] y = dataset.getY(); 18 | 19 | int trainSize = trainIndex.length; 20 | 21 | double[][] trainXres = new double[trainSize][]; 22 | double[] trainYres = new double[trainSize]; 23 | for (int i = 0; i < trainSize; i++) { 24 | int idx = trainIndex[i]; 25 | trainXres[i] = X[idx]; 26 | trainYres[i] = y[idx]; 27 | } 28 | 29 | int testSize = testIndex.length; 30 | 31 | double[][] testXres = new double[testSize][]; 32 | double[] testYres = new double[testSize]; 33 | for (int i = 0; i < testSize; i++) { 34 | int idx = testIndex[i]; 35 | testXres[i] = X[idx]; 36 | testYres[i] = y[idx]; 37 | } 38 | 39 | Dataset train = new Dataset(trainXres, trainYres, dataset.getFeatureNames()); 40 | Dataset test = new Dataset(testXres, testYres, dataset.getFeatureNames()); 41 | return new Split(train, test); 42 | } 43 | 44 | public Dataset getTrain() { 45 | return train; 46 | } 47 | 48 | public Dataset getTest() { 49 | return test; 50 | } 51 | 52 | @Override 53 | public boolean equals(Object obj) { 54 | if (obj instanceof Split) { 55 | Split other = (Split) obj; 56 | return train.equals(other.train) && test.equals(test); 57 | } 58 | 59 | return false; 60 | } 61 | 62 | @Override 63 | public int hashCode() { 64 | return Objects.hash(train, test); 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /Module 2/MasteringJavaforDataScience_Code/Chapter07/src/main/resources/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /Module 2/MasteringJavaforDataScience_Code/Chapter08/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | com.alexeygrigorev.javads 6 | chapter-08-dl4j 7 | 0.0.1-SNAPSHOT 8 | 9 | 10 | 1.7.21 11 | 1.1.7 12 | 13 | 14 | 15 | 16 | central 17 | http://repo1.maven.org/maven2 18 | 19 | 20 | bintray 21 | http://jcenter.bintray.com 22 | 23 | 24 | 25 | 26 | 27 | 28 | org.slf4j 29 | slf4j-api 30 | ${slf4j.version} 31 | 32 | 33 | ch.qos.logback 34 | logback-classic 35 | ${logback.version} 36 | 37 | 38 | ch.qos.logback 39 | logback-core 40 | ${logback.version} 41 | 42 | 43 | 44 | 45 | org.apache.commons 46 | commons-lang3 47 | 3.4 48 | 49 | 50 | commons-io 51 | commons-io 52 | 2.5 53 | 54 | 55 | org.apache.commons 56 | commons-math3 57 | 3.6.1 58 | 59 | 60 | com.google.guava 61 | guava 62 | 19.0 63 | 64 | 65 | 66 | org.deeplearning4j 67 | deeplearning4j-core 68 | 0.7.1 69 | 70 | 71 | org.deeplearning4j 72 | deeplearning4j-ui_2.10 73 | 0.7.1 74 | 75 | 76 | org.nd4j 77 | nd4j-native-platform 78 | 0.7.1 79 | 80 | 87 | 88 | joinery 89 | joinery-dataframe 90 | 1.7 91 | 92 | 93 | org.apache.poi 94 | poi 95 | 3.14 96 | 97 | 98 | 99 | org.imgscalr 100 | imgscalr-lib 101 | 4.2 102 | 103 | 104 | 105 | 106 | junit 107 | junit-dep 108 | 4.8.1 109 | test 110 | 111 | 112 | 113 | 114 | 115 | 116 | org.apache.maven.plugins 117 | maven-compiler-plugin 118 | 3.5.1 119 | 120 | 1.8 121 | 1.8 122 | 123 | 124 | 125 | org.apache.maven.plugins 126 | maven-surefire-plugin 127 | 2.19.1 128 | 129 | 130 | org.apache.maven.plugins 131 | maven-dependency-plugin 132 | 133 | 134 | copy-dependencies 135 | prepare-package 136 | 137 | copy-dependencies 138 | 139 | 140 | libs 141 | false 142 | false 143 | true 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | org.eclipse.m2e 155 | lifecycle-mapping 156 | 1.0.0 157 | 158 | 159 | 160 | 161 | 162 | 163 | org.apache.maven.plugins 164 | maven-dependency-plugin 165 | [1.0.0,) 166 | 167 | copy-dependencies 168 | unpack 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | -------------------------------------------------------------------------------- /Module 2/MasteringJavaforDataScience_Code/Chapter08/src/main/java/chapter08/Metrics.java: -------------------------------------------------------------------------------- 1 | package chapter08; 2 | 3 | import org.apache.commons.lang3.Validate; 4 | 5 | public class Metrics { 6 | 7 | public static double logLoss(double[] actual, double[] predicted) { 8 | return logLoss(actual, predicted, 1e-15); 9 | } 10 | 11 | public static double logLoss(double[] actual, double[] predicted, double eps) { 12 | Validate.isTrue(actual.length == predicted.length, "the lengths don't match"); 13 | int n = actual.length; 14 | double total = 0.0; 15 | 16 | for (int i = 0; i < n; i++) { 17 | double yi = actual[i]; 18 | double pi = predicted[i]; 19 | 20 | if (yi == 0.0) { 21 | double log = Math.log(Math.min(1 - pi, 1 - eps)); 22 | total = total + log; 23 | } else if (yi == 1.0) { 24 | double log = Math.log(Math.max(pi, eps)); 25 | total = total + log; 26 | } else { 27 | throw new IllegalArgumentException("unrecognized class " + yi); 28 | } 29 | } 30 | 31 | return -total / n; 32 | } 33 | 34 | } 35 | -------------------------------------------------------------------------------- /Module 2/MasteringJavaforDataScience_Code/Chapter08/src/main/resources/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /Module 2/MasteringJavaforDataScience_Code/Chapter09/src/main/resources/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /Module 2/MasteringJavaforDataScience_Code/Chapter10/src/main/resources/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /Module 2/MasteringJavaforDataScience_Code/ReadMe.txt: -------------------------------------------------------------------------------- 1 | Chapter 1 does not contain code files -------------------------------------------------------------------------------- /Module 2/MasteringJavaforDataScience_Code/SoftwareHardwareList.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Java-Data-Science-Made-Easy/29e2a5d0638c0ee8eb913c67bf549221bc4d0549/Module 2/MasteringJavaforDataScience_Code/SoftwareHardwareList.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | ## $5 Tech Unlocked 2021! 5 | [Buy and download this Course for only $5 on PacktPub.com](https://www.packtpub.com/product/java-data-science-made-easy/9781788475655) 6 | ----- 7 | *The $5 campaign runs from __December 15th 2020__ to __January 13th 2021.__* 8 | 9 | # Java-Data-Science-Made-Easy 10 | Code Repository for Java: Data Science Made Easy 11 | ### Download a free PDF 12 | 13 | If you have already purchased a print or Kindle version of this book, you can get a DRM-free PDF version at no cost.
Simply click on the link to claim your free PDF.
14 |

https://packt.link/free-ebook/9781788475655

--------------------------------------------------------------------------------