├── doc ├── util │ ├── package-use.html │ ├── package-frame.html │ ├── package-tree.html │ ├── package-summary.html │ └── class-use │ │ └── Pair.html ├── package-list ├── resources │ └── inherit.gif ├── allclasses-noframe.html ├── allclasses-frame.html ├── io │ ├── package-frame.html │ ├── package-use.html │ ├── class-use │ │ └── TFReader.html │ ├── package-tree.html │ ├── package-summary.html │ └── TFReader.html ├── package-frame.html ├── overview-frame.html ├── index.html ├── stylesheet.css ├── deprecated-list.html ├── overview-tree.html ├── constant-values.html ├── package-use.html ├── class-use │ ├── Test.html │ ├── RunSpam.html │ └── NaiveBayes.html ├── index-files │ ├── index-1.html │ ├── index-5.html │ ├── index-3.html │ ├── index-2.html │ └── index-4.html ├── package-tree.html ├── package-summary.html ├── help-doc.html ├── RunSpam.html └── Test.html ├── bin ├── Test.class ├── test.class ├── RunSpam.class ├── TFReader.class ├── NaiveBayes.class ├── io │ └── TFReader.class └── util │ ├── Pair.class │ ├── EmailDataset.class │ ├── EmailMessage.class │ ├── TabelaLimiar.class │ └── ThresholdTable.class ├── src ├── io │ ├── TFReader.class │ └── TFReader.java ├── util │ ├── Pair.class │ ├── EmailDataset.class │ ├── EmailMessage.class │ ├── TabelaLimiar.class │ ├── TabelaLimiar.java │ ├── ThresholdTable.java │ ├── Pair.java │ ├── EmailMessage.java │ └── EmailDataset.java ├── test.java ├── Test.java ├── TFReader.java └── RunSpam.java ├── .classpath ├── .project └── .settings └── org.eclipse.jdt.core.prefs /doc/util/package-use.html: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /doc/package-list: -------------------------------------------------------------------------------- 1 | 2 | io 3 | util 4 | -------------------------------------------------------------------------------- /bin/Test.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidenunes/naive-bayes-spam-filter/HEAD/bin/Test.class -------------------------------------------------------------------------------- /bin/test.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidenunes/naive-bayes-spam-filter/HEAD/bin/test.class -------------------------------------------------------------------------------- /bin/RunSpam.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidenunes/naive-bayes-spam-filter/HEAD/bin/RunSpam.class -------------------------------------------------------------------------------- /bin/TFReader.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidenunes/naive-bayes-spam-filter/HEAD/bin/TFReader.class -------------------------------------------------------------------------------- /bin/NaiveBayes.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidenunes/naive-bayes-spam-filter/HEAD/bin/NaiveBayes.class -------------------------------------------------------------------------------- /bin/io/TFReader.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidenunes/naive-bayes-spam-filter/HEAD/bin/io/TFReader.class -------------------------------------------------------------------------------- /bin/util/Pair.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidenunes/naive-bayes-spam-filter/HEAD/bin/util/Pair.class -------------------------------------------------------------------------------- /src/io/TFReader.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidenunes/naive-bayes-spam-filter/HEAD/src/io/TFReader.class -------------------------------------------------------------------------------- /src/util/Pair.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidenunes/naive-bayes-spam-filter/HEAD/src/util/Pair.class -------------------------------------------------------------------------------- /doc/resources/inherit.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidenunes/naive-bayes-spam-filter/HEAD/doc/resources/inherit.gif -------------------------------------------------------------------------------- /bin/util/EmailDataset.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidenunes/naive-bayes-spam-filter/HEAD/bin/util/EmailDataset.class -------------------------------------------------------------------------------- /bin/util/EmailMessage.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidenunes/naive-bayes-spam-filter/HEAD/bin/util/EmailMessage.class -------------------------------------------------------------------------------- /bin/util/TabelaLimiar.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidenunes/naive-bayes-spam-filter/HEAD/bin/util/TabelaLimiar.class -------------------------------------------------------------------------------- /src/util/EmailDataset.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidenunes/naive-bayes-spam-filter/HEAD/src/util/EmailDataset.class -------------------------------------------------------------------------------- /src/util/EmailMessage.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidenunes/naive-bayes-spam-filter/HEAD/src/util/EmailMessage.class -------------------------------------------------------------------------------- /src/util/TabelaLimiar.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidenunes/naive-bayes-spam-filter/HEAD/src/util/TabelaLimiar.class -------------------------------------------------------------------------------- /bin/util/ThresholdTable.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidenunes/naive-bayes-spam-filter/HEAD/bin/util/ThresholdTable.class -------------------------------------------------------------------------------- /.classpath: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | AA 4 | 5 | 6 | 7 | 8 | 9 | org.eclipse.jdt.core.javabuilder 10 | 11 | 12 | 13 | 14 | 15 | org.eclipse.jdt.core.javanature 16 | 17 | 18 | -------------------------------------------------------------------------------- /.settings/org.eclipse.jdt.core.prefs: -------------------------------------------------------------------------------- 1 | #Fri Jan 28 16:22:01 WET 2011 2 | eclipse.preferences.version=1 3 | org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled 4 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6 5 | org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve 6 | org.eclipse.jdt.core.compiler.compliance=1.6 7 | org.eclipse.jdt.core.compiler.debug.lineNumber=generate 8 | org.eclipse.jdt.core.compiler.debug.localVariable=generate 9 | org.eclipse.jdt.core.compiler.debug.sourceFile=generate 10 | org.eclipse.jdt.core.compiler.problem.assertIdentifier=error 11 | org.eclipse.jdt.core.compiler.problem.enumIdentifier=error 12 | org.eclipse.jdt.core.compiler.source=1.6 13 | -------------------------------------------------------------------------------- /src/util/TabelaLimiar.java: -------------------------------------------------------------------------------- 1 | package util; 2 | /** 3 | * 4 | */ 5 | public class TabelaLimiar { 6 | 7 | private int limiar; 8 | private int fp; 9 | private int fn; 10 | 11 | 12 | public TabelaLimiar(){} 13 | 14 | public TabelaLimiar(int l, int fp, int fn){ 15 | this.limiar = l; 16 | this.fp = fp; 17 | this.fn = fn; 18 | } 19 | 20 | public int getLimiar() { 21 | return limiar; 22 | } 23 | 24 | public void setLimiar(int limiar) { 25 | this.limiar = limiar; 26 | } 27 | 28 | public int getFp() { 29 | return fp; 30 | } 31 | 32 | public void setFp(int fp) { 33 | this.fp = fp; 34 | } 35 | 36 | public int getFn() { 37 | return fn; 38 | } 39 | 40 | public void setFn(int fn) { 41 | this.fn = fn; 42 | } 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | } 51 | -------------------------------------------------------------------------------- /src/util/ThresholdTable.java: -------------------------------------------------------------------------------- 1 | package util; 2 | /** 3 | * 4 | */ 5 | public class ThresholdTable { 6 | 7 | private int limiar; 8 | private int fp; 9 | private int fn; 10 | 11 | 12 | public ThresholdTable(){} 13 | 14 | public ThresholdTable(int l, int fp, int fn){ 15 | this.limiar = l; 16 | this.fp = fp; 17 | this.fn = fn; 18 | } 19 | 20 | public int getLimiar() { 21 | return limiar; 22 | } 23 | 24 | public void setLimiar(int limiar) { 25 | this.limiar = limiar; 26 | } 27 | 28 | public int getFp() { 29 | return fp; 30 | } 31 | 32 | public void setFp(int fp) { 33 | this.fp = fp; 34 | } 35 | 36 | public int getFn() { 37 | return fn; 38 | } 39 | 40 | public void setFn(int fn) { 41 | this.fn = fn; 42 | } 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | } 51 | -------------------------------------------------------------------------------- /doc/allclasses-noframe.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | All Classes 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | All Classes 20 |
21 | 22 | 23 | 24 | 27 | 28 |
NaiveBayes 25 |
26 |
29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /doc/allclasses-frame.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | All Classes 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | All Classes 20 |
21 | 22 | 23 | 24 | 27 | 28 |
NaiveBayes 25 |
26 |
29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /doc/io/package-frame.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | io 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | io 20 | 21 | 22 | 27 | 28 |
23 | Classes  24 | 25 |
26 | TFReader
29 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /doc/package-frame.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | <Unnamed> 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | <Unnamed> 20 | 21 | 22 | 29 | 30 |
23 | Classes  24 | 25 |
26 | NaiveBayes 27 |
28 | RunSpam
31 | 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /doc/util/package-frame.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | util 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | util 20 | 21 | 22 | 31 | 32 |
23 | Classes  24 | 25 |
26 | EmailDataset 27 |
28 | EmailMessage 29 |
30 | Pair
33 | 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /doc/overview-frame.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Overview List 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 23 | 24 |
22 |
25 | 26 | 27 | 28 | 40 | 41 |
All Classes 29 |

30 | 31 | Packages 32 |
33 | <unnamed package> 34 |
35 | io 36 |
37 | util 38 |
39 |

42 | 43 |

44 |   45 | 46 | 47 | -------------------------------------------------------------------------------- /doc/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Generated Documentation (Untitled) 8 | 9 | 20 | 22 | 23 | 24 | 25 | 26 | 27 | <H2> 28 | Frame Alert</H2> 29 | 30 | <P> 31 | This document is designed to be viewed using the frames feature. If you see this message, you are using a non-frame-capable web client. 32 | <BR> 33 | Link to<A HREF="NaiveBayes.html">Non-frame version.</A> 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /doc/stylesheet.css: -------------------------------------------------------------------------------- 1 | /* Javadoc style sheet */ 2 | 3 | /* Define colors, fonts and other style attributes here to override the defaults */ 4 | 5 | /* Page background color */ 6 | body { background-color: #FFFFFF; color:#000000 } 7 | 8 | /* Headings */ 9 | h1 { font-size: 145% } 10 | 11 | /* Table colors */ 12 | .TableHeadingColor { background: #CCCCFF; color:#000000 } /* Dark mauve */ 13 | .TableSubHeadingColor { background: #EEEEFF; color:#000000 } /* Light mauve */ 14 | .TableRowColor { background: #FFFFFF; color:#000000 } /* White */ 15 | 16 | /* Font used in left-hand frame lists */ 17 | .FrameTitleFont { font-size: 100%; font-family: Helvetica, Arial, sans-serif; color:#000000 } 18 | .FrameHeadingFont { font-size: 90%; font-family: Helvetica, Arial, sans-serif; color:#000000 } 19 | .FrameItemFont { font-size: 90%; font-family: Helvetica, Arial, sans-serif; color:#000000 } 20 | 21 | /* Navigation bar fonts and colors */ 22 | .NavBarCell1 { background-color:#EEEEFF; color:#000000} /* Light mauve */ 23 | .NavBarCell1Rev { background-color:#00008B; color:#FFFFFF} /* Dark Blue */ 24 | .NavBarFont1 { font-family: Arial, Helvetica, sans-serif; color:#000000;color:#000000;} 25 | .NavBarFont1Rev { font-family: Arial, Helvetica, sans-serif; color:#FFFFFF;color:#FFFFFF;} 26 | 27 | .NavBarCell2 { font-family: Arial, Helvetica, sans-serif; background-color:#FFFFFF; color:#000000} 28 | .NavBarCell3 { font-family: Arial, Helvetica, sans-serif; background-color:#FFFFFF; color:#000000} 29 | 30 | -------------------------------------------------------------------------------- /src/util/Pair.java: -------------------------------------------------------------------------------- 1 | package util; 2 | /** 3 | * Class that represents a pair of object 4 | * for utility purposes 5 | * 6 | * @author davide 7 | * @author ainara 8 | * 9 | * @param type of the two elements in the pair 10 | */ 11 | public class Pair { 12 | private T first; 13 | private T second; 14 | 15 | /** 16 | * If you use this constructor 17 | * use the setFirst() and setSecond() method 18 | */ 19 | public Pair(){} 20 | 21 | /** 22 | * Creates a pair given two elements 23 | * 24 | * @param first T first element in the pair 25 | * @param second T second element in the pair 26 | */ 27 | public Pair(T first, T second){ 28 | this.first = first; 29 | this.second = second; 30 | } 31 | 32 | /** 33 | * Method that returns the first element of the pair 34 | * 35 | * @return first T - first element 36 | */ 37 | public T getFirst(){ 38 | return first; 39 | } 40 | 41 | /** 42 | * Method that returns the second element of the pair 43 | * 44 | * @return second T - second element 45 | */ 46 | public T getSecont(){ 47 | return second; 48 | } 49 | 50 | /** 51 | * Method that sets the first element of the pair 52 | * 53 | * (usefull if you construct empty pairs and instantiate them latter on) 54 | * 55 | * 56 | */ 57 | public void setFirst(T first){ 58 | this.first = first; 59 | } 60 | 61 | /** 62 | * Method that sets the second element of the pair 63 | * 64 | * (usefull if you construct empty pairs and instantiate them latter on) 65 | * 66 | * 67 | */ 68 | public void setSecond(T second){ 69 | this.second = second; 70 | } 71 | 72 | } 73 | -------------------------------------------------------------------------------- /src/test.java: -------------------------------------------------------------------------------- 1 | 2 | 3 | import io.TFReader; 4 | 5 | import java.io.FileNotFoundException; 6 | import java.util.ArrayList; 7 | import java.util.Collections; 8 | import java.util.HashMap; 9 | import java.util.LinkedHashMap; 10 | import java.util.List; 11 | 12 | import util.EmailDataset; 13 | import util.EmailMessage; 14 | 15 | 16 | public class test { 17 | 18 | /** 19 | * @param args 20 | * @throws FileNotFoundException 21 | */ 22 | public static void main(String[] args) throws FileNotFoundException { 23 | 24 | NaiveBayes nb = new NaiveBayes("labeled_train.tf", 20); 25 | // 26 | // TFReader reader = new TFReader("labeled_train.tf"); 27 | // 28 | // EmailDataset dataset = reader.read(); 29 | // EmailMessage msg = dataset.getMessages().get(5); 30 | // 31 | // int predicted = nb.classify(msg, 4); 32 | // System.out.println(predicted); 33 | 34 | nb.algoritmoEM("u0_eval.tf", "u1_eval.tf"); 35 | 36 | TFReader reader = new TFReader("u2_eval.tf"); 37 | 38 | 39 | nb.classifyAll(reader.read(), 4); 40 | 41 | 42 | 43 | } 44 | 45 | 46 | public static LinkedHashMap orderValues(LinkedHashMap map){ //de valor mais pequeno a mais grande 47 | LinkedHashMap newMap = new LinkedHashMap(); 48 | 49 | ArrayList values = new ArrayList(map.values()); 50 | Collections.sort(values); 51 | 52 | for(Double value : values){ 53 | for(Integer token : map.keySet()){ 54 | if(value == map.get(token)){ 55 | newMap.put(token, value); 56 | } 57 | 58 | } 59 | } 60 | 61 | return newMap; 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /src/Test.java: -------------------------------------------------------------------------------- 1 | 2 | 3 | import io.TFReader; 4 | 5 | import java.io.FileNotFoundException; 6 | import java.util.ArrayList; 7 | import java.util.Collections; 8 | import java.util.HashMap; 9 | import java.util.Iterator; 10 | import java.util.LinkedHashMap; 11 | import java.util.List; 12 | 13 | import util.EmailDataset; 14 | import util.EmailMessage; 15 | 16 | 17 | public class Test { 18 | 19 | /** 20 | * @param args 21 | * @throws FileNotFoundException 22 | */ 23 | public static void main(String[] args) throws FileNotFoundException { 24 | 25 | NaiveBayes nb = new NaiveBayes("labeled_train.tf", 20, 30); 26 | // 27 | // TFReader reader = new TFReader("labeled_train.tf"); 28 | // 29 | // EmailDataset dataset = reader.read(); 30 | // EmailMessage msg = dataset.getMessages().get(5); 31 | // 32 | // int predicted = nb.classify(msg, 4); 33 | // System.out.println(predicted); 34 | 35 | nb.algoritmoEM("u0_eval.tf", "u1_eval.tf"); 36 | 37 | TFReader reader = new TFReader("u2_eval_lab.tf"); 38 | 39 | EmailDataset actual = reader.read(); 40 | EmailDataset predict = actual.clone(); 41 | nb.classifyAll(predict); 42 | 43 | List classifsActual = actual.getClassifications(); 44 | List classifsPredicted = predict.getClassifications(); //post classif 45 | 46 | Iterator it = classifsPredicted.iterator(); 47 | 48 | int count = 0; 49 | for(Integer c : classifsActual){ 50 | //System.out.print("c: "+c+" "); 51 | //System.out.println("c: "+it.next()+" "); 52 | if(c == it.next()) 53 | count++; 54 | } 55 | 56 | System.out.println("correct: "+count/(classifsActual.size()*1.0)); 57 | 58 | 59 | 60 | } 61 | 62 | 63 | public static LinkedHashMap orderValues(LinkedHashMap map){ //de valor mais pequeno a mais grande 64 | LinkedHashMap newMap = new LinkedHashMap(); 65 | 66 | ArrayList values = new ArrayList(map.values()); 67 | Collections.sort(values); 68 | 69 | for(Double value : values){ 70 | for(Integer token : map.keySet()){ 71 | if(value == map.get(token)){ 72 | newMap.put(token, value); 73 | } 74 | 75 | } 76 | } 77 | 78 | return newMap; 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /src/io/TFReader.java: -------------------------------------------------------------------------------- 1 | package io; 2 | import java.io.FileNotFoundException; 3 | import java.io.FileReader; 4 | import java.util.HashMap; 5 | import java.util.LinkedList; 6 | import java.util.List; 7 | import java.util.Scanner; 8 | 9 | import util.EmailDataset; 10 | import util.EmailMessage; 11 | 12 | 13 | 14 | /** 15 | * Class used to create reader objects for .tf files 16 | * the files contain token information about email messages 17 | * 18 | * 19 | * @author davide 20 | * @author ainara 21 | */ 22 | public class TFReader { 23 | private String filename; 24 | 25 | /** 26 | * Constructor 27 | * 28 | * @param filename String the file to be read 29 | */ 30 | public TFReader(String filename){ 31 | this.filename = filename; 32 | 33 | } 34 | 35 | /** 36 | * Method used to read the tf file supplied in the constructor 37 | * and return an EmailDataset object containing all the 38 | * EmailMessages of the file 39 | * 40 | * @return messages EmailDataset - the messages loaded into a dataset 41 | * 42 | * @throws FileNotFoundException 43 | */ 44 | public EmailDataset read() throws FileNotFoundException{ 45 | //fist item spam second ham 46 | List messages = new LinkedList(); 47 | //read the file 48 | Scanner sc = new Scanner(new FileReader(filename)); 49 | 50 | HashMap currentHashMap = null; 51 | while(sc.hasNextLine()){ 52 | String line = sc.nextLine(); 53 | //System.out.println("current line:" + line); 54 | String[] pairs = line.split(" "); 55 | 56 | //System.out.println("line separated: "+Integer.parseInt(pairs[0])); 57 | 58 | int k = 0; 59 | int classification = Integer.parseInt(pairs[0]); 60 | if(classification == 1 || classification == -1) 61 | k = 1;//if classification is present start extracting tokens from index 1 of pairs 62 | //usefull as a flag to choose the EmailMessage constructor 63 | 64 | currentHashMap = new HashMap(); 65 | 66 | String[] integers = null; 67 | 68 | for(int i=1; i { 14 | private int classification; // 1 spam -1 ham 15 | private HashMap tokens; 16 | private boolean tagged; 17 | 18 | /** 19 | * Constructor for tagged message (classification information) 20 | * @param classification - 1 spam | -1 ham 21 | * @param tokens HashMap that maps a Token to a number of its occurences 22 | * token -> ocurrences 23 | */ 24 | public EmailMessage(int classification, HashMap tokens){ 25 | tagged = true; 26 | this.tokens = tokens; 27 | this.classification = classification; 28 | } 29 | 30 | /** 31 | * Constructor for untagged message (with no classification information) 32 | * @param tokens list of HashMap that maps a Token to a number of its occurences 33 | * token -> ocurrences 34 | */ 35 | public EmailMessage(HashMap tokens){ 36 | tagged = false; 37 | this.tokens = tokens; 38 | this.classification = 0; 39 | } 40 | 41 | /** 42 | * Method that returns a list of maps that map a token to its number of occurrences 43 | * 44 | * @return 45 | */ 46 | public HashMap getTokens(){ 47 | return this.tokens; 48 | } 49 | 50 | /** 51 | * Method that returns true if this is a spam message 52 | * @return true - spam message 53 | * false - non spam or untagged message 54 | */ 55 | public boolean isSpam(){ 56 | return (classification == 1); 57 | } 58 | 59 | /** 60 | * Method that returns true if the message has 61 | * information about its classification 62 | * @return true - tagged message 63 | */ 64 | public boolean isTagged(){ 65 | return tagged; 66 | } 67 | 68 | /** 69 | * Method that returns the classification of the message 70 | * 1 if it is spam, -1 if it is not spam 71 | * @return Integer 1 - spam 72 | * -1 - ham 73 | * 74 | * this.isSpam() -> this.getClassification() == 1 75 | * 76 | */ 77 | public int getClassification(){ 78 | return classification; 79 | } 80 | 81 | 82 | /** 83 | * Attribute a classification to this message 84 | * if the value of the class is correct 1 / -1 85 | * 86 | * @param c Integer 1 or -1 otherwise the message will not be classified 87 | */ 88 | public void classify(int c){ 89 | if(c == 1 || c == -1){ 90 | this.classification = c; 91 | this.tagged = true; 92 | } 93 | } 94 | 95 | /** 96 | * clone operation for an email message 97 | * executes a clone over the token map 98 | * 99 | */ 100 | public EmailMessage clone(){ 101 | return new EmailMessage(getClassification(),new HashMap(tokens)); 102 | } 103 | 104 | 105 | /** 106 | * Returns a string representation of the email message object 107 | * 108 | */ 109 | public String toString(){ 110 | return this.tokens.toString(); 111 | } 112 | 113 | @Override 114 | public Iterator iterator() { 115 | return tokens.keySet().iterator(); 116 | } 117 | 118 | public boolean containsToken(int token){ 119 | return tokens.containsKey(tokens); 120 | } 121 | 122 | public void removeToken(int token){ 123 | tokens.remove(tokens); 124 | } 125 | 126 | 127 | public void filter(Collection tokenFilter){ 128 | for(Integer token : tokenFilter){ 129 | if(tokens.containsKey(token)) 130 | tokens.remove(token); 131 | } 132 | } 133 | 134 | 135 | 136 | 137 | } 138 | -------------------------------------------------------------------------------- /doc/deprecated-list.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Deprecated List 8 | 9 | 10 | 11 | 12 | 13 | 14 | 22 | 24 | 25 | 26 | 27 | 28 |


29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 50 | 53 | 54 | 55 | 56 | 59 | 75 | 76 |
51 | 52 |
77 | 78 | 79 | 80 |
81 |
82 |

83 | Deprecated API

84 |
85 |
86 | Contents 88 | 89 |
90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 111 | 114 | 115 | 116 | 117 | 120 | 136 | 137 |
112 | 113 |
138 | 139 | 140 | 141 |
142 | 143 | 144 | 145 | -------------------------------------------------------------------------------- /doc/overview-tree.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Class Hierarchy 8 | 9 | 10 | 11 | 12 | 13 | 14 | 22 | 24 | 25 | 26 | 27 | 28 |
29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 50 | 53 | 54 | 55 | 56 | 59 | 75 | 76 |
51 | 52 |
77 | 78 | 79 | 80 |
81 |
82 |

83 | Hierarchy For All Packages

84 |
85 |

86 | Class Hierarchy 87 |

88 | 92 |
93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 114 | 117 | 118 | 119 | 120 | 123 | 139 | 140 |
115 | 116 |
141 | 142 | 143 | 144 |
145 | 146 | 147 | 148 | -------------------------------------------------------------------------------- /src/TFReader.java: -------------------------------------------------------------------------------- 1 | import java.io.FileNotFoundException; 2 | import java.io.FileReader; 3 | import java.util.HashMap; 4 | import java.util.LinkedList; 5 | import java.util.List; 6 | import java.util.Scanner; 7 | 8 | 9 | 10 | 11 | public class TFReader { 12 | private String filename; 13 | private int numSpam; 14 | private int numHam; 15 | 16 | public TFReader(String filename){ 17 | this.filename = filename; 18 | 19 | } 20 | 21 | 22 | public List> trainRead() throws FileNotFoundException{ 23 | LinkedList> maps = new LinkedList>(); 24 | maps.add(new HashMap()); 25 | maps.add(new HashMap()); 26 | //fist item spam second ham 27 | 28 | //read the file 29 | Scanner sc = new Scanner(new FileReader(filename)); 30 | 31 | HashMap currentHashMap = null; 32 | while(sc.hasNextLine()){ 33 | String line = sc.nextLine(); 34 | //System.out.println("current line:" + line); 35 | String[] pairs = line.split(" "); 36 | 37 | //System.out.println("line separated: "+Integer.parseInt(pairs[0])); 38 | 39 | if(Integer.parseInt(pairs[0]) == 1){ 40 | currentHashMap = maps.getFirst(); 41 | numSpam++; 42 | } 43 | else if(Integer.parseInt(pairs[0]) == -1){ 44 | currentHashMap = maps.getLast(); 45 | numHam++; 46 | } 47 | 48 | String[] integers = null; 49 | for(int i=1; i> crossRead() throws FileNotFoundException{ 82 | LinkedList> maps = new LinkedList>(); 83 | maps.add(new HashMap()); 84 | maps.add(new HashMap()); 85 | //fist item spam second ham 86 | 87 | //read the file 88 | Scanner sc = new Scanner(new FileReader(filename)); 89 | 90 | HashMap currentHashMap = null; 91 | while(sc.hasNextLine()){ 92 | String line = sc.nextLine(); 93 | //System.out.println("current line:" + line); 94 | String[] pairs = line.split(" "); 95 | 96 | 97 | if(Math.random() > 0.5){//subtrain set 98 | 99 | 100 | if(Integer.parseInt(pairs[0]) == 1){ 101 | currentHashMap = maps.get(0); 102 | numSpam++; 103 | } 104 | else if(Integer.parseInt(pairs[0]) == -1){ 105 | currentHashMap = maps.get(1); 106 | numHam++; 107 | } 108 | 109 | String[] integers = null; 110 | for(int i=1; i(); 130 | maps.add(currentHashMap); 131 | 132 | String[] integers = null; 133 | for(int i=1; i> untaggedRead() throws FileNotFoundException{ 166 | LinkedList> maps = new LinkedList>(); 167 | 168 | //fist item spam second ham 169 | 170 | //read the file 171 | Scanner sc = new Scanner(new FileReader(filename)); 172 | 173 | HashMap currentHashMap = null; 174 | while(sc.hasNextLine()){ 175 | String line = sc.nextLine(); 176 | currentHashMap = new HashMap(); 177 | maps.add(currentHashMap); 178 | //System.out.println("current line:" + line); 179 | String[] pairs = line.split(" "); 180 | 181 | //System.out.println("line separated: "+Integer.parseInt(pairs[0])); 182 | 183 | 184 | 185 | String[] integers = null; 186 | for(int i=0; i 2 | 3 | 4 | 5 | 6 | 7 | Constant Field Values 8 | 9 | 10 | 11 | 12 | 13 | 14 | 22 | 24 | 25 | 26 | 27 | 28 |
29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 51 | 54 | 55 | 56 | 57 | 60 | 76 | 77 |
52 | 53 |
78 | 79 | 80 | 81 |
82 |
83 |

84 | Constant Field Values

85 |
86 |
87 | Contents
    88 |
89 | 90 |
91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 113 | 116 | 117 | 118 | 119 | 122 | 138 | 139 |
114 | 115 |
140 | 141 | 142 | 143 |
144 | 145 | 146 | 147 | -------------------------------------------------------------------------------- /doc/package-use.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Uses of Package 8 | 9 | 10 | 11 | 12 | 13 | 14 | 22 | 24 | 25 | 26 | 27 | 28 |
29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 51 | 54 | 55 | 56 | 57 | 60 | 76 | 77 |
52 | 53 |
78 | 79 | 80 | 81 |
82 |
83 |

84 | Uses of Package

85 |
86 | No usage of 87 |

88 |


89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 111 | 114 | 115 | 116 | 117 | 120 | 136 | 137 |
112 | 113 |
138 | 139 | 140 | 141 |
142 | 143 | 144 | 145 | -------------------------------------------------------------------------------- /doc/io/package-use.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Uses of Package io 8 | 9 | 10 | 11 | 12 | 13 | 14 | 22 | 24 | 25 | 26 | 27 | 28 |
29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 51 | 54 | 55 | 56 | 57 | 60 | 76 | 77 |
52 | 53 |
78 | 79 | 80 | 81 |
82 |
83 |

84 | Uses of Package
io

85 |
86 | No usage of io 87 |

88 |


89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 111 | 114 | 115 | 116 | 117 | 120 | 136 | 137 |
112 | 113 |
138 | 139 | 140 | 141 |
142 | 143 | 144 | 145 | -------------------------------------------------------------------------------- /doc/class-use/Test.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Uses of Class Test 8 | 9 | 10 | 11 | 12 | 13 | 14 | 22 | 24 | 25 | 26 | 27 | 28 |
29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 51 | 54 | 55 | 56 | 57 | 60 | 76 | 77 |
52 | 53 |
78 | 79 | 80 | 81 |
82 |
83 |

84 | Uses of Class
Test

85 |
86 | No usage of Test 87 |

88 |


89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 111 | 114 | 115 | 116 | 117 | 120 | 136 | 137 |
112 | 113 |
138 | 139 | 140 | 141 |
142 | 143 | 144 | 145 | -------------------------------------------------------------------------------- /doc/class-use/RunSpam.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Uses of Class RunSpam 8 | 9 | 10 | 11 | 12 | 13 | 14 | 22 | 24 | 25 | 26 | 27 | 28 |
29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 51 | 54 | 55 | 56 | 57 | 60 | 76 | 77 |
52 | 53 |
78 | 79 | 80 | 81 |
82 |
83 |

84 | Uses of Class
RunSpam

85 |
86 | No usage of RunSpam 87 |

88 |


89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 111 | 114 | 115 | 116 | 117 | 120 | 136 | 137 |
112 | 113 |
138 | 139 | 140 | 141 |
142 | 143 | 144 | 145 | -------------------------------------------------------------------------------- /doc/class-use/NaiveBayes.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Uses of Class NaiveBayes 8 | 9 | 10 | 11 | 12 | 13 | 14 | 22 | 24 | 25 | 26 | 27 | 28 |
29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 51 | 54 | 55 | 56 | 57 | 60 | 76 | 77 |
52 | 53 |
78 | 79 | 80 | 81 |
82 |
83 |

84 | Uses of Class
NaiveBayes

85 |
86 | No usage of NaiveBayes 87 |

88 |


89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 111 | 114 | 115 | 116 | 117 | 120 | 136 | 137 |
112 | 113 |
138 | 139 | 140 | 141 |
142 | 143 | 144 | 145 | -------------------------------------------------------------------------------- /doc/index-files/index-1.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | A-Index 8 | 9 | 10 | 11 | 12 | 13 | 14 | 22 | 24 | 25 | 26 | 27 | 28 |
29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 50 | 53 | 54 | 55 | 56 | 59 | 75 | 76 |
51 | 52 |
77 | 78 | 79 | 80 | A C G N T
81 |

82 | A

83 |
84 |
algoritmoEM(String, String) - 85 | Method in class NaiveBayes 86 |
Runs the EM algorithm on the current model 87 |
88 |
89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 110 | 113 | 114 | 115 | 116 | 119 | 135 | 136 |
111 | 112 |
137 | 138 | 139 | 140 | A C G N T
141 | 142 | 143 | 144 | -------------------------------------------------------------------------------- /doc/io/class-use/TFReader.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Uses of Class io.TFReader 8 | 9 | 10 | 11 | 12 | 13 | 14 | 22 | 24 | 25 | 26 | 27 | 28 |
29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 51 | 54 | 55 | 56 | 57 | 60 | 76 | 77 |
52 | 53 |
78 | 79 | 80 | 81 |
82 |
83 |

84 | Uses of Class
io.TFReader

85 |
86 | No usage of io.TFReader 87 |

88 |


89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 111 | 114 | 115 | 116 | 117 | 120 | 136 | 137 |
112 | 113 |
138 | 139 | 140 | 141 |
142 | 143 | 144 | 145 | -------------------------------------------------------------------------------- /doc/index-files/index-5.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | T-Index 8 | 9 | 10 | 11 | 12 | 13 | 14 | 22 | 24 | 25 | 26 | 27 | 28 |
29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 50 | 53 | 54 | 55 | 56 | 59 | 75 | 76 |
51 | 52 |
77 | 78 | 79 | 80 | A C G N T
81 |

82 | T

83 |
84 |
threashold(String) - 85 | Static method in class NaiveBayes 86 |
Returns a list of Tables of false positive and false negative 87 | classifications to aid in the threshold choice 88 |
89 |
90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 111 | 114 | 115 | 116 | 117 | 120 | 136 | 137 |
112 | 113 |
138 | 139 | 140 | 141 | A C G N T
142 | 143 | 144 | 145 | -------------------------------------------------------------------------------- /doc/index-files/index-3.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | G-Index 8 | 9 | 10 | 11 | 12 | 13 | 14 | 22 | 24 | 25 | 26 | 27 | 28 |
29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 50 | 53 | 54 | 55 | 56 | 59 | 75 | 76 |
51 | 52 |
77 | 78 | 79 | 80 | A C G N T
81 |

82 | G

83 |
84 |
getLikehood() - 85 | Method in class NaiveBayes 86 |
Method used to calculate the likelihood between classified 87 | datasets with the expression: 88 | 89 | 90 |
91 |
92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 113 | 116 | 117 | 118 | 119 | 122 | 138 | 139 |
114 | 115 |
140 | 141 | 142 | 143 | A C G N T
144 | 145 | 146 | 147 | -------------------------------------------------------------------------------- /doc/package-tree.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Class Hierarchy 8 | 9 | 10 | 11 | 12 | 13 | 14 | 22 | 24 | 25 | 26 | 27 | 28 |
29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 51 | 54 | 55 | 56 | 57 | 60 | 76 | 77 |
52 | 53 |
78 | 79 | 80 | 81 |
82 |
83 |

84 | Hierarchy For Package <Unnamed> 85 |

86 |
87 |
88 |
Package Hierarchies:
All Packages
89 |
90 |

91 | Class Hierarchy 92 |

93 | 97 |
98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 120 | 123 | 124 | 125 | 126 | 129 | 145 | 146 |
121 | 122 |
147 | 148 | 149 | 150 |
151 | 152 | 153 | 154 | -------------------------------------------------------------------------------- /doc/io/package-tree.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | io Class Hierarchy 8 | 9 | 10 | 11 | 12 | 13 | 14 | 22 | 24 | 25 | 26 | 27 | 28 |
29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 51 | 54 | 55 | 56 | 57 | 60 | 76 | 77 |
52 | 53 |
78 | 79 | 80 | 81 |
82 |
83 |

84 | Hierarchy For Package io 85 |

86 |
87 |
88 |
Package Hierarchies:
All Packages
89 |
90 |

91 | Class Hierarchy 92 |

93 |
    94 |
  • java.lang.Object 96 |
97 |
98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 120 | 123 | 124 | 125 | 126 | 129 | 145 | 146 |
121 | 122 |
147 | 148 | 149 | 150 |
151 | 152 | 153 | 154 | -------------------------------------------------------------------------------- /doc/package-summary.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 |
19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 41 | 44 | 45 | 46 | 47 | 50 | 66 | 67 |
42 | 43 |
68 | 69 | 70 | 71 |
72 |

73 | Package <Unnamed> 74 |

75 | 76 | 77 | 78 | 80 | 81 | 82 | 83 | 86 | 87 | 88 | 89 | 90 | 91 |
79 | Class Summary
NaiveBayesClass that represents a Naive Bayes Classifier 84 | this class constructs classifiers from given TF 85 | files using the TFReader
RunSpamMain class for testing purposes
92 |   93 | 94 |

95 |

96 |
97 |
98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 120 | 123 | 124 | 125 | 126 | 129 | 145 | 146 |
121 | 122 |
147 | 148 | 149 | 150 |
151 | 152 | 153 | 154 | -------------------------------------------------------------------------------- /doc/index-files/index-2.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | C-Index 8 | 9 | 10 | 11 | 12 | 13 | 14 | 22 | 24 | 25 | 26 | 27 | 28 |
29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 50 | 53 | 54 | 55 | 56 | 59 | 75 | 76 |
51 | 52 |
77 | 78 | 79 | 80 | A C G N T
81 |

82 | C

83 |
84 |
classify(EmailMessage, double) - 85 | Method in class NaiveBayes 86 |
Method that sets the classification value of the message 87 | according to the current model 88 |
classifyAll(EmailDataset) - 89 | Method in class NaiveBayes 90 |
Method used to classify a dataset of messages received 91 |
92 |
93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 114 | 117 | 118 | 119 | 120 | 123 | 139 | 140 |
115 | 116 |
141 | 142 | 143 | 144 | A C G N T
145 | 146 | 147 | 148 | -------------------------------------------------------------------------------- /doc/util/package-tree.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | util Class Hierarchy 8 | 9 | 10 | 11 | 12 | 13 | 14 | 22 | 24 | 25 | 26 | 27 | 28 |
29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 51 | 54 | 55 | 56 | 57 | 60 | 76 | 77 |
52 | 53 |
78 | 79 | 80 | 81 |
82 |
83 |

84 | Hierarchy For Package util 85 |

86 |
87 |
88 |
Package Hierarchies:
All Packages
89 |
90 |

91 | Class Hierarchy 92 |

93 |
    94 |
  • java.lang.Object
      95 |
    • util.EmailDataset (implements java.lang.Cloneable, java.lang.Iterable<T>) 96 |
    • util.EmailMessage (implements java.lang.Iterable<T>) 97 |
    • util.Pair<T>
    98 |
99 |
100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 122 | 125 | 126 | 127 | 128 | 131 | 147 | 148 |
123 | 124 |
149 | 150 | 151 | 152 |
153 | 154 | 155 | 156 | -------------------------------------------------------------------------------- /doc/io/package-summary.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | io 8 | 9 | 10 | 11 | 12 | 13 | 14 | 22 | 24 | 25 | 26 | 27 | 28 |
29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 51 | 54 | 55 | 56 | 57 | 60 | 76 | 77 |
52 | 53 |
78 | 79 | 80 | 81 |
82 |

83 | Package io 84 |

85 | 86 | 87 | 88 | 90 | 91 | 92 | 93 | 95 | 96 |
89 | Class Summary
TFReaderClass used to create reader objects for .tf files 94 | the files contain token information about email messages
97 |   98 | 99 |

100 |

101 |
102 |
103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 125 | 128 | 129 | 130 | 131 | 134 | 150 | 151 |
126 | 127 |
152 | 153 | 154 | 155 |
156 | 157 | 158 | 159 | -------------------------------------------------------------------------------- /doc/index-files/index-4.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | N-Index 8 | 9 | 10 | 11 | 12 | 13 | 14 | 22 | 24 | 25 | 26 | 27 | 28 |
29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 50 | 53 | 54 | 55 | 56 | 59 | 75 | 76 |
51 | 52 |
77 | 78 | 79 | 80 | A C G N T
81 |

82 | N

83 |
84 |
NaiveBayes - Class in <Unnamed>
Class that represents a Naive Bayes Classifier 85 | this class constructs classifiers from given TF 86 | files using the TFReader
NaiveBayes(String, int, double) - 87 | Constructor for class NaiveBayes 88 |
Constructor 89 |
NaiveBayes(EmailDataset, int, int) - 90 | Constructor for class NaiveBayes 91 |
  92 |
93 |
94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 115 | 118 | 119 | 120 | 121 | 124 | 140 | 141 |
116 | 117 |
142 | 143 | 144 | 145 | A C G N T
146 | 147 | 148 | 149 | -------------------------------------------------------------------------------- /doc/util/package-summary.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | util 8 | 9 | 10 | 11 | 12 | 13 | 14 | 22 | 24 | 25 | 26 | 27 | 28 |
29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 51 | 54 | 55 | 56 | 57 | 60 | 76 | 77 |
52 | 53 |
78 | 79 | 80 | 81 |
82 |

83 | Package util 84 |

85 | 86 | 87 | 88 | 90 | 91 | 92 | 93 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 108 | 109 |
89 | Class Summary
EmailDatasetClass that represents a set of email messages 94 | usefull to be returned by the TFReader class 95 | 96 | encapsulates a list of Email Messages to ease the 97 | processing of muliple messages to generate usefull structures 98 | like HashMaps of the occurrency of all the tokens in a message set
EmailMessageClass that represents a line of data with a set of tokens and their
Pair<T>Class that represents a pair of object 107 | for utility purposes
110 |   111 | 112 |

113 |

114 |
115 |
116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 138 | 141 | 142 | 143 | 144 | 147 | 163 | 164 |
139 | 140 |
165 | 166 | 167 | 168 |
169 | 170 | 171 | 172 | -------------------------------------------------------------------------------- /src/RunSpam.java: -------------------------------------------------------------------------------- 1 | 2 | 3 | import io.TFReader; 4 | 5 | import java.io.FileNotFoundException; 6 | import java.util.Iterator; 7 | import java.util.LinkedHashMap; 8 | import java.util.List; 9 | import java.util.Scanner; 10 | 11 | import util.EmailDataset; 12 | import util.Pair; 13 | 14 | /** 15 | * Main class for testing purposes 16 | * 17 | * @author davide 18 | * @author ainara 19 | */ 20 | 21 | public class RunSpam { 22 | 23 | /** 24 | * prints a menu.... 25 | * @param args 26 | * @throws FileNotFoundException 27 | */ 28 | public static void main(String[] args) throws FileNotFoundException { 29 | int op = -1; 30 | Scanner scanner = new Scanner(System.in); 31 | do{ 32 | System.out.println("******* Welcome to the great spam classifier :D *******"); 33 | System.out.println("1.Train with labeled_train, EM with u0 and u2, classify u2"); 34 | System.out.println("2.Train with u0, EM with labeled_train and u2, classify u2"); 35 | System.out.println("3.Table thresholds"); 36 | System.out.println("4.Table thresholds with EM"); 37 | System.out.println("0.Exit"); 38 | op = scanner.nextInt(); 39 | 40 | 41 | switch(op){ 42 | case 0: 43 | System.out.println("Bye!!"); 44 | break; 45 | case 1: 46 | trainRunEMandClassify("labeled_train.tf", 47 | "u0_eval.tf", 48 | "u1_eval.tf", 49 | "u2_eval_lab.tf" 50 | ,30.0,0); 51 | break; 52 | case 2: 53 | trainRunEMandClassify("u0_eval_lab.tf", 54 | "labeled_train.tf", 55 | "u1_eval.tf", 56 | "u2_eval_lab.tf", 57 | 30.0,0); 58 | break; 59 | case 3: 60 | tableThreshold("labeled_train.tf"); 61 | break; 62 | 63 | case 4: 64 | tableThresholdsEM("u0_eval_lab.tf", 65 | "labeled_train.tf", 66 | "u1_eval.tf", 67 | "u2_eval_lab.tf" 68 | ); 69 | break; 70 | } 71 | 72 | }while(op != 0); 73 | 74 | 75 | 76 | 77 | 78 | 79 | } 80 | 81 | /** 82 | * Method used to test the model construction, EM algorithm and classification 83 | * @param file1Train 84 | * @param file2EM 85 | * @param file3EM 86 | * @param file4Class 87 | * @param threshold 88 | * @param significTokens 89 | * @throws FileNotFoundException 90 | */ 91 | private static void trainRunEMandClassify(String file1Train, 92 | String file2EM, 93 | String file3EM, 94 | String file4Class, 95 | double threshold, 96 | int significTokens) 97 | throws FileNotFoundException{ 98 | //create the basic naive bayes model 99 | NaiveBayes nb = new NaiveBayes(file1Train, threshold, significTokens); 100 | //refine the model 101 | nb.algoritmoEM(file2EM, file3EM, true); 102 | 103 | //classify the last file 104 | TFReader reader = new TFReader(file4Class); 105 | 106 | //check the correctness 107 | EmailDataset actual = reader.read(); 108 | EmailDataset predict = actual.clone(); 109 | nb.classifyAll(predict); 110 | printFPFN(actual.getClassifications(), predict.getClassifications()); 111 | 112 | 113 | 114 | } 115 | 116 | 117 | /** 118 | * Method to test the threshold list considering the EM method 119 | * @param file1Train 120 | * @param file2EM 121 | * @param file3EM 122 | * @param file4Class 123 | * @throws FileNotFoundException 124 | */ 125 | private static void tableThresholdsEM(String file1Train, 126 | String file2EM, 127 | String file3EM, 128 | String file4Class) 129 | throws FileNotFoundException{ 130 | NaiveBayes nb = null; 131 | for(int t=0; t<=30; t++){ 132 | //create the basic naive bayes model 133 | nb = new NaiveBayes(file1Train, t, 0); 134 | //refine the model 135 | nb.algoritmoEM(file2EM, file3EM, false); 136 | 137 | //classify the last file 138 | TFReader reader = new TFReader(file4Class); 139 | 140 | //check the correctness 141 | EmailDataset actual = reader.read(); 142 | EmailDataset predict = actual.clone(); 143 | nb.classifyAll(predict); 144 | printFPFN(actual.getClassifications(), predict.getClassifications()); 145 | System.out.println("T:"+t); 146 | } 147 | 148 | } 149 | 150 | 151 | 152 | 153 | /** 154 | * Tables the threshols with the method presented (no enunciado) 155 | * @param filename 156 | * @throws FileNotFoundException 157 | */ 158 | private static void tableThreshold(String filename) throws FileNotFoundException{ 159 | LinkedHashMap> table = threashold(filename); 160 | for(Double t: table.keySet()){//print the table 161 | System.out.println("T: "+t+" FP: "+table.get(t).getFirst()+ 162 | " FN: "+table.get(t).getFirst()); 163 | 164 | } 165 | } 166 | 167 | 168 | /** 169 | * Returns a list of Tables of false positive and false negative 170 | * classifications to aid in the threshold choice 171 | * 172 | * @param filename - labelled data file to be used 173 | * @return List FP and FN table list 174 | * 175 | * @throws FileNotFoundException 176 | */ 177 | private static LinkedHashMap> threashold(String filename)throws FileNotFoundException{ 178 | LinkedHashMap> thresholds = new LinkedHashMap>(); 179 | 180 | //read file 181 | TFReader reader = new TFReader(filename); 182 | EmailDataset readData = reader.read(); 183 | Pair pair = readData.split(); 184 | 185 | EmailDataset train = pair.getFirst(); 186 | EmailDataset validation = pair.getSecont(); 187 | 188 | NaiveBayes model = null; 189 | 190 | //clasificaçao de emails com diferentes thresholdes 191 | for (int t=0; t<30;t++){ 192 | model = new NaiveBayes(train.clone(), t, 0);//create a new model 193 | 194 | //check the correctness 195 | EmailDataset predict = validation.clone(); 196 | 197 | model.classifyAll(predict); 198 | List classifsActual = validation.getClassifications(); 199 | List classifsPredicted = predict.getClassifications(); //post classif 200 | 201 | //System.out.println("actual:"+classifsActual); 202 | //System.out.println("predicted:"+classifsPredicted); 203 | 204 | Iterator it = classifsPredicted.iterator(); 205 | 206 | int fp = 0; 207 | int fn = 0; 208 | for(Integer act : classifsActual){//transverse the actual classifications 209 | int pred = it.next();//get predicted 210 | //System.out.println("act:"+act+" pred:"+pred); 211 | if(act == 1 && pred == -1){ 212 | fn++; 213 | }else if(act == -1 && pred == 1){ 214 | fp++; 215 | } 216 | 217 | 218 | } 219 | thresholds.put(new Double(t), new Pair(fp, fn)); 220 | 221 | 222 | 223 | } 224 | 225 | 226 | return thresholds; 227 | } 228 | 229 | 230 | /** 231 | * Prints the FP and FN comparing 2 classification lists 232 | * 233 | * @param ca - actual classification List 234 | * @param cp - predicted classification List 235 | */ 236 | private static void printFPFN(List ca, List cp){ 237 | List classifsActual = ca; 238 | List classifsPredicted = cp; 239 | 240 | Iterator it = classifsPredicted.iterator(); 241 | 242 | int correct = 0; 243 | int fp = 0; 244 | int fn = 0; 245 | for(Integer act : classifsActual){//transverse the actual classifications 246 | int pred = it.next();//get predicted 247 | if(act == pred) //correct classification 248 | correct++; 249 | else{ 250 | if(act == 1 && pred == -1) 251 | fn++; 252 | else if(act == -1 && pred == 1) 253 | fp++; 254 | 255 | } 256 | } 257 | 258 | System.out.print("correct: "+correct/(classifsActual.size()*1.0)); 259 | System.out.print(" FP:"+fp); 260 | System.out.println(" FN: "+fn); 261 | } 262 | 263 | } 264 | -------------------------------------------------------------------------------- /src/util/EmailDataset.java: -------------------------------------------------------------------------------- 1 | package util; 2 | import java.util.HashMap; 3 | import java.util.HashSet; 4 | import java.util.Iterator; 5 | import java.util.LinkedList; 6 | import java.util.List; 7 | 8 | 9 | /** 10 | * Class that represents a set of email messages 11 | * usefull to be returned by the TFReader class 12 | * 13 | * encapsulates a list of Email Messages to ease the 14 | * processing of muliple messages to generate usefull structures 15 | * like HashMaps of the occurrency of all the tokens in a message set 16 | * 17 | * 18 | * @author davide 19 | * @author ainara 20 | * 21 | */ 22 | public class EmailDataset implements Cloneable, Iterable { 23 | private List messages; 24 | private int dictionaryDim; 25 | 26 | /** 27 | * Simple constructor recieves a set of email messages 28 | * @param messages List 29 | */ 30 | public EmailDataset(List messages){ 31 | this.messages = messages; 32 | dictionaryDim = calculateDictionaryDim(); 33 | } 34 | 35 | 36 | 37 | public EmailDataset() { 38 | this.messages = new LinkedList(); 39 | } 40 | 41 | 42 | 43 | public List getMessages(){ 44 | return messages; 45 | } 46 | 47 | /** 48 | * This method returns a pair of HashMaps that map the tokens 49 | * of the tagged messages to the total number of occurrencies 50 | * in all the messages of the dataset 51 | * 52 | * TOKEN -> TOTAL OCURR 53 | * 54 | * First - spam token ocurrencies 55 | * Second - ham token ocurrencies 56 | * 57 | * @return Pair 58 | */ 59 | public Pair> getTotalTokenOcurr(){ 60 | 61 | Pair> pair = new Pair>(); 62 | 63 | HashMap spamTable = new HashMap(); 64 | HashMap hamTable = new HashMap(); 65 | HashMap currentTable = null; //to select between both tables 66 | 67 | for(EmailMessage m : messages){ //foreach message in the dataset 68 | if(m.isTagged()){ //we can only separate tagged messages 69 | if(m.isSpam()) 70 | currentTable = spamTable; 71 | else 72 | currentTable = hamTable; 73 | 74 | //store the tokens in the current table 75 | HashMap tokens = m.getTokens(); 76 | for(Integer token : tokens.keySet()){ 77 | int currentOcurrencies = 0; 78 | if(currentTable.containsKey(token)) 79 | currentOcurrencies = currentTable.get(token); 80 | 81 | //update the current map token ocurr count 82 | //sums 0 if the tokens does not exist 83 | currentTable.put(token, (tokens.get(token) + currentOcurrencies)); 84 | }//end for each token in the current message 85 | 86 | }//end is tagged 87 | }//end for each message 88 | 89 | 90 | //chamar ao metodo para incluir todos os keys nas duas tabelas 91 | pair=completeKeys(spamTable, hamTable); 92 | return pair; 93 | } 94 | 95 | 96 | //TODO return desnecessário diria eu, ao alterar nas referências já fica tudo 97 | //alterado 98 | /** 99 | * Method that adds the keys missing in both 100 | * spam and ham tables 101 | * 102 | * @param spamTable - table with spam token occurrencies 103 | * @param hamTable - table with ham token occurrencies 104 | * @return ... 105 | */ 106 | private Pair>completeKeys(HashMap spamTable,HashMap hamTable ){ 107 | 108 | Pair> pair = new Pair>(); 109 | 110 | for(Integer key : spamTable.keySet()){//transverse spam table 111 | if (!hamTable.containsKey(key)){ 112 | hamTable.put(key, 0); 113 | } 114 | } 115 | for(Integer key : hamTable.keySet()){//transverse ham table 116 | if(!spamTable.containsKey(key)){ 117 | spamTable.put(key, 0); 118 | } 119 | } 120 | 121 | pair.setFirst(spamTable); 122 | pair.setSecond(hamTable); 123 | return pair; 124 | 125 | } 126 | 127 | /** 128 | * Method that returns the number of spam messages 129 | * present in this EmailDataset 130 | * 131 | * @return Integer number of spam message 132 | */ 133 | public int getNumSpam(){ 134 | int numSpam = 0; 135 | for(EmailMessage m: messages){ 136 | if(m.isTagged()){ 137 | if(m.isSpam()) numSpam++; 138 | } 139 | } 140 | return numSpam; 141 | } 142 | 143 | /** 144 | * Method that returns the number of ham messages 145 | * present in this EmailDataset 146 | * 147 | * @return Integer number of spam message 148 | */ 149 | public int getNumHam(){ 150 | int numHam = 0; 151 | for(EmailMessage m: messages){ 152 | if(m.isTagged()){ 153 | if(!m.isSpam()) numHam++; 154 | } 155 | } 156 | return numHam; 157 | } 158 | 159 | /** 160 | * Method that returns the number of the messages 161 | * in this email dataset 162 | * 163 | * @return Integer number of messages in this email dataset 164 | */ 165 | public int getNumMessages(){ 166 | return this.messages.size(); 167 | } 168 | 169 | /** 170 | * This method returns the dimension 171 | * of the dictionary present on this email dataset 172 | * -> 173 | * NUMBER OF DISTINCT TOKENS in all the messages 174 | * 175 | * @return Integer - number of unique tokens 176 | */ 177 | public int calculateDictionaryDim(){ 178 | HashSet tokens = new HashSet(); 179 | for(EmailMessage m : messages){ 180 | tokens.addAll(m.getTokens().keySet()); 181 | } 182 | return tokens.size(); 183 | } 184 | 185 | public HashSet getDictionary(){ 186 | HashSet tokens = new HashSet(); 187 | for(EmailMessage m : messages){ 188 | tokens.addAll(m.getTokens().keySet()); 189 | } 190 | return tokens; 191 | } 192 | 193 | public int getDictionaryDim(){ 194 | return dictionaryDim; 195 | } 196 | 197 | //TODO test this 198 | /** 199 | * Method that implements the clone operation for this dataset 200 | * it creates a new dataset and clones all the entries 201 | * 202 | * @return clonedObject EmailDataset - cloned dataset to be returned 203 | */ 204 | public EmailDataset clone(){ 205 | List messages2 = new LinkedList(); 206 | for(EmailMessage m: messages){ 207 | messages2.add(m.clone()); 208 | } 209 | return new EmailDataset(messages2); 210 | } 211 | 212 | 213 | /** 214 | * Method that adds a list of messages to this dataset 215 | * 216 | * @param messageList List the list of messages to be 217 | * added to this dataset 218 | */ 219 | public void merge(EmailDataset dataset){ 220 | messages.addAll(dataset.getMessages()); 221 | } 222 | 223 | 224 | /** 225 | * Returns the string representation of the whole 226 | * dataset 227 | */ 228 | public String toString(){ 229 | StringBuilder sb = new StringBuilder(); 230 | for(EmailMessage m : messages){ 231 | sb.append(m.toString()); 232 | sb.append('\n'); 233 | } 234 | 235 | return sb.toString(); 236 | } 237 | 238 | /** 239 | * size forward method from List 240 | * 241 | * @return size Integer number of messages on the set 242 | */ 243 | public int size(){ 244 | return messages.size(); 245 | } 246 | 247 | 248 | 249 | 250 | @Override 251 | public Iterator iterator() { 252 | return messages.iterator(); 253 | } 254 | 255 | public void add(EmailMessage msg){ 256 | messages.add(msg); 257 | } 258 | 259 | /** 260 | * Method used to splin the dataset 261 | * in a random fashion returning a pair of 262 | * datasets each one with the same size 263 | * 264 | * (Math.random() <0.5) 265 | * @return pair Pair - two datasets 266 | * that result from the split of the current one 267 | */ 268 | public Pair split(){ 269 | EmailDataset ds1 = new EmailDataset(); 270 | EmailDataset ds2 = new EmailDataset(); 271 | 272 | for(EmailMessage m : messages) 273 | if(Math.random() < 0.5) 274 | ds1.add(m.clone()); 275 | else 276 | ds2.add(m.clone()); 277 | 278 | return new Pair(ds1, ds2); 279 | } 280 | 281 | /** 282 | * Returns the list of classifications following the 283 | * same order of the messages stored 284 | * 285 | * @return classifications List 286 | */ 287 | public List getClassifications(){ 288 | List classifs = new LinkedList(); 289 | for(EmailMessage m: messages) 290 | classifs.add(m.getClassification()); 291 | return classifs; 292 | } 293 | 294 | 295 | 296 | 297 | } 298 | -------------------------------------------------------------------------------- /doc/util/class-use/Pair.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Uses of Class util.Pair 8 | 9 | 10 | 11 | 12 | 13 | 14 | 22 | 24 | 25 | 26 | 27 | 28 |
29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 51 | 54 | 55 | 56 | 57 | 60 | 76 | 77 |
52 | 53 |
78 | 79 | 80 | 81 |
82 |
83 |

84 | Uses of Class
util.Pair

85 |
86 | 87 | 88 | 89 | 91 | 92 | 93 | 94 | 95 | 96 |
90 | Packages that use Pair
util  
97 |   98 |

99 | 100 | 101 | 102 | 104 | 105 |
103 | Uses of Pair in util
106 |   107 |

108 | 109 | 110 | 111 | 112 | 113 | 114 | 116 | 127 | 128 | 129 | 131 | 139 | 140 |
Methods in util that return Pair
115 |  Pair<java.util.HashMap<java.lang.Integer,java.lang.Integer>>EmailDataset.getTotalTokenOcurr() 117 | 118 |
119 |           This method returns a pair of HashMaps that map the tokens 120 | of the tagged messages to the total number of occurrencies 121 | in all the messages of the dataset 122 | 123 | TOKEN -> TOTAL OCURR 124 | 125 | First - spam token ocurrencies 126 | Second - ham token ocurrencies
130 |  Pair<EmailDataset>EmailDataset.split() 132 | 133 |
134 |           Method used to splin the dataset 135 | in a random fashion returning a pair of 136 | datasets each one with the same size 137 | 138 | (Math.random() <0.5)
141 |   142 |

143 |


144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 166 | 169 | 170 | 171 | 172 | 175 | 191 | 192 |
167 | 168 |
193 | 194 | 195 | 196 |
197 | 198 | 199 | 200 | -------------------------------------------------------------------------------- /doc/help-doc.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | API Help 8 | 9 | 10 | 11 | 12 | 13 | 14 | 22 | 24 | 25 | 26 | 27 | 28 |
29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 50 | 53 | 54 | 55 | 56 | 59 | 75 | 76 |
51 | 52 |
77 | 78 | 79 | 80 |
81 |
82 |

83 | How This API Document Is Organized

84 |
85 | This API (Application Programming Interface) document has pages corresponding to the items in the navigation bar, described as follows.

86 | Package

87 |
88 | 89 |

90 | Each package has a page that contains a list of its classes and interfaces, with a summary for each. This page can contain four categories:

    91 |
  • Interfaces (italic)
  • Classes
  • Enums
  • Exceptions
  • Errors
  • Annotation Types
92 |
93 |

94 | Class/Interface

95 |
96 | 97 |

98 | Each class, interface, nested class and nested interface has its own separate page. Each of these pages has three sections consisting of a class/interface description, summary tables, and detailed member descriptions:

    99 |
  • Class inheritance diagram
  • Direct Subclasses
  • All Known Subinterfaces
  • All Known Implementing Classes
  • Class/interface declaration
  • Class/interface description 100 |

    101 |

  • Nested Class Summary
  • Field Summary
  • Constructor Summary
  • Method Summary 102 |

    103 |

  • Field Detail
  • Constructor Detail
  • Method Detail
104 | Each summary entry contains the first sentence from the detailed description for that item. The summary entries are alphabetical, while the detailed descriptions are in the order they appear in the source code. This preserves the logical groupings established by the programmer.
105 | 106 |

107 | Annotation Type

108 |
109 | 110 |

111 | Each annotation type has its own separate page with the following sections:

    112 |
  • Annotation Type declaration
  • Annotation Type description
  • Required Element Summary
  • Optional Element Summary
  • Element Detail
113 |
114 | 115 |

116 | Enum

117 |
118 | 119 |

120 | Each enum has its own separate page with the following sections:

    121 |
  • Enum declaration
  • Enum description
  • Enum Constant Summary
  • Enum Constant Detail
122 |
123 |

124 | Use

125 |
126 | Each documented package, class and interface has its own Use page. This page describes what packages, classes, methods, constructors and fields use any part of the given class or package. Given a class or interface A, its Use page includes subclasses of A, fields declared as A, methods that return A, and methods and constructors with parameters of type A. You can access this page by first going to the package, class or interface, then clicking on the "Use" link in the navigation bar.
127 |

128 | Tree (Class Hierarchy)

129 |
130 | There is a Class Hierarchy page for all packages, plus a hierarchy for each package. Each hierarchy page contains a list of classes and a list of interfaces. The classes are organized by inheritance structure starting with java.lang.Object. The interfaces do not inherit from java.lang.Object.
    131 |
  • When viewing the Overview page, clicking on "Tree" displays the hierarchy for all packages.
  • When viewing a particular package, class or interface page, clicking "Tree" displays the hierarchy for only that package.
132 |
133 |

134 | Deprecated API

135 |
136 | The Deprecated API page lists all of the API that have been deprecated. A deprecated API is not recommended for use, generally due to improvements, and a replacement API is usually given. Deprecated APIs may be removed in future implementations.
137 |

138 | Index

139 |
140 | The Index contains an alphabetic list of all classes, interfaces, constructors, methods, and fields.
141 |

142 | Prev/Next

143 | These links take you to the next or previous class, interface, package, or related page.

144 | Frames/No Frames

145 | These links show and hide the HTML frames. All pages are available with or without frames. 146 |

147 |

148 | Serialized Form

149 | Each serializable or externalizable class has a description of its serialization fields and methods. This information is of interest to re-implementors, not to developers using the API. While there is no link in the navigation bar, you can get to this information by going to any serialized class and clicking "Serialized Form" in the "See also" section of the class description. 150 |

151 |

152 | Constant Field Values

153 | The Constant Field Values page lists the static final fields and their values. 154 |

155 | 156 | 157 | This help file applies to API documentation generated using the standard doclet. 158 | 159 |
160 |


161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 182 | 185 | 186 | 187 | 188 | 191 | 207 | 208 |
183 | 184 |
209 | 210 | 211 | 212 |
213 | 214 | 215 | 216 | -------------------------------------------------------------------------------- /doc/RunSpam.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | RunSpam 8 | 9 | 10 | 11 | 12 | 13 | 14 | 22 | 24 | 25 | 26 | 27 | 28 |
29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 51 | 54 | 55 | 56 | 57 | 60 | 76 | 77 | 78 | 80 | 82 | 83 |
52 | 53 |
84 | 85 | 86 | 87 |
88 | 89 |

90 | Class RunSpam

91 |
 92 | java.lang.Object
 93 |   extended by RunSpam
 94 | 
95 |
96 |
97 |
public class RunSpam
extends java.lang.Object
98 | 99 | 100 |

101 | Main class for testing purposes 102 |

103 | 104 |

105 |

106 |
Author:
107 |
davide, ainara
108 |
109 |
110 | 111 |

112 | 113 | 114 | 115 | 116 | 117 | 118 | 120 | 121 | 122 | 126 | 127 |
119 | Constructor Summary
RunSpam() 123 | 124 |
125 |            
128 |   129 | 130 | 131 | 132 | 133 | 134 | 136 | 137 | 138 | 140 | 144 | 145 |
135 | Method Summary
139 | static voidmain(java.lang.String[] args) 141 | 142 |
143 |           prints a menu....
146 |   147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 |
Methods inherited from class java.lang.Object
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
155 |   156 |

157 | 158 | 159 | 160 | 161 | 162 | 163 | 165 | 166 |
164 | Constructor Detail
167 | 168 |

169 | RunSpam

170 |
171 | public RunSpam()
172 |
173 |
174 | 175 | 176 | 177 | 178 | 179 | 180 | 182 | 183 |
181 | Method Detail
184 | 185 |

186 | main

187 |
188 | public static void main(java.lang.String[] args)
189 |                  throws java.io.FileNotFoundException
190 |
191 |
prints a menu.... 192 |

193 |

194 |
Parameters:
args - 195 |
Throws: 196 |
java.io.FileNotFoundException
197 |
198 |
199 | 200 |
201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 223 | 226 | 227 | 228 | 229 | 232 | 248 | 249 | 250 | 252 | 254 | 255 |
224 | 225 |
256 | 257 | 258 | 259 |
260 | 261 | 262 | 263 | -------------------------------------------------------------------------------- /doc/Test.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Test 8 | 9 | 10 | 11 | 12 | 13 | 14 | 22 | 24 | 25 | 26 | 27 | 28 |
29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 51 | 54 | 55 | 56 | 57 | 60 | 76 | 77 | 78 | 80 | 82 | 83 |
52 | 53 |
84 | 85 | 86 | 87 |
88 | 89 |

90 | Class Test

91 |
 92 | java.lang.Object
 93 |   extended by Test
 94 | 
95 |
96 |
97 |
public class Test
extends java.lang.Object
98 | 99 | 100 |

101 |


102 | 103 |

104 | 105 | 106 | 107 | 108 | 109 | 110 | 112 | 113 | 114 | 118 | 119 |
111 | Constructor Summary
Test() 115 | 116 |
117 |            
120 |   121 | 122 | 123 | 124 | 125 | 126 | 128 | 129 | 130 | 132 | 136 | 137 | 138 | 140 | 144 | 145 |
127 | Method Summary
131 | static voidmain(java.lang.String[] args) 133 | 134 |
135 |            
139 | static java.util.LinkedHashMap<java.lang.Integer,java.lang.Double>orderValues(java.util.LinkedHashMap<java.lang.Integer,java.lang.Double> map) 141 | 142 |
143 |            
146 |   147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 |
Methods inherited from class java.lang.Object
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
155 |   156 |

157 | 158 | 159 | 160 | 161 | 162 | 163 | 165 | 166 |
164 | Constructor Detail
167 | 168 |

169 | Test

170 |
171 | public Test()
172 |
173 |
174 | 175 | 176 | 177 | 178 | 179 | 180 | 182 | 183 |
181 | Method Detail
184 | 185 |

186 | main

187 |
188 | public static void main(java.lang.String[] args)
189 |                  throws java.io.FileNotFoundException
190 |
191 |
192 |
Parameters:
args - 193 |
Throws: 194 |
java.io.FileNotFoundException
195 |
196 |
197 |
198 | 199 |

200 | orderValues

201 |
202 | public static java.util.LinkedHashMap<java.lang.Integer,java.lang.Double> orderValues(java.util.LinkedHashMap<java.lang.Integer,java.lang.Double> map)
203 |
204 |
205 |
206 |
207 |
208 | 209 |
210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 232 | 235 | 236 | 237 | 238 | 241 | 257 | 258 | 259 | 261 | 263 | 264 |
233 | 234 |
265 | 266 | 267 | 268 |
269 | 270 | 271 | 272 | -------------------------------------------------------------------------------- /doc/io/TFReader.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | TFReader 8 | 9 | 10 | 11 | 12 | 13 | 14 | 22 | 24 | 25 | 26 | 27 | 28 |
29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 51 | 54 | 55 | 56 | 57 | 60 | 76 | 77 | 78 | 80 | 82 | 83 |
52 | 53 |
84 | 85 | 86 | 87 |
88 | 89 |

90 | 91 | io 92 |
93 | Class TFReader

94 |
 95 | java.lang.Object
 96 |   extended by io.TFReader
 97 | 
98 |
99 |
100 |
public class TFReader
extends java.lang.Object
101 | 102 | 103 |

104 | Class used to create reader objects for .tf files 105 | the files contain token information about email messages 106 |

107 | 108 |

109 |

110 |
Author:
111 |
davide, ainara
112 |
113 |
114 | 115 |

116 | 117 | 118 | 119 | 120 | 121 | 122 | 124 | 125 | 126 | 130 | 131 |
123 | Constructor Summary
TFReader(java.lang.String filename) 127 | 128 |
129 |           Constructor
132 |   133 | 134 | 135 | 136 | 137 | 138 | 140 | 141 | 142 | 144 | 150 | 151 |
139 | Method Summary
143 |  EmailDatasetread() 145 | 146 |
147 |           Method used to read the tf file supplied in the constructor 148 | and return an EmailDataset object containing all the 149 | EmailMessages of the file
152 |   153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 |
Methods inherited from class java.lang.Object
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
161 |   162 |

163 | 164 | 165 | 166 | 167 | 168 | 169 | 171 | 172 |
170 | Constructor Detail
173 | 174 |

175 | TFReader

176 |
177 | public TFReader(java.lang.String filename)
178 |
179 |
Constructor 180 |

181 |

182 |
Parameters:
filename - String the file to be read
183 |
184 | 185 | 186 | 187 | 188 | 189 | 190 | 192 | 193 |
191 | Method Detail
194 | 195 |

196 | read

197 |
198 | public EmailDataset read()
199 |                   throws java.io.FileNotFoundException
200 |
201 |
Method used to read the tf file supplied in the constructor 202 | and return an EmailDataset object containing all the 203 | EmailMessages of the file 204 |

205 |

206 | 207 |
Returns:
messages EmailDataset - the messages loaded into a dataset 208 |
Throws: 209 |
java.io.FileNotFoundException
210 |
211 |
212 | 213 |
214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 236 | 239 | 240 | 241 | 242 | 245 | 261 | 262 | 263 | 265 | 267 | 268 |
237 | 238 |
269 | 270 | 271 | 272 |
273 | 274 | 275 | 276 | --------------------------------------------------------------------------------