├── .gitignore ├── out └── production │ └── LeafeeWordCloud │ └── LeafeeWordCloud │ ├── result.txt │ ├── Word.class │ ├── ChainList.class │ ├── MyFunction.class │ ├── MyOutput.class │ ├── ChainList$Node.class │ ├── LeafeeWordCloud.class │ └── LeafeeWordCloud$1.class ├── LeafeeWordCloud.iml ├── src └── main │ └── java │ ├── META-INF │ └── MANIFEST.MF │ └── LeafeeWordCloud │ ├── Word.java │ ├── ChainList.java │ └── LeafeeWordCloud.java └── pom.xml /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | out/ 3 | target/ 4 | -------------------------------------------------------------------------------- /out/production/LeafeeWordCloud/LeafeeWordCloud/result.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /LeafeeWordCloud.iml: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/main/java/META-INF/MANIFEST.MF: -------------------------------------------------------------------------------- 1 | Manifest-Version: 1.0 2 | Main-Class: LeafeeWordCloud.LeafeeWordCloud 3 | 4 | -------------------------------------------------------------------------------- /out/production/LeafeeWordCloud/LeafeeWordCloud/Word.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leafee98/LeafeeWordCloud/master/out/production/LeafeeWordCloud/LeafeeWordCloud/Word.class -------------------------------------------------------------------------------- /out/production/LeafeeWordCloud/LeafeeWordCloud/ChainList.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leafee98/LeafeeWordCloud/master/out/production/LeafeeWordCloud/LeafeeWordCloud/ChainList.class -------------------------------------------------------------------------------- /out/production/LeafeeWordCloud/LeafeeWordCloud/MyFunction.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leafee98/LeafeeWordCloud/master/out/production/LeafeeWordCloud/LeafeeWordCloud/MyFunction.class -------------------------------------------------------------------------------- /out/production/LeafeeWordCloud/LeafeeWordCloud/MyOutput.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leafee98/LeafeeWordCloud/master/out/production/LeafeeWordCloud/LeafeeWordCloud/MyOutput.class -------------------------------------------------------------------------------- /out/production/LeafeeWordCloud/LeafeeWordCloud/ChainList$Node.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leafee98/LeafeeWordCloud/master/out/production/LeafeeWordCloud/LeafeeWordCloud/ChainList$Node.class -------------------------------------------------------------------------------- /out/production/LeafeeWordCloud/LeafeeWordCloud/LeafeeWordCloud.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leafee98/LeafeeWordCloud/master/out/production/LeafeeWordCloud/LeafeeWordCloud/LeafeeWordCloud.class -------------------------------------------------------------------------------- /out/production/LeafeeWordCloud/LeafeeWordCloud/LeafeeWordCloud$1.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leafee98/LeafeeWordCloud/master/out/production/LeafeeWordCloud/LeafeeWordCloud/LeafeeWordCloud$1.class -------------------------------------------------------------------------------- /src/main/java/LeafeeWordCloud/Word.java: -------------------------------------------------------------------------------- 1 | package LeafeeWordCloud; 2 | 3 | class Word { 4 | private String str; 5 | private int count; 6 | 7 | public Word(Word w) { 8 | this(w.str, w.count); 9 | } 10 | public Word(String str) { 11 | this(str, 1); 12 | } 13 | public Word(String str, int count) { 14 | this.str = str; 15 | this.count = count; 16 | } 17 | public boolean equals(Word w) { 18 | return this.str.equals(w.str); 19 | } 20 | public boolean equals(String str) { 21 | return this.str.equals(str); 22 | } 23 | public boolean isBigger(Word w) {return this.count > w.count; } 24 | public String toString() { return this.str; } 25 | public void count() { 26 | ++this.count; 27 | } 28 | public int getCount() { 29 | return this.count; 30 | } 31 | public void setCount(int count) { this.count = count; } 32 | public String getString() { 33 | return this.str; 34 | } 35 | } 36 | 37 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | Leafee 8 | LeafeeWordCloud 9 | 0.8-SNAPSHOT 10 | 11 | 12 | 1.6 13 | 1.6 14 | 15 | 16 | 17 | 18 | 19 | org.apache.maven.plugins 20 | maven-jar-plugin 21 | 22 | 23 | 24 | true 25 | LeafeeWordCloud.LeafeeWordCloud 26 | 27 | 28 | 29 | 30 | 31 | org.apache.maven.plugins 32 | maven-dependency-plugin 33 | 34 | 35 | copy-dependencies 36 | prepare-package 37 | 38 | copy-dependencies 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | com.kennycason 50 | kumo-core 51 | 1.17 52 | 53 | 54 | com.kennycason 55 | kumo-tokenizers 56 | 1.17 57 | 58 | 59 | org.slf4j 60 | slf4j-jdk14 61 | 1.8.0-alpha2 62 | 63 | 64 | 65 | -------------------------------------------------------------------------------- /src/main/java/LeafeeWordCloud/ChainList.java: -------------------------------------------------------------------------------- 1 | package LeafeeWordCloud; 2 | 3 | import java.util.List; 4 | import java.util.ArrayList; 5 | import java.util.Comparator; 6 | 7 | class ChainList { 8 | private class Node { 9 | public Word item; 10 | public Node next; 11 | public Node(Word item, Node next) { 12 | this.item = new Word(item); 13 | this.next = next; 14 | } 15 | } 16 | private Node head; 17 | 18 | public ChainList() { 19 | this.head = null; 20 | } 21 | public void insertBack(Word item) { 22 | if (head == null) { 23 | head = new Node(item, null); 24 | } 25 | else { 26 | Node temp = head; 27 | while (temp.next != null) 28 | temp = temp.next; 29 | temp.next = new Node(item, null); 30 | } 31 | } 32 | public Word find(String item) { 33 | Node temp = head; 34 | while (temp != null && !temp.item.equals(item)) 35 | temp = temp.next; 36 | return temp == null ? null : temp.item; 37 | } 38 | public boolean remove(String str) { 39 | // if empty chainList; 40 | if (head == null) 41 | return false; 42 | 43 | // if the first node; 44 | if (head.item.equals(str)) { 45 | head = head.next; 46 | return true; 47 | } 48 | else { 49 | // find the Node which before the destination 50 | // and delete by using Node.next 51 | // if it is the last node, it will be also judged by using temp.next.item 52 | Node temp = head; 53 | while (temp.next != null && !temp.next.item.equals(str)) 54 | temp = temp.next; 55 | if (temp.next == null) 56 | return false; 57 | else { 58 | // Leafee: always want to delete something 59 | temp.next = temp.next.next; 60 | return true; 61 | } 62 | } 63 | } 64 | public List toList() { 65 | if (head == null) 66 | return null; 67 | ArrayList arrList = new ArrayList(); 68 | for (Node i = head; i != null; i = i.next) 69 | arrList.add(i.item); 70 | 71 | // // for some reason, the sort doesn't work 72 | // arrList.sort(new Comparator() { 73 | // public int compare(Word o1, Word o2) { 74 | // return o1.getString().compareTo(o2.getString()); 75 | // } 76 | // }); 77 | 78 | return arrList; 79 | } 80 | } 81 | 82 | -------------------------------------------------------------------------------- /src/main/java/LeafeeWordCloud/LeafeeWordCloud.java: -------------------------------------------------------------------------------- 1 | package LeafeeWordCloud; 2 | 3 | import java.awt.*; 4 | import java.io.*; 5 | import java.util.Arrays; 6 | import java.util.Scanner; 7 | import java.util.List; 8 | import java.util.ArrayList; 9 | 10 | import com.kennycason.kumo.CollisionMode; 11 | import com.kennycason.kumo.WordCloud; 12 | import com.kennycason.kumo.WordFrequency; 13 | import com.kennycason.kumo.bg.CircleBackground; 14 | import com.kennycason.kumo.bg.PixelBoundryBackground; 15 | import com.kennycason.kumo.font.FontWeight; 16 | import com.kennycason.kumo.font.KumoFont; 17 | import com.kennycason.kumo.font.scale.SqrtFontScalar; 18 | import com.kennycason.kumo.nlp.FrequencyAnalyzer; 19 | import com.kennycason.kumo.nlp.tokenizers.ChineseWordTokenizer; 20 | import com.kennycason.kumo.palette.ColorPalette; 21 | 22 | public class LeafeeWordCloud{ 23 | static ChainList wordList = new ChainList(); 24 | static Scanner input = new Scanner(System.in); 25 | 26 | 27 | // --in-file --stop-words-file -stop-words-handle 28 | // --word-amount --in-picture 29 | // --out-picture --picture-width --picture-height 30 | // --font-name 31 | public static void main(String[] args) { 32 | String inFile = null, stopWordsFile = null; 33 | String inPicture = null, outPicture = "output.png"; 34 | String fontName = "微软雅黑", charset = "utf-8"; 35 | int wordAmount = 600, pictureWidth = 600, pictureHeight = 600; 36 | boolean stopWordsHandle = false, help = false; 37 | for (int i = 0; i < args.length; ++i) { 38 | if (args[i].equals("--in-file")) 39 | inFile = args[++i]; 40 | else if (args[i].equals("--stop-words-file")) 41 | stopWordsFile = args[++i]; 42 | else if (args[i].equals("--stop-words-handle")) 43 | stopWordsHandle = true; 44 | else if (args[i].equals("--word-amount")) 45 | wordAmount = Integer.parseInt(args[++i]); 46 | else if (args[i].equals("--in-picture")) 47 | inPicture = args[++i]; 48 | else if (args[i].equals("--out-picture")) 49 | outPicture = args[++i]; 50 | else if (args[i].equals("--picture-width")) 51 | pictureWidth = Integer.parseInt(args[++i]); 52 | else if (args[i].equals("--picture-height")) 53 | pictureHeight = Integer.parseInt(args[++i]); 54 | else if (args[i].equals("--font-name")) 55 | fontName = args[++i]; 56 | else if (args[i].equals("--charset")) 57 | charset = args[++i]; 58 | else if (args[i].equals("--help")) 59 | help = true; 60 | else { 61 | System.out.println("unrecognized option:" + args[i]); 62 | System.exit(-3); 63 | } 64 | } 65 | if (help == true) 66 | printHelp(); 67 | if (inFile == null){ 68 | System.out.println("in-file is required"); 69 | System.exit(-1); 70 | } 71 | 72 | Word temp = null; 73 | // read the content file 74 | for (String s : readFile(openFile(inFile, charset))) { 75 | if ((temp = wordList.find(s)) != null) 76 | temp.count(); 77 | else 78 | wordList.insertBack(new Word(s)); 79 | } 80 | 81 | 82 | // remove the tab stop, or it will throw "width could less equal than 0" 83 | // because the tab will not be displayed on an picture; 84 | wordList.remove("\t"); 85 | 86 | if (stopWordsHandle) { 87 | // remove the stop words 88 | String stopWord = null; 89 | String removeMessage = null; 90 | // input the stop words by hand 91 | System.out.println("Please input the stop words you want to remove from the wordCloud,\n" + 92 | "input three equal sign(===) to terminate input:"); 93 | for (stopWord = input.next().trim(); !stopWord.equals("==="); ) { 94 | removeMessage = wordList.remove(stopWord) ? "Succeed!" : "This word is not contained by wordList."; 95 | System.out.println(removeMessage); 96 | stopWord = input.next().trim(); 97 | } 98 | } 99 | 100 | 101 | if (stopWordsFile != null) 102 | for (String si : readFile(openFile(stopWordsFile, charset))) 103 | wordList.remove(si); 104 | 105 | 106 | // translate from Word which I built, to WordFrequency the kumo made 107 | List sortedWords = wordList.toList(); 108 | List wfList = new ArrayList(); 109 | for (Word x : sortedWords) 110 | wfList.add(new WordFrequency(x.toString(), x.getCount())); 111 | 112 | // create FrequencyAnalyzer, which can load from file, List, webPage and etc; 113 | // It also can filter the wordAmount most frequent words, the shortest word must longer than 2 114 | // Leafee: and Chinese Tokenizer, which waste me lot of time to solve the dependency 115 | final FrequencyAnalyzer frequencyAnalyzer = new FrequencyAnalyzer(); 116 | frequencyAnalyzer.setWordFrequenciesToReturn(wordAmount); 117 | frequencyAnalyzer.setMinWordLength(3); 118 | frequencyAnalyzer.setWordTokenizer(new ChineseWordTokenizer()); 119 | List wordFrequencies = null; 120 | 121 | // read WordFrequencies form the list I created 122 | try { 123 | wordFrequencies = frequencyAnalyzer.loadWordFrequencies(wfList); 124 | } catch (Exception e) { 125 | e.printStackTrace(); 126 | } 127 | 128 | // draw picture with wordFrequencies and output to a file; 129 | final Dimension dimension = new Dimension(pictureWidth, pictureHeight); 130 | final WordCloud wordCloud = new WordCloud(dimension, CollisionMode.PIXEL_PERFECT); 131 | try { 132 | if (inPicture == null) 133 | wordCloud.setBackground(new CircleBackground((pictureHeight < pictureWidth ? pictureHeight : pictureWidth) / 2)); 134 | else 135 | wordCloud.setBackground(new PixelBoundryBackground(new File(inPicture))); 136 | wordCloud.setPadding(2); 137 | wordCloud.setColorPalette(new ColorPalette(new Color(0xD5CFFA), new Color(0xBBB1FA), 138 | new Color(0x9A8CF5), new Color(0x806EF5))); 139 | wordCloud.setFontScalar(new SqrtFontScalar(12, 45)); 140 | wordCloud.setKumoFont(new KumoFont(fontName, FontWeight.BOLD)); 141 | // wordCloud.build(wfList); // if I use my List, it will create 4500+ words, without filter 142 | wordCloud.build(wordFrequencies); 143 | wordCloud.writeToFile(outPicture); 144 | } catch (IOException e) { 145 | e.printStackTrace(); 146 | System.exit(-2); 147 | } 148 | } 149 | 150 | private static BufferedReader openFile(String path, String charset) { 151 | File file = null; 152 | BufferedReader bufferedreader = null; 153 | // open the file. The BufferedReader is used to read file by line 154 | try { 155 | // read the file path, in case that the program running in a different computer 156 | file = new File(path); 157 | FileInputStream fileInputStream = new FileInputStream(file); 158 | InputStreamReader inputstreamreader = new InputStreamReader(fileInputStream, charset); 159 | bufferedreader = new BufferedReader(inputstreamreader); 160 | } catch (Exception e) { 161 | e.printStackTrace(); 162 | System.exit(-1); 163 | } 164 | return bufferedreader; 165 | } 166 | 167 | private static List readFile(BufferedReader bufferedreader) { 168 | // read file and add the words into wordList 169 | List list = new ArrayList(); 170 | try { 171 | while (bufferedreader.ready()) { 172 | String[] temp = bufferedreader.readLine().split(" "); 173 | list.addAll(Arrays.asList(temp)); 174 | } 175 | bufferedreader.close(); 176 | } catch (IOException e) { 177 | e.printStackTrace(); 178 | System.exit(-1); 179 | } 180 | return list; 181 | } 182 | 183 | private static void printHelp() { 184 | System.out.println("\t--in-file pathOfContentFileName * read the content file from this path"); 185 | System.out.println("\t--stop-words-file pathOfStopWordFile read the stop words from there"); 186 | System.out.println("\t--stop-words-handle if you want press stop words yourself, add this parameter without value"); 187 | System.out.println("\t--word-amount number the amount of words will be displayed on picture"); 188 | System.out.println("\t--in-picture path read the background picture"); 189 | System.out.println("\t--out-picture path output the picture to path"); 190 | System.out.println("\t--picture-width number width of output picture"); 191 | System.out.println("\t--picture-height number height of output picture"); 192 | System.out.println("\t--font-name fontName the font you want on picture"); 193 | System.out.println("\t--charset charsetName change the charset used to read file"); 194 | System.out.println("\t--help print this menu\n"); 195 | System.out.println(" the argument with * is required"); 196 | System.out.println(" if there are spaces in some arguments, use quote around them"); 197 | System.out.println(" example: java -jar JAR --in-file inputFile.txt"); 198 | System.out.println(" java -jar JAR --in-file inputFile.txt --stop-words-file StopWords.txt --out-picture result.png"); 199 | System.out.println(" java -jar JAR --in-file inputFile.txt --stop-words-file StopWords.txt --in-picture in.png --picture-width 640 --picture-height 640 --out-picture out.png --font-name \"YaHei Consolas Hybrid\"\n"); 200 | System.out.println(" default values :"); 201 | System.out.println(" stop-words-file = null"); 202 | System.out.println(" word-amount = 600"); 203 | System.out.println(" in-picture = null"); 204 | System.out.println(" out-picture = out.png"); 205 | System.out.println(" picture-width = 600"); 206 | System.out.println(" picture-height = 600"); 207 | System.out.println(" font-name = 微软雅黑"); 208 | System.out.println(" charset = utf-8"); 209 | System.exit(0); 210 | } 211 | } --------------------------------------------------------------------------------