├── .gitignore
├── out
└── production
│ └── LeafeeWordCloud
│ └── LeafeeWordCloud
│ ├── result.txt
│ ├── Word.class
│ ├── ChainList.class
│ ├── MyFunction.class
│ ├── MyOutput.class
│ ├── ChainList$Node.class
│ ├── LeafeeWordCloud.class
│ └── LeafeeWordCloud$1.class
├── LeafeeWordCloud.iml
├── src
└── main
│ └── java
│ ├── META-INF
│ └── MANIFEST.MF
│ └── LeafeeWordCloud
│ ├── Word.java
│ ├── ChainList.java
│ └── LeafeeWordCloud.java
└── pom.xml
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/
2 | out/
3 | target/
4 |
--------------------------------------------------------------------------------
/out/production/LeafeeWordCloud/LeafeeWordCloud/result.txt:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/LeafeeWordCloud.iml:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/src/main/java/META-INF/MANIFEST.MF:
--------------------------------------------------------------------------------
1 | Manifest-Version: 1.0
2 | Main-Class: LeafeeWordCloud.LeafeeWordCloud
3 |
4 |
--------------------------------------------------------------------------------
/out/production/LeafeeWordCloud/LeafeeWordCloud/Word.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leafee98/LeafeeWordCloud/master/out/production/LeafeeWordCloud/LeafeeWordCloud/Word.class
--------------------------------------------------------------------------------
/out/production/LeafeeWordCloud/LeafeeWordCloud/ChainList.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leafee98/LeafeeWordCloud/master/out/production/LeafeeWordCloud/LeafeeWordCloud/ChainList.class
--------------------------------------------------------------------------------
/out/production/LeafeeWordCloud/LeafeeWordCloud/MyFunction.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leafee98/LeafeeWordCloud/master/out/production/LeafeeWordCloud/LeafeeWordCloud/MyFunction.class
--------------------------------------------------------------------------------
/out/production/LeafeeWordCloud/LeafeeWordCloud/MyOutput.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leafee98/LeafeeWordCloud/master/out/production/LeafeeWordCloud/LeafeeWordCloud/MyOutput.class
--------------------------------------------------------------------------------
/out/production/LeafeeWordCloud/LeafeeWordCloud/ChainList$Node.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leafee98/LeafeeWordCloud/master/out/production/LeafeeWordCloud/LeafeeWordCloud/ChainList$Node.class
--------------------------------------------------------------------------------
/out/production/LeafeeWordCloud/LeafeeWordCloud/LeafeeWordCloud.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leafee98/LeafeeWordCloud/master/out/production/LeafeeWordCloud/LeafeeWordCloud/LeafeeWordCloud.class
--------------------------------------------------------------------------------
/out/production/LeafeeWordCloud/LeafeeWordCloud/LeafeeWordCloud$1.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leafee98/LeafeeWordCloud/master/out/production/LeafeeWordCloud/LeafeeWordCloud/LeafeeWordCloud$1.class
--------------------------------------------------------------------------------
/src/main/java/LeafeeWordCloud/Word.java:
--------------------------------------------------------------------------------
1 | package LeafeeWordCloud;
2 |
3 | class Word {
4 | private String str;
5 | private int count;
6 |
7 | public Word(Word w) {
8 | this(w.str, w.count);
9 | }
10 | public Word(String str) {
11 | this(str, 1);
12 | }
13 | public Word(String str, int count) {
14 | this.str = str;
15 | this.count = count;
16 | }
17 | public boolean equals(Word w) {
18 | return this.str.equals(w.str);
19 | }
20 | public boolean equals(String str) {
21 | return this.str.equals(str);
22 | }
23 | public boolean isBigger(Word w) {return this.count > w.count; }
24 | public String toString() { return this.str; }
25 | public void count() {
26 | ++this.count;
27 | }
28 | public int getCount() {
29 | return this.count;
30 | }
31 | public void setCount(int count) { this.count = count; }
32 | public String getString() {
33 | return this.str;
34 | }
35 | }
36 |
37 |
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 | Leafee
8 | LeafeeWordCloud
9 | 0.8-SNAPSHOT
10 |
11 |
12 | 1.6
13 | 1.6
14 |
15 |
16 |
17 |
18 |
19 | org.apache.maven.plugins
20 | maven-jar-plugin
21 |
22 |
23 |
24 | true
25 | LeafeeWordCloud.LeafeeWordCloud
26 |
27 |
28 |
29 |
30 |
31 | org.apache.maven.plugins
32 | maven-dependency-plugin
33 |
34 |
35 | copy-dependencies
36 | prepare-package
37 |
38 | copy-dependencies
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 | com.kennycason
50 | kumo-core
51 | 1.17
52 |
53 |
54 | com.kennycason
55 | kumo-tokenizers
56 | 1.17
57 |
58 |
59 | org.slf4j
60 | slf4j-jdk14
61 | 1.8.0-alpha2
62 |
63 |
64 |
65 |
--------------------------------------------------------------------------------
/src/main/java/LeafeeWordCloud/ChainList.java:
--------------------------------------------------------------------------------
1 | package LeafeeWordCloud;
2 |
3 | import java.util.List;
4 | import java.util.ArrayList;
5 | import java.util.Comparator;
6 |
7 | class ChainList {
8 | private class Node {
9 | public Word item;
10 | public Node next;
11 | public Node(Word item, Node next) {
12 | this.item = new Word(item);
13 | this.next = next;
14 | }
15 | }
16 | private Node head;
17 |
18 | public ChainList() {
19 | this.head = null;
20 | }
21 | public void insertBack(Word item) {
22 | if (head == null) {
23 | head = new Node(item, null);
24 | }
25 | else {
26 | Node temp = head;
27 | while (temp.next != null)
28 | temp = temp.next;
29 | temp.next = new Node(item, null);
30 | }
31 | }
32 | public Word find(String item) {
33 | Node temp = head;
34 | while (temp != null && !temp.item.equals(item))
35 | temp = temp.next;
36 | return temp == null ? null : temp.item;
37 | }
38 | public boolean remove(String str) {
39 | // if empty chainList;
40 | if (head == null)
41 | return false;
42 |
43 | // if the first node;
44 | if (head.item.equals(str)) {
45 | head = head.next;
46 | return true;
47 | }
48 | else {
49 | // find the Node which before the destination
50 | // and delete by using Node.next
51 | // if it is the last node, it will be also judged by using temp.next.item
52 | Node temp = head;
53 | while (temp.next != null && !temp.next.item.equals(str))
54 | temp = temp.next;
55 | if (temp.next == null)
56 | return false;
57 | else {
58 | // Leafee: always want to delete something
59 | temp.next = temp.next.next;
60 | return true;
61 | }
62 | }
63 | }
64 | public List toList() {
65 | if (head == null)
66 | return null;
67 | ArrayList arrList = new ArrayList();
68 | for (Node i = head; i != null; i = i.next)
69 | arrList.add(i.item);
70 |
71 | // // for some reason, the sort doesn't work
72 | // arrList.sort(new Comparator() {
73 | // public int compare(Word o1, Word o2) {
74 | // return o1.getString().compareTo(o2.getString());
75 | // }
76 | // });
77 |
78 | return arrList;
79 | }
80 | }
81 |
82 |
--------------------------------------------------------------------------------
/src/main/java/LeafeeWordCloud/LeafeeWordCloud.java:
--------------------------------------------------------------------------------
1 | package LeafeeWordCloud;
2 |
3 | import java.awt.*;
4 | import java.io.*;
5 | import java.util.Arrays;
6 | import java.util.Scanner;
7 | import java.util.List;
8 | import java.util.ArrayList;
9 |
10 | import com.kennycason.kumo.CollisionMode;
11 | import com.kennycason.kumo.WordCloud;
12 | import com.kennycason.kumo.WordFrequency;
13 | import com.kennycason.kumo.bg.CircleBackground;
14 | import com.kennycason.kumo.bg.PixelBoundryBackground;
15 | import com.kennycason.kumo.font.FontWeight;
16 | import com.kennycason.kumo.font.KumoFont;
17 | import com.kennycason.kumo.font.scale.SqrtFontScalar;
18 | import com.kennycason.kumo.nlp.FrequencyAnalyzer;
19 | import com.kennycason.kumo.nlp.tokenizers.ChineseWordTokenizer;
20 | import com.kennycason.kumo.palette.ColorPalette;
21 |
22 | public class LeafeeWordCloud{
23 | static ChainList wordList = new ChainList();
24 | static Scanner input = new Scanner(System.in);
25 |
26 |
27 | // --in-file --stop-words-file -stop-words-handle
28 | // --word-amount --in-picture
29 | // --out-picture --picture-width --picture-height
30 | // --font-name
31 | public static void main(String[] args) {
32 | String inFile = null, stopWordsFile = null;
33 | String inPicture = null, outPicture = "output.png";
34 | String fontName = "微软雅黑", charset = "utf-8";
35 | int wordAmount = 600, pictureWidth = 600, pictureHeight = 600;
36 | boolean stopWordsHandle = false, help = false;
37 | for (int i = 0; i < args.length; ++i) {
38 | if (args[i].equals("--in-file"))
39 | inFile = args[++i];
40 | else if (args[i].equals("--stop-words-file"))
41 | stopWordsFile = args[++i];
42 | else if (args[i].equals("--stop-words-handle"))
43 | stopWordsHandle = true;
44 | else if (args[i].equals("--word-amount"))
45 | wordAmount = Integer.parseInt(args[++i]);
46 | else if (args[i].equals("--in-picture"))
47 | inPicture = args[++i];
48 | else if (args[i].equals("--out-picture"))
49 | outPicture = args[++i];
50 | else if (args[i].equals("--picture-width"))
51 | pictureWidth = Integer.parseInt(args[++i]);
52 | else if (args[i].equals("--picture-height"))
53 | pictureHeight = Integer.parseInt(args[++i]);
54 | else if (args[i].equals("--font-name"))
55 | fontName = args[++i];
56 | else if (args[i].equals("--charset"))
57 | charset = args[++i];
58 | else if (args[i].equals("--help"))
59 | help = true;
60 | else {
61 | System.out.println("unrecognized option:" + args[i]);
62 | System.exit(-3);
63 | }
64 | }
65 | if (help == true)
66 | printHelp();
67 | if (inFile == null){
68 | System.out.println("in-file is required");
69 | System.exit(-1);
70 | }
71 |
72 | Word temp = null;
73 | // read the content file
74 | for (String s : readFile(openFile(inFile, charset))) {
75 | if ((temp = wordList.find(s)) != null)
76 | temp.count();
77 | else
78 | wordList.insertBack(new Word(s));
79 | }
80 |
81 |
82 | // remove the tab stop, or it will throw "width could less equal than 0"
83 | // because the tab will not be displayed on an picture;
84 | wordList.remove("\t");
85 |
86 | if (stopWordsHandle) {
87 | // remove the stop words
88 | String stopWord = null;
89 | String removeMessage = null;
90 | // input the stop words by hand
91 | System.out.println("Please input the stop words you want to remove from the wordCloud,\n" +
92 | "input three equal sign(===) to terminate input:");
93 | for (stopWord = input.next().trim(); !stopWord.equals("==="); ) {
94 | removeMessage = wordList.remove(stopWord) ? "Succeed!" : "This word is not contained by wordList.";
95 | System.out.println(removeMessage);
96 | stopWord = input.next().trim();
97 | }
98 | }
99 |
100 |
101 | if (stopWordsFile != null)
102 | for (String si : readFile(openFile(stopWordsFile, charset)))
103 | wordList.remove(si);
104 |
105 |
106 | // translate from Word which I built, to WordFrequency the kumo made
107 | List sortedWords = wordList.toList();
108 | List wfList = new ArrayList();
109 | for (Word x : sortedWords)
110 | wfList.add(new WordFrequency(x.toString(), x.getCount()));
111 |
112 | // create FrequencyAnalyzer, which can load from file, List, webPage and etc;
113 | // It also can filter the wordAmount most frequent words, the shortest word must longer than 2
114 | // Leafee: and Chinese Tokenizer, which waste me lot of time to solve the dependency
115 | final FrequencyAnalyzer frequencyAnalyzer = new FrequencyAnalyzer();
116 | frequencyAnalyzer.setWordFrequenciesToReturn(wordAmount);
117 | frequencyAnalyzer.setMinWordLength(3);
118 | frequencyAnalyzer.setWordTokenizer(new ChineseWordTokenizer());
119 | List wordFrequencies = null;
120 |
121 | // read WordFrequencies form the list I created
122 | try {
123 | wordFrequencies = frequencyAnalyzer.loadWordFrequencies(wfList);
124 | } catch (Exception e) {
125 | e.printStackTrace();
126 | }
127 |
128 | // draw picture with wordFrequencies and output to a file;
129 | final Dimension dimension = new Dimension(pictureWidth, pictureHeight);
130 | final WordCloud wordCloud = new WordCloud(dimension, CollisionMode.PIXEL_PERFECT);
131 | try {
132 | if (inPicture == null)
133 | wordCloud.setBackground(new CircleBackground((pictureHeight < pictureWidth ? pictureHeight : pictureWidth) / 2));
134 | else
135 | wordCloud.setBackground(new PixelBoundryBackground(new File(inPicture)));
136 | wordCloud.setPadding(2);
137 | wordCloud.setColorPalette(new ColorPalette(new Color(0xD5CFFA), new Color(0xBBB1FA),
138 | new Color(0x9A8CF5), new Color(0x806EF5)));
139 | wordCloud.setFontScalar(new SqrtFontScalar(12, 45));
140 | wordCloud.setKumoFont(new KumoFont(fontName, FontWeight.BOLD));
141 | // wordCloud.build(wfList); // if I use my List, it will create 4500+ words, without filter
142 | wordCloud.build(wordFrequencies);
143 | wordCloud.writeToFile(outPicture);
144 | } catch (IOException e) {
145 | e.printStackTrace();
146 | System.exit(-2);
147 | }
148 | }
149 |
150 | private static BufferedReader openFile(String path, String charset) {
151 | File file = null;
152 | BufferedReader bufferedreader = null;
153 | // open the file. The BufferedReader is used to read file by line
154 | try {
155 | // read the file path, in case that the program running in a different computer
156 | file = new File(path);
157 | FileInputStream fileInputStream = new FileInputStream(file);
158 | InputStreamReader inputstreamreader = new InputStreamReader(fileInputStream, charset);
159 | bufferedreader = new BufferedReader(inputstreamreader);
160 | } catch (Exception e) {
161 | e.printStackTrace();
162 | System.exit(-1);
163 | }
164 | return bufferedreader;
165 | }
166 |
167 | private static List readFile(BufferedReader bufferedreader) {
168 | // read file and add the words into wordList
169 | List list = new ArrayList();
170 | try {
171 | while (bufferedreader.ready()) {
172 | String[] temp = bufferedreader.readLine().split(" ");
173 | list.addAll(Arrays.asList(temp));
174 | }
175 | bufferedreader.close();
176 | } catch (IOException e) {
177 | e.printStackTrace();
178 | System.exit(-1);
179 | }
180 | return list;
181 | }
182 |
183 | private static void printHelp() {
184 | System.out.println("\t--in-file pathOfContentFileName * read the content file from this path");
185 | System.out.println("\t--stop-words-file pathOfStopWordFile read the stop words from there");
186 | System.out.println("\t--stop-words-handle if you want press stop words yourself, add this parameter without value");
187 | System.out.println("\t--word-amount number the amount of words will be displayed on picture");
188 | System.out.println("\t--in-picture path read the background picture");
189 | System.out.println("\t--out-picture path output the picture to path");
190 | System.out.println("\t--picture-width number width of output picture");
191 | System.out.println("\t--picture-height number height of output picture");
192 | System.out.println("\t--font-name fontName the font you want on picture");
193 | System.out.println("\t--charset charsetName change the charset used to read file");
194 | System.out.println("\t--help print this menu\n");
195 | System.out.println(" the argument with * is required");
196 | System.out.println(" if there are spaces in some arguments, use quote around them");
197 | System.out.println(" example: java -jar JAR --in-file inputFile.txt");
198 | System.out.println(" java -jar JAR --in-file inputFile.txt --stop-words-file StopWords.txt --out-picture result.png");
199 | System.out.println(" java -jar JAR --in-file inputFile.txt --stop-words-file StopWords.txt --in-picture in.png --picture-width 640 --picture-height 640 --out-picture out.png --font-name \"YaHei Consolas Hybrid\"\n");
200 | System.out.println(" default values :");
201 | System.out.println(" stop-words-file = null");
202 | System.out.println(" word-amount = 600");
203 | System.out.println(" in-picture = null");
204 | System.out.println(" out-picture = out.png");
205 | System.out.println(" picture-width = 600");
206 | System.out.println(" picture-height = 600");
207 | System.out.println(" font-name = 微软雅黑");
208 | System.out.println(" charset = utf-8");
209 | System.exit(0);
210 | }
211 | }
--------------------------------------------------------------------------------