├── README.md ├── .gitignore ├── src ├── test │ └── java │ │ └── com │ │ └── technobium │ │ └── AppTest.java └── main │ └── java │ └── com │ └── technobium │ └── OpenNLPCategorizer.java ├── pom.xml └── input └── tweets.txt /README.md: -------------------------------------------------------------------------------- 1 | # opennlp-categorizer 2 | Apache OpenNLP document categorizer demo. Code exlained here: 3 | http://technobium.com/sentiment-analysis-using-opennlp-document-categorizer/ 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.class 2 | 3 | # Mobile Tools for Java (J2ME) 4 | .mtj.tmp/ 5 | 6 | # Package Files # 7 | *.jar 8 | *.war 9 | *.ear 10 | 11 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 12 | hs_err_pid* 13 | -------------------------------------------------------------------------------- /src/test/java/com/technobium/AppTest.java: -------------------------------------------------------------------------------- 1 | package com.technobium; 2 | 3 | import junit.framework.Test; 4 | import junit.framework.TestCase; 5 | import junit.framework.TestSuite; 6 | 7 | /** 8 | * Unit test for simple App. 9 | */ 10 | public class AppTest 11 | extends TestCase 12 | { 13 | /** 14 | * Create the test case 15 | * 16 | * @param testName name of the test case 17 | */ 18 | public AppTest( String testName ) 19 | { 20 | super( testName ); 21 | } 22 | 23 | /** 24 | * @return the suite of tests being tested 25 | */ 26 | public static Test suite() 27 | { 28 | return new TestSuite( AppTest.class ); 29 | } 30 | 31 | /** 32 | * Rigourous Test :-) 33 | */ 34 | public void testApp() 35 | { 36 | assertTrue( true ); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | com.technobium 5 | opennlp-categorizer 6 | jar 7 | 1.0-SNAPSHOT 8 | opennlp-categorizer 9 | http://maven.apache.org 10 | 11 | 12 | junit 13 | junit 14 | 3.8.1 15 | test 16 | 17 | 18 | org.slf4j 19 | slf4j-simple 20 | 1.7.7 21 | 22 | 23 | org.apache.opennlp 24 | opennlp-tools 25 | 1.5.3 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /src/main/java/com/technobium/OpenNLPCategorizer.java: -------------------------------------------------------------------------------- 1 | package com.technobium; 2 | 3 | import java.io.FileInputStream; 4 | import java.io.IOException; 5 | import java.io.InputStream; 6 | 7 | import opennlp.tools.doccat.DoccatModel; 8 | import opennlp.tools.doccat.DocumentCategorizerME; 9 | import opennlp.tools.doccat.DocumentSampleStream; 10 | import opennlp.tools.util.ObjectStream; 11 | import opennlp.tools.util.PlainTextByLineStream; 12 | 13 | public class OpenNLPCategorizer { 14 | DoccatModel model; 15 | 16 | public static void main(String[] args) { 17 | OpenNLPCategorizer twitterCategorizer = new OpenNLPCategorizer(); 18 | twitterCategorizer.trainModel(); 19 | twitterCategorizer.classifyNewTweet("Have a nice day!"); 20 | } 21 | 22 | public void trainModel() { 23 | InputStream dataIn = null; 24 | try { 25 | dataIn = new FileInputStream("input/tweets.txt"); 26 | ObjectStream lineStream = new PlainTextByLineStream(dataIn, "UTF-8"); 27 | ObjectStream sampleStream = new DocumentSampleStream(lineStream); 28 | // Specifies the minimum number of times a feature must be seen 29 | int cutoff = 2; 30 | int trainingIterations = 30; 31 | model = DocumentCategorizerME.train("en", sampleStream, cutoff, 32 | trainingIterations); 33 | } catch (IOException e) { 34 | e.printStackTrace(); 35 | } finally { 36 | if (dataIn != null) { 37 | try { 38 | dataIn.close(); 39 | } catch (IOException e) { 40 | e.printStackTrace(); 41 | } 42 | } 43 | } 44 | } 45 | 46 | public void classifyNewTweet(String tweet) { 47 | DocumentCategorizerME myCategorizer = new DocumentCategorizerME(model); 48 | double[] outcomes = myCategorizer.categorize(tweet); 49 | String category = myCategorizer.getBestCategory(outcomes); 50 | 51 | if (category.equalsIgnoreCase("1")) { 52 | System.out.println("The tweet is positive :) "); 53 | } else { 54 | System.out.println("The tweet is negative :( "); 55 | } 56 | } 57 | } -------------------------------------------------------------------------------- /input/tweets.txt: -------------------------------------------------------------------------------- 1 | 1 Watching a nice movie 2 | 0 The painting is ugly, will return it tomorrow... 3 | 1 One of the best soccer games, worth seeing it 4 | 1 Very tasty, not only for vegetarians 5 | 1 Super party! 6 | 0 Too early to travel..need a coffee 7 | 0 Damn..the train is late again... 8 | 0 Bad news, my flight just got cancelled. 9 | 1 Happy birthday mr. president 10 | 1 Just watch it. Respect. 11 | 1 Wonderful sunset. 12 | 1 Bravo, first title in 2014! 13 | 0 Had a bad evening, need urgently a beer. 14 | 0 I put on weight again 15 | 1 On today's show we met Angela, a woman with an amazing story 16 | 1 I fell in love again 17 | 0 I lost my keys 18 | 1 On a trip to Iceland 19 | 1 Happy in Berlin 20 | 0 I hate Mondays 21 | 1 Love the new book I reveived for Christmas 22 | 0 He killed our good mood 23 | 1 I am in good spirits again 24 | 1 This guy creates the most awesome pics ever 25 | 0 The dark side of a selfie. 26 | 1 Cool! John is back! 27 | 1 Many rooms and many hopes for new residents 28 | 0 False hopes for the people attending the meeting 29 | 1 I set my new year's resolution 30 | 0 The ugliest car ever! 31 | 0 Feeling bored 32 | 0 Need urgently a pause 33 | 1 Nice to see Ana made it 34 | 1 My dream came true 35 | 0 I didn't see that one coming 36 | 0 Sorry mate, there is no more room for you 37 | 0 Who could have possibly done this? 38 | 1 I won the challenge 39 | 0 I feel bad for what I did 40 | 1 I had a great time tonight 41 | 1 It was a lot of fun 42 | 1 Thank you Molly making this possible 43 | 0 I just did a big mistake 44 | 1 I love it!! 45 | 0 I never loved so hard in my life 46 | 0 I hate you Mike!! 47 | 0 I hate to say goodbye 48 | 1 Lovely! 49 | 1 Like and share if you feel the same 50 | 0 Never try this at home 51 | 0 Don't spoil it! 52 | 1 I love rock and roll 53 | 0 The more I hear you, the more annoyed I get 54 | 1 Finnaly passed my exam! 55 | 1 Lovely kittens 56 | 0 I just lost my appetite 57 | 0 Sad end for this movie 58 | 0 Lonely, I am so lonely 59 | 1 Beautiful morning 60 | 1 She is amazing 61 | 1 Enjoying some time with my friends 62 | 1 Special thanks to Marty 63 | 1 Thanks God I left on time 64 | 1 Greateful for a wonderful meal 65 | 1 So happy to be home 66 | 0 Hate to wait on a long queue 67 | 0 No cab available 68 | 0 Electricity outage, this is a nightmare 69 | 0 Nobody to ask about directions 70 | 1 Great game! 71 | 1 Nice trip 72 | 1 I just received a pretty flower 73 | 1 Excellent idea 74 | 1 Got a new watch. Feeling happy 75 | 0 I feel sick 76 | 0 I am very tired 77 | 1 Such a good taste 78 | 0 Such a bad taste 79 | 1 Enjoying brunch 80 | 0 I don't recommend this restaurant 81 | 1 Thank you mom for supporting me 82 | 0 I will never ever call you again 83 | 0 I just got kicked out of the contest 84 | 1 Smiling 85 | 0 Big pain to see my team loosing 86 | 0 Bitter defeat tonight 87 | 0 My bike was stollen 88 | 1 Great to see you! 89 | 0 I lost every hope for seeing him again 90 | 1 Nice dress! 91 | 1 Stop wasting my time 92 | 1 I have a great idea 93 | 1 Excited to go to the pub 94 | 1 Feeling proud 95 | 1 Cute bunnies 96 | 0 Cold winter ahead 97 | 0 Hopless struggle.. 98 | 0 Ugly hat 99 | 1 Big hug and lots of love 100 | 1 I hope you have a wonderful celebration --------------------------------------------------------------------------------