├── _config.yml ├── benchmark ├── benchmark.bash ├── exec_time.ods ├── .~lock.exec_time.ods# ├── dataset │ └── .~lock.twitter-test-gold-B.tsv# ├── nltk.bash ├── nltk_scikit_ngram.py ├── nltk_scikit_ngram_liu.py └── nltk_scikit_liu_vader.py ├── img ├── logo.png └── tweetToSparseOptions.png ├── docs ├── img │ ├── logo.png │ └── tweetToSparseOptions.png ├── shortBio.bib.txt ├── videos.md └── install.md ├── doc ├── element-list ├── package-list ├── test │ ├── package-frame.html │ ├── package-tree.html │ └── package-summary.html ├── script.js ├── weka │ ├── core │ │ ├── tokenizers │ │ │ └── package-frame.html │ │ └── converters │ │ │ └── package-frame.html │ └── filters │ │ ├── supervised │ │ └── attribute │ │ │ └── package-frame.html │ │ └── unsupervised │ │ └── attribute │ │ └── package-frame.html ├── overview-frame.html ├── affective │ └── core │ │ └── package-frame.html ├── index.html ├── deprecated-list.html ├── overview-summary.html ├── allclasses-noframe.html └── constant-values.html ├── GenericPropertiesCreator.props ├── ISSUE_TEMPLATE.md ├── CONTRIBUTING.md ├── GUIEditors.props ├── mkdocs.yml ├── src ├── main │ ├── main.iml │ └── java │ │ ├── weka │ │ ├── core │ │ │ ├── converters │ │ │ │ ├── TweetCollectionToArff.java │ │ │ │ ├── HumanCodedToArff.java │ │ │ │ ├── SemEvalToArff.java │ │ │ │ └── NRCAffectToArff.java │ │ │ └── tokenizers │ │ │ │ └── TweetNLPTokenizer.java │ │ └── filters │ │ │ └── unsupervised │ │ │ └── attribute │ │ │ └── TweetToWordListCountFeatureVector.java │ │ └── affective │ │ └── core │ │ ├── LexiconEvaluator.java │ │ ├── SentiStrengthEvaluator.java │ │ ├── EmbeddingHandler.java │ │ ├── NegationEvaluator.java │ │ ├── PolarityLexiconEvaluator.java │ │ ├── IntensityLexiconEvaluator.java │ │ ├── SWN3LexiconEvaluator.java │ │ └── CSVEmbeddingHandler.java └── test │ ├── java │ └── weka │ │ ├── AllPackageTests.java │ │ └── filters │ │ ├── unsupervised │ │ └── attribute │ │ │ ├── PTCMTest.java │ │ │ ├── TweetCentroidTest.java │ │ │ ├── ASATest.java │ │ │ ├── LabelWordVectorsTest.java │ │ │ ├── LexiconDistantSupervisionTest.java │ │ │ ├── TweetNLPPOSTaggerTest.java │ │ │ ├── TweetToSparseFeatureVectorTest.java │ │ │ ├── TweetToEmbeddingsFeatureVectorTest.java │ │ │ ├── TweetToSentiStrengthFeatureVectorTest.java │ │ │ ├── TweetToWordListCountFeatureVectorTest.java │ │ │ └── TweetToInputLexiconFeatureVectorTest.java │ │ └── supervised │ │ └── attribute │ │ └── PMILexiconExpanderTest.java │ └── resources │ └── wekarefs │ └── weka │ └── filters │ └── unsupervised │ └── attribute │ └── LexiconDistantSupervisionTest.ref ├── .gitignore ├── Description.props ├── wekarefs └── weka │ └── filters │ ├── supervised │ └── attribute │ │ └── PMILexiconExpanderTest.ref │ └── unsupervised │ └── attribute │ └── LexiconDistantSupervisionTest.ref ├── CODE_OF_CONDUCT.md └── README.md /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-cayman -------------------------------------------------------------------------------- /benchmark/benchmark.bash: -------------------------------------------------------------------------------- 1 | bash nltk.bash 2 | bash affectivetweets.bash 3 | -------------------------------------------------------------------------------- /img/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/felipebravom/AffectiveTweets/HEAD/img/logo.png -------------------------------------------------------------------------------- /docs/img/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/felipebravom/AffectiveTweets/HEAD/docs/img/logo.png -------------------------------------------------------------------------------- /benchmark/exec_time.ods: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/felipebravom/AffectiveTweets/HEAD/benchmark/exec_time.ods -------------------------------------------------------------------------------- /benchmark/.~lock.exec_time.ods#: -------------------------------------------------------------------------------- 1 | ,felipe,felipe-HP-Notebook,17.04.2019 13:50,file:///home/felipe/.config/libreoffice/4; -------------------------------------------------------------------------------- /img/tweetToSparseOptions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/felipebravom/AffectiveTweets/HEAD/img/tweetToSparseOptions.png -------------------------------------------------------------------------------- /benchmark/dataset/.~lock.twitter-test-gold-B.tsv#: -------------------------------------------------------------------------------- 1 | ,fbravoma,neural,13.02.2019 15:57,file:///home/fbravoma/.config/libreoffice/4; -------------------------------------------------------------------------------- /docs/img/tweetToSparseOptions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/felipebravom/AffectiveTweets/HEAD/docs/img/tweetToSparseOptions.png -------------------------------------------------------------------------------- /doc/element-list: -------------------------------------------------------------------------------- 1 | affective.core 2 | weka.core.converters 3 | weka.core.tokenizers 4 | weka.filters.supervised.attribute 5 | weka.filters.unsupervised.attribute 6 | -------------------------------------------------------------------------------- /doc/package-list: -------------------------------------------------------------------------------- 1 | affective.core 2 | weka.core.converters 3 | weka.core.tokenizers 4 | weka.filters.supervised.attribute 5 | weka.filters.unsupervised.attribute 6 | -------------------------------------------------------------------------------- /GenericPropertiesCreator.props: -------------------------------------------------------------------------------- 1 | affective.core.ArffLexiconEvaluator =\ affective.core 2 | affective.core.ArffLexiconWordLabeller =\ affective.core 3 | affective.core.EmbeddingHandler =\ affective.core -------------------------------------------------------------------------------- /ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ### Description 2 | 3 | #### Expected behavior: 4 | 5 | #### Encountered behavior: 6 | 7 | 8 | ### Additional Information 9 | 10 | - Weka version: 11 | - AffectiveTweets version: 12 | - Operating System: 13 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | ## How to Contribute 2 | 3 | New contributors are more than welcome. If you want to contribute just fork the project and send a [pull request](https://help.github.com/articles/about-pull-requests/) with your changes. 4 | 5 | More details about how to contribute are given [here](https://affectivetweets.cms.waikato.ac.nz/contribute/). -------------------------------------------------------------------------------- /GUIEditors.props: -------------------------------------------------------------------------------- 1 | affective.core.ArffLexiconEvaluator=weka.gui.GenericObjectEditor 2 | affective.core.ArffLexiconEvaluator[]=weka.gui.GenericArrayEditor 3 | affective.core.ArffLexiconWordLabeller=weka.gui.GenericObjectEditor 4 | affective.core.ArffLexiconWordLabeller[]=weka.gui.GenericArrayEditor 5 | affective.core.EmbeddingHandler=weka.gui.GenericObjectEditor -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: AffectiveTweets 2 | theme: readthedocs 3 | repo_url: https://github.com/felipebravom/AffectiveTweets 4 | site_description: AffectiveTweets Documentation 5 | nav: 6 | - Home: index.md 7 | - Installation: install.md 8 | - Examples: examples.md 9 | - Videos: videos.md 10 | - Contributing: contribute.md 11 | - Benchmark: benchmark.md 12 | -------------------------------------------------------------------------------- /src/main/main.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | *.class 3 | 4 | # Mobile Tools for Java (J2ME) 5 | .mtj.tmp/ 6 | 7 | # Package Files # 8 | *.war 9 | *.ear 10 | *.jar 11 | 12 | # zip files # 13 | *.zip 14 | 15 | # Eclipse files 16 | .classpath 17 | .project 18 | .settings/ 19 | bin/ 20 | 21 | # IntelliJ 22 | .idea/ 23 | *.iml 24 | 25 | # Project resources 26 | resources/ 27 | lexicons/ 28 | dist/ 29 | data/ 30 | lib/ 31 | 32 | #site 33 | site/ 34 | 35 | # scripts # 36 | *.sh 37 | 38 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 39 | hs_err_pid* 40 | -------------------------------------------------------------------------------- /benchmark/nltk.bash: -------------------------------------------------------------------------------- 1 | echo "Linear model using n-grams (n=1,2,3,4)." 2 | start=`date +%s` 3 | python nltk_scikit_ngram.py 4 | end=`date +%s` 5 | runtime=$((end-start)) 6 | echo time = $runtime 7 | 8 | 9 | echo "Linear model using n-grams + Bing Liu's Lexicon" 10 | start=`date +%s` 11 | python nltk_scikit_ngram_liu.py 12 | end=`date +%s` 13 | runtime=$((end-start)) 14 | echo time = $runtime 15 | 16 | 17 | echo "Linear model using Bing Liu's Lexicon + Vader" 18 | start=`date +%s` 19 | python nltk_scikit_liu_vader.py 20 | end=`date +%s` 21 | runtime=$((end-start)) 22 | echo time = $runtime 23 | 24 | 25 | echo "Linear model using n+grams + Bing Liu's Lexicon + Vader" 26 | start=`date +%s` 27 | python nltk_scikit_ngram_liu_vader.py 28 | end=`date +%s` 29 | runtime=$((end-start)) 30 | echo time = $runtime 31 | -------------------------------------------------------------------------------- /docs/shortBio.bib.txt: -------------------------------------------------------------------------------- 1 | @article{JMLR:v20:18-450, 2 | author = {Felipe Bravo-Marquez and Eibe Frank and Bernhard Pfahringer and Saif M. Mohammad}, 3 | title = {{AffectiveTweets}: a {W}eka Package for Analyzing Affect in Tweets}, 4 | journal = {Journal of Machine Learning Research}, 5 | year = {2019}, 6 | volume = {20}, 7 | number = {92}, 8 | pages = {1-6}, 9 | url = {http://jmlr.org/papers/v20/18-450.html} 10 | } 11 | 12 | 13 | @InProceedings{MohammadB17, 14 | Title = {Emotion Intensities in Tweets}, 15 | Author = {Saif Mohammad and Felipe Bravo{-}Marquez}, 16 | Booktitle = {Proceedings of the 6th Joint Conference on Lexical and Computational Semantics, *SEM @ACM 2017, Vancouver, Canada, August 3-4, 2017}, 17 | Year = {2017}, 18 | Pages = {65--77} 19 | } 20 | 21 | -------------------------------------------------------------------------------- /doc/test/package-frame.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | test 7 | 8 | 9 | 10 | 11 | 12 |

test

13 |
14 |

Classes

15 | 19 |
20 | 21 | 22 | -------------------------------------------------------------------------------- /doc/script.js: -------------------------------------------------------------------------------- 1 | function show(type) 2 | { 3 | count = 0; 4 | for (var key in methods) { 5 | var row = document.getElementById(key); 6 | if ((methods[key] & type) != 0) { 7 | row.style.display = ''; 8 | row.className = (count++ % 2) ? rowColor : altColor; 9 | } 10 | else 11 | row.style.display = 'none'; 12 | } 13 | updateTabs(type); 14 | } 15 | 16 | function updateTabs(type) 17 | { 18 | for (var value in tabs) { 19 | var sNode = document.getElementById(tabs[value][0]); 20 | var spanNode = sNode.firstChild; 21 | if (value == type) { 22 | sNode.className = activeTableTab; 23 | spanNode.innerHTML = tabs[value][1]; 24 | } 25 | else { 26 | sNode.className = tableTab; 27 | spanNode.innerHTML = "" + tabs[value][1] + ""; 28 | } 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /docs/videos.md: -------------------------------------------------------------------------------- 1 | The demo videos from below show how to use AffectiveTweets with the WEKA GUI. 2 | 3 | ### Video 1: Training sentiment classification models for tweets 4 | 5 | [![IMAGE ALT TEXT](http://img.youtube.com/vi/mApoMfDSG0A/0.jpg)](http://www.youtube.com/watch?v=mApoMfDSG0A "Training sentiment classification models for tweets") 6 | 7 | Click [here](https://github.com/felipebravom/AffectiveTweets/releases/download/1.0.2/AffectiveTweets1.mp4) to download the video in mp4 format. 8 | 9 | 10 | ### Video 2: Creating lexicons for Twitter sentiment analysis 11 | 12 | [![IMAGE ALT TEXT](http://img.youtube.com/vi/FnLnpIa230w/0.jpg)](http://www.youtube.com/watch?v=FnLnpIa230w "Creating lexicons for Twitter Sentiment Analysis") 13 | 14 | Click [here](https://github.com/felipebravom/AffectiveTweets/releases/download/1.0.2/AffectiveTweets2.mp4) to download the video in mp4 format. 15 | 16 | 17 | ### Video 3: Twitter sentiment classification with distant supervision 18 | 19 | [![IMAGE ALT TEXT](http://img.youtube.com/vi/fCHRSadvI4g/0.jpg)](http://www.youtube.com/watch?v=fCHRSadvI4g "Twitter Sentiment Classification with Distant Supervision") 20 | 21 | Click [here](https://github.com/felipebravom/AffectiveTweets/releases/download/1.0.2/AffectiveTweets3.mp4) to download the video in mp4 format. -------------------------------------------------------------------------------- /Description.props: -------------------------------------------------------------------------------- 1 | # Description file for AffectiveTweets 2 | 3 | 4 | # Package name (required) 5 | PackageName=AffectiveTweets 6 | 7 | # Version (required) 8 | Version=1.0.2 9 | 10 | #Date 11 | Date=2019-02-20 12 | 13 | # Title (required) 14 | Title=Text Filters for Analyzing Sentiment and Emotions of Tweets 15 | 16 | Category=Text classification 17 | 18 | # Author (required) 19 | Author=felipebravom 20 | 21 | # Maintainer (required) 22 | Maintainer=felipebravom 23 | 24 | # License (required) 25 | License=GPL 3.0 26 | 27 | # Description (required) 28 | Description= Text filters for analyzing emotion and sentiment of tweets. Sample tweets annotated by sentiment and emotions can be found in ${WEKA_HOME}/packages/AffectiveTweets/data. A file with some pre-trained word embeddings can be found in ${WEKA_HOME}/packages/AffectiveTweets/resources/w2v.twitter.edinburgh.100d.csv.gz. However, we recommend using the embeddings located in: https://github.com/felipebravom/AffectiveTweets/releases/download/1.0.0/w2v.twitter.edinburgh10M.400d.csv.gz for better performance. 29 | 30 | # Package URL for obtaining the package archive (required) 31 | PackageURL=https://github.com/felipebravom/AffectiveTweets/releases/download/1.0.2/AffectiveTweets1.0.2.zip 32 | 33 | 34 | # URL for further information 35 | URL=https://affectivetweets.cms.waikato.ac.nz/ 36 | 37 | # Related to other packages? 38 | #Related= 39 | 40 | # Dependencies 41 | Depends=weka (>=3.8.1) 42 | 43 | 44 | -------------------------------------------------------------------------------- /src/main/java/weka/core/converters/TweetCollectionToArff.java: -------------------------------------------------------------------------------- 1 | /* 2 | * This program is free software: you can redistribute it and/or modify 3 | * it under the terms of the GNU General Public License as published by 4 | * the Free Software Foundation, either version 3 of the License, or 5 | * (at your option) any later version. 6 | * 7 | * This program is distributed in the hope that it will be useful, 8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | * GNU General Public License for more details. 11 | * 12 | * You should have received a copy of the GNU General Public License 13 | * along with this program. If not, see . 14 | */ 15 | 16 | /* 17 | * TweetCollectionToArff.java 18 | * Copyright (C) 1999-2016 University of Waikato, Hamilton, New Zealand 19 | * 20 | */ 21 | 22 | 23 | package weka.core.converters; 24 | 25 | import weka.core.Instances; 26 | 27 | 28 | /** 29 | * Builds an arff dataset from a collection of tweets in a given file. 30 | * @author Felipe Bravo-Marquez (fjb11 at students.waikato.ac.nz) 31 | * @version 1.0 32 | */ 33 | 34 | public abstract class TweetCollectionToArff { 35 | 36 | 37 | /** 38 | * Creates the dataset. 39 | * @param collectionPath the file wit he the input collection 40 | * @return the Instances weka object 41 | * @throws Exception if something goes wrong 42 | */ 43 | public abstract Instances createDataset(String collectionPath) throws Exception; 44 | } 45 | -------------------------------------------------------------------------------- /doc/weka/core/tokenizers/package-frame.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | weka.core.tokenizers 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 17 | 18 | 19 | 20 | 21 |

weka.core.tokenizers

22 |
23 |

Classes

24 | 27 |
28 | 29 | 30 | -------------------------------------------------------------------------------- /doc/weka/filters/supervised/attribute/package-frame.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | weka.filters.supervised.attribute 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 17 | 18 | 19 | 20 | 21 |

weka.filters.supervised.attribute

22 |
23 |

Classes

24 | 27 |
28 | 29 | 30 | -------------------------------------------------------------------------------- /doc/weka/core/converters/package-frame.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | weka.core.converters 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 17 | 18 | 19 | 20 | 21 |

weka.core.converters

22 |
23 |

Classes

24 | 30 |
31 | 32 | 33 | -------------------------------------------------------------------------------- /doc/overview-frame.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Overview List 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 17 | 18 | 19 | 20 | 21 |
22 | 25 |
26 | 36 |

 

37 | 38 | 39 | -------------------------------------------------------------------------------- /benchmark/nltk_scikit_ngram.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # Authors: Felipe Bravo-Marquez 15 | 16 | 17 | import pandas as pd 18 | from nltk.tokenize import TweetTokenizer 19 | from nltk.sentiment.util import mark_negation 20 | 21 | from sklearn.feature_extraction.text import CountVectorizer 22 | from sklearn.linear_model import LogisticRegression 23 | from sklearn.pipeline import Pipeline 24 | from sklearn.metrics import confusion_matrix, cohen_kappa_score, classification_report 25 | 26 | 27 | # load training and testing datasets as a pandas dataframe 28 | train_data = pd.read_csv("dataset/twitter-train-B.txt", header=None, delimiter="\t",usecols=(2,3), names=("sent","tweet")) 29 | test_data = pd.read_csv("dataset/twitter-test-gold-B.tsv", header=None, delimiter="\t",usecols=(2,3), names=("sent","tweet")) 30 | 31 | # replace objective-OR-neutral and objective to neutral 32 | train_data.sent = train_data.sent.replace(['objective-OR-neutral','objective'],['neutral','neutral']) 33 | 34 | # use a Twitter-specific tokenizer 35 | tokenizer = TweetTokenizer(preserve_case=False, reduce_len=True) 36 | 37 | 38 | 39 | 40 | ################################################# 41 | # 42 | # Train a linear model using n-gram features 43 | # 44 | ################################################## 45 | vectorizer = CountVectorizer(tokenizer = tokenizer.tokenize, preprocessor = mark_negation, ngram_range=(1,4)) 46 | log_mod = LogisticRegression(solver='liblinear',multi_class='ovr') 47 | text_clf = Pipeline([('vect', vectorizer), ('clf', log_mod)]) 48 | 49 | text_clf.fit(train_data.tweet, train_data.sent) 50 | 51 | predicted = text_clf.predict(test_data.tweet) 52 | 53 | conf = confusion_matrix(test_data.sent, predicted) 54 | kappa = cohen_kappa_score(test_data.sent, predicted) 55 | class_rep = classification_report(test_data.sent, predicted) 56 | 57 | 58 | 59 | 60 | print('Confusion Matrix for Logistic Regression + ngram features:') 61 | print(conf) 62 | print('Classification Report') 63 | print(class_rep) 64 | print('kappa:'+str(kappa)) 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | -------------------------------------------------------------------------------- /src/main/java/affective/core/LexiconEvaluator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * This program is free software: you can redistribute it and/or modify 3 | * it under the terms of the GNU General Public License as published by 4 | * the Free Software Foundation, either version 3 of the License, or 5 | * (at your option) any later version. 6 | * 7 | * This program is distributed in the hope that it will be useful, 8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | * GNU General Public License for more details. 11 | * 12 | * You should have received a copy of the GNU General Public License 13 | * along with this program. If not, see . 14 | */ 15 | 16 | /* 17 | * LexiconEvaluator.java 18 | * Copyright (C) 2018 University of Waikato, Hamilton, New Zealand 19 | */ 20 | 21 | package affective.core; 22 | 23 | import java.io.IOException; 24 | import java.io.Serializable; 25 | import java.util.List; 26 | import java.util.Map; 27 | 28 | /** 29 | * 30 | * This abstract class provides the structure for all classes calculating attributes from lexicons. 31 | * 32 | * 33 | * 34 | * @author Felipe Bravo-Marquez (fbravoma@waikato.ac.nz) 35 | * @version $Revision: 1 $ 36 | */ 37 | public abstract class LexiconEvaluator implements Serializable { 38 | 39 | /** for serialization */ 40 | private static final long serialVersionUID = 1L; 41 | 42 | /** The lexicon file */ 43 | protected String path; 44 | 45 | /** The name of the lexicon */ 46 | protected String name; 47 | 48 | /** A list with all the features provided by the lexicon evaluator */ 49 | protected List featureNames; 50 | 51 | 52 | /** 53 | * initializes the Object 54 | * 55 | * @param path the file with the lexicon 56 | * @param name the prefix for all the attributes calculated from this lexicon 57 | */ 58 | public LexiconEvaluator(String path,String name){ 59 | this.path=path; 60 | this.name=name; 61 | } 62 | 63 | /** 64 | * initializes the dictionary 65 | * @throws IOException in case of wrong file 66 | */ 67 | public abstract void processDict() throws IOException; 68 | 69 | 70 | /** 71 | * Calculates lexicon-based feature values from a list of tokens 72 | * @param tokens a tokenized tweet 73 | * @return a mapping between attribute names and their scores 74 | */ 75 | public abstract Map evaluateTweet(List tokens); 76 | 77 | /** 78 | * Gets the feature names 79 | * 80 | * @return the feature names. 81 | */ 82 | public List getFeatureNames() { 83 | return featureNames; 84 | } 85 | 86 | 87 | 88 | } 89 | -------------------------------------------------------------------------------- /wekarefs/weka/filters/supervised/attribute/PMILexiconExpanderTest.ref: -------------------------------------------------------------------------------- 1 | @relation 'Twitter Sentiment Analysis Sentiment140 Test: Dataset. More info at: http://help.sentiment140.com/-weka.filters.supervised.attribute.PMILexiconExpander-minFreq10-negClassValuenegative-posClassValuepositive-stemmerweka.core.stemmers.NullStemmer-stopwords-handlerweka.core.stopwords.Null-I1-U-tokenizerweka.core.tokenizers.TweetNLPTokenizer' 2 | 3 | @attribute WORD_NAME string 4 | @attribute PMI-SO numeric 5 | 6 | @data 7 | !,0.894939 8 | !!,0.960034 9 | !!!,0.960034 10 | '\"',0.281962 11 | &,0.80803 12 | '\'',-0.039966 13 | (,0.152679 14 | ),0.696999 15 | *,3.129959 16 | ',',-0.361895 17 | -,1.474607 18 | .,-0.268908 19 | ..,-0.933051 20 | ...,0.474607 21 | :,0.483596 22 | :(,-2.624929 23 | :),1.375071 24 | '?',-1.728022 25 | a,-0.066933 26 | about,-0.817574 27 | aig,-3.209891 28 | all,-0.887963 29 | am,0.544996 30 | and,-0.539537 31 | api,-0.847321 32 | are,-0.817574 33 | as,-0.262359 34 | at,0.597464 35 | at&t,-3.499398 36 | awesome,2.660473 37 | back,-0.232611 38 | be,-0.19197 39 | best,1.960034 40 | but,0.307957 41 | by,0.322604 42 | cable,-3.499398 43 | can,-1.624929 44 | damn,-0.847321 45 | day,-0.887963 46 | dentist,-1.624929 47 | do,-0.039966 48 | down,-1.455004 49 | exam,-0.847321 50 | for,0.388877 51 | from,0.696999 52 | get,-0.402536 53 | gladwell,0.767389 54 | gm,-4.127429 55 | go,0.097537 56 | going,-0.525393 57 | good,1.223068 58 | google,1.419465 59 | got,0.490548 60 | great,1.725568 61 | has,-0.039966 62 | hate,-3.287894 63 | have,0.429519 64 | he,-0.039966 65 | his,2.281962 66 | i,-0.167346 67 | 'i\'m',0.375071 68 | if,-0.303001 69 | in,0.496086 70 | internet,-3.209891 71 | iphone,-0.624929 72 | is,-0.155444 73 | it,0.834503 74 | 'it\'s',-1.155444 75 | its,0.223068 76 | just,0.120498 77 | kindle2,2.866924 78 | last,0.767389 79 | latex,-0.847321 80 | lebron,1.544996 81 | like,-0.402536 82 | lol,0.767389 83 | love,2.818015 84 | malcolm,0.960034 85 | me,-1.320074 86 | more,-1.624929 87 | much,0.80803 88 | museum,1.338545 89 | my,0.606397 90 | never,-0.624929 91 | new,1.696999 92 | night,1.281962 93 | nike,1.44546 94 | no,-0.039966 95 | not,-1.209891 96 | now,-1.624929 97 | obama,0.638105 98 | of,-0.50563 99 | on,-0.542467 100 | one,2.129959 101 | only,-0.847321 102 | out,-1.303001 103 | phone,-2.039966 104 | really,0.152679 105 | rt,0.960034 106 | see,0.80803 107 | so,-0.209891 108 | still,-1.624929 109 | than,-0.624929 110 | that,-2.17747 111 | the,-0.010219 112 | they,-2.262359 113 | this,-0.624929 114 | time,-2.965966 115 | to,-0.119401 116 | today,-0.209891 117 | too,-0.303001 118 | twitter,-0.718038 119 | u,-0.847321 120 | up,-1.361895 121 | using,1.182426 122 | very,1.960034 123 | want,0.638105 124 | warner,-4.994163 125 | was,0.059569 126 | we,-0.776932 127 | what,-0.624929 128 | will,0.281962 129 | with,0.622999 130 | you,-0.499398 131 | your,0.960034 132 | -------------------------------------------------------------------------------- /src/test/java/weka/AllPackageTests.java: -------------------------------------------------------------------------------- 1 | /* 2 | * This program is free software: you can redistribute it and/or modify 3 | * it under the terms of the GNU General Public License as published by 4 | * the Free Software Foundation, either version 3 of the License, or 5 | * (at your option) any later version. 6 | * 7 | * This program is distributed in the hope that it will be useful, 8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | * GNU General Public License for more details. 11 | * 12 | * You should have received a copy of the GNU General Public License 13 | * along with this program. If not, see . 14 | */ 15 | 16 | package weka; 17 | 18 | import junit.framework.Test; 19 | import junit.framework.TestSuite; 20 | 21 | import weka.filters.unsupervised.attribute.ASATest; 22 | import weka.filters.unsupervised.attribute.PTCMTest; 23 | import weka.filters.unsupervised.attribute.LexiconDistantSupervisionTest; 24 | import weka.filters.unsupervised.attribute.TweetNLPPOSTaggerTest; 25 | import weka.filters.supervised.attribute.PMILexiconExpanderTest; 26 | import weka.filters.unsupervised.attribute.TweetCentroidTest; 27 | import weka.filters.unsupervised.attribute.LabelWordVectorsTest; 28 | 29 | import weka.filters.unsupervised.attribute.TweetToSentiStrengthFeatureVectorTest; 30 | import weka.filters.unsupervised.attribute.TweetToSparseFeatureVectorTest; 31 | import weka.filters.unsupervised.attribute.TweetToLexiconFeatureVectorTest; 32 | import weka.filters.unsupervised.attribute.TweetToInputLexiconFeatureVectorTest; 33 | import weka.filters.unsupervised.attribute.TweetToEmbeddingsFeatureVectorTest; 34 | import weka.filters.unsupervised.attribute.TweetToWordListCountFeatureVectorTest; 35 | 36 | /** 37 | * Test class for all tests in this directory. Run from the command line with: 38 | *

39 | * java weka.AllPackageTests 40 | * 41 | * @author Eibe Frank 42 | * @version $Revision: 10160 $ 43 | */ 44 | public class AllPackageTests extends TestSuite { 45 | 46 | public static Test suite() { 47 | TestSuite suite = new TestSuite(); 48 | 49 | suite.addTestSuite(ASATest.class); 50 | suite.addTestSuite(PTCMTest.class); 51 | suite.addTestSuite(LexiconDistantSupervisionTest.class); 52 | 53 | suite.addTestSuite(PMILexiconExpanderTest.class); 54 | suite.addTestSuite(TweetCentroidTest.class); 55 | suite.addTestSuite(LabelWordVectorsTest.class); 56 | 57 | suite.addTestSuite(TweetToSparseFeatureVectorTest.class); 58 | suite.addTestSuite(TweetToLexiconFeatureVectorTest.class); 59 | suite.addTestSuite(TweetToInputLexiconFeatureVectorTest.class); 60 | suite.addTestSuite(TweetToSentiStrengthFeatureVectorTest.class); 61 | suite.addTestSuite(TweetToEmbeddingsFeatureVectorTest.class); 62 | suite.addTestSuite(TweetNLPPOSTaggerTest.class); 63 | suite.addTestSuite(TweetToWordListCountFeatureVectorTest.class); 64 | 65 | 66 | return suite; 67 | } 68 | 69 | public static void main(String[] args) { 70 | 71 | junit.textui.TestRunner.run(suite()); 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /doc/affective/core/package-frame.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | affective.core 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 17 | 18 | 19 | 20 | 21 |

affective.core

22 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /doc/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Generated Documentation (Untitled) 7 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | <noscript> 69 | <div>JavaScript is disabled on your browser.</div> 70 | </noscript> 71 | <h2>Frame Alert</h2> 72 | <p>This document is designed to be viewed using the frames feature. If you see this message, you are using a non-frame-capable web client. Link to <a href="overview-summary.html">Non-frame version</a>.</p> 73 | 74 | 75 | 76 | -------------------------------------------------------------------------------- /docs/install.md: -------------------------------------------------------------------------------- 1 | 2 | ## Installing Weka 3 | Download the latest stable [version](https://waikato.github.io/weka-wiki/downloading_weka/) of Weka. 4 | 5 | ## Installing AffectiveTweets 6 | 7 | Install AffectiveTweets1.0.2 using the [WekaPackageManager](https://waikato.github.io/weka-wiki/packages/manager/): 8 | 9 | ```bash 10 | java -cp $WEKA_PATH/weka.jar weka.core.WekaPackageManager -install-package AffectiveTweets 11 | ``` 12 | 13 | Make sure you have installed version 1.0.2 to run the [examples](examples). 14 | 15 | In case of having problems with the Weka packages repository, install the package as follows: 16 | 17 | ```bash 18 | java -cp $WEKA_PATH/weka.jar weka.core.WekaPackageManager -install-package https://github.com/felipebravom/AffectiveTweets/releases/download/1.0.2/AffectiveTweets1.0.2.zip 19 | ``` 20 | 21 | ## Building AffectiveTweets 22 | You can also build the package from the Github version using the project’s [build-package.xml](https://github.com/felipebravom/AffectiveTweets/blob/master/build_package.xml) file with the [ant](http://ant.apache.org/) build tool. This is very useful if you want to modify the code or [contribute](../contribute) with a new feature. 23 | 24 | 25 | 26 | ```bash 27 | # clone the repository 28 | git clone https://github.com/felipebravom/AffectiveTweets.git 29 | cd AffectiveTweets 30 | 31 | # Download additional files 32 | wget https://github.com/felipebravom/AffectiveTweets/releases/download/1.0.1/extra.zip 33 | unzip extra.zip 34 | 35 | # Build the package using apache ant 36 | ant -f build_package.xml make_package 37 | 38 | # Install the built package 39 | java -cp $WEKA_PATH/weka.jar weka.core.WekaPackageManager -install-package dist/AffectiveTweets.zip 40 | 41 | 42 | ``` 43 | 44 | ## Testing AffectiveTweets 45 | The software can be tested using Junit test cases. The package must either be installed or JVM must be started in AffectiveTweets directory. 46 | 47 | ```bash 48 | # run all tests 49 | ant -f build_package.xml run_tests_all 50 | ``` 51 | 52 | 53 | 54 | ## Other Useful Packages 55 | 56 | We recommend installing other useful packages for classification, regression and evaluation: 57 | 58 | * [LibLinear](https://www.csie.ntu.edu.tw/~cjlin/liblinear/): This package is required for running the [examples](examples). 59 | ```bash 60 | java -cp $WEKA_PATH/weka.jar weka.core.WekaPackageManager -install-package LibLINEAR 61 | ``` 62 | 63 | * [LibSVM](https://www.csie.ntu.edu.tw/~cjlin/libsvm/) 64 | 65 | ```bash 66 | java -cp $WEKA_PATH/weka.jar weka.core.WekaPackageManager -install-package LibSVM 67 | ``` 68 | 69 | * [RankCorrelation](https://github.com/felipebravom/RankCorrelation) 70 | 71 | ```bash 72 | java -cp $WEKA_PATH/weka.jar weka.core.WekaPackageManager -install-package RankCorrelation 73 | ``` 74 | 75 | * [Snowball-stemmers](https://github.com/fracpete/snowball-stemmers-weka-package): This package allows using the Porter stemmer as well as other Snowball stemmers. 76 | ```bash 77 | java -cp $WEKA_PATH/weka.jar weka.core.WekaPackageManager -install-package snowball-stemmers 78 | ``` 79 | 80 | 81 | 82 | 83 | * The [WekaDeepLearning4j](https://deeplearning.cms.waikato.ac.nz/) package can be installed for training deep neural networks and word embeddings. 84 | 85 | 86 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. 6 | 7 | ## Our Standards 8 | 9 | Examples of behavior that contributes to creating a positive environment include: 10 | 11 | * Using welcoming and inclusive language 12 | * Being respectful of differing viewpoints and experiences 13 | * Gracefully accepting constructive criticism 14 | * Focusing on what is best for the community 15 | * Showing empathy towards other community members 16 | 17 | Examples of unacceptable behavior by participants include: 18 | 19 | * The use of sexualized language or imagery and unwelcome sexual attention or advances 20 | * Trolling, insulting/derogatory comments, and personal or political attacks 21 | * Public or private harassment 22 | * Publishing others' private information, such as a physical or electronic address, without explicit permission 23 | * Other conduct which could reasonably be considered inappropriate in a professional setting 24 | 25 | ## Our Responsibilities 26 | 27 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. 28 | 29 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. 30 | 31 | ## Scope 32 | 33 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. 34 | 35 | ## Enforcement 36 | 37 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at fbravoma@waikato.ac.nz. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. 38 | 39 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. 40 | 41 | ## Attribution 42 | 43 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version] 44 | 45 | [homepage]: http://contributor-covenant.org 46 | [version]: http://contributor-covenant.org/version/1/4/ 47 | -------------------------------------------------------------------------------- /src/main/java/affective/core/SentiStrengthEvaluator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * This program is free software: you can redistribute it and/or modify 3 | * it under the terms of the GNU General Public License as published by 4 | * the Free Software Foundation, either version 3 of the License, or 5 | * (at your option) any later version. 6 | * 7 | * This program is distributed in the hope that it will be useful, 8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | * GNU General Public License for more details. 11 | * 12 | * You should have received a copy of the GNU General Public License 13 | * along with this program. If not, see . 14 | */ 15 | 16 | /* 17 | * SentiStrengthEvaluator.java 18 | * Copyright (C) 1999-2018 University of Waikato, Hamilton, New Zealand 19 | * 20 | */ 21 | 22 | 23 | 24 | package affective.core; 25 | 26 | 27 | import java.io.IOException; 28 | import java.util.ArrayList; 29 | import java.util.HashMap; 30 | import java.util.List; 31 | import java.util.Map; 32 | 33 | import uk.ac.wlv.sentistrength.SentiStrength; 34 | 35 | 36 | /** 37 | * 38 | * This class is used for evaluating a tweet with SentiStrength. 39 | * 40 | * 41 | * 42 | * @author Felipe Bravo-Marquez (fjb11@students.waikato.ac.nz) 43 | * @version $Revision: 1 $ 44 | */ 45 | public class SentiStrengthEvaluator extends LexiconEvaluator { 46 | 47 | 48 | /** For serialization. */ 49 | private static final long serialVersionUID = -2094228012480778199L; 50 | 51 | /** The SentiStrengh object. */ 52 | protected transient SentiStrength sentiStrength; 53 | 54 | 55 | /** 56 | * initializes the Object 57 | * 58 | * @param file the file with the lexicon 59 | * @param name the prefix for all the attributes calculated from this lexicon 60 | */ 61 | public SentiStrengthEvaluator(String file,String name) { 62 | super(file,name); 63 | 64 | 65 | 66 | this.featureNames=new ArrayList(); 67 | this.featureNames.add(name+"-posScore"); 68 | this.featureNames.add(name+"-negScore"); 69 | 70 | } 71 | 72 | 73 | /* (non-Javadoc) 74 | * @see affective.core.LexiconEvaluator#processDict() 75 | */ 76 | @Override 77 | public void processDict() throws IOException { 78 | this.sentiStrength = new SentiStrength(); 79 | String sentiParams[] = {"sentidata", this.path, "trinary"}; 80 | this.sentiStrength.initialise(sentiParams); 81 | } 82 | 83 | 84 | 85 | /* (non-Javadoc) 86 | * @see affective.core.LexiconEvaluator#evaluateTweet(java.util.List) 87 | */ 88 | @Override 89 | public Map evaluateTweet(List tokens) { 90 | 91 | Map strengthScores = new HashMap(); 92 | 93 | String sentence = ""; 94 | for (int i = 0; i < tokens.size(); i++) { 95 | sentence += tokens.get(i); 96 | if (i < tokens.size() - 1) { 97 | sentence += "+"; 98 | } 99 | } 100 | 101 | String result = sentiStrength.computeSentimentScores(sentence); 102 | 103 | String[] values = result.split(" "); 104 | strengthScores.put(name+"-posScore", Double.parseDouble(values[0])); 105 | strengthScores.put(name+"-negScore", Double.parseDouble(values[1])); 106 | 107 | return strengthScores; 108 | } 109 | 110 | } 111 | -------------------------------------------------------------------------------- /doc/weka/filters/unsupervised/attribute/package-frame.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | weka.filters.unsupervised.attribute 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 17 | 18 | 19 | 20 | 21 |

weka.filters.unsupervised.attribute

22 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /src/main/java/affective/core/EmbeddingHandler.java: -------------------------------------------------------------------------------- 1 | /* 2 | * This program is free software: you can redistribute it and/or modify 3 | * it under the terms of the GNU General Public License as published by 4 | * the Free Software Foundation, either version 3 of the License, or 5 | * (at your option) any later version. 6 | * 7 | * This program is distributed in the hope that it will be useful, 8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | * GNU General Public License for more details. 11 | * 12 | * You should have received a copy of the GNU General Public License 13 | * along with this program. If not, see . 14 | */ 15 | 16 | /* 17 | * EmbeddingHandler.java 18 | * Copyright (C) 1999-2018 University of Waikato, Hamilton, New Zealand 19 | * 20 | */ 21 | 22 | 23 | 24 | package affective.core; 25 | 26 | import it.unimi.dsi.fastutil.doubles.AbstractDoubleList; 27 | import it.unimi.dsi.fastutil.objects.Object2ObjectMap; 28 | import it.unimi.dsi.fastutil.objects.Object2ObjectOpenHashMap; 29 | 30 | import java.io.Serializable; 31 | import java.util.Enumeration; 32 | 33 | import weka.core.Option; 34 | import weka.core.OptionHandler; 35 | 36 | 37 | 38 | 39 | /** 40 | * 41 | * This abstract class is used for handling word vector or embeddings. 42 | * 43 | * 44 | * 45 | * 46 | * @author Felipe Bravo-Marquez (fbravoma@waikato.ac.nz) 47 | * @version $Revision: 1 $ 48 | */ 49 | public abstract class EmbeddingHandler implements Serializable, OptionHandler { 50 | 51 | 52 | /** For serialization, **/ 53 | private static final long serialVersionUID = -2789278587499972963L; 54 | 55 | 56 | /** Mapping between words and their vectors. */ 57 | protected Object2ObjectMap wordMap=new Object2ObjectOpenHashMap(); 58 | 59 | /** Number of dimensions of the embeddings. */ 60 | protected int dimensions; 61 | 62 | 63 | 64 | /* (non-Javadoc) 65 | * @see weka.filters.Filter#listOptions() 66 | */ 67 | public Enumeration