├── _config.yml
├── benchmark
    ├── benchmark.bash
    ├── exec_time.ods
    ├── .~lock.exec_time.ods#
    ├── dataset
    │   └── .~lock.twitter-test-gold-B.tsv#
    ├── nltk.bash
    ├── nltk_scikit_ngram.py
    ├── nltk_scikit_ngram_liu.py
    └── nltk_scikit_liu_vader.py
├── img
    ├── logo.png
    └── tweetToSparseOptions.png
├── docs
    ├── img
    │   ├── logo.png
    │   └── tweetToSparseOptions.png
    ├── shortBio.bib.txt
    ├── videos.md
    └── install.md
├── doc
    ├── element-list
    ├── package-list
    ├── test
    │   ├── package-frame.html
    │   ├── package-tree.html
    │   └── package-summary.html
    ├── script.js
    ├── weka
    │   ├── core
    │   │   ├── tokenizers
    │   │   │   └── package-frame.html
    │   │   └── converters
    │   │   │   └── package-frame.html
    │   └── filters
    │   │   ├── supervised
    │   │       └── attribute
    │   │       │   └── package-frame.html
    │   │   └── unsupervised
    │   │       └── attribute
    │   │           └── package-frame.html
    ├── overview-frame.html
    ├── affective
    │   └── core
    │   │   └── package-frame.html
    ├── index.html
    ├── deprecated-list.html
    ├── overview-summary.html
    ├── allclasses-noframe.html
    └── constant-values.html
├── GenericPropertiesCreator.props
├── ISSUE_TEMPLATE.md
├── CONTRIBUTING.md
├── GUIEditors.props
├── mkdocs.yml
├── src
    ├── main
    │   ├── main.iml
    │   └── java
    │   │   ├── weka
    │   │       ├── core
    │   │       │   ├── converters
    │   │       │   │   ├── TweetCollectionToArff.java
    │   │       │   │   ├── HumanCodedToArff.java
    │   │       │   │   ├── SemEvalToArff.java
    │   │       │   │   └── NRCAffectToArff.java
    │   │       │   └── tokenizers
    │   │       │   │   └── TweetNLPTokenizer.java
    │   │       └── filters
    │   │       │   └── unsupervised
    │   │       │       └── attribute
    │   │       │           └── TweetToWordListCountFeatureVector.java
    │   │   └── affective
    │   │       └── core
    │   │           ├── LexiconEvaluator.java
    │   │           ├── SentiStrengthEvaluator.java
    │   │           ├── EmbeddingHandler.java
    │   │           ├── NegationEvaluator.java
    │   │           ├── PolarityLexiconEvaluator.java
    │   │           ├── IntensityLexiconEvaluator.java
    │   │           ├── SWN3LexiconEvaluator.java
    │   │           └── CSVEmbeddingHandler.java
    └── test
    │   ├── java
    │       └── weka
    │       │   ├── AllPackageTests.java
    │       │   └── filters
    │       │       ├── unsupervised
    │       │           └── attribute
    │       │           │   ├── PTCMTest.java
    │       │           │   ├── TweetCentroidTest.java
    │       │           │   ├── ASATest.java
    │       │           │   ├── LabelWordVectorsTest.java
    │       │           │   ├── LexiconDistantSupervisionTest.java
    │       │           │   ├── TweetNLPPOSTaggerTest.java
    │       │           │   ├── TweetToSparseFeatureVectorTest.java
    │       │           │   ├── TweetToEmbeddingsFeatureVectorTest.java
    │       │           │   ├── TweetToSentiStrengthFeatureVectorTest.java
    │       │           │   ├── TweetToWordListCountFeatureVectorTest.java
    │       │           │   └── TweetToInputLexiconFeatureVectorTest.java
    │       │       └── supervised
    │       │           └── attribute
    │       │               └── PMILexiconExpanderTest.java
    │   └── resources
    │       └── wekarefs
    │           └── weka
    │               └── filters
    │                   └── unsupervised
    │                       └── attribute
    │                           └── LexiconDistantSupervisionTest.ref
├── .gitignore
├── Description.props
├── wekarefs
    └── weka
    │   └── filters
    │       ├── supervised
    │           └── attribute
    │           │   └── PMILexiconExpanderTest.ref
    │       └── unsupervised
    │           └── attribute
    │               └── LexiconDistantSupervisionTest.ref
├── CODE_OF_CONDUCT.md
└── README.md


/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-cayman


--------------------------------------------------------------------------------
/benchmark/benchmark.bash:
--------------------------------------------------------------------------------
1 | bash nltk.bash
2 | bash affectivetweets.bash
3 | 


--------------------------------------------------------------------------------
/img/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/felipebravom/AffectiveTweets/HEAD/img/logo.png


--------------------------------------------------------------------------------
/docs/img/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/felipebravom/AffectiveTweets/HEAD/docs/img/logo.png


--------------------------------------------------------------------------------
/benchmark/exec_time.ods:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/felipebravom/AffectiveTweets/HEAD/benchmark/exec_time.ods


--------------------------------------------------------------------------------
/benchmark/.~lock.exec_time.ods#:
--------------------------------------------------------------------------------
1 | ,felipe,felipe-HP-Notebook,17.04.2019 13:50,file:///home/felipe/.config/libreoffice/4;


--------------------------------------------------------------------------------
/img/tweetToSparseOptions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/felipebravom/AffectiveTweets/HEAD/img/tweetToSparseOptions.png


--------------------------------------------------------------------------------
/benchmark/dataset/.~lock.twitter-test-gold-B.tsv#:
--------------------------------------------------------------------------------
1 | ,fbravoma,neural,13.02.2019 15:57,file:///home/fbravoma/.config/libreoffice/4;


--------------------------------------------------------------------------------
/docs/img/tweetToSparseOptions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/felipebravom/AffectiveTweets/HEAD/docs/img/tweetToSparseOptions.png


--------------------------------------------------------------------------------
/doc/element-list:
--------------------------------------------------------------------------------
1 | affective.core
2 | weka.core.converters
3 | weka.core.tokenizers
4 | weka.filters.supervised.attribute
5 | weka.filters.unsupervised.attribute
6 | 


--------------------------------------------------------------------------------
/doc/package-list:
--------------------------------------------------------------------------------
1 | affective.core
2 | weka.core.converters
3 | weka.core.tokenizers
4 | weka.filters.supervised.attribute
5 | weka.filters.unsupervised.attribute
6 | 


--------------------------------------------------------------------------------
/GenericPropertiesCreator.props:
--------------------------------------------------------------------------------
1 | affective.core.ArffLexiconEvaluator =\ affective.core
2 | affective.core.ArffLexiconWordLabeller =\ affective.core 
3 | affective.core.EmbeddingHandler =\ affective.core 


--------------------------------------------------------------------------------
/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | ### Description
 2 | 
 3 | #### Expected behavior:
 4 | 
 5 | #### Encountered behavior:
 6 | 
 7 | 
 8 | ### Additional Information
 9 | 
10 | - Weka version:
11 | - AffectiveTweets version:
12 | - Operating System:
13 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | ## How to Contribute
2 | 
3 | New contributors are more than welcome. If you want to contribute just fork the project and send a [pull request](https://help.github.com/articles/about-pull-requests/) with your changes. 
4 | 
5 | More details about how to contribute are given [here](https://affectivetweets.cms.waikato.ac.nz/contribute/).


--------------------------------------------------------------------------------
/GUIEditors.props:
--------------------------------------------------------------------------------
1 | affective.core.ArffLexiconEvaluator=weka.gui.GenericObjectEditor
2 | affective.core.ArffLexiconEvaluator[]=weka.gui.GenericArrayEditor
3 | affective.core.ArffLexiconWordLabeller=weka.gui.GenericObjectEditor
4 | affective.core.ArffLexiconWordLabeller[]=weka.gui.GenericArrayEditor
5 | affective.core.EmbeddingHandler=weka.gui.GenericObjectEditor


--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
 1 | site_name: AffectiveTweets
 2 | theme: readthedocs  
 3 | repo_url: https://github.com/felipebravom/AffectiveTweets
 4 | site_description: AffectiveTweets Documentation
 5 | nav:
 6 |     - Home: index.md
 7 |     - Installation: install.md
 8 |     - Examples: examples.md
 9 |     - Videos: videos.md
10 |     - Contributing: contribute.md
11 |     - Benchmark: benchmark.md
12 | 


--------------------------------------------------------------------------------
/src/main/main.iml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <module type="JAVA_MODULE" version="4">
 3 |   <component name="NewModuleRootManager" inherit-compiler-output="true">
 4 |     <exclude-output />
 5 |     <content url="file://$MODULE_DIR$">
 6 |       <sourceFolder url="file://$MODULE_DIR$/java" isTestSource="false" />
 7 |     </content>
 8 |     <orderEntry type="inheritedJdk" />
 9 |     <orderEntry type="sourceFolder" forTests="false" />
10 |     <orderEntry type="library" name="lib" level="project" />
11 |   </component>
12 | </module>


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .DS_Store
 2 | *.class
 3 | 
 4 | # Mobile Tools for Java (J2ME)
 5 | .mtj.tmp/
 6 | 
 7 | # Package Files #
 8 | *.war
 9 | *.ear
10 | *.jar
11 | 
12 | # zip files #
13 | *.zip
14 | 
15 | # Eclipse files
16 | .classpath
17 | .project
18 | .settings/
19 | bin/
20 | 
21 | # IntelliJ
22 | .idea/
23 | *.iml
24 | 
25 | # Project resources
26 | resources/
27 | lexicons/
28 | dist/
29 | data/
30 | lib/
31 | 
32 | #site
33 | site/
34 | 
35 | # scripts #
36 | *.sh
37 | 
38 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
39 | hs_err_pid*
40 | 


--------------------------------------------------------------------------------
/benchmark/nltk.bash:
--------------------------------------------------------------------------------
 1 | echo "Linear model using  n-grams  (n=1,2,3,4)." 
 2 | start=`date +%s`
 3 | python nltk_scikit_ngram.py 
 4 | end=`date +%s`
 5 | runtime=$((end-start))
 6 | echo time = $runtime
 7 | 
 8 | 
 9 | echo "Linear model using  n-grams + Bing Liu's Lexicon" 
10 | start=`date +%s`
11 | python nltk_scikit_ngram_liu.py 
12 | end=`date +%s`
13 | runtime=$((end-start))
14 | echo time = $runtime
15 | 
16 | 
17 | echo "Linear model using Bing Liu's Lexicon + Vader" 
18 | start=`date +%s`
19 | python nltk_scikit_liu_vader.py 
20 | end=`date +%s`
21 | runtime=$((end-start))
22 | echo time = $runtime
23 | 
24 | 
25 | echo "Linear model using n+grams + Bing Liu's Lexicon + Vader" 
26 | start=`date +%s`
27 | python nltk_scikit_ngram_liu_vader.py 
28 | end=`date +%s`
29 | runtime=$((end-start))
30 | echo time = $runtime
31 | 


--------------------------------------------------------------------------------
/docs/shortBio.bib.txt:
--------------------------------------------------------------------------------
 1 | @article{JMLR:v20:18-450,
 2 |   author  = {Felipe Bravo-Marquez and Eibe Frank and Bernhard Pfahringer and Saif M. Mohammad},
 3 |   title   = {{AffectiveTweets}:  a {W}eka Package for Analyzing Affect in Tweets},
 4 |   journal = {Journal of Machine Learning Research},
 5 |   year    = {2019},
 6 |   volume  = {20},
 7 |   number  = {92},
 8 |   pages   = {1-6},
 9 |   url     = {http://jmlr.org/papers/v20/18-450.html}
10 | }
11 | 
12 | 
13 | @InProceedings{MohammadB17,
14 |   Title                    = {Emotion Intensities in Tweets},
15 |   Author                   = {Saif Mohammad and Felipe Bravo{-}Marquez},
16 |   Booktitle                = {Proceedings of the 6th Joint Conference on Lexical and Computational Semantics, *SEM @ACM 2017, Vancouver, Canada, August 3-4, 2017},
17 |   Year                     = {2017},
18 |   Pages                    = {65--77}
19 | }
20 | 
21 | 


--------------------------------------------------------------------------------
/doc/test/package-frame.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
 2 | <!-- NewPage -->
 3 | <html lang="en">
 4 | <head>
 5 | <!-- Generated by javadoc (1.8.0_191) on Mon Feb 11 14:22:47 NZDT 2019 -->
 6 | <title>test</title>
 7 | <meta name="date" content="2019-02-11">
 8 | <link rel="stylesheet" type="text/css" href="../stylesheet.css" title="Style">
 9 | <script type="text/javascript" src="../script.js"></script>
10 | </head>
11 | <body>
12 | <h1 class="bar"><a href="../test/package-summary.html" target="classFrame">test</a></h1>
13 | <div class="indexContainer">
14 | <h2 title="Classes">Classes</h2>
15 | <ul title="Classes">
16 | <li><a href="AffectiveTest.html" title="class in test" target="classFrame">AffectiveTest</a></li>
17 | <li><a href="AffectiveTestRunner.html" title="class in test" target="classFrame">AffectiveTestRunner</a></li>
18 | </ul>
19 | </div>
20 | </body>
21 | </html>
22 | 


--------------------------------------------------------------------------------
/doc/script.js:
--------------------------------------------------------------------------------
 1 | function show(type)
 2 | {
 3 |     count = 0;
 4 |     for (var key in methods) {
 5 |         var row = document.getElementById(key);
 6 |         if ((methods[key] &  type) != 0) {
 7 |             row.style.display = '';
 8 |             row.className = (count++ % 2) ? rowColor : altColor;
 9 |         }
10 |         else
11 |             row.style.display = 'none';
12 |     }
13 |     updateTabs(type);
14 | }
15 | 
16 | function updateTabs(type)
17 | {
18 |     for (var value in tabs) {
19 |         var sNode = document.getElementById(tabs[value][0]);
20 |         var spanNode = sNode.firstChild;
21 |         if (value == type) {
22 |             sNode.className = activeTableTab;
23 |             spanNode.innerHTML = tabs[value][1];
24 |         }
25 |         else {
26 |             sNode.className = tableTab;
27 |             spanNode.innerHTML = "<a href=\"javascript:show("+ value + ");\">" + tabs[value][1] + "</a>";
28 |         }
29 |     }
30 | }
31 | 


--------------------------------------------------------------------------------
/docs/videos.md:
--------------------------------------------------------------------------------
 1 | The demo videos from below show how to use AffectiveTweets with the WEKA GUI. 
 2 | 
 3 | ### Video 1:  Training sentiment classification models for tweets
 4 | 
 5 | [![IMAGE ALT TEXT](http://img.youtube.com/vi/mApoMfDSG0A/0.jpg)](http://www.youtube.com/watch?v=mApoMfDSG0A "Training sentiment classification models for tweets")
 6 | 
 7 | Click [here](https://github.com/felipebravom/AffectiveTweets/releases/download/1.0.2/AffectiveTweets1.mp4) to download the video in mp4 format.
 8 | 
 9 | 
10 | ### Video 2: Creating lexicons for Twitter sentiment analysis
11 | 
12 | [![IMAGE ALT TEXT](http://img.youtube.com/vi/FnLnpIa230w/0.jpg)](http://www.youtube.com/watch?v=FnLnpIa230w "Creating lexicons for Twitter Sentiment Analysis")
13 | 
14 | Click [here](https://github.com/felipebravom/AffectiveTweets/releases/download/1.0.2/AffectiveTweets2.mp4) to download the video in mp4 format.
15 | 
16 | 
17 | ### Video 3: Twitter sentiment classification with distant supervision
18 | 
19 | [![IMAGE ALT TEXT](http://img.youtube.com/vi/fCHRSadvI4g/0.jpg)](http://www.youtube.com/watch?v=fCHRSadvI4g "Twitter Sentiment Classification with Distant Supervision")
20 | 
21 | Click [here](https://github.com/felipebravom/AffectiveTweets/releases/download/1.0.2/AffectiveTweets3.mp4) to download the video in mp4 format.


--------------------------------------------------------------------------------
/Description.props:
--------------------------------------------------------------------------------
 1 | # Description file for AffectiveTweets
 2 | 
 3 | 
 4 | # Package name (required)
 5 | PackageName=AffectiveTweets
 6 | 
 7 | # Version (required)
 8 | Version=1.0.2
 9 | 
10 | #Date
11 | Date=2019-02-20
12 | 
13 | # Title (required)
14 | Title=Text Filters for Analyzing Sentiment and Emotions of Tweets    
15 | 
16 | Category=Text classification
17 | 
18 | # Author (required)
19 | Author=felipebravom <fbravoma@waikato.ac.nz>
20 | 
21 | # Maintainer (required)
22 | Maintainer=felipebravom <fbravoma@waikato.ac.nz>
23 | 
24 | # License (required)
25 | License=GPL 3.0
26 | 
27 | # Description (required)
28 | Description= Text filters for analyzing emotion and sentiment of tweets. Sample tweets annotated by sentiment and emotions can be found in ${WEKA_HOME}/packages/AffectiveTweets/data. A file with some pre-trained word embeddings can be found in ${WEKA_HOME}/packages/AffectiveTweets/resources/w2v.twitter.edinburgh.100d.csv.gz. However, we recommend using the embeddings located in: https://github.com/felipebravom/AffectiveTweets/releases/download/1.0.0/w2v.twitter.edinburgh10M.400d.csv.gz for better performance.
29 | 
30 | # Package URL for obtaining the package archive (required)
31 | PackageURL=https://github.com/felipebravom/AffectiveTweets/releases/download/1.0.2/AffectiveTweets1.0.2.zip
32 | 
33 | 
34 | # URL for further information
35 | URL=https://affectivetweets.cms.waikato.ac.nz/
36 | 
37 | # Related to other packages?
38 | #Related=
39 | 
40 | # Dependencies
41 | Depends=weka (>=3.8.1)
42 | 
43 | 
44 | 


--------------------------------------------------------------------------------
/src/main/java/weka/core/converters/TweetCollectionToArff.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *   This program is free software: you can redistribute it and/or modify
 3 |  *   it under the terms of the GNU General Public License as published by
 4 |  *   the Free Software Foundation, either version 3 of the License, or
 5 |  *   (at your option) any later version.
 6 |  *
 7 |  *   This program is distributed in the hope that it will be useful,
 8 |  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 9 |  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 |  *   GNU General Public License for more details.
11 |  *
12 |  *   You should have received a copy of the GNU General Public License
13 |  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
14 |  */
15 | 
16 | /*
17 |  *    TweetCollectionToArff.java
18 |  *    Copyright (C) 1999-2016 University of Waikato, Hamilton, New Zealand
19 |  *
20 |  */
21 | 
22 | 
23 | package weka.core.converters;
24 | 
25 | import weka.core.Instances;
26 | 
27 | 
28 | /**
29 | * Builds an arff dataset from a collection of tweets in a given file.
30 | * @author Felipe Bravo-Marquez (fjb11 at students.waikato.ac.nz)
31 | * @version 1.0
32 | */
33 | 
34 | public abstract class TweetCollectionToArff {
35 | 	
36 | 	
37 | 	/**
38 | 	 * Creates the dataset.
39 | 	 * @param collectionPath the file wit he the input collection
40 | 	 * @return the Instances weka object
41 | 	 * @throws Exception if something goes wrong
42 | 	 */
43 | 	public abstract Instances createDataset(String collectionPath) throws Exception;
44 | }
45 | 


--------------------------------------------------------------------------------
/doc/weka/core/tokenizers/package-frame.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
 2 | <!-- NewPage -->
 3 | <html lang="en">
 4 | <head>
 5 | <!-- Generated by javadoc (10.0.2) on Thu Feb 21 10:45:39 NZDT 2019 -->
 6 | <title>weka.core.tokenizers</title>
 7 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
 8 | <meta name="date" content="2019-02-21">
 9 | <link rel="stylesheet" type="text/css" href="../../../stylesheet.css" title="Style">
10 | <link rel="stylesheet" type="text/css" href="../../../jquery/jquery-ui.css" title="Style">
11 | <script type="text/javascript" src="../../../script.js"></script>
12 | <script type="text/javascript" src="../../../jquery/jszip/dist/jszip.min.js"></script>
13 | <script type="text/javascript" src="../../../jquery/jszip-utils/dist/jszip-utils.min.js"></script>
14 | <!--[if IE]>
15 | <script type="text/javascript" src="../../../jquery/jszip-utils/dist/jszip-utils-ie.min.js"></script>
16 | <![endif]-->
17 | <script type="text/javascript" src="../../../jquery/jquery-1.10.2.js"></script>
18 | <script type="text/javascript" src="../../../jquery/jquery-ui.js"></script>
19 | </head>
20 | <body>
21 | <h1 class="bar"><a href="../../../weka/core/tokenizers/package-summary.html" target="classFrame">weka.core.tokenizers</a></h1>
22 | <div class="indexContainer">
23 | <h2 title="Classes">Classes</h2>
24 | <ul title="Classes">
25 | <li><a href="TweetNLPTokenizer.html" title="class in weka.core.tokenizers" target="classFrame">TweetNLPTokenizer</a></li>
26 | </ul>
27 | </div>
28 | </body>
29 | </html>
30 | 


--------------------------------------------------------------------------------
/doc/weka/filters/supervised/attribute/package-frame.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
 2 | <!-- NewPage -->
 3 | <html lang="en">
 4 | <head>
 5 | <!-- Generated by javadoc (10.0.2) on Thu Feb 21 10:45:39 NZDT 2019 -->
 6 | <title>weka.filters.supervised.attribute</title>
 7 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
 8 | <meta name="date" content="2019-02-21">
 9 | <link rel="stylesheet" type="text/css" href="../../../../stylesheet.css" title="Style">
10 | <link rel="stylesheet" type="text/css" href="../../../../jquery/jquery-ui.css" title="Style">
11 | <script type="text/javascript" src="../../../../script.js"></script>
12 | <script type="text/javascript" src="../../../../jquery/jszip/dist/jszip.min.js"></script>
13 | <script type="text/javascript" src="../../../../jquery/jszip-utils/dist/jszip-utils.min.js"></script>
14 | <!--[if IE]>
15 | <script type="text/javascript" src="../../../../jquery/jszip-utils/dist/jszip-utils-ie.min.js"></script>
16 | <![endif]-->
17 | <script type="text/javascript" src="../../../../jquery/jquery-1.10.2.js"></script>
18 | <script type="text/javascript" src="../../../../jquery/jquery-ui.js"></script>
19 | </head>
20 | <body>
21 | <h1 class="bar"><a href="../../../../weka/filters/supervised/attribute/package-summary.html" target="classFrame">weka.filters.supervised.attribute</a></h1>
22 | <div class="indexContainer">
23 | <h2 title="Classes">Classes</h2>
24 | <ul title="Classes">
25 | <li><a href="PMILexiconExpander.html" title="class in weka.filters.supervised.attribute" target="classFrame">PMILexiconExpander</a></li>
26 | </ul>
27 | </div>
28 | </body>
29 | </html>
30 | 


--------------------------------------------------------------------------------
/doc/weka/core/converters/package-frame.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
 2 | <!-- NewPage -->
 3 | <html lang="en">
 4 | <head>
 5 | <!-- Generated by javadoc (10.0.2) on Thu Feb 21 10:45:39 NZDT 2019 -->
 6 | <title>weka.core.converters</title>
 7 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
 8 | <meta name="date" content="2019-02-21">
 9 | <link rel="stylesheet" type="text/css" href="../../../stylesheet.css" title="Style">
10 | <link rel="stylesheet" type="text/css" href="../../../jquery/jquery-ui.css" title="Style">
11 | <script type="text/javascript" src="../../../script.js"></script>
12 | <script type="text/javascript" src="../../../jquery/jszip/dist/jszip.min.js"></script>
13 | <script type="text/javascript" src="../../../jquery/jszip-utils/dist/jszip-utils.min.js"></script>
14 | <!--[if IE]>
15 | <script type="text/javascript" src="../../../jquery/jszip-utils/dist/jszip-utils-ie.min.js"></script>
16 | <![endif]-->
17 | <script type="text/javascript" src="../../../jquery/jquery-1.10.2.js"></script>
18 | <script type="text/javascript" src="../../../jquery/jquery-ui.js"></script>
19 | </head>
20 | <body>
21 | <h1 class="bar"><a href="../../../weka/core/converters/package-summary.html" target="classFrame">weka.core.converters</a></h1>
22 | <div class="indexContainer">
23 | <h2 title="Classes">Classes</h2>
24 | <ul title="Classes">
25 | <li><a href="HumanCodedToArff.html" title="class in weka.core.converters" target="classFrame">HumanCodedToArff</a></li>
26 | <li><a href="NRCAffectToArff.html" title="class in weka.core.converters" target="classFrame">NRCAffectToArff</a></li>
27 | <li><a href="SemEvalToArff.html" title="class in weka.core.converters" target="classFrame">SemEvalToArff</a></li>
28 | <li><a href="TweetCollectionToArff.html" title="class in weka.core.converters" target="classFrame">TweetCollectionToArff</a></li>
29 | </ul>
30 | </div>
31 | </body>
32 | </html>
33 | 


--------------------------------------------------------------------------------
/doc/overview-frame.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
 2 | <!-- NewPage -->
 3 | <html lang="en">
 4 | <head>
 5 | <!-- Generated by javadoc (10.0.2) on Thu Feb 21 10:45:39 NZDT 2019 -->
 6 | <title>Overview List</title>
 7 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
 8 | <meta name="date" content="2019-02-21">
 9 | <link rel="stylesheet" type="text/css" href="stylesheet.css" title="Style">
10 | <link rel="stylesheet" type="text/css" href="jquery/jquery-ui.css" title="Style">
11 | <script type="text/javascript" src="script.js"></script>
12 | <script type="text/javascript" src="jquery/jszip/dist/jszip.min.js"></script>
13 | <script type="text/javascript" src="jquery/jszip-utils/dist/jszip-utils.min.js"></script>
14 | <!--[if IE]>
15 | <script type="text/javascript" src="jquery/jszip-utils/dist/jszip-utils-ie.min.js"></script>
16 | <![endif]-->
17 | <script type="text/javascript" src="jquery/jquery-1.10.2.js"></script>
18 | <script type="text/javascript" src="jquery/jquery-ui.js"></script>
19 | </head>
20 | <body>
21 | <div class="indexNav">
22 | <ul>
23 | <li><a href="allclasses-frame.html" target="packageFrame">All&nbsp;Classes</a></li>
24 | </ul>
25 | </div>
26 | <div class="indexContainer">
27 | <h2 title="Packages">Packages</h2>
28 | <ul title="Packages">
29 | <li><a href="affective/core/package-frame.html" target="packageFrame">affective.core</a></li>
30 | <li><a href="weka/core/converters/package-frame.html" target="packageFrame">weka.core.converters</a></li>
31 | <li><a href="weka/core/tokenizers/package-frame.html" target="packageFrame">weka.core.tokenizers</a></li>
32 | <li><a href="weka/filters/supervised/attribute/package-frame.html" target="packageFrame">weka.filters.supervised.attribute</a></li>
33 | <li><a href="weka/filters/unsupervised/attribute/package-frame.html" target="packageFrame">weka.filters.unsupervised.attribute</a></li>
34 | </ul>
35 | </div>
36 | <p>&nbsp;</p>
37 | </body>
38 | </html>
39 | 


--------------------------------------------------------------------------------
/benchmark/nltk_scikit_ngram.py:
--------------------------------------------------------------------------------
 1 | # This program is free software: you can redistribute it and/or modify
 2 | # it under the terms of the GNU General Public License as published by
 3 | # the Free Software Foundation, either version 3 of the License, or
 4 | # (at your option) any later version.
 5 | #
 6 | # This program is distributed in the hope that it will be useful,
 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 9 | # GNU General Public License for more details.
10 | #
11 | # You should have received a copy of the GNU General Public License
12 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
13 | 
14 | # Authors: Felipe Bravo-Marquez
15 | 
16 | 	
17 | import pandas as pd       
18 | from nltk.tokenize import TweetTokenizer
19 | from nltk.sentiment.util import  mark_negation
20 | 
21 | from sklearn.feature_extraction.text import CountVectorizer  
22 | from sklearn.linear_model import LogisticRegression
23 | from sklearn.pipeline import Pipeline
24 | from sklearn.metrics import confusion_matrix, cohen_kappa_score, classification_report
25 | 
26 | 
27 | # load training and testing datasets as a pandas dataframe
28 | train_data = pd.read_csv("dataset/twitter-train-B.txt", header=None, delimiter="\t",usecols=(2,3), names=("sent","tweet"))
29 | test_data = pd.read_csv("dataset/twitter-test-gold-B.tsv", header=None, delimiter="\t",usecols=(2,3), names=("sent","tweet"))
30 | 
31 | # replace objective-OR-neutral and objective to neutral
32 | train_data.sent = train_data.sent.replace(['objective-OR-neutral','objective'],['neutral','neutral'])
33 | 
34 | # use a Twitter-specific tokenizer
35 | tokenizer = TweetTokenizer(preserve_case=False, reduce_len=True)
36 | 
37 | 
38 | 
39 | 
40 | #################################################
41 | #
42 | #  Train a linear model using n-gram features
43 | #  
44 | ##################################################
45 | vectorizer = CountVectorizer(tokenizer = tokenizer.tokenize, preprocessor = mark_negation, ngram_range=(1,4))  
46 | log_mod = LogisticRegression(solver='liblinear',multi_class='ovr')  
47 | text_clf = Pipeline([('vect', vectorizer), ('clf', log_mod)])
48 | 
49 | text_clf.fit(train_data.tweet, train_data.sent)
50 | 
51 | predicted = text_clf.predict(test_data.tweet)
52 | 
53 | conf = confusion_matrix(test_data.sent, predicted)
54 | kappa = cohen_kappa_score(test_data.sent, predicted) 
55 | class_rep = classification_report(test_data.sent, predicted)
56 | 
57 | 
58 | 
59 | 
60 | print('Confusion Matrix for Logistic Regression + ngram features:')
61 | print(conf)
62 | print('Classification Report')
63 | print(class_rep)
64 | print('kappa:'+str(kappa))
65 | 
66 | 
67 | 
68 | 
69 | 
70 | 
71 | 
72 | 


--------------------------------------------------------------------------------
/src/main/java/affective/core/LexiconEvaluator.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *   This program is free software: you can redistribute it and/or modify
 3 |  *   it under the terms of the GNU General Public License as published by
 4 |  *   the Free Software Foundation, either version 3 of the License, or
 5 |  *   (at your option) any later version.
 6 |  *
 7 |  *   This program is distributed in the hope that it will be useful,
 8 |  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 9 |  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 |  *   GNU General Public License for more details.
11 |  *
12 |  *   You should have received a copy of the GNU General Public License
13 |  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
14 |  */
15 | 
16 | /*
17 |  *    LexiconEvaluator.java
18 |  *    Copyright (C) 2018 University of Waikato, Hamilton, New Zealand
19 |  */
20 | 
21 | package affective.core;
22 | 
23 | import java.io.IOException;
24 | import java.io.Serializable;
25 | import java.util.List;
26 | import java.util.Map;
27 | 
28 | /**
29 |  *  <!-- globalinfo-start --> 
30 |  *  This abstract class provides the structure for all classes calculating attributes from lexicons.
31 |  * <!-- globalinfo-end -->
32 |  * 
33 |  * 
34 |  * @author Felipe Bravo-Marquez (fbravoma@waikato.ac.nz)
35 |  * @version $Revision: 1 $
36 |  */
37 | public abstract class LexiconEvaluator implements Serializable {
38 | 
39 | 	/** for serialization */
40 | 	private static final long serialVersionUID = 1L;
41 | 	
42 | 	/** The lexicon file */
43 | 	protected String path;
44 | 	
45 | 	/** The name of the lexicon */
46 | 	protected String name; 
47 | 	
48 | 	/** A list with all the features provided by the lexicon evaluator */
49 | 	protected List<String> featureNames; 
50 | 	
51 | 	
52 | 	/**
53 | 	 * initializes the Object
54 | 	 * 
55 | 	 * @param path the file with the lexicon
56 | 	 * @param name the prefix for all the attributes calculated from this lexicon
57 | 	 */	
58 | 	public LexiconEvaluator(String path,String name){
59 | 		this.path=path;
60 | 		this.name=name;
61 | 	}
62 | 	
63 | 	/**
64 | 	 * initializes the dictionary
65 | 	 * @throws IOException in case of wrong file
66 | 	 */	
67 | 	public abstract void processDict()  throws IOException;
68 | 	
69 | 
70 | 	/**
71 | 	 * Calculates lexicon-based feature values from a list of tokens
72 | 	 * @param tokens a tokenized tweet
73 | 	 * @return a mapping between attribute names and their scores
74 | 	 */	
75 | 	public abstract Map<String,Double> evaluateTweet(List<String> tokens);
76 | 
77 | 	/**
78 | 	 * Gets the feature names
79 | 	 * 
80 | 	 * @return the feature names.
81 | 	 */	
82 | 	public List<String> getFeatureNames() {
83 | 		return featureNames;
84 | 	}
85 | 
86 | 	
87 | 	
88 | }
89 | 


--------------------------------------------------------------------------------
/wekarefs/weka/filters/supervised/attribute/PMILexiconExpanderTest.ref:
--------------------------------------------------------------------------------
  1 | @relation 'Twitter Sentiment Analysis Sentiment140 Test: Dataset. More info at: http://help.sentiment140.com/-weka.filters.supervised.attribute.PMILexiconExpander-minFreq10-negClassValuenegative-posClassValuepositive-stemmerweka.core.stemmers.NullStemmer-stopwords-handlerweka.core.stopwords.Null-I1-U-tokenizerweka.core.tokenizers.TweetNLPTokenizer'
  2 | 
  3 | @attribute WORD_NAME string
  4 | @attribute PMI-SO numeric
  5 | 
  6 | @data
  7 | !,0.894939
  8 | !!,0.960034
  9 | !!!,0.960034
 10 | '\"',0.281962
 11 | &,0.80803
 12 | '\'',-0.039966
 13 | (,0.152679
 14 | ),0.696999
 15 | *,3.129959
 16 | ',',-0.361895
 17 | -,1.474607
 18 | .,-0.268908
 19 | ..,-0.933051
 20 | ...,0.474607
 21 | :,0.483596
 22 | :(,-2.624929
 23 | :),1.375071
 24 | '?',-1.728022
 25 | a,-0.066933
 26 | about,-0.817574
 27 | aig,-3.209891
 28 | all,-0.887963
 29 | am,0.544996
 30 | and,-0.539537
 31 | api,-0.847321
 32 | are,-0.817574
 33 | as,-0.262359
 34 | at,0.597464
 35 | at&t,-3.499398
 36 | awesome,2.660473
 37 | back,-0.232611
 38 | be,-0.19197
 39 | best,1.960034
 40 | but,0.307957
 41 | by,0.322604
 42 | cable,-3.499398
 43 | can,-1.624929
 44 | damn,-0.847321
 45 | day,-0.887963
 46 | dentist,-1.624929
 47 | do,-0.039966
 48 | down,-1.455004
 49 | exam,-0.847321
 50 | for,0.388877
 51 | from,0.696999
 52 | get,-0.402536
 53 | gladwell,0.767389
 54 | gm,-4.127429
 55 | go,0.097537
 56 | going,-0.525393
 57 | good,1.223068
 58 | google,1.419465
 59 | got,0.490548
 60 | great,1.725568
 61 | has,-0.039966
 62 | hate,-3.287894
 63 | have,0.429519
 64 | he,-0.039966
 65 | his,2.281962
 66 | i,-0.167346
 67 | 'i\'m',0.375071
 68 | if,-0.303001
 69 | in,0.496086
 70 | internet,-3.209891
 71 | iphone,-0.624929
 72 | is,-0.155444
 73 | it,0.834503
 74 | 'it\'s',-1.155444
 75 | its,0.223068
 76 | just,0.120498
 77 | kindle2,2.866924
 78 | last,0.767389
 79 | latex,-0.847321
 80 | lebron,1.544996
 81 | like,-0.402536
 82 | lol,0.767389
 83 | love,2.818015
 84 | malcolm,0.960034
 85 | me,-1.320074
 86 | more,-1.624929
 87 | much,0.80803
 88 | museum,1.338545
 89 | my,0.606397
 90 | never,-0.624929
 91 | new,1.696999
 92 | night,1.281962
 93 | nike,1.44546
 94 | no,-0.039966
 95 | not,-1.209891
 96 | now,-1.624929
 97 | obama,0.638105
 98 | of,-0.50563
 99 | on,-0.542467
100 | one,2.129959
101 | only,-0.847321
102 | out,-1.303001
103 | phone,-2.039966
104 | really,0.152679
105 | rt,0.960034
106 | see,0.80803
107 | so,-0.209891
108 | still,-1.624929
109 | than,-0.624929
110 | that,-2.17747
111 | the,-0.010219
112 | they,-2.262359
113 | this,-0.624929
114 | time,-2.965966
115 | to,-0.119401
116 | today,-0.209891
117 | too,-0.303001
118 | twitter,-0.718038
119 | u,-0.847321
120 | up,-1.361895
121 | using,1.182426
122 | very,1.960034
123 | want,0.638105
124 | warner,-4.994163
125 | was,0.059569
126 | we,-0.776932
127 | what,-0.624929
128 | will,0.281962
129 | with,0.622999
130 | you,-0.499398
131 | your,0.960034
132 | 


--------------------------------------------------------------------------------
/src/test/java/weka/AllPackageTests.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *   This program is free software: you can redistribute it and/or modify
 3 |  *   it under the terms of the GNU General Public License as published by
 4 |  *   the Free Software Foundation, either version 3 of the License, or
 5 |  *   (at your option) any later version.
 6 |  *
 7 |  *   This program is distributed in the hope that it will be useful,
 8 |  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 9 |  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 |  *   GNU General Public License for more details.
11 |  *
12 |  *   You should have received a copy of the GNU General Public License
13 |  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
14 |  */
15 | 
16 | package weka;
17 | 
18 | import junit.framework.Test;
19 | import junit.framework.TestSuite;
20 | 
21 | import weka.filters.unsupervised.attribute.ASATest;
22 | import weka.filters.unsupervised.attribute.PTCMTest;
23 | import weka.filters.unsupervised.attribute.LexiconDistantSupervisionTest;
24 | import weka.filters.unsupervised.attribute.TweetNLPPOSTaggerTest;
25 | import weka.filters.supervised.attribute.PMILexiconExpanderTest;
26 | import weka.filters.unsupervised.attribute.TweetCentroidTest;
27 | import weka.filters.unsupervised.attribute.LabelWordVectorsTest;
28 | 
29 | import weka.filters.unsupervised.attribute.TweetToSentiStrengthFeatureVectorTest;
30 | import weka.filters.unsupervised.attribute.TweetToSparseFeatureVectorTest;
31 | import weka.filters.unsupervised.attribute.TweetToLexiconFeatureVectorTest;
32 | import weka.filters.unsupervised.attribute.TweetToInputLexiconFeatureVectorTest;
33 | import weka.filters.unsupervised.attribute.TweetToEmbeddingsFeatureVectorTest;
34 | import weka.filters.unsupervised.attribute.TweetToWordListCountFeatureVectorTest;
35 | 
36 | /**
37 |  * Test class for all tests in this directory. Run from the command line with:
38 |  * <p>
39 |  * java weka.AllPackageTests
40 |  * 
41 |  * @author Eibe Frank
42 |  * @version $Revision: 10160 $
43 |  */
44 | public class AllPackageTests extends TestSuite {
45 | 
46 |   public static Test suite() {
47 |     TestSuite suite = new TestSuite();
48 | 
49 |     suite.addTestSuite(ASATest.class);
50 |     suite.addTestSuite(PTCMTest.class);
51 |     suite.addTestSuite(LexiconDistantSupervisionTest.class);
52 | 
53 |     suite.addTestSuite(PMILexiconExpanderTest.class);
54 |     suite.addTestSuite(TweetCentroidTest.class);
55 |     suite.addTestSuite(LabelWordVectorsTest.class);
56 | 
57 |     suite.addTestSuite(TweetToSparseFeatureVectorTest.class);
58 |     suite.addTestSuite(TweetToLexiconFeatureVectorTest.class);
59 |     suite.addTestSuite(TweetToInputLexiconFeatureVectorTest.class);
60 |     suite.addTestSuite(TweetToSentiStrengthFeatureVectorTest.class);
61 |     suite.addTestSuite(TweetToEmbeddingsFeatureVectorTest.class);
62 |     suite.addTestSuite(TweetNLPPOSTaggerTest.class);
63 |     suite.addTestSuite(TweetToWordListCountFeatureVectorTest.class);    
64 | 
65 | 
66 |     return suite;
67 |   }
68 | 
69 |   public static void main(String[] args) {
70 | 
71 |     junit.textui.TestRunner.run(suite());
72 |   }
73 | }
74 | 


--------------------------------------------------------------------------------
/doc/affective/core/package-frame.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
 2 | <!-- NewPage -->
 3 | <html lang="en">
 4 | <head>
 5 | <!-- Generated by javadoc (10.0.2) on Thu Feb 21 10:45:39 NZDT 2019 -->
 6 | <title>affective.core</title>
 7 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
 8 | <meta name="date" content="2019-02-21">
 9 | <link rel="stylesheet" type="text/css" href="../../stylesheet.css" title="Style">
10 | <link rel="stylesheet" type="text/css" href="../../jquery/jquery-ui.css" title="Style">
11 | <script type="text/javascript" src="../../script.js"></script>
12 | <script type="text/javascript" src="../../jquery/jszip/dist/jszip.min.js"></script>
13 | <script type="text/javascript" src="../../jquery/jszip-utils/dist/jszip-utils.min.js"></script>
14 | <!--[if IE]>
15 | <script type="text/javascript" src="../../jquery/jszip-utils/dist/jszip-utils-ie.min.js"></script>
16 | <![endif]-->
17 | <script type="text/javascript" src="../../jquery/jquery-1.10.2.js"></script>
18 | <script type="text/javascript" src="../../jquery/jquery-ui.js"></script>
19 | </head>
20 | <body>
21 | <h1 class="bar"><a href="../../affective/core/package-summary.html" target="classFrame">affective.core</a></h1>
22 | <div class="indexContainer">
23 | <h2 title="Classes">Classes</h2>
24 | <ul title="Classes">
25 | <li><a href="ArffLexiconEvaluator.html" title="class in affective.core" target="classFrame">ArffLexiconEvaluator</a></li>
26 | <li><a href="ArffLexiconWordLabeller.html" title="class in affective.core" target="classFrame">ArffLexiconWordLabeller</a></li>
27 | <li><a href="CSVEmbeddingHandler.html" title="class in affective.core" target="classFrame">CSVEmbeddingHandler</a></li>
28 | <li><a href="EmbeddingHandler.html" title="class in affective.core" target="classFrame">EmbeddingHandler</a></li>
29 | <li><a href="IntensityLexiconEvaluator.html" title="class in affective.core" target="classFrame">IntensityLexiconEvaluator</a></li>
30 | <li><a href="LexiconEvaluator.html" title="class in affective.core" target="classFrame">LexiconEvaluator</a></li>
31 | <li><a href="NegationEvaluator.html" title="class in affective.core" target="classFrame">NegationEvaluator</a></li>
32 | <li><a href="NRCEmotionLexiconEvaluator.html" title="class in affective.core" target="classFrame">NRCEmotionLexiconEvaluator</a></li>
33 | <li><a href="NRCExpandedEmotionLexiconEvaluator.html" title="class in affective.core" target="classFrame">NRCExpandedEmotionLexiconEvaluator</a></li>
34 | <li><a href="NRCHashtagEmotionLexiconEvaluator.html" title="class in affective.core" target="classFrame">NRCHashtagEmotionLexiconEvaluator</a></li>
35 | <li><a href="PolarityLexiconEvaluator.html" title="class in affective.core" target="classFrame">PolarityLexiconEvaluator</a></li>
36 | <li><a href="SentiStrengthEvaluator.html" title="class in affective.core" target="classFrame">SentiStrengthEvaluator</a></li>
37 | <li><a href="SWN3LexiconEvaluator.html" title="class in affective.core" target="classFrame">SWN3LexiconEvaluator</a></li>
38 | <li><a href="Utils.html" title="class in affective.core" target="classFrame">Utils</a></li>
39 | </ul>
40 | </div>
41 | </body>
42 | </html>
43 | 


--------------------------------------------------------------------------------
/doc/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Frameset//EN" "http://www.w3.org/TR/html4/frameset.dtd">
 2 | <!-- NewPage -->
 3 | <html lang="en">
 4 | <head>
 5 | <!-- Generated by javadoc (1.8.0_191) on Wed Feb 20 17:12:12 NZDT 2019 -->
 6 | <title>Generated Documentation (Untitled)</title>
 7 | <script type="text/javascript">
 8 |     tmpTargetPage = "" + window.location.search;
 9 |     if (tmpTargetPage != "" && tmpTargetPage != "undefined")
10 |         tmpTargetPage = tmpTargetPage.substring(1);
11 |     if (tmpTargetPage.indexOf(":") != -1 || (tmpTargetPage != "" && !validURL(tmpTargetPage)))
12 |         tmpTargetPage = "undefined";
13 |     targetPage = tmpTargetPage;
14 |     function validURL(url) {
15 |         try {
16 |             url = decodeURIComponent(url);
17 |         }
18 |         catch (error) {
19 |             return false;
20 |         }
21 |         var pos = url.indexOf(".html");
22 |         if (pos == -1 || pos != url.length - 5)
23 |             return false;
24 |         var allowNumber = false;
25 |         var allowSep = false;
26 |         var seenDot = false;
27 |         for (var i = 0; i < url.length - 5; i++) {
28 |             var ch = url.charAt(i);
29 |             if ('a' <= ch && ch <= 'z' ||
30 |                     'A' <= ch && ch <= 'Z' ||
31 |                     ch == '$' ||
32 |                     ch == '_' ||
33 |                     ch.charCodeAt(0) > 127) {
34 |                 allowNumber = true;
35 |                 allowSep = true;
36 |             } else if ('0' <= ch && ch <= '9'
37 |                     || ch == '-') {
38 |                 if (!allowNumber)
39 |                      return false;
40 |             } else if (ch == '/' || ch == '.') {
41 |                 if (!allowSep)
42 |                     return false;
43 |                 allowNumber = false;
44 |                 allowSep = false;
45 |                 if (ch == '.')
46 |                      seenDot = true;
47 |                 if (ch == '/' && seenDot)
48 |                      return false;
49 |             } else {
50 |                 return false;
51 |             }
52 |         }
53 |         return true;
54 |     }
55 |     function loadFrames() {
56 |         if (targetPage != "" && targetPage != "undefined")
57 |              top.classFrame.location = top.targetPage;
58 |     }
59 | </script>
60 | </head>
61 | <frameset cols="20%,80%" title="Documentation frame" onload="top.loadFrames()">
62 | <frameset rows="30%,70%" title="Left frames" onload="top.loadFrames()">
63 | <frame src="overview-frame.html" name="packageListFrame" title="All Packages">
64 | <frame src="allclasses-frame.html" name="packageFrame" title="All classes and interfaces (except non-static nested types)">
65 | </frameset>
66 | <frame src="overview-summary.html" name="classFrame" title="Package, class and interface descriptions" scrolling="yes">
67 | <noframes>
68 | <noscript>
69 | <div>JavaScript is disabled on your browser.</div>
70 | </noscript>
71 | <h2>Frame Alert</h2>
72 | <p>This document is designed to be viewed using the frames feature. If you see this message, you are using a non-frame-capable web client. Link to <a href="overview-summary.html">Non-frame version</a>.</p>
73 | </noframes>
74 | </frameset>
75 | </html>
76 | 


--------------------------------------------------------------------------------
/docs/install.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ## Installing  Weka
 3 | Download the latest stable [version](https://waikato.github.io/weka-wiki/downloading_weka/) of Weka.
 4 | 
 5 | ## Installing AffectiveTweets
 6 | 
 7 | Install AffectiveTweets1.0.2 using the [WekaPackageManager](https://waikato.github.io/weka-wiki/packages/manager/): 
 8 | 
 9 | ```bash
10 | java -cp $WEKA_PATH/weka.jar weka.core.WekaPackageManager -install-package AffectiveTweets
11 | ```
12 | 
13 | Make sure you have installed version 1.0.2 to run the [examples](examples).
14 | 
15 | In case of having problems with the Weka packages repository, install the package as follows:
16 | 
17 | ```bash
18 | java -cp $WEKA_PATH/weka.jar weka.core.WekaPackageManager -install-package https://github.com/felipebravom/AffectiveTweets/releases/download/1.0.2/AffectiveTweets1.0.2.zip
19 | ```
20 | 
21 | ## Building AffectiveTweets
22 | You can also build the package from the Github version using the project’s [build-package.xml](https://github.com/felipebravom/AffectiveTweets/blob/master/build_package.xml) file  with the [ant](http://ant.apache.org/) build tool. This is very useful if you want to modify the code or [contribute](../contribute) with a new feature. 
23 | 
24 | 
25 | 
26 | ```bash
27 | # clone the repository
28 | git clone https://github.com/felipebravom/AffectiveTweets.git
29 | cd AffectiveTweets
30 | 
31 | # Download additional files
32 | wget https://github.com/felipebravom/AffectiveTweets/releases/download/1.0.1/extra.zip
33 | unzip extra.zip
34 | 
35 | # Build the package using apache ant
36 | ant -f build_package.xml make_package
37 | 
38 | # Install the built package 
39 | java -cp $WEKA_PATH/weka.jar weka.core.WekaPackageManager -install-package dist/AffectiveTweets.zip
40 | 
41 | 
42 | ```
43 | 
44 | ## Testing AffectiveTweets
45 | The software can be tested using Junit test cases.  The package must either be installed or  JVM must be started in AffectiveTweets directory.
46 | 
47 | ```bash
48 | # run all tests
49 | ant -f build_package.xml run_tests_all 
50 | ```
51 | 
52 | 
53 | 
54 | ## Other Useful Packages
55 | 
56 | We recommend installing other useful packages for classification, regression and evaluation:
57 | 
58 | * [LibLinear](https://www.csie.ntu.edu.tw/~cjlin/liblinear/): This package is required for running the [examples](examples).
59 | ```bash
60 | java -cp $WEKA_PATH/weka.jar weka.core.WekaPackageManager -install-package LibLINEAR
61 | ```
62 | 
63 | * [LibSVM](https://www.csie.ntu.edu.tw/~cjlin/libsvm/)
64 | 
65 | ```bash
66 | java -cp $WEKA_PATH/weka.jar weka.core.WekaPackageManager -install-package LibSVM
67 | ```
68 | 
69 | * [RankCorrelation](https://github.com/felipebravom/RankCorrelation)
70 | 
71 | ```bash
72 | java -cp $WEKA_PATH/weka.jar weka.core.WekaPackageManager -install-package RankCorrelation
73 | ```
74 | 
75 | * [Snowball-stemmers](https://github.com/fracpete/snowball-stemmers-weka-package): This package allows using the Porter stemmer as well as other Snowball stemmers.
76 | ```bash
77 | java -cp $WEKA_PATH/weka.jar weka.core.WekaPackageManager -install-package snowball-stemmers
78 | ```
79 | 
80 | 
81 | 
82 | 
83 | * The [WekaDeepLearning4j](https://deeplearning.cms.waikato.ac.nz/) package can be installed for training deep neural networks and word embeddings. 
84 | 
85 | 
86 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Contributor Covenant Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation.
 6 | 
 7 | ## Our Standards
 8 | 
 9 | Examples of behavior that contributes to creating a positive environment include:
10 | 
11 | * Using welcoming and inclusive language
12 | * Being respectful of differing viewpoints and experiences
13 | * Gracefully accepting constructive criticism
14 | * Focusing on what is best for the community
15 | * Showing empathy towards other community members
16 | 
17 | Examples of unacceptable behavior by participants include:
18 | 
19 | * The use of sexualized language or imagery and unwelcome sexual attention or advances
20 | * Trolling, insulting/derogatory comments, and personal or political attacks
21 | * Public or private harassment
22 | * Publishing others' private information, such as a physical or electronic address, without explicit permission
23 | * Other conduct which could reasonably be considered inappropriate in a professional setting
24 | 
25 | ## Our Responsibilities
26 | 
27 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.
28 | 
29 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.
30 | 
31 | ## Scope
32 | 
33 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers.
34 | 
35 | ## Enforcement
36 | 
37 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at fbravoma@waikato.ac.nz. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately.
38 | 
39 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership.
40 | 
41 | ## Attribution
42 | 
43 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version]
44 | 
45 | [homepage]: http://contributor-covenant.org
46 | [version]: http://contributor-covenant.org/version/1/4/
47 | 


--------------------------------------------------------------------------------
/src/main/java/affective/core/SentiStrengthEvaluator.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *   This program is free software: you can redistribute it and/or modify
  3 |  *   it under the terms of the GNU General Public License as published by
  4 |  *   the Free Software Foundation, either version 3 of the License, or
  5 |  *   (at your option) any later version.
  6 |  *
  7 |  *   This program is distributed in the hope that it will be useful,
  8 |  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 |  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10 |  *   GNU General Public License for more details.
 11 |  *
 12 |  *   You should have received a copy of the GNU General Public License
 13 |  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 14 |  */
 15 | 
 16 | /*
 17 |  *    SentiStrengthEvaluator.java
 18 |  *    Copyright (C) 1999-2018 University of Waikato, Hamilton, New Zealand
 19 |  *
 20 |  */
 21 | 
 22 | 
 23 | 
 24 | package affective.core;
 25 | 
 26 | 
 27 | import java.io.IOException;
 28 | import java.util.ArrayList;
 29 | import java.util.HashMap;
 30 | import java.util.List;
 31 | import java.util.Map;
 32 | 
 33 | import uk.ac.wlv.sentistrength.SentiStrength;
 34 | 
 35 | 
 36 | /**
 37 |  *  <!-- globalinfo-start --> 
 38 |  *  This class is used for evaluating a tweet with SentiStrength.
 39 |  * <!-- globalinfo-end -->
 40 |  * 
 41 |  * 
 42 |  * @author Felipe Bravo-Marquez (fjb11@students.waikato.ac.nz)
 43 |  * @version $Revision: 1 $
 44 |  */
 45 | public class SentiStrengthEvaluator extends LexiconEvaluator  {
 46 | 
 47 | 
 48 | 	/** For serialization. */
 49 | 	private static final long serialVersionUID = -2094228012480778199L;
 50 | 	
 51 | 	/** The SentiStrengh object. */
 52 | 	protected transient SentiStrength sentiStrength;
 53 | 	
 54 | 
 55 | 	/**
 56 | 	 * initializes the Object
 57 | 	 * 
 58 | 	 * @param file the file with the lexicon
 59 | 	 * @param name the prefix for all the attributes calculated from this lexicon
 60 | 	 */	
 61 | 	public SentiStrengthEvaluator(String file,String name) {
 62 | 		super(file,name);
 63 | 		
 64 | 	
 65 | 				
 66 | 		this.featureNames=new ArrayList<String>();
 67 | 		this.featureNames.add(name+"-posScore");
 68 | 		this.featureNames.add(name+"-negScore");
 69 | 
 70 | 	}
 71 | 
 72 | 
 73 | 	/* (non-Javadoc)
 74 | 	 * @see affective.core.LexiconEvaluator#processDict()
 75 | 	 */
 76 | 	@Override
 77 | 	public void processDict() throws IOException  {
 78 | 		this.sentiStrength = new SentiStrength();
 79 | 		String sentiParams[] = {"sentidata", this.path, "trinary"};
 80 | 		this.sentiStrength.initialise(sentiParams);		
 81 | 	}
 82 | 
 83 | 
 84 | 
 85 | 	/* (non-Javadoc)
 86 | 	 * @see affective.core.LexiconEvaluator#evaluateTweet(java.util.List)
 87 | 	 */
 88 | 	@Override
 89 | 	public Map<String, Double> evaluateTweet(List<String> tokens) {
 90 | 	
 91 | 		Map<String, Double> strengthScores = new HashMap<String, Double>();
 92 | 		
 93 | 		String sentence = "";
 94 | 		for (int i = 0; i < tokens.size(); i++) {
 95 | 			sentence += tokens.get(i);
 96 | 			if (i < tokens.size() - 1) {
 97 | 				sentence += "+";
 98 | 			}
 99 | 		}
100 | 		
101 | 		String result = sentiStrength.computeSentimentScores(sentence);
102 | 		
103 | 		String[] values = result.split(" ");
104 | 		strengthScores.put(name+"-posScore", Double.parseDouble(values[0]));
105 | 		strengthScores.put(name+"-negScore", Double.parseDouble(values[1]));
106 | 
107 | 		return strengthScores;
108 | 	}
109 | 	
110 | }
111 | 


--------------------------------------------------------------------------------
/doc/weka/filters/unsupervised/attribute/package-frame.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
 2 | <!-- NewPage -->
 3 | <html lang="en">
 4 | <head>
 5 | <!-- Generated by javadoc (10.0.2) on Thu Feb 21 10:45:39 NZDT 2019 -->
 6 | <title>weka.filters.unsupervised.attribute</title>
 7 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
 8 | <meta name="date" content="2019-02-21">
 9 | <link rel="stylesheet" type="text/css" href="../../../../stylesheet.css" title="Style">
10 | <link rel="stylesheet" type="text/css" href="../../../../jquery/jquery-ui.css" title="Style">
11 | <script type="text/javascript" src="../../../../script.js"></script>
12 | <script type="text/javascript" src="../../../../jquery/jszip/dist/jszip.min.js"></script>
13 | <script type="text/javascript" src="../../../../jquery/jszip-utils/dist/jszip-utils.min.js"></script>
14 | <!--[if IE]>
15 | <script type="text/javascript" src="../../../../jquery/jszip-utils/dist/jszip-utils-ie.min.js"></script>
16 | <![endif]-->
17 | <script type="text/javascript" src="../../../../jquery/jquery-1.10.2.js"></script>
18 | <script type="text/javascript" src="../../../../jquery/jquery-ui.js"></script>
19 | </head>
20 | <body>
21 | <h1 class="bar"><a href="../../../../weka/filters/unsupervised/attribute/package-summary.html" target="classFrame">weka.filters.unsupervised.attribute</a></h1>
22 | <div class="indexContainer">
23 | <h2 title="Classes">Classes</h2>
24 | <ul title="Classes">
25 | <li><a href="ASA.html" title="class in weka.filters.unsupervised.attribute" target="classFrame">ASA</a></li>
26 | <li><a href="DistantSupervisionSyntheticFilter.html" title="class in weka.filters.unsupervised.attribute" target="classFrame">DistantSupervisionSyntheticFilter</a></li>
27 | <li><a href="LabelWordVectors.html" title="class in weka.filters.unsupervised.attribute" target="classFrame">LabelWordVectors</a></li>
28 | <li><a href="LexiconDistantSupervision.html" title="class in weka.filters.unsupervised.attribute" target="classFrame">LexiconDistantSupervision</a></li>
29 | <li><a href="PTCM.html" title="class in weka.filters.unsupervised.attribute" target="classFrame">PTCM</a></li>
30 | <li><a href="TweetCentroid.html" title="class in weka.filters.unsupervised.attribute" target="classFrame">TweetCentroid</a></li>
31 | <li><a href="TweetNLPPOSTagger.html" title="class in weka.filters.unsupervised.attribute" target="classFrame">TweetNLPPOSTagger</a></li>
32 | <li><a href="TweetToEmbeddingsFeatureVector.html" title="class in weka.filters.unsupervised.attribute" target="classFrame">TweetToEmbeddingsFeatureVector</a></li>
33 | <li><a href="TweetToFeatureVector.html" title="class in weka.filters.unsupervised.attribute" target="classFrame">TweetToFeatureVector</a></li>
34 | <li><a href="TweetToInputLexiconFeatureVector.html" title="class in weka.filters.unsupervised.attribute" target="classFrame">TweetToInputLexiconFeatureVector</a></li>
35 | <li><a href="TweetToLexiconFeatureVector.html" title="class in weka.filters.unsupervised.attribute" target="classFrame">TweetToLexiconFeatureVector</a></li>
36 | <li><a href="TweetToSentiStrengthFeatureVector.html" title="class in weka.filters.unsupervised.attribute" target="classFrame">TweetToSentiStrengthFeatureVector</a></li>
37 | <li><a href="TweetToSparseFeatureVector.html" title="class in weka.filters.unsupervised.attribute" target="classFrame">TweetToSparseFeatureVector</a></li>
38 | <li><a href="TweetToWordListCountFeatureVector.html" title="class in weka.filters.unsupervised.attribute" target="classFrame">TweetToWordListCountFeatureVector</a></li>
39 | </ul>
40 | </div>
41 | </body>
42 | </html>
43 | 


--------------------------------------------------------------------------------
/src/main/java/affective/core/EmbeddingHandler.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *   This program is free software: you can redistribute it and/or modify
  3 |  *   it under the terms of the GNU General Public License as published by
  4 |  *   the Free Software Foundation, either version 3 of the License, or
  5 |  *   (at your option) any later version.
  6 |  *
  7 |  *   This program is distributed in the hope that it will be useful,
  8 |  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 |  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10 |  *   GNU General Public License for more details.
 11 |  *
 12 |  *   You should have received a copy of the GNU General Public License
 13 |  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 14 |  */
 15 | 
 16 | /*
 17 |  *    EmbeddingHandler.java
 18 |  *    Copyright (C) 1999-2018 University of Waikato, Hamilton, New Zealand
 19 |  *
 20 |  */
 21 | 
 22 | 
 23 | 
 24 | package affective.core;
 25 | 
 26 | import it.unimi.dsi.fastutil.doubles.AbstractDoubleList;
 27 | import it.unimi.dsi.fastutil.objects.Object2ObjectMap;
 28 | import it.unimi.dsi.fastutil.objects.Object2ObjectOpenHashMap;
 29 | 
 30 | import java.io.Serializable;
 31 | import java.util.Enumeration;
 32 | 
 33 | import weka.core.Option;
 34 | import weka.core.OptionHandler;
 35 | 
 36 | 
 37 | 
 38 | 
 39 | /**
 40 |  *  <!-- globalinfo-start --> 
 41 |  *  This abstract class is used for handling word vector or embeddings.
 42 |  * 
 43 |  * <!-- globalinfo-end -->
 44 |  * 
 45 |  * 
 46 |  * @author Felipe Bravo-Marquez (fbravoma@waikato.ac.nz)
 47 |  * @version $Revision: 1 $
 48 |  */
 49 | public abstract class EmbeddingHandler implements Serializable, OptionHandler {
 50 | 
 51 | 
 52 | 	/** For serialization, **/ 
 53 | 	private static final long serialVersionUID = -2789278587499972963L;
 54 | 	
 55 | 
 56 | 	/** Mapping between words and their vectors. */
 57 | 	protected Object2ObjectMap<String, AbstractDoubleList> wordMap=new Object2ObjectOpenHashMap<String, AbstractDoubleList>();
 58 | 
 59 | 	/** Number of dimensions of the embeddings. */ 
 60 | 	protected int dimensions;
 61 | 
 62 | 
 63 | 
 64 | 	/* (non-Javadoc)
 65 | 	 * @see weka.filters.Filter#listOptions()
 66 | 	 */
 67 | 	public Enumeration<Option> listOptions() {
 68 | 		//this.getClass().getSuperclass()
 69 | 		return Option.listOptionsForClassHierarchy(this.getClass(), this.getClass().getSuperclass()).elements();
 70 | 	}
 71 | 
 72 | 
 73 | 	/* (non-Javadoc)
 74 | 	 * @see weka.filters.Filter#getOptions()
 75 | 	 */
 76 | 	public String[] getOptions() {	
 77 | 		return Option.getOptionsForHierarchy(this, this.getClass().getSuperclass());
 78 | 
 79 | 	}
 80 | 
 81 | 
 82 | 
 83 | 
 84 | 	/**
 85 | 	 * Parses the options for this object.
 86 | 	 *  
 87 | 	 * @param options
 88 | 	 *            the options to use
 89 | 	 * @throws Exception
 90 | 	 *             if setting of options fails
 91 | 	 */
 92 | 	public void setOptions(String[] options) throws Exception {
 93 | 		Option.setOptionsForHierarchy(options, this, this.getClass().getSuperclass());
 94 | 	}
 95 | 
 96 | 
 97 | 
 98 | 	/**
 99 | 	 * initializes the dictionary
100 | 	 * 
101 | 	 * @throws Exception in case of wrong file
102 | 	 */
103 | 	abstract public void createDict() throws Exception;
104 | 
105 | 	
106 | 	/**
107 | 	 * Gets the dictionary mapping the words to their vectors
108 | 	 * 
109 | 	 * @return the dictionary.
110 | 	 */
111 | 	public Object2ObjectMap<String, AbstractDoubleList> getWordMap() {
112 | 		return wordMap;
113 | 	}
114 | 
115 | 
116 | 	/**
117 | 	 * Gets the dimensions variable
118 | 	 * 
119 | 	 * @return the value of the variable.
120 | 	 */	
121 | 	public int getDimensions() {
122 | 		return dimensions;
123 | 	}
124 | 
125 | 
126 | 
127 | 
128 | 
129 | }
130 | 


--------------------------------------------------------------------------------
/src/main/java/affective/core/NegationEvaluator.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *   This program is free software: you can redistribute it and/or modify
  3 |  *   it under the terms of the GNU General Public License as published by
  4 |  *   the Free Software Foundation, either version 3 of the License, or
  5 |  *   (at your option) any later version.
  6 |  *
  7 |  *   This program is distributed in the hope that it will be useful,
  8 |  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 |  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10 |  *   GNU General Public License for more details.
 11 |  *
 12 |  *   You should have received a copy of the GNU General Public License
 13 |  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 14 |  */
 15 | 
 16 | /*
 17 |  *    NegationEvaluator.java
 18 |  *    Copyright (C) 1999-2016 University of Waikato, Hamilton, New Zealand
 19 |  *
 20 |  */
 21 | 
 22 | 
 23 | 
 24 | package affective.core;
 25 | 
 26 | import java.io.BufferedReader;
 27 | import java.io.FileInputStream;
 28 | import java.io.IOException;
 29 | import java.io.InputStreamReader;
 30 | import java.util.ArrayList;
 31 | import java.util.HashMap;
 32 | import java.util.HashSet;
 33 | import java.util.List;
 34 | import java.util.Map;
 35 | import java.util.Set;
 36 | import java.util.zip.GZIPInputStream;
 37 | 
 38 | /**
 39 |  *  <!-- globalinfo-start --> 
 40 |  *  This class is used for counting the number of negation words in a tweet.
 41 |  *  <!-- globalinfo-end -->
 42 |  * 
 43 |  * 
 44 |  * @author Felipe Bravo-Marquez (fjb11@students.waikato.ac.nz)
 45 |  * @version $Revision: 1 $
 46 |  */
 47 | public class NegationEvaluator extends LexiconEvaluator {
 48 | 
 49 | 	/** For serialization. */
 50 | 	private static final long serialVersionUID = 1331150082874408516L;
 51 | 
 52 | 	/** The negating words. */
 53 | 	protected Set<String> wordList;
 54 | 
 55 | 	/**
 56 | 	 * Initializes the Object
 57 | 	 * 
 58 | 	 * @param path the file with the lexicon
 59 | 	 * @param name the prefix for all the attributes calculated from this lexicon
 60 | 	 */
 61 | 	public NegationEvaluator(String path, String name) {
 62 | 		super(path, name);
 63 | 		this.wordList=new HashSet<String>();
 64 | 
 65 | 		this.featureNames=new ArrayList<String>();
 66 | 		this.featureNames.add(name+"-negationCount");
 67 | 
 68 | 	}
 69 | 
 70 | 
 71 | 	/* (non-Javadoc)
 72 | 	 * @see affective.core.LexiconEvaluator#processDict()
 73 | 	 */
 74 | 	@Override
 75 | 	public void processDict() throws IOException {
 76 | 		FileInputStream fin = new FileInputStream(this.path);
 77 | 		GZIPInputStream gzis = new GZIPInputStream(fin);
 78 | 		InputStreamReader xover = new InputStreamReader(gzis);
 79 | 		BufferedReader bf = new BufferedReader(xover);
 80 | 
 81 | 		String line;
 82 | 		while ((line = bf.readLine()) != null) {
 83 | 			this.wordList.add(line);
 84 | 		}
 85 | 		bf.close();
 86 | 		xover.close();
 87 | 		gzis.close();
 88 | 		fin.close();
 89 | 
 90 | 	}
 91 | 
 92 | 
 93 | 	/* (non-Javadoc)
 94 | 	 * @see affective.core.LexiconEvaluator#evaluateTweet(java.util.List)
 95 | 	 */
 96 | 	@Override
 97 | 	public Map<String, Double> evaluateTweet(List<String> tokens) {
 98 | 		Map<String, Double> negCountsFeat = new HashMap<String, Double>();
 99 | 
100 | 		double neationCount = 0.0;
101 | 
102 | 
103 | 		// counts the number of negation words
104 | 		for (String w : tokens) {
105 | 			if(this.wordList.contains(w))
106 | 				neationCount++;
107 | 		}
108 | 
109 | 
110 | 		negCountsFeat.put(name+"-negationCount", neationCount);
111 | 
112 | 		return negCountsFeat;
113 | 
114 | 	}
115 | 	
116 | 	/**
117 | 	 * Gets the negating words
118 | 	 * 
119 | 	 * @return the word list
120 | 	 */	
121 | 	public Set<String> getWordList() {
122 | 		return wordList;
123 | 	}
124 | 
125 | 
126 | 
127 | }
128 | 


--------------------------------------------------------------------------------
/src/main/java/weka/core/converters/HumanCodedToArff.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *   This program is free software: you can redistribute it and/or modify
  3 |  *   it under the terms of the GNU General Public License as published by
  4 |  *   the Free Software Foundation, either version 3 of the License, or
  5 |  *   (at your option) any later version.
  6 |  *
  7 |  *   This program is distributed in the hope that it will be useful,
  8 |  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 |  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10 |  *   GNU General Public License for more details.
 11 |  *
 12 |  *   You should have received a copy of the GNU General Public License
 13 |  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 14 |  */
 15 | 
 16 | /*
 17 |  *    HumanCodedToArff.java
 18 |  *    Copyright (C) 1999-2018 University of Waikato, Hamilton, New Zealand
 19 |  *
 20 |  */
 21 | 
 22 | 
 23 | package weka.core.converters;
 24 | 
 25 | import java.io.BufferedReader;
 26 | import java.io.File;
 27 | import java.io.FileReader;
 28 | import java.util.ArrayList;
 29 | 
 30 | import weka.core.Attribute;
 31 | import weka.core.DenseInstance;
 32 | import weka.core.Instance;
 33 | import weka.core.Instances;
 34 | import weka.core.converters.ArffSaver;
 35 | 
 36 | /**
 37 |  * Builds an arff dataset from the 6HumanCoded collection of tweets for sentiment
 38 |  * analysis. 
 39 |  * 
 40 |  * @author Felipe Bravo-Marquez (fbravoma@waikato.ac.nz)
 41 |  * @version 1.0
 42 |  */
 43 | public class HumanCodedToArff extends TweetCollectionToArff {
 44 | 
 45 | 	/* (non-Javadoc)
 46 | 	 * @see weka.core.converters.TweetCollectionToArff#createDataset(java.lang.String)
 47 | 	 */
 48 | 	@Override
 49 | 	public Instances createDataset(String collectionPath) throws Exception {
 50 | 
 51 | 		ArrayList<Attribute> attributes = new ArrayList<Attribute>();
 52 | 
 53 | 		// The content of the tweet
 54 | 		attributes.add(new Attribute("content", (ArrayList<String>) null));
 55 | 		
 56 | 		attributes.add(new Attribute("pos"));
 57 | 		attributes.add(new Attribute("neg"));
 58 | 		
 59 | 		
 60 | 
 61 | 		
 62 | 		
 63 | 		Instances dataset = new Instances(
 64 | 				"6HumanCoded Dataset", attributes, 0); // The
 65 | 																				// last
 66 | 																				// attribute
 67 | 
 68 | 		BufferedReader bf = new BufferedReader(new FileReader(collectionPath));
 69 | 		String line=bf.readLine();
 70 | 		while ((line = bf.readLine()) != null) {
 71 | 			String parts[] = line.split("\t");
 72 | 
 73 | 			if(parts.length==3){
 74 | 				String content=parts[2];
 75 | 				int pos=Integer.parseInt(parts[0].trim());
 76 | 				int neg=Integer.parseInt(parts[1].trim());
 77 | 				
 78 | 								
 79 | 				
 80 | 				double values[] = new double[3];
 81 | 
 82 | 				values[0] = dataset.attribute(0).addStringValue(content);
 83 | 				values[1] = pos;
 84 | 				values[2] = neg;
 85 | 
 86 | 				Instance inst = new DenseInstance(1, values);
 87 | 				dataset.add(inst);
 88 | 
 89 | 			}
 90 | 
 91 | 		}
 92 | 
 93 | 
 94 | 		bf.close();
 95 | 
 96 | 		return dataset;
 97 | 	}
 98 | 
 99 | 	/**
100 | 	 * Main method for testing this class.
101 | 	 * 
102 | 	 * should contain the path of input dataset and the name of
103 | 	 *            target file scheme (see Evaluation)
104 | 	 * @param args arguments           
105 | 	 */
106 | 	static public void main(String args[]) {
107 | 
108 | 		if (args.length == 2) {
109 | 
110 | 			TweetCollectionToArff ta = new HumanCodedToArff();
111 | 
112 | 			try {
113 | 				Instances dataset = ta.createDataset(args[0]);
114 | 				ArffSaver saver = new ArffSaver();
115 | 				saver.setInstances(dataset);
116 | 
117 | 				saver.setFile(new File(args[1]));
118 | 				saver.writeBatch();
119 | 
120 | 			} catch (Exception e) {
121 | 				// TODO Auto-generated catch block
122 | 				e.printStackTrace();
123 | 			}
124 | 
125 | 		}
126 | 
127 | 	}
128 | 
129 | }
130 | 


--------------------------------------------------------------------------------
/doc/deprecated-list.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
  2 | <!-- NewPage -->
  3 | <html lang="en">
  4 | <head>
  5 | <!-- Generated by javadoc (1.8.0_191) on Wed Feb 20 17:12:12 NZDT 2019 -->
  6 | <title>Deprecated List</title>
  7 | <meta name="date" content="2019-02-20">
  8 | <link rel="stylesheet" type="text/css" href="stylesheet.css" title="Style">
  9 | <script type="text/javascript" src="script.js"></script>
 10 | </head>
 11 | <body>
 12 | <script type="text/javascript"><!--
 13 |     try {
 14 |         if (location.href.indexOf('is-external=true') == -1) {
 15 |             parent.document.title="Deprecated List";
 16 |         }
 17 |     }
 18 |     catch(err) {
 19 |     }
 20 | //-->
 21 | </script>
 22 | <noscript>
 23 | <div>JavaScript is disabled on your browser.</div>
 24 | </noscript>
 25 | <!-- ========= START OF TOP NAVBAR ======= -->
 26 | <div class="topNav"><a name="navbar.top">
 27 | <!--   -->
 28 | </a>
 29 | <div class="skipNav"><a href="#skip.navbar.top" title="Skip navigation links">Skip navigation links</a></div>
 30 | <a name="navbar.top.firstrow">
 31 | <!--   -->
 32 | </a>
 33 | <ul class="navList" title="Navigation">
 34 | <li><a href="overview-summary.html">Overview</a></li>
 35 | <li>Package</li>
 36 | <li>Class</li>
 37 | <li><a href="overview-tree.html">Tree</a></li>
 38 | <li class="navBarCell1Rev">Deprecated</li>
 39 | <li><a href="index-all.html">Index</a></li>
 40 | <li><a href="help-doc.html">Help</a></li>
 41 | </ul>
 42 | </div>
 43 | <div class="subNav">
 44 | <ul class="navList">
 45 | <li>Prev</li>
 46 | <li>Next</li>
 47 | </ul>
 48 | <ul class="navList">
 49 | <li><a href="index.html?deprecated-list.html" target="_top">Frames</a></li>
 50 | <li><a href="deprecated-list.html" target="_top">No&nbsp;Frames</a></li>
 51 | </ul>
 52 | <ul class="navList" id="allclasses_navbar_top">
 53 | <li><a href="allclasses-noframe.html">All&nbsp;Classes</a></li>
 54 | </ul>
 55 | <div>
 56 | <script type="text/javascript"><!--
 57 |   allClassesLink = document.getElementById("allclasses_navbar_top");
 58 |   if(window==top) {
 59 |     allClassesLink.style.display = "block";
 60 |   }
 61 |   else {
 62 |     allClassesLink.style.display = "none";
 63 |   }
 64 |   //-->
 65 | </script>
 66 | </div>
 67 | <a name="skip.navbar.top">
 68 | <!--   -->
 69 | </a></div>
 70 | <!-- ========= END OF TOP NAVBAR ========= -->
 71 | <div class="header">
 72 | <h1 title="Deprecated API" class="title">Deprecated API</h1>
 73 | <h2 title="Contents">Contents</h2>
 74 | </div>
 75 | <!-- ======= START OF BOTTOM NAVBAR ====== -->
 76 | <div class="bottomNav"><a name="navbar.bottom">
 77 | <!--   -->
 78 | </a>
 79 | <div class="skipNav"><a href="#skip.navbar.bottom" title="Skip navigation links">Skip navigation links</a></div>
 80 | <a name="navbar.bottom.firstrow">
 81 | <!--   -->
 82 | </a>
 83 | <ul class="navList" title="Navigation">
 84 | <li><a href="overview-summary.html">Overview</a></li>
 85 | <li>Package</li>
 86 | <li>Class</li>
 87 | <li><a href="overview-tree.html">Tree</a></li>
 88 | <li class="navBarCell1Rev">Deprecated</li>
 89 | <li><a href="index-all.html">Index</a></li>
 90 | <li><a href="help-doc.html">Help</a></li>
 91 | </ul>
 92 | </div>
 93 | <div class="subNav">
 94 | <ul class="navList">
 95 | <li>Prev</li>
 96 | <li>Next</li>
 97 | </ul>
 98 | <ul class="navList">
 99 | <li><a href="index.html?deprecated-list.html" target="_top">Frames</a></li>
100 | <li><a href="deprecated-list.html" target="_top">No&nbsp;Frames</a></li>
101 | </ul>
102 | <ul class="navList" id="allclasses_navbar_bottom">
103 | <li><a href="allclasses-noframe.html">All&nbsp;Classes</a></li>
104 | </ul>
105 | <div>
106 | <script type="text/javascript"><!--
107 |   allClassesLink = document.getElementById("allclasses_navbar_bottom");
108 |   if(window==top) {
109 |     allClassesLink.style.display = "block";
110 |   }
111 |   else {
112 |     allClassesLink.style.display = "none";
113 |   }
114 |   //-->
115 | </script>
116 | </div>
117 | <a name="skip.navbar.bottom">
118 | <!--   -->
119 | </a></div>
120 | <!-- ======== END OF BOTTOM NAVBAR ======= -->
121 | </body>
122 | </html>
123 | 


--------------------------------------------------------------------------------
/src/test/java/weka/filters/unsupervised/attribute/PTCMTest.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *   This program is free software: you can redistribute it and/or modify
  3 |  *   it under the terms of the GNU General Public License as published by
  4 |  *   the Free Software Foundation, either version 3 of the License, or
  5 |  *   (at your option) any later version.
  6 |  *
  7 |  *   This program is distributed in the hope that it will be useful,
  8 |  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 |  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10 |  *   GNU General Public License for more details.
 11 |  *
 12 |  *   You should have received a copy of the GNU General Public License
 13 |  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 14 |  */
 15 | 
 16 | /*
 17 |  * Copyright (C) 2019 University of Waikato, Hamilton, New Zealand
 18 |  */
 19 | 
 20 | package weka.filters.unsupervised.attribute;
 21 | 
 22 | import weka.classifiers.meta.FilteredClassifier;
 23 | import weka.filters.AbstractFilterTest;
 24 | import weka.filters.Filter;
 25 | 
 26 | import junit.framework.Test;
 27 | import junit.framework.TestSuite;
 28 | 
 29 | import java.io.File;
 30 | 
 31 | /**
 32 |  * Tests PTCM. Run from the command line with: <p/>
 33 |  * java weka.filters.unsupervised.attribute.PTCMTest
 34 |  * <p> 
 35 |  * AffectiveTweets package must either be installed or
 36 |  * JVM must be started in AffectiveTweets directory.
 37 |  * <p>
 38 |  * @author FracPete and eibe
 39 |  * @version $Revision: 9568 $
 40 |  */
 41 | public class PTCMTest extends AbstractFilterTest {
 42 | 
 43 |     public PTCMTest(String name) {
 44 |         super(name);
 45 |     }
 46 | 
 47 |     /** Creates a default PTCM filter */
 48 |     public Filter getFilter() {
 49 | 	Filter f = null;
 50 | 
 51 | 	// Check to see if the test is run from directory containing build_package.xml
 52 | 	if ((new File(".." + File.separator + "AffectiveTweets" + File.separator + "build_package.xml")).exists()) {
 53 | 	    File backup = weka.core.WekaPackageManager.PACKAGES_DIR;
 54 | 	    weka.core.WekaPackageManager.PACKAGES_DIR = new java.io.File(".."); // So that default lexicon, etc., is found.
 55 | 	    f = new PTCM();
 56 | 	    weka.core.WekaPackageManager.PACKAGES_DIR = backup;
 57 | 	} else {
 58 | 	    f = new PTCM(); // Hope that the package is installed.
 59 | 	}
 60 | 	return f;
 61 |     }
 62 | 
 63 |     /**
 64 |      * PTCM is not suitable for use in a FilteredClassifier, so this just creates a dummy
 65 |      * FilteredClassifier so that the tests run through.
 66 |      *
 67 |      * @return the configured FilteredClassifier
 68 |      */
 69 |     protected FilteredClassifier getFilteredClassifier() {
 70 |         FilteredClassifier	result;
 71 | 
 72 |         result = new FilteredClassifier();
 73 | 
 74 |         result.setFilter(new weka.filters.AllFilter());
 75 |         result.setClassifier(new weka.classifiers.rules.ZeroR());
 76 | 
 77 |         return result;
 78 |     }
 79 | 
 80 |     /**
 81 |      * Called by JUnit before each test method. Sets up the Instances object to use based on 
 82 |      * one of the datasets that comes with the package.
 83 |      *
 84 |      * @throws Exception if an error occurs reading the example instances.
 85 |      */
 86 |     protected void setUp() throws Exception {
 87 |         super.setUp();
 88 | 
 89 | 	// Check to see if the test is run from directory containing build_package.xml
 90 | 	if ((new File(".." + File.separator + "AffectiveTweets" + File.separator + "build_package.xml")).exists()) {
 91 | 	    m_Instances = (new weka.core.converters.ConverterUtils.DataSource("data" + File.separator + "sent140test.arff.gz")).getDataSet();
 92 | 	} else { // Hope that package is installed.
 93 | 	    m_Instances = (new weka.core.converters.ConverterUtils.DataSource(weka.core.WekaPackageManager.PACKAGES_DIR.toString() + File.separator + "data" + File.separator + "sent140test.arff.gz")).getDataSet();
 94 | 	}
 95 | 
 96 | 	m_Instances.setClassIndex(m_Instances.numAttributes() - 1);
 97 |     }
 98 | 
 99 |     public static Test suite() {
100 |         return new TestSuite(PTCMTest.class);
101 |     }
102 | 
103 |     public static void main(String[] args){
104 |         junit.textui.TestRunner.run(suite());
105 |     }
106 | }
107 | 


--------------------------------------------------------------------------------
/src/test/java/weka/filters/unsupervised/attribute/TweetCentroidTest.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *   This program is free software: you can redistribute it and/or modify
  3 |  *   it under the terms of the GNU General Public License as published by
  4 |  *   the Free Software Foundation, either version 3 of the License, or
  5 |  *   (at your option) any later version.
  6 |  *
  7 |  *   This program is distributed in the hope that it will be useful,
  8 |  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 |  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10 |  *   GNU General Public License for more details.
 11 |  *
 12 |  *   You should have received a copy of the GNU General Public License
 13 |  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 14 |  */
 15 | 
 16 | /*
 17 |  * Copyright (C) 2019 University of Waikato, Hamilton, New Zealand
 18 |  */
 19 | 
 20 | package weka.filters.unsupervised.attribute;
 21 | 
 22 | import weka.classifiers.meta.FilteredClassifier;
 23 | import weka.filters.AbstractFilterTest;
 24 | import weka.filters.Filter;
 25 | 
 26 | import junit.framework.Test;
 27 | import junit.framework.TestSuite;
 28 | 
 29 | import java.io.File;
 30 | 
 31 | /**
 32 |  * Tests TweetCentroid. Run from the command line with: <p/>
 33 |  * java weka.filters.unsupervised.attribute.TweetCentroidTest
 34 |  * <p> 
 35 |  * AffectiveTweets package must either be installed or
 36 |  * JVM must be started in AffectiveTweets directory.
 37 |  * <p>
 38 |  * @author FracPete and eibe
 39 |  * @version $Revision: 9568 $
 40 |  */
 41 | public class TweetCentroidTest extends AbstractFilterTest {
 42 | 
 43 |     public TweetCentroidTest(String name) {
 44 |         super(name);
 45 |     }
 46 | 
 47 |     /** Creates a default TweetCentroid filter */
 48 |     public Filter getFilter() {
 49 | 	Filter f = null;
 50 | 
 51 | 	// Check to see if the test is run from directory containing build_package.xml
 52 | 	if ((new File(".." + File.separator + "AffectiveTweets" + File.separator + "build_package.xml")).exists()) {
 53 | 	    File backup = weka.core.WekaPackageManager.PACKAGES_DIR;
 54 | 	    weka.core.WekaPackageManager.PACKAGES_DIR = new java.io.File(".."); // So that default lexicon, etc., is found.
 55 | 	    f = new TweetCentroid();
 56 | 	    weka.core.WekaPackageManager.PACKAGES_DIR = backup;
 57 | 	} else {
 58 | 	    f = new TweetCentroid(); // Hope that the package is installed.
 59 | 	}
 60 | 	return f;
 61 |     }
 62 | 
 63 |     /**
 64 |      * TweetCentroid is not suitable for use in a FilteredClassifier, so this just creates a dummy
 65 |      * FilteredClassifier so that the tests run through.
 66 |      *
 67 |      * @return the configured FilteredClassifier
 68 |      */
 69 |     protected FilteredClassifier getFilteredClassifier() {
 70 |         FilteredClassifier	result;
 71 | 
 72 |         result = new FilteredClassifier();
 73 | 
 74 |         result.setFilter(new weka.filters.AllFilter());
 75 |         result.setClassifier(new weka.classifiers.rules.ZeroR());
 76 | 
 77 |         return result;
 78 |     }
 79 | 
 80 |     /**
 81 |      * Called by JUnit before each test method. Sets up the Instances object to use based on 
 82 |      * one of the datasets that comes with the package.
 83 |      *
 84 |      * @throws Exception if an error occurs reading the example instances.
 85 |      */
 86 |     protected void setUp() throws Exception {
 87 |         super.setUp();
 88 | 
 89 | 	// Check to see if the test is run from directory containing build_package.xml
 90 | 	if ((new File(".." + File.separator + "AffectiveTweets" + File.separator + "build_package.xml")).exists()) {
 91 | 	    m_Instances = (new weka.core.converters.ConverterUtils.DataSource("data" + File.separator + "sent140test.arff.gz")).getDataSet();
 92 | 	} else { // Hope that package is installed.
 93 | 	    m_Instances = (new weka.core.converters.ConverterUtils.DataSource(weka.core.WekaPackageManager.PACKAGES_DIR.toString() + File.separator + "data" + File.separator + "sent140test.arff.gz")).getDataSet();
 94 | 	}
 95 | 
 96 | 	m_Instances.setClassIndex(m_Instances.numAttributes() - 1);
 97 |     }
 98 | 
 99 |     public static Test suite() {
100 |         return new TestSuite(TweetCentroidTest.class);
101 |     }
102 | 
103 |     public static void main(String[] args){
104 |         junit.textui.TestRunner.run(suite());
105 |     }
106 | }
107 | 


--------------------------------------------------------------------------------
/src/test/java/weka/filters/unsupervised/attribute/ASATest.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *   This program is free software: you can redistribute it and/or modify
  3 |  *   it under the terms of the GNU General Public License as published by
  4 |  *   the Free Software Foundation, either version 3 of the License, or
  5 |  *   (at your option) any later version.
  6 |  *
  7 |  *   This program is distributed in the hope that it will be useful,
  8 |  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 |  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10 |  *   GNU General Public License for more details.
 11 |  *
 12 |  *   You should have received a copy of the GNU General Public License
 13 |  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 14 |  */
 15 | 
 16 | /*
 17 |  * Copyright (C) 2019 University of Waikato, Hamilton, New Zealand
 18 |  */
 19 | 
 20 | package weka.filters.unsupervised.attribute;
 21 | 
 22 | import weka.classifiers.meta.FilteredClassifier;
 23 | import weka.filters.AbstractFilterTest;
 24 | import weka.filters.Filter;
 25 | 
 26 | import junit.framework.Test;
 27 | import junit.framework.TestSuite;
 28 | 
 29 | import java.io.File;
 30 | 
 31 | /**
 32 |  * Tests ASA. Run from the command line with: <p/>
 33 |  * java weka.filters.unsupervised.attribute.ASATest
 34 |  * <p> 
 35 |  * AffectiveTweets package must either be installed or
 36 |  * JVM must be started in AffectiveTweets directory.
 37 |  * <p>
 38 |  * @author FracPete and eibe
 39 |  * @version $Revision: 9568 $
 40 |  */
 41 | public class ASATest extends AbstractFilterTest {
 42 | 
 43 |     public ASATest(String name) {
 44 |         super(name);
 45 |     }
 46 | 
 47 |     /** Creates a default ASA filter */
 48 |     public Filter getFilter() {
 49 | 	Filter f = null;
 50 | 
 51 | 	// Check to see if the test is run from directory containing build_package.xml
 52 | 	if ((new File(".." + File.separator + "AffectiveTweets" + File.separator + "build_package.xml")).exists()) {
 53 | 	    File backup = weka.core.WekaPackageManager.PACKAGES_DIR;
 54 | 	    weka.core.WekaPackageManager.PACKAGES_DIR = new java.io.File(".."); // So that default lexicon, etc., is found.
 55 | 	    f = new ASA();
 56 | 	    weka.core.WekaPackageManager.PACKAGES_DIR = backup;
 57 | 	} else {
 58 | 	    f = new ASA(); // Hope that the package is installed.
 59 | 	}
 60 | 	return f;
 61 |     }
 62 | 
 63 |     /**
 64 |      * ASA is not suitable for use in a FilteredClassifier, so this just creates a dummy
 65 |      * FilteredClassifier so that the tests run through.
 66 |      *
 67 |      * @return the configured FilteredClassifier
 68 |      */
 69 |     protected FilteredClassifier getFilteredClassifier() {
 70 |         FilteredClassifier	result;
 71 | 
 72 |         result = new FilteredClassifier();
 73 | 
 74 |         result.setFilter(new weka.filters.AllFilter());
 75 |         result.setClassifier(new weka.classifiers.rules.ZeroR());
 76 | 
 77 |         return result;
 78 |     }
 79 | 
 80 |     /**
 81 |      * Called by JUnit before each test method. Sets up the Instances object to use based on 
 82 |      * one of the datasets that comes with the package.
 83 |      *
 84 |      * @throws Exception if an error occurs reading the example instances.
 85 |      */
 86 |     protected void setUp() throws Exception {
 87 |         super.setUp();
 88 | 
 89 | 	// Check to see if the test is run from directory containing build_package.xml
 90 | 	if ((new File(".." + File.separator + "AffectiveTweets" + File.separator + "build_package.xml")).exists()) {
 91 | 	    m_Instances = (new weka.core.converters.ConverterUtils.DataSource("data" + File.separator + "sent140test.arff.gz")).getDataSet();
 92 | 	} else { // Hope that package is installed.
 93 | 	    m_Instances = (new weka.core.converters.ConverterUtils.DataSource(weka.core.WekaPackageManager.PACKAGES_DIR.toString() + File.separator + "data" + File.separator + "sent140test.arff.gz")).getDataSet();
 94 | 	}
 95 | 
 96 | 	m_Instances.setClassIndex(m_Instances.numAttributes() - 1);
 97 |     }
 98 |     
 99 |     
100 |     /* (non-Javadoc)
101 |      * @see weka.filters.AbstractFilterTest#testBuffered()
102 |      */
103 |     public void testBuffered(){}
104 | 
105 |     public static Test suite() {
106 |         return new TestSuite(ASATest.class);
107 |     }
108 | 
109 |     public static void main(String[] args){
110 |         junit.textui.TestRunner.run(suite());
111 |     }
112 | }
113 | 


--------------------------------------------------------------------------------
/src/main/java/weka/core/converters/SemEvalToArff.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *   This program is free software: you can redistribute it and/or modify
  3 |  *   it under the terms of the GNU General Public License as published by
  4 |  *   the Free Software Foundation, either version 3 of the License, or
  5 |  *   (at your option) any later version.
  6 |  *
  7 |  *   This program is distributed in the hope that it will be useful,
  8 |  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 |  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10 |  *   GNU General Public License for more details.
 11 |  *
 12 |  *   You should have received a copy of the GNU General Public License
 13 |  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 14 |  */
 15 | 
 16 | /*
 17 |  *    SemEvalToArff.java
 18 |  *    Copyright (C) 1999-2018 University of Waikato, Hamilton, New Zealand
 19 |  *
 20 |  */
 21 | 
 22 | 
 23 | 
 24 | package weka.core.converters;
 25 | 
 26 | import java.io.BufferedReader;
 27 | import java.io.File;
 28 | import java.io.FileReader;
 29 | import java.util.ArrayList;
 30 | 
 31 | import weka.core.Attribute;
 32 | import weka.core.DenseInstance;
 33 | import weka.core.Instance;
 34 | import weka.core.Instances;
 35 | import weka.core.converters.ArffSaver;
 36 | 
 37 | /**
 38 |  * Builds an arff dataset from the SemEval collection of tweets for sentiment
 39 |  * analysis. More info about the task:
 40 |  * https://www.cs.york.ac.uk/semeval-2013/task2/
 41 |  * 
 42 |  * @author Felipe Bravo-Marquez (fjb11 at students.waikato.ac.nz)
 43 |  * @version 1.0
 44 |  */
 45 | 
 46 | public class SemEvalToArff extends TweetCollectionToArff {
 47 | 
 48 | 	/* (non-Javadoc)
 49 | 	 * @see weka.core.converters.TweetCollectionToArff#createDataset(java.lang.String)
 50 | 	 */
 51 | 	@Override
 52 | 	public Instances createDataset(String collectionPath) throws Exception {
 53 | 
 54 | 		ArrayList<Attribute> attributes = new ArrayList<Attribute>();
 55 | 
 56 | 		// The content of the tweet
 57 | 		attributes.add(new Attribute("content", (ArrayList<String>) null));
 58 | 
 59 | 		// The target label
 60 | 		ArrayList<String> label = new ArrayList<String>();
 61 | 		label.add("positive");
 62 | 		label.add("neutral");
 63 | 		label.add("negative");
 64 | 
 65 | 		attributes.add(new Attribute("Class", label));
 66 | 		Instances dataset = new Instances(
 67 | 				"Twitter Sentiment Analysis SemEval Dataset", attributes, 0); // The
 68 | 																				// last
 69 | 																				// attribute
 70 | 
 71 | 		BufferedReader bf = new BufferedReader(new FileReader(collectionPath));
 72 | 		String line;
 73 | 		while ((line = bf.readLine()) != null) {
 74 | 			String parts[] = line.split("\t");
 75 | 
 76 | 			String content = parts[3];
 77 | 			String target = parts[2];
 78 | 
 79 | 			double values[] = new double[2];
 80 | 
 81 | 			// add the content
 82 | 			values[0] = dataset.attribute(0).addStringValue(content);
 83 | 
 84 | 			// add the label
 85 | 			if (target.equals("positive")) {
 86 | 				values[1] = dataset.attribute(1).indexOfValue("positive");
 87 | 			} else if (target.equals("neutral") || target.equals("objective")
 88 | 					|| target.equals("objective-OR-neutral")) {
 89 | 				values[1] = dataset.attribute(1).indexOfValue("neutral");
 90 | 			} else {
 91 | 				values[1] = dataset.attribute(1).indexOfValue("negative");
 92 | 			}
 93 | 
 94 | 			Instance inst = new DenseInstance(1, values);
 95 | 			dataset.add(inst);
 96 | 
 97 | 		}
 98 | 
 99 | 		// set the class index
100 | 		dataset.setClassIndex(dataset.numAttributes() - 1);
101 | 
102 | 		bf.close();
103 | 
104 | 		return dataset;
105 | 	}
106 | 
107 | 	/**
108 | 	 * Main method for testing this class.
109 | 	 * 
110 | 	 * 
111 | 	 *            should contain the path of input dataset and the name of
112 | 	 *            target file scheme (see Evaluation)
113 | 	 *@param args arguments            
114 | 	 */
115 | 	static public void main(String args[]) {
116 | 
117 | 		if (args.length == 2) {
118 | 
119 | 			TweetCollectionToArff ta = new SemEvalToArff();
120 | 
121 | 			try {
122 | 				Instances dataset = ta.createDataset(args[0]);
123 | 				ArffSaver saver = new ArffSaver();
124 | 				saver.setInstances(dataset);
125 | 
126 | 				saver.setFile(new File(args[1]));
127 | 				saver.writeBatch();
128 | 
129 | 			} catch (Exception e) {
130 | 				// TODO Auto-generated catch block
131 | 				e.printStackTrace();
132 | 			}
133 | 
134 | 		}
135 | 
136 | 	}
137 | 
138 | }
139 | 


--------------------------------------------------------------------------------
/src/main/java/affective/core/PolarityLexiconEvaluator.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *   This program is free software: you can redistribute it and/or modify
  3 |  *   it under the terms of the GNU General Public License as published by
  4 |  *   the Free Software Foundation, either version 3 of the License, or
  5 |  *   (at your option) any later version.
  6 |  *
  7 |  *   This program is distributed in the hope that it will be useful,
  8 |  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 |  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10 |  *   GNU General Public License for more details.
 11 |  *
 12 |  *   You should have received a copy of the GNU General Public License
 13 |  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 14 |  */
 15 | 
 16 | /*
 17 |  *    PolarityLexiconEvaluator.java
 18 |  *    Copyright (C) 1999-2016 University of Waikato, Hamilton, New Zealand
 19 |  *
 20 |  */
 21 | 
 22 | 
 23 | package affective.core;
 24 | 
 25 | import java.io.BufferedReader;
 26 | import java.io.FileInputStream;
 27 | import java.io.IOException;
 28 | import java.io.InputStreamReader;
 29 | import java.util.ArrayList;
 30 | import java.util.HashMap;
 31 | import java.util.List;
 32 | import java.util.Map;
 33 | import java.util.zip.GZIPInputStream;
 34 | 
 35 | 
 36 | /**
 37 |  *  <!-- globalinfo-start --> 
 38 |  *  This class is used for evaluating the polarity lexicons with positive and negative
 39 |  *  nominal entries.
 40 |  * <!-- globalinfo-end -->
 41 |  * 
 42 |  * 
 43 |  * @author Felipe Bravo-Marquez (fbravoma@waikato.ac.nz)
 44 |  * @version $Revision: 1 $
 45 |  */
 46 | public class PolarityLexiconEvaluator extends LexiconEvaluator {
 47 | 
 48 | 	/** For serialization. */
 49 | 	private static final long serialVersionUID = 5921580335557644894L;
 50 | 
 51 | 	/** A mapping between words and the sentiment label. */	
 52 | 	protected Map<String, String> dict;
 53 | 
 54 | 
 55 | 	/**
 56 | 	 * initializes the Object
 57 | 	 * 
 58 | 	 * @param file the file with the lexicon
 59 | 	 * @param name the prefix for all the attributes calculated from this lexicon
 60 | 	 */	
 61 | 	public PolarityLexiconEvaluator(String file,String name) {
 62 | 		super(file,name);
 63 | 		this.dict = new HashMap<String, String>();	
 64 | 
 65 | 		this.featureNames=new ArrayList<String>();
 66 | 		this.featureNames.add(name+"-posCount");
 67 | 		this.featureNames.add(name+"-negCount");
 68 | 
 69 | 	}
 70 | 
 71 | 	
 72 | 	/* (non-Javadoc)
 73 | 	 * @see affective.core.LexiconEvaluator#processDict()
 74 | 	 */
 75 | 	public void processDict() throws IOException  {
 76 | 		// first, we open the file
 77 | 		FileInputStream fin = new FileInputStream(this.path);
 78 | 		GZIPInputStream gzis = new GZIPInputStream(fin);
 79 | 		InputStreamReader xover = new InputStreamReader(gzis);
 80 | 		BufferedReader bf = new BufferedReader(xover);
 81 | 
 82 | 		String line;
 83 | 		while ((line = bf.readLine()) != null) {
 84 | 			String pair[] = line.split("\t");
 85 | 			this.dict.put(pair[0], pair[1]);
 86 | 
 87 | 		}
 88 | 		bf.close();
 89 | 		xover.close();
 90 | 		gzis.close();
 91 | 		fin.close();
 92 | 
 93 | 	}
 94 | 
 95 | 	/**
 96 | 	 * returns the sentiment associated with a word
 97 | 	 * 
 98 | 	 * @param word the input word
 99 | 	 * @return the value for the word 
100 | 	 */
101 | 	public String retrieveValue(String word) {
102 | 		if (!this.dict.containsKey(word)) {
103 | 			return "not_found";
104 | 		} else {
105 | 			return this.dict.get(word);
106 | 		}
107 | 
108 | 	}
109 | 
110 | 
111 | 	/* (non-Javadoc)
112 | 	 * @see affective.core.LexiconEvaluator#evaluateTweet(java.util.List)
113 | 	 */
114 | 	@Override
115 | 	public Map<String, Double> evaluateTweet(List<String> tokens) {
116 | 		Map<String, Double> sentCount = new HashMap<String, Double>();
117 | 
118 | 		double negCount = 0.0;
119 | 		double posCount = 0.0;
120 | 
121 | 		for (String w : tokens) {
122 | 			String pol = this.retrieveValue(w);
123 | 			if (pol.equals("positive")) {
124 | 				posCount++;
125 | 			} else if (pol.equals("negative")) {
126 | 				negCount++;
127 | 			}
128 | 		}
129 | 
130 | 		sentCount.put(this.name+"-posCount", posCount);
131 | 		sentCount.put(this.name+"-negCount", negCount);
132 | 
133 | 		return sentCount;
134 | 	}
135 | 	
136 | 	
137 | 	/**
138 | 	 * Gets the dictionary mapping the words to their sentiment
139 | 	 * 
140 | 	 * @return the dictionary.
141 | 	 */	
142 | 	public Map<String, String> getDict() {
143 | 		return this.dict;
144 | 	}	
145 | 	
146 | }
147 | 


--------------------------------------------------------------------------------
/src/main/java/affective/core/IntensityLexiconEvaluator.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *   This program is free software: you can redistribute it and/or modify
  3 |  *   it under the terms of the GNU General Public License as published by
  4 |  *   the Free Software Foundation, either version 3 of the License, or
  5 |  *   (at your option) any later version.
  6 |  *
  7 |  *   This program is distributed in the hope that it will be useful,
  8 |  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 |  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10 |  *   GNU General Public License for more details.
 11 |  *
 12 |  *   You should have received a copy of the GNU General Public License
 13 |  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 14 |  */
 15 | 
 16 | /*
 17 |  *    IntensityLexiconEvaluator.java
 18 |  *    Copyright (C) 1999-2018 University of Waikato, Hamilton, New Zealand
 19 |  *
 20 |  */
 21 | 
 22 | 
 23 | 
 24 | package affective.core;
 25 | 
 26 | import java.io.BufferedReader;
 27 | import java.io.FileInputStream;
 28 | import java.io.IOException;
 29 | import java.io.InputStreamReader;
 30 | import java.util.ArrayList;
 31 | import java.util.HashMap;
 32 | import java.util.List;
 33 | import java.util.Map;
 34 | import java.util.zip.GZIPInputStream;
 35 | 
 36 | 
 37 | /**
 38 |  *  <!-- globalinfo-start --> 
 39 |  *  This class is used for evaluating lexicons with numerical sentiment scores.
 40 |  *  <!-- globalinfo-end -->
 41 |  * 
 42 |  * 
 43 |  * @author Felipe Bravo-Marquez (fbravoma@waikato.ac.nz)
 44 |  * @version $Revision: 1 $
 45 |  */
 46 | public class IntensityLexiconEvaluator extends LexiconEvaluator  {
 47 | 
 48 | 
 49 | 	/** For serialization. */
 50 | 	private static final long serialVersionUID = -2094228012480778199L;
 51 | 
 52 | 	/** The dictionary. */
 53 | 	protected Map<String, String> dict;
 54 | 
 55 | 	/**
 56 | 	 * Initializes the Object
 57 | 	 * 
 58 | 	 * @param file the file with the lexicon
 59 | 	 * @param name the prefix for all the attributes calculated from this lexicon
 60 | 	 */
 61 | 	public IntensityLexiconEvaluator(String file,String name) {
 62 | 		super(file,name);
 63 | 		this.dict = new HashMap<String, String>();	
 64 | 
 65 | 		this.featureNames=new ArrayList<String>();
 66 | 		this.featureNames.add(name+"-posScore");
 67 | 		this.featureNames.add(name+"-negScore");
 68 | 
 69 | 	}
 70 | 
 71 | 
 72 | 	/* (non-Javadoc)
 73 | 	 * @see affective.core.LexiconEvaluator#processDict()
 74 | 	 */
 75 | 	@Override
 76 | 	public void processDict() throws IOException  {
 77 | 		// first, we open the file
 78 | 		FileInputStream fin = new FileInputStream(this.path);
 79 | 		GZIPInputStream gzis = new GZIPInputStream(fin);
 80 | 		InputStreamReader xover = new InputStreamReader(gzis);
 81 | 		BufferedReader bf = new BufferedReader(xover);
 82 | 
 83 | 		String line;
 84 | 		while ((line = bf.readLine()) != null) {
 85 | 			String pair[] = line.split("\t");
 86 | 			this.dict.put(pair[0], pair[1]);
 87 | 
 88 | 		}
 89 | 		bf.close();
 90 | 		xover.close();
 91 | 		gzis.close();
 92 | 		fin.close();
 93 | 
 94 | 	}
 95 | 
 96 | 	/**
 97 | 	 * returns the score associated with a word
 98 | 	 * 
 99 | 	 * @param word the input word
100 | 	 * @return the value for the word 
101 | 	 */
102 | 	public String retrieveValue(String word) {
103 | 		if (!this.dict.containsKey(word)) {
104 | 			return "not_found";
105 | 		} else {
106 | 			return this.dict.get(word);
107 | 		}
108 | 
109 | 	}
110 | 	
111 | 
112 | 	/* (non-Javadoc)
113 | 	 * @see affective.core.LexiconEvaluator#evaluateTweet(java.util.List)
114 | 	 */
115 | 	@Override
116 | 	public Map<String, Double> evaluateTweet(List<String> tokens) {
117 | 		Map<String, Double> strengthScores = new HashMap<String, Double>();
118 | 		double posScore = 0;
119 | 		double negScore = 0;
120 | 		for (String w : tokens) {
121 | 			String pol = this.retrieveValue(w);
122 | 			if (!pol.equals("not_found")) {
123 | 				double value = Double.parseDouble(pol);
124 | 				if (value > 0) {
125 | 					posScore += value;
126 | 				} else {
127 | 					negScore += value;
128 | 				}
129 | 			}
130 | 		}
131 | 		strengthScores.put(name+"-posScore", posScore);
132 | 		strengthScores.put(name+"-negScore", negScore);
133 | 
134 | 		return strengthScores;
135 | 	}
136 | 
137 | 	/**
138 | 	 * Gets the dictionary mapping the words to their vectors
139 | 	 * 
140 | 	 * @return the dictionary.
141 | 	 */
142 | 	public Map<String, String> getDict() {
143 | 		return this.dict;
144 | 	}
145 | 
146 | 
147 | }
148 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <img src="img/logo.png" alt="alt text" width="30%" height="30%"> 
 2 | 
 3 | ## About
 4 | 
 5 | [AffectiveTweets](https://affectivetweets.cms.waikato.ac.nz/) is a [WEKA](http://www.cs.waikato.ac.nz/~ml/weka/) package for analyzing emotion and sentiment of English written tweets. 
 6 | 
 7 | The package implements WEKA filters for calculating state-of-the-art affective analysis features from tweets that can be fed into machine learning algorithms. Many of these features were drawn from the [NRC-Canada System](http://saifmohammad.com/WebPages/NRC-Canada-Sentiment.htm). It also implements methods for building affective lexicons and distant supervision methods for training affective models from unlabelled tweets.
 8 | 
 9 | 
10 | The package was made available as the official baseline system for the [WASSA-2017](http://optima.jrc.it/wassa2017/) Shared Task on Emotion Intensity [(EmoInt)](http://saifmohammad.com/WebPages/EmotionIntensity-SharedTask.html) and for [SemEval-2018](http://alt.qcri.org/semeval2018/) Task 1: [Affect in Tweets](http://www.saifmohammad.com/WebPages/affectintweets.htm). 
11 | 
12 | Five participating teams used AffectiveTweets in WASSA-2017 to generate feature vectors, including the teams that eventually ranked first, second, and third. For SemEval-2018, the package was used by 15 teams.
13 | 
14 | [https://affectivetweets.cms.waikato.ac.nz/](https://affectivetweets.cms.waikato.ac.nz/)
15 | 
16 | ## Using AffectiveTweets
17 | 
18 | * [About](https://affectivetweets.cms.waikato.ac.nz/#about)
19 | * [Installation](https://affectivetweets.cms.waikato.ac.nz/install/)
20 | * [Examples](https://affectivetweets.cms.waikato.ac.nz/examples/)
21 | 
22 | 
23 | 
24 | ## Relevant Papers
25 | 
26 | The most relevant papers on which this package is based are:
27 | 
28 | 
29 |  * [Sentiment Analysis of Short Informal Texts](http://saifmohammad.com/WebDocs/NRC-Sentiment-JAIR-2014.pdf). Svetlana Kiritchenko, Xiaodan Zhu and Saif Mohammad. Journal of Artificial Intelligence Research, volume 50, pages 723-762, August 2014. [BibTeX](http://saifmohammad.com/WebDocs/JAIR14-bibtex.txt)
30 |  * [Meta-Level Sentiment Models for Big Social Data Analysis](http://www.sciencedirect.com/science/article/pii/S0950705114002068). F. Bravo-Marquez, M. Mendoza and B. Poblete. Knowledge-Based Systems Volume 69, October 2014, Pages 86–99. [BibTex](http://dblp.uni-trier.de/rec/bib2/journals/kbs/Bravo-MarquezMP14.bib)
31 |  * [Stance and sentiment in tweets](http://saifmohammad.com/WebDocs/1605.01655v1.pdf). Saif M. Mohammad, Parinaz Sobhani, and Svetlana Kiritchenko. 2017. Special Section of the ACM Transactions on Internet Technology on Argumentation in Social Media 17(3). [BibTeX](http://saifmohammad.com/WebPages/Abstracts/stance-toit.bib.txt)
32 |  * [Sentiment strength detection for the social Web](http://dl.acm.org/citation.cfm?id=2336261). Thelwall, M., Buckley, K., & Paltoglou, G. (2012). Journal of the American Society for Information Science and Technology, 63(1), 163-173. [BibTex](http://dblp.uni-trier.de/rec/bib2/journals/jasis/ThelwallBP12.bib)
33 | 
34 | 
35 | 
36 | 
37 | 
38 | ## Citation
39 | - Please cite the following paper if using this package in an academic publication:
40 | 
41 |   - F. Bravo-Marquez, E. Frank, B. Pfahringer, and S. M. Mohammad [AffectiveTweets: a WEKA Package for Analyzing Affect in Tweets](http://jmlr.org/papers/v20/18-450.html), In *Journal of Machine Learning Research* Volume 20(92), pages 1−6, 2019. ([pdf](https://felipebravom.com/publications/jmlr2019.pdf))
42 | 
43 |   You are also welcome to cite a previous publication describing the package:
44 | 
45 |   - S. M. Mohammad and F. Bravo-Marquez [Emotion Intensities in Tweets](http://anthology.aclweb.org/S/S17/S17-1007.pdf), In *\*Sem '17: Proceedings of the sixth joint conference on lexical and computational semantics (\*Sem)*, August 2017, Vancouver, Canada. ([pdf](https://felipebravom.com/publications/starsem2017.pdf)) 
46 | 
47 | You should also cite the papers describing any of the lexicons or resources you are using with this package. 
48 | 
49 | * Here is the [BibTex](https://affectivetweets.cms.waikato.ac.nz/fullBio.bib.txt) entry for the package along with the entries for the resources included in the package. 
50 | 
51 | * Here is the [BibTex](https://affectivetweets.cms.waikato.ac.nz/shortBio.bib.txt) entry just for the package.
52 | 
53 | 
54 | 
55 | ## Contact
56 | * Email: fbravoma at waikato.ac.nz
57 | * If you have questions about Weka please refer to the Weka [mailing list](https://list.waikato.ac.nz/mailman/listinfo/wekalist). 
58 | 
59 | 
60 | 
61 | 


--------------------------------------------------------------------------------
/src/main/java/affective/core/SWN3LexiconEvaluator.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *   This program is free software: you can redistribute it and/or modify
  3 |  *   it under the terms of the GNU General Public License as published by
  4 |  *   the Free Software Foundation, either version 3 of the License, or
  5 |  *   (at your option) any later version.
  6 |  *
  7 |  *   This program is distributed in the hope that it will be useful,
  8 |  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 |  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10 |  *   GNU General Public License for more details.
 11 |  *
 12 |  *   You should have received a copy of the GNU General Public License
 13 |  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 14 |  */
 15 | 
 16 | /*
 17 |  *    SWN3LexiconEvaluator.java
 18 |  *    Copyright (C) 1999-2018 University of Waikato, Hamilton, New Zealand
 19 |  *
 20 |  */
 21 | 
 22 | 
 23 | package affective.core;
 24 | 
 25 | import java.io.BufferedReader;
 26 | import java.io.FileInputStream;
 27 | import java.io.IOException;
 28 | import java.io.InputStreamReader;
 29 | import java.util.ArrayList;
 30 | import java.util.HashMap;
 31 | import java.util.List;
 32 | import java.util.Map;
 33 | import java.util.zip.GZIPInputStream;
 34 | 
 35 | /**
 36 |  *  <!-- globalinfo-start --> 
 37 |  *  This class is used for evaluating SentiWordnet.
 38 |  * <!-- globalinfo-end -->
 39 |  * 
 40 |  * 
 41 |  * @author Felipe Bravo-Marquez (fbravoma@waikato.ac.nz)
 42 |  * @version $Revision: 1 $
 43 |  */
 44 | public class SWN3LexiconEvaluator extends LexiconEvaluator{
 45 | 
 46 | 	/** For serialization. */
 47 | 	private static final long serialVersionUID = 1576067300486821206L;
 48 | 
 49 | 	/** The dictionary. */
 50 | 	protected Map<String, Double> dict;
 51 | 
 52 | 	/**
 53 | 	 * initializes the Object
 54 | 	 * 
 55 | 	 * @param path the file with the lexicon
 56 | 	 * @param name the prefix for all the attributes calculated from this lexicon
 57 | 	 */
 58 | 	public SWN3LexiconEvaluator(String path, String name) {
 59 | 		super(path,name);
 60 | 
 61 | 		this.dict = new HashMap<String, Double>();	
 62 | 
 63 | 		this.featureNames=new ArrayList<String>();
 64 | 		this.featureNames.add(name+"-posScore");
 65 | 		this.featureNames.add(name+"-negScore");
 66 | 	}
 67 | 
 68 | 
 69 | 	/* (non-Javadoc)
 70 | 	 * @see affective.core.LexiconEvaluator#processDict()
 71 | 	 */
 72 | 	@Override
 73 | 	public void processDict() throws IOException {
 74 | 
 75 | 
 76 | 		FileInputStream fin = new FileInputStream(this.path);
 77 | 		GZIPInputStream gzis = new GZIPInputStream(fin);
 78 | 		InputStreamReader xover = new InputStreamReader(gzis);
 79 | 		BufferedReader bf = new BufferedReader(xover);
 80 | 
 81 | 
 82 | 		String line = "";
 83 | 
 84 | 		// discard comments
 85 | 		while ((line = bf.readLine()) != null) {
 86 | 			if (line.startsWith("#") || line.startsWith("				#")) {
 87 | 				continue;
 88 | 			}
 89 | 
 90 | 			String[] data = line.split("\t");
 91 | 
 92 | 			// Difference between positive and negative score for one particular Synset
 93 | 			Double polScore = Double.parseDouble(data[2])
 94 | 					- Double.parseDouble(data[3]);
 95 | 
 96 | 			// extract all the synset terms
 97 | 			String[] sysSetTerms = data[4].split(" ");
 98 | 			for (String w : sysSetTerms) {
 99 | 				String[] w_n = w.split("#");
100 | 
101 | 				String word=w_n[0];
102 | 				// the word's rank, small values indicate a more popular meaning
103 | 				// More popular word receive a higher weight
104 | 				int rank = Integer.parseInt(w_n[1]);
105 | 
106 | 				if (this.dict.containsKey(word)) {
107 | 					Double prevScore=this.dict.get(word);
108 | 					this.dict.put(word, prevScore + polScore/(1+rank));
109 | 				} else {
110 | 					this.dict.put(word, polScore/(1+rank));
111 | 				}
112 | 			}
113 | 		}
114 | 
115 | 		bf.close();
116 | 		xover.close();
117 | 		gzis.close();
118 | 		fin.close();
119 | 	}
120 | 
121 | 
122 | 	/* (non-Javadoc)
123 | 	 * @see affective.core.LexiconEvaluator#evaluateTweet(java.util.List)
124 | 	 */
125 | 	@Override
126 | 	public Map<String, Double> evaluateTweet(List<String> tokens) {
127 | 		Map<String, Double> strengthScores = new HashMap<String, Double>();
128 | 		double posScore = 0;
129 | 		double negScore = 0;
130 | 		for (String w : tokens) {
131 | 
132 | 			if (this.dict.containsKey(w)) {
133 | 				double value = this.dict.get(w);
134 | 				if (value > 0) {
135 | 					posScore += value;
136 | 				} else {
137 | 					negScore += value;
138 | 				}
139 | 			}
140 | 
141 | 		}
142 | 		strengthScores.put(name+"-posScore", posScore);
143 | 		strengthScores.put(name+"-negScore", negScore);
144 | 
145 | 		return strengthScores;
146 | 	}
147 | 
148 | }


--------------------------------------------------------------------------------
/src/test/java/weka/filters/unsupervised/attribute/LabelWordVectorsTest.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *   This program is free software: you can redistribute it and/or modify
  3 |  *   it under the terms of the GNU General Public License as published by
  4 |  *   the Free Software Foundation, either version 3 of the License, or
  5 |  *   (at your option) any later version.
  6 |  *
  7 |  *   This program is distributed in the hope that it will be useful,
  8 |  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 |  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10 |  *   GNU General Public License for more details.
 11 |  *
 12 |  *   You should have received a copy of the GNU General Public License
 13 |  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 14 |  */
 15 | 
 16 | /*
 17 |  * Copyright (C) 2019 University of Waikato, Hamilton, New Zealand
 18 |  */
 19 | 
 20 | package weka.filters.unsupervised.attribute;
 21 | 
 22 | import weka.classifiers.meta.FilteredClassifier;
 23 | import weka.filters.AbstractFilterTest;
 24 | import weka.filters.Filter;
 25 | 
 26 | import junit.framework.Test;
 27 | import junit.framework.TestSuite;
 28 | 
 29 | import java.io.File;
 30 | 
 31 | /**
 32 |  * Tests LabelWordVectors. Run from the command line with: <p/>
 33 |  * java weka.filters.unsupervised.attribute.LabelWordVectorsTest
 34 |  * <p> 
 35 |  * AffectiveTweets package must either be installed or
 36 |  * JVM must be started in AffectiveTweets directory.
 37 |  * <p>
 38 |  * @author FracPete and eibe
 39 |  * @version $Revision: 9568 $
 40 |  */
 41 | /**
 42 |  * @author fbravoma
 43 |  *
 44 |  */
 45 | public class LabelWordVectorsTest extends AbstractFilterTest {
 46 | 
 47 |     public LabelWordVectorsTest(String name) {
 48 |         super(name);
 49 |     }
 50 | 
 51 |     /** Creates a default LabelWordVectors filter */
 52 |     public Filter getFilter() {
 53 | 	Filter f = null;
 54 | 
 55 | 	// Check to see if the test is run from directory containing build_package.xml
 56 | 	if ((new File(".." + File.separator + "AffectiveTweets" + File.separator + "build_package.xml")).exists()) {
 57 | 	    File backup = weka.core.WekaPackageManager.PACKAGES_DIR;
 58 | 	    weka.core.WekaPackageManager.PACKAGES_DIR = new java.io.File(".."); // So that default lexicon, etc., is found.
 59 | 	    f = new LabelWordVectors();
 60 | 	    weka.core.WekaPackageManager.PACKAGES_DIR = backup;
 61 | 	} else {
 62 | 	    f = new LabelWordVectors(); // Hope that the package is installed.
 63 | 	}
 64 | 	return f;
 65 |     }
 66 | 
 67 |     /**
 68 |      * LabelWordVectors is not suitable for use in a FilteredClassifier, so this just creates a dummy
 69 |      * FilteredClassifier so that the tests run through.
 70 |      *
 71 |      * @return the configured FilteredClassifier
 72 |      */
 73 |     protected FilteredClassifier getFilteredClassifier() {
 74 |         FilteredClassifier	result;
 75 | 
 76 |         result = new FilteredClassifier();
 77 | 
 78 |         result.setFilter(new weka.filters.AllFilter());
 79 |         result.setClassifier(new weka.classifiers.rules.ZeroR());
 80 | 
 81 |         return result;
 82 |     }
 83 | 
 84 |     /**
 85 |      * Called by JUnit before each test method. Sets up the Instances object to use based on 
 86 |      * one of the datasets that comes with the package.
 87 |      *
 88 |      * @throws Exception if an error occurs reading the example instances.
 89 |      */
 90 |     protected void setUp() throws Exception {
 91 |         super.setUp();
 92 | 
 93 | 	// Check to see if the test is run from directory containing build_package.xml
 94 | 	if ((new File(".." + File.separator + "AffectiveTweets" + File.separator + "build_package.xml")).exists()) {
 95 | 	    m_Instances = (new weka.core.converters.ConverterUtils.DataSource("data" + File.separator + "sent140test.arff.gz")).getDataSet();
 96 | 	} else { // Hope that package is installed.
 97 | 	    m_Instances = (new weka.core.converters.ConverterUtils.DataSource(weka.core.WekaPackageManager.PACKAGES_DIR.toString() + File.separator + "data" + File.separator + "sent140test.arff.gz")).getDataSet();
 98 | 	}
 99 | 
100 | 	m_Instances.setClassIndex(m_Instances.numAttributes() - 1);
101 |     }
102 |     
103 |     
104 |     /* (non-Javadoc)
105 |      * @see weka.filters.AbstractFilterTest#testBuffered()
106 |      */
107 |     public void testBuffered() {}
108 |     
109 |     /* (non-Javadoc)
110 |      * @see weka.filters.AbstractFilterTest#testRegression()
111 |      */
112 |     public void testRegression(){}
113 |     
114 | 
115 |     public static Test suite() {
116 |         return new TestSuite(LabelWordVectorsTest.class);
117 |     }
118 | 
119 |     public static void main(String[] args){
120 |         junit.textui.TestRunner.run(suite());
121 |     }
122 | }
123 | 


--------------------------------------------------------------------------------
/src/test/java/weka/filters/unsupervised/attribute/LexiconDistantSupervisionTest.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *   This program is free software: you can redistribute it and/or modify
  3 |  *   it under the terms of the GNU General Public License as published by
  4 |  *   the Free Software Foundation, either version 3 of the License, or
  5 |  *   (at your option) any later version.
  6 |  *
  7 |  *   This program is distributed in the hope that it will be useful,
  8 |  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 |  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10 |  *   GNU General Public License for more details.
 11 |  *
 12 |  *   You should have received a copy of the GNU General Public License
 13 |  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 14 |  */
 15 | 
 16 | /*
 17 |  * Copyright (C) 2019 University of Waikato, Hamilton, New Zealand
 18 |  */
 19 | 
 20 | package weka.filters.unsupervised.attribute;
 21 | 
 22 | import weka.classifiers.meta.FilteredClassifier;
 23 | import weka.filters.AbstractFilterTest;
 24 | import weka.filters.Filter;
 25 | 
 26 | import junit.framework.Test;
 27 | import junit.framework.TestSuite;
 28 | 
 29 | import java.io.File;
 30 | 
 31 | /**
 32 |  * Tests LexiconDistantSupervision. Run from the command line with: <p/>
 33 |  * java weka.filters.unsupervised.attribute.LexiconDistantSupervisionTest
 34 |  * <p> 
 35 |  * AffectiveTweets package must either be installed or
 36 |  * JVM must be started in AffectiveTweets directory.
 37 |  * <p>
 38 |  * @author FracPete and eibe
 39 |  * @version $Revision: 9568 $
 40 |  */
 41 | public class LexiconDistantSupervisionTest extends AbstractFilterTest {
 42 | 
 43 |     public LexiconDistantSupervisionTest(String name) {
 44 |         super(name);
 45 |     }
 46 | 
 47 |     /** Creates a default LexiconDistantSupervision filter */
 48 |     public Filter getFilter() {
 49 | 	Filter f = null;
 50 | 
 51 | 	// Check to see if the test is run from directory containing build_package.xml
 52 | 	if ((new File(".." + File.separator + "AffectiveTweets" + File.separator + "build_package.xml")).exists()) {
 53 | 	    File backup = weka.core.WekaPackageManager.PACKAGES_DIR;
 54 | 	    weka.core.WekaPackageManager.PACKAGES_DIR = new java.io.File(".."); // So that default lexicon, etc., is found.
 55 | 	    f = new LexiconDistantSupervision();
 56 | 	    weka.core.WekaPackageManager.PACKAGES_DIR = backup;
 57 | 	} else {
 58 | 	    f = new LexiconDistantSupervision(); // Hope that the package is installed.
 59 | 	}
 60 | 	return f;
 61 |     }
 62 | 
 63 |     /**
 64 |      * LexiconDistantSupervision is not suitable for use in a FilteredClassifier, so this just creates a dummy
 65 |      * FilteredClassifier so that the tests run through.
 66 |      *
 67 |      * @return the configured FilteredClassifier
 68 |      */
 69 |     protected FilteredClassifier getFilteredClassifier() {
 70 |         FilteredClassifier	result;
 71 | 
 72 |         result = new FilteredClassifier();
 73 | 
 74 |         result.setFilter(new weka.filters.AllFilter());
 75 |         result.setClassifier(new weka.classifiers.rules.ZeroR());
 76 | 
 77 |         return result;
 78 |     }
 79 | 
 80 |     /**
 81 |      * Called by JUnit before each test method. Sets up the Instances object to use based on 
 82 |      * one of the datasets that comes with the package.
 83 |      *
 84 |      * @throws Exception if an error occurs reading the example instances.
 85 |      */
 86 |     protected void setUp() throws Exception {
 87 |         super.setUp();
 88 | 
 89 | 	// Check to see if the test is run from directory containing build_package.xml
 90 | 	if ((new File(".." + File.separator + "AffectiveTweets" + File.separator + "build_package.xml")).exists()) {
 91 | 	    m_Instances = (new weka.core.converters.ConverterUtils.DataSource("data" + File.separator + "sent140test.arff.gz")).getDataSet();
 92 | 	} else { // Hope that package is installed.
 93 | 	    m_Instances = (new weka.core.converters.ConverterUtils.DataSource(weka.core.WekaPackageManager.PACKAGES_DIR.toString() + File.separator + "data" + File.separator + "sent140test.arff.gz")).getDataSet();
 94 | 	}
 95 | 
 96 | 	m_Instances.setClassIndex(m_Instances.numAttributes() - 1);
 97 |     }
 98 |     
 99 |     
100 |     /* (non-Javadoc)
101 |      * @see weka.filters.AbstractFilterTest#testBatchFiltering()
102 |      */
103 |     public void testBatchFiltering(){}
104 |     
105 |     /* (non-Javadoc)
106 |      * @see weka.filters.AbstractFilterTest#testBatchFilteringLarger()
107 |      */
108 |     public void testBatchFilteringLarger(){}
109 |     
110 |     
111 |     /* (non-Javadoc)
112 |      * @see weka.filters.AbstractFilterTest#testBatchFilteringSmaller()
113 |      */
114 |     public void testBatchFilteringSmaller(){}
115 |     
116 | 
117 |     public static Test suite() {
118 |         return new TestSuite(LexiconDistantSupervisionTest.class);
119 |     }
120 | 
121 |     public static void main(String[] args){
122 |         junit.textui.TestRunner.run(suite());
123 |     }
124 | }
125 | 


--------------------------------------------------------------------------------
/src/main/java/weka/core/converters/NRCAffectToArff.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *   This program is free software: you can redistribute it and/or modify
  3 |  *   it under the terms of the GNU General Public License as published by
  4 |  *   the Free Software Foundation, either version 3 of the License, or
  5 |  *   (at your option) any later version.
  6 |  *
  7 |  *   This program is distributed in the hope that it will be useful,
  8 |  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 |  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10 |  *   GNU General Public License for more details.
 11 |  *
 12 |  *   You should have received a copy of the GNU General Public License
 13 |  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 14 |  */
 15 | 
 16 | /*
 17 |  *    NRCAffectToArff.java
 18 |  *    Copyright (C) 1999-2018 University of Waikato, Hamilton, New Zealand
 19 |  *
 20 |  */
 21 | 
 22 | package weka.core.converters;
 23 | 
 24 | import java.io.BufferedReader;
 25 | import java.io.File;
 26 | import java.io.FileReader;
 27 | import java.util.ArrayList;
 28 | import java.util.Arrays;
 29 | import java.util.HashMap;
 30 | import java.util.Map;
 31 | 
 32 | import weka.core.Attribute;
 33 | import weka.core.DenseInstance;
 34 | import weka.core.Instance;
 35 | import weka.core.Instances;
 36 | 
 37 | /**
 38 |  * Builds an arff dataset from the NRC Affective Lexicon.
 39 |  * analysis. 
 40 |  * 
 41 |  * @author Felipe Bravo-Marquez (fbravoma@waikato.ac.nz)
 42 |  * @version 1.0
 43 |  */
 44 | public class NRCAffectToArff {
 45 | 	
 46 | 	/**
 47 | 	 * Creates a Weka Instances object from the lexicon.
 48 | 	 * @param collectionPath the file path of the lexicon.
 49 | 	 * @return an Instances object
 50 | 	 * @throws Exception if a wrong file is used.
 51 | 	 */
 52 | 	public Instances createDataset(String collectionPath) throws Exception {
 53 | 
 54 | 		ArrayList<Attribute> attributes = new ArrayList<Attribute>();
 55 | 
 56 | 		// The content of the tweet
 57 | 		attributes.add(new Attribute("term", (ArrayList<String>) null));
 58 | 		attributes.add(new Attribute("angerScore"));
 59 | 		attributes.add(new Attribute("fearScore"));
 60 | 		attributes.add(new Attribute("sadnessScore"));
 61 | 		attributes.add(new Attribute("joyScore"));
 62 | 		
 63 | 		
 64 | 		Instances dataset = new Instances(
 65 | 				"The NRC Affect Intensity Lexicon v0.5. More info at:www.saifmohammad.com/WebPages/AffectIntensity.htm", attributes, 0); 
 66 | 
 67 | 		Map<String,Map<String,Double>> mapper=new HashMap<String,Map<String,Double>>();
 68 | 		
 69 | 		BufferedReader bf = new BufferedReader(new FileReader(collectionPath));
 70 | 		String line;		
 71 | 		while ((line = bf.readLine()) != null) {
 72 | 			String parts[] = line.split("\t");
 73 | 			String term=parts[0];
 74 | 			Double score= Double.parseDouble(parts[1]);
 75 | 			String affectDim=parts[2];
 76 | 	//		System.out.println(term+" "+score+" "+affectDim);
 77 | 			
 78 | 			if(!mapper.containsKey(term)){
 79 | 				Map<String,Double> scoreVals=new HashMap<String,Double>();
 80 | 				scoreVals.put(affectDim, score);
 81 | 				mapper.put(term,scoreVals);
 82 | 			}
 83 | 			else{
 84 | 				Map<String,Double> scoreVals=mapper.get(term);
 85 | 				scoreVals.put(affectDim, score);
 86 | 			}
 87 | 
 88 | 		}
 89 | 	
 90 | 		
 91 | 		String[] sortedWords=mapper.keySet().toArray(new String[0]);
 92 | 		Arrays.sort(sortedWords);
 93 | 		
 94 | 		for(String word:sortedWords){
 95 | 			Map<String,Double> scoreVals=mapper.get(word);
 96 | 			double angerScore= scoreVals.containsKey("anger")?scoreVals.get("anger"):weka.core.Utils.missingValue();
 97 | 			double fearScore= scoreVals.containsKey("fear")?scoreVals.get("fear"):weka.core.Utils.missingValue();
 98 | 			double sadnessScore= scoreVals.containsKey("sadness")?scoreVals.get("sadness"):weka.core.Utils.missingValue();
 99 | 			double joyScore= scoreVals.containsKey("joy")?scoreVals.get("joy"):weka.core.Utils.missingValue();
100 | 		    
101 | 			double values[] = new double[5];
102 | 			values[0]=dataset.attribute(0).addStringValue(word);
103 | 			values[1]=angerScore;
104 | 			values[2]=fearScore;
105 | 			values[3]=sadnessScore;
106 | 			values[4]=joyScore;
107 | 		
108 | 			Instance inst = new DenseInstance(1, values);
109 | 			dataset.add(inst);
110 | 			
111 | 		}
112 | 		
113 | 		
114 | 
115 | 		bf.close();
116 | 
117 | 		return dataset;
118 | 	}
119 | 	
120 | 	
121 | 	/**
122 | 	 * Main method for testing this class.
123 | 	 * 
124 | 	 * should contain the path of input dataset and the name of
125 | 	 *            target file scheme (see Evaluation)
126 | 	 * @param args arguments           
127 | 	 */
128 | 	static public void main(String args[]) {
129 | 
130 | 		if (args.length == 2) {
131 | 
132 | 			NRCAffectToArff na = new NRCAffectToArff();
133 | 
134 | 			try {
135 | 				Instances dataset = na.createDataset(args[0]);
136 | 				ArffSaver saver = new ArffSaver();
137 | 				saver.setInstances(dataset);
138 | 
139 | 				saver.setFile(new File(args[1]));
140 | 				saver.writeBatch();
141 | 
142 | 			} catch (Exception e) {
143 | 				// TODO Auto-generated catch block
144 | 				e.printStackTrace();
145 | 			}
146 | 
147 | 		}
148 | 
149 | 	}
150 | 	
151 | 	
152 | 
153 | }
154 | 


--------------------------------------------------------------------------------
/doc/test/package-tree.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
  2 | <!-- NewPage -->
  3 | <html lang="en">
  4 | <head>
  5 | <!-- Generated by javadoc (1.8.0_191) on Mon Feb 11 14:22:47 NZDT 2019 -->
  6 | <title>test Class Hierarchy</title>
  7 | <meta name="date" content="2019-02-11">
  8 | <link rel="stylesheet" type="text/css" href="../stylesheet.css" title="Style">
  9 | <script type="text/javascript" src="../script.js"></script>
 10 | </head>
 11 | <body>
 12 | <script type="text/javascript"><!--
 13 |     try {
 14 |         if (location.href.indexOf('is-external=true') == -1) {
 15 |             parent.document.title="test Class Hierarchy";
 16 |         }
 17 |     }
 18 |     catch(err) {
 19 |     }
 20 | //-->
 21 | </script>
 22 | <noscript>
 23 | <div>JavaScript is disabled on your browser.</div>
 24 | </noscript>
 25 | <!-- ========= START OF TOP NAVBAR ======= -->
 26 | <div class="topNav"><a name="navbar.top">
 27 | <!--   -->
 28 | </a>
 29 | <div class="skipNav"><a href="#skip.navbar.top" title="Skip navigation links">Skip navigation links</a></div>
 30 | <a name="navbar.top.firstrow">
 31 | <!--   -->
 32 | </a>
 33 | <ul class="navList" title="Navigation">
 34 | <li><a href="../overview-summary.html">Overview</a></li>
 35 | <li><a href="package-summary.html">Package</a></li>
 36 | <li>Class</li>
 37 | <li class="navBarCell1Rev">Tree</li>
 38 | <li><a href="../deprecated-list.html">Deprecated</a></li>
 39 | <li><a href="../index-all.html">Index</a></li>
 40 | <li><a href="../help-doc.html">Help</a></li>
 41 | </ul>
 42 | </div>
 43 | <div class="subNav">
 44 | <ul class="navList">
 45 | <li><a href="../affective/core/package-tree.html">Prev</a></li>
 46 | <li><a href="../weka/core/converters/package-tree.html">Next</a></li>
 47 | </ul>
 48 | <ul class="navList">
 49 | <li><a href="../index.html?test/package-tree.html" target="_top">Frames</a></li>
 50 | <li><a href="package-tree.html" target="_top">No&nbsp;Frames</a></li>
 51 | </ul>
 52 | <ul class="navList" id="allclasses_navbar_top">
 53 | <li><a href="../allclasses-noframe.html">All&nbsp;Classes</a></li>
 54 | </ul>
 55 | <div>
 56 | <script type="text/javascript"><!--
 57 |   allClassesLink = document.getElementById("allclasses_navbar_top");
 58 |   if(window==top) {
 59 |     allClassesLink.style.display = "block";
 60 |   }
 61 |   else {
 62 |     allClassesLink.style.display = "none";
 63 |   }
 64 |   //-->
 65 | </script>
 66 | </div>
 67 | <a name="skip.navbar.top">
 68 | <!--   -->
 69 | </a></div>
 70 | <!-- ========= END OF TOP NAVBAR ========= -->
 71 | <div class="header">
 72 | <h1 class="title">Hierarchy For Package test</h1>
 73 | <span class="packageHierarchyLabel">Package Hierarchies:</span>
 74 | <ul class="horizontal">
 75 | <li><a href="../overview-tree.html">All Packages</a></li>
 76 | </ul>
 77 | </div>
 78 | <div class="contentContainer">
 79 | <h2 title="Class Hierarchy">Class Hierarchy</h2>
 80 | <ul>
 81 | <li type="circle">java.lang.Object
 82 | <ul>
 83 | <li type="circle">test.<a href="../test/AffectiveTestRunner.html" title="class in test"><span class="typeNameLink">AffectiveTestRunner</span></a></li>
 84 | <li type="circle">junit.framework.Assert
 85 | <ul>
 86 | <li type="circle">junit.framework.TestCase (implements junit.framework.Test)
 87 | <ul>
 88 | <li type="circle">test.<a href="../test/AffectiveTest.html" title="class in test"><span class="typeNameLink">AffectiveTest</span></a></li>
 89 | </ul>
 90 | </li>
 91 | </ul>
 92 | </li>
 93 | </ul>
 94 | </li>
 95 | </ul>
 96 | </div>
 97 | <!-- ======= START OF BOTTOM NAVBAR ====== -->
 98 | <div class="bottomNav"><a name="navbar.bottom">
 99 | <!--   -->
100 | </a>
101 | <div class="skipNav"><a href="#skip.navbar.bottom" title="Skip navigation links">Skip navigation links</a></div>
102 | <a name="navbar.bottom.firstrow">
103 | <!--   -->
104 | </a>
105 | <ul class="navList" title="Navigation">
106 | <li><a href="../overview-summary.html">Overview</a></li>
107 | <li><a href="package-summary.html">Package</a></li>
108 | <li>Class</li>
109 | <li class="navBarCell1Rev">Tree</li>
110 | <li><a href="../deprecated-list.html">Deprecated</a></li>
111 | <li><a href="../index-all.html">Index</a></li>
112 | <li><a href="../help-doc.html">Help</a></li>
113 | </ul>
114 | </div>
115 | <div class="subNav">
116 | <ul class="navList">
117 | <li><a href="../affective/core/package-tree.html">Prev</a></li>
118 | <li><a href="../weka/core/converters/package-tree.html">Next</a></li>
119 | </ul>
120 | <ul class="navList">
121 | <li><a href="../index.html?test/package-tree.html" target="_top">Frames</a></li>
122 | <li><a href="package-tree.html" target="_top">No&nbsp;Frames</a></li>
123 | </ul>
124 | <ul class="navList" id="allclasses_navbar_bottom">
125 | <li><a href="../allclasses-noframe.html">All&nbsp;Classes</a></li>
126 | </ul>
127 | <div>
128 | <script type="text/javascript"><!--
129 |   allClassesLink = document.getElementById("allclasses_navbar_bottom");
130 |   if(window==top) {
131 |     allClassesLink.style.display = "block";
132 |   }
133 |   else {
134 |     allClassesLink.style.display = "none";
135 |   }
136 |   //-->
137 | </script>
138 | </div>
139 | <a name="skip.navbar.bottom">
140 | <!--   -->
141 | </a></div>
142 | <!-- ======== END OF BOTTOM NAVBAR ======= -->
143 | </body>
144 | </html>
145 | 


--------------------------------------------------------------------------------
/doc/test/package-summary.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
  2 | <!-- NewPage -->
  3 | <html lang="en">
  4 | <head>
  5 | <!-- Generated by javadoc (1.8.0_191) on Mon Feb 11 14:22:47 NZDT 2019 -->
  6 | <title>test</title>
  7 | <meta name="date" content="2019-02-11">
  8 | <link rel="stylesheet" type="text/css" href="../stylesheet.css" title="Style">
  9 | <script type="text/javascript" src="../script.js"></script>
 10 | </head>
 11 | <body>
 12 | <script type="text/javascript"><!--
 13 |     try {
 14 |         if (location.href.indexOf('is-external=true') == -1) {
 15 |             parent.document.title="test";
 16 |         }
 17 |     }
 18 |     catch(err) {
 19 |     }
 20 | //-->
 21 | </script>
 22 | <noscript>
 23 | <div>JavaScript is disabled on your browser.</div>
 24 | </noscript>
 25 | <!-- ========= START OF TOP NAVBAR ======= -->
 26 | <div class="topNav"><a name="navbar.top">
 27 | <!--   -->
 28 | </a>
 29 | <div class="skipNav"><a href="#skip.navbar.top" title="Skip navigation links">Skip navigation links</a></div>
 30 | <a name="navbar.top.firstrow">
 31 | <!--   -->
 32 | </a>
 33 | <ul class="navList" title="Navigation">
 34 | <li><a href="../overview-summary.html">Overview</a></li>
 35 | <li class="navBarCell1Rev">Package</li>
 36 | <li>Class</li>
 37 | <li><a href="package-tree.html">Tree</a></li>
 38 | <li><a href="../deprecated-list.html">Deprecated</a></li>
 39 | <li><a href="../index-all.html">Index</a></li>
 40 | <li><a href="../help-doc.html">Help</a></li>
 41 | </ul>
 42 | </div>
 43 | <div class="subNav">
 44 | <ul class="navList">
 45 | <li><a href="../affective/core/package-summary.html">Prev&nbsp;Package</a></li>
 46 | <li><a href="../weka/core/converters/package-summary.html">Next&nbsp;Package</a></li>
 47 | </ul>
 48 | <ul class="navList">
 49 | <li><a href="../index.html?test/package-summary.html" target="_top">Frames</a></li>
 50 | <li><a href="package-summary.html" target="_top">No&nbsp;Frames</a></li>
 51 | </ul>
 52 | <ul class="navList" id="allclasses_navbar_top">
 53 | <li><a href="../allclasses-noframe.html">All&nbsp;Classes</a></li>
 54 | </ul>
 55 | <div>
 56 | <script type="text/javascript"><!--
 57 |   allClassesLink = document.getElementById("allclasses_navbar_top");
 58 |   if(window==top) {
 59 |     allClassesLink.style.display = "block";
 60 |   }
 61 |   else {
 62 |     allClassesLink.style.display = "none";
 63 |   }
 64 |   //-->
 65 | </script>
 66 | </div>
 67 | <a name="skip.navbar.top">
 68 | <!--   -->
 69 | </a></div>
 70 | <!-- ========= END OF TOP NAVBAR ========= -->
 71 | <div class="header">
 72 | <h1 title="Package" class="title">Package&nbsp;test</h1>
 73 | </div>
 74 | <div class="contentContainer">
 75 | <ul class="blockList">
 76 | <li class="blockList">
 77 | <table class="typeSummary" border="0" cellpadding="3" cellspacing="0" summary="Class Summary table, listing classes, and an explanation">
 78 | <caption><span>Class Summary</span><span class="tabEnd">&nbsp;</span></caption>
 79 | <tr>
 80 | <th class="colFirst" scope="col">Class</th>
 81 | <th class="colLast" scope="col">Description</th>
 82 | </tr>
 83 | <tbody>
 84 | <tr class="altColor">
 85 | <td class="colFirst"><a href="../test/AffectiveTest.html" title="class in test">AffectiveTest</a></td>
 86 | <td class="colLast">&nbsp;</td>
 87 | </tr>
 88 | <tr class="rowColor">
 89 | <td class="colFirst"><a href="../test/AffectiveTestRunner.html" title="class in test">AffectiveTestRunner</a></td>
 90 | <td class="colLast">&nbsp;</td>
 91 | </tr>
 92 | </tbody>
 93 | </table>
 94 | </li>
 95 | </ul>
 96 | </div>
 97 | <!-- ======= START OF BOTTOM NAVBAR ====== -->
 98 | <div class="bottomNav"><a name="navbar.bottom">
 99 | <!--   -->
100 | </a>
101 | <div class="skipNav"><a href="#skip.navbar.bottom" title="Skip navigation links">Skip navigation links</a></div>
102 | <a name="navbar.bottom.firstrow">
103 | <!--   -->
104 | </a>
105 | <ul class="navList" title="Navigation">
106 | <li><a href="../overview-summary.html">Overview</a></li>
107 | <li class="navBarCell1Rev">Package</li>
108 | <li>Class</li>
109 | <li><a href="package-tree.html">Tree</a></li>
110 | <li><a href="../deprecated-list.html">Deprecated</a></li>
111 | <li><a href="../index-all.html">Index</a></li>
112 | <li><a href="../help-doc.html">Help</a></li>
113 | </ul>
114 | </div>
115 | <div class="subNav">
116 | <ul class="navList">
117 | <li><a href="../affective/core/package-summary.html">Prev&nbsp;Package</a></li>
118 | <li><a href="../weka/core/converters/package-summary.html">Next&nbsp;Package</a></li>
119 | </ul>
120 | <ul class="navList">
121 | <li><a href="../index.html?test/package-summary.html" target="_top">Frames</a></li>
122 | <li><a href="package-summary.html" target="_top">No&nbsp;Frames</a></li>
123 | </ul>
124 | <ul class="navList" id="allclasses_navbar_bottom">
125 | <li><a href="../allclasses-noframe.html">All&nbsp;Classes</a></li>
126 | </ul>
127 | <div>
128 | <script type="text/javascript"><!--
129 |   allClassesLink = document.getElementById("allclasses_navbar_bottom");
130 |   if(window==top) {
131 |     allClassesLink.style.display = "block";
132 |   }
133 |   else {
134 |     allClassesLink.style.display = "none";
135 |   }
136 |   //-->
137 | </script>
138 | </div>
139 | <a name="skip.navbar.bottom">
140 | <!--   -->
141 | </a></div>
142 | <!-- ======== END OF BOTTOM NAVBAR ======= -->
143 | </body>
144 | </html>
145 | 


--------------------------------------------------------------------------------
/doc/overview-summary.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
  2 | <!-- NewPage -->
  3 | <html lang="en">
  4 | <head>
  5 | <!-- Generated by javadoc (1.8.0_191) on Wed Feb 20 17:12:12 NZDT 2019 -->
  6 | <title>Overview</title>
  7 | <meta name="date" content="2019-02-20">
  8 | <link rel="stylesheet" type="text/css" href="stylesheet.css" title="Style">
  9 | <script type="text/javascript" src="script.js"></script>
 10 | </head>
 11 | <body>
 12 | <script type="text/javascript"><!--
 13 |     try {
 14 |         if (location.href.indexOf('is-external=true') == -1) {
 15 |             parent.document.title="Overview";
 16 |         }
 17 |     }
 18 |     catch(err) {
 19 |     }
 20 | //-->
 21 | </script>
 22 | <noscript>
 23 | <div>JavaScript is disabled on your browser.</div>
 24 | </noscript>
 25 | <!-- ========= START OF TOP NAVBAR ======= -->
 26 | <div class="topNav"><a name="navbar.top">
 27 | <!--   -->
 28 | </a>
 29 | <div class="skipNav"><a href="#skip.navbar.top" title="Skip navigation links">Skip navigation links</a></div>
 30 | <a name="navbar.top.firstrow">
 31 | <!--   -->
 32 | </a>
 33 | <ul class="navList" title="Navigation">
 34 | <li class="navBarCell1Rev">Overview</li>
 35 | <li>Package</li>
 36 | <li>Class</li>
 37 | <li><a href="overview-tree.html">Tree</a></li>
 38 | <li><a href="deprecated-list.html">Deprecated</a></li>
 39 | <li><a href="index-all.html">Index</a></li>
 40 | <li><a href="help-doc.html">Help</a></li>
 41 | </ul>
 42 | </div>
 43 | <div class="subNav">
 44 | <ul class="navList">
 45 | <li>Prev</li>
 46 | <li>Next</li>
 47 | </ul>
 48 | <ul class="navList">
 49 | <li><a href="index.html?overview-summary.html" target="_top">Frames</a></li>
 50 | <li><a href="overview-summary.html" target="_top">No&nbsp;Frames</a></li>
 51 | </ul>
 52 | <ul class="navList" id="allclasses_navbar_top">
 53 | <li><a href="allclasses-noframe.html">All&nbsp;Classes</a></li>
 54 | </ul>
 55 | <div>
 56 | <script type="text/javascript"><!--
 57 |   allClassesLink = document.getElementById("allclasses_navbar_top");
 58 |   if(window==top) {
 59 |     allClassesLink.style.display = "block";
 60 |   }
 61 |   else {
 62 |     allClassesLink.style.display = "none";
 63 |   }
 64 |   //-->
 65 | </script>
 66 | </div>
 67 | <a name="skip.navbar.top">
 68 | <!--   -->
 69 | </a></div>
 70 | <!-- ========= END OF TOP NAVBAR ========= -->
 71 | <div class="contentContainer">
 72 | <table class="overviewSummary" border="0" cellpadding="3" cellspacing="0" summary="Packages table, listing packages, and an explanation">
 73 | <caption><span>Packages</span><span class="tabEnd">&nbsp;</span></caption>
 74 | <tr>
 75 | <th class="colFirst" scope="col">Package</th>
 76 | <th class="colLast" scope="col">Description</th>
 77 | </tr>
 78 | <tbody>
 79 | <tr class="altColor">
 80 | <td class="colFirst"><a href="affective/core/package-summary.html">affective.core</a></td>
 81 | <td class="colLast">&nbsp;</td>
 82 | </tr>
 83 | <tr class="rowColor">
 84 | <td class="colFirst"><a href="weka/core/converters/package-summary.html">weka.core.converters</a></td>
 85 | <td class="colLast">&nbsp;</td>
 86 | </tr>
 87 | <tr class="altColor">
 88 | <td class="colFirst"><a href="weka/core/tokenizers/package-summary.html">weka.core.tokenizers</a></td>
 89 | <td class="colLast">&nbsp;</td>
 90 | </tr>
 91 | <tr class="rowColor">
 92 | <td class="colFirst"><a href="weka/filters/supervised/attribute/package-summary.html">weka.filters.supervised.attribute</a></td>
 93 | <td class="colLast">&nbsp;</td>
 94 | </tr>
 95 | <tr class="altColor">
 96 | <td class="colFirst"><a href="weka/filters/unsupervised/attribute/package-summary.html">weka.filters.unsupervised.attribute</a></td>
 97 | <td class="colLast">&nbsp;</td>
 98 | </tr>
 99 | </tbody>
100 | </table>
101 | </div>
102 | <!-- ======= START OF BOTTOM NAVBAR ====== -->
103 | <div class="bottomNav"><a name="navbar.bottom">
104 | <!--   -->
105 | </a>
106 | <div class="skipNav"><a href="#skip.navbar.bottom" title="Skip navigation links">Skip navigation links</a></div>
107 | <a name="navbar.bottom.firstrow">
108 | <!--   -->
109 | </a>
110 | <ul class="navList" title="Navigation">
111 | <li class="navBarCell1Rev">Overview</li>
112 | <li>Package</li>
113 | <li>Class</li>
114 | <li><a href="overview-tree.html">Tree</a></li>
115 | <li><a href="deprecated-list.html">Deprecated</a></li>
116 | <li><a href="index-all.html">Index</a></li>
117 | <li><a href="help-doc.html">Help</a></li>
118 | </ul>
119 | </div>
120 | <div class="subNav">
121 | <ul class="navList">
122 | <li>Prev</li>
123 | <li>Next</li>
124 | </ul>
125 | <ul class="navList">
126 | <li><a href="index.html?overview-summary.html" target="_top">Frames</a></li>
127 | <li><a href="overview-summary.html" target="_top">No&nbsp;Frames</a></li>
128 | </ul>
129 | <ul class="navList" id="allclasses_navbar_bottom">
130 | <li><a href="allclasses-noframe.html">All&nbsp;Classes</a></li>
131 | </ul>
132 | <div>
133 | <script type="text/javascript"><!--
134 |   allClassesLink = document.getElementById("allclasses_navbar_bottom");
135 |   if(window==top) {
136 |     allClassesLink.style.display = "block";
137 |   }
138 |   else {
139 |     allClassesLink.style.display = "none";
140 |   }
141 |   //-->
142 | </script>
143 | </div>
144 | <a name="skip.navbar.bottom">
145 | <!--   -->
146 | </a></div>
147 | <!-- ======== END OF BOTTOM NAVBAR ======= -->
148 | </body>
149 | </html>
150 | 


--------------------------------------------------------------------------------
/doc/allclasses-noframe.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
 2 | <!-- NewPage -->
 3 | <html lang="en">
 4 | <head>
 5 | <!-- Generated by javadoc (1.8.0_191) on Wed Feb 20 17:12:12 NZDT 2019 -->
 6 | <title>All Classes</title>
 7 | <meta name="date" content="2019-02-20">
 8 | <link rel="stylesheet" type="text/css" href="stylesheet.css" title="Style">
 9 | <script type="text/javascript" src="script.js"></script>
10 | </head>
11 | <body>
12 | <h1 class="bar">All&nbsp;Classes</h1>
13 | <div class="indexContainer">
14 | <ul>
15 | <li><a href="affective/core/ArffLexiconEvaluator.html" title="class in affective.core">ArffLexiconEvaluator</a></li>
16 | <li><a href="affective/core/ArffLexiconWordLabeller.html" title="class in affective.core">ArffLexiconWordLabeller</a></li>
17 | <li><a href="weka/filters/unsupervised/attribute/ASA.html" title="class in weka.filters.unsupervised.attribute">ASA</a></li>
18 | <li><a href="affective/core/CSVEmbeddingHandler.html" title="class in affective.core">CSVEmbeddingHandler</a></li>
19 | <li><a href="weka/filters/unsupervised/attribute/DistantSupervisionSyntheticFilter.html" title="class in weka.filters.unsupervised.attribute">DistantSupervisionSyntheticFilter</a></li>
20 | <li><a href="affective/core/EmbeddingHandler.html" title="class in affective.core">EmbeddingHandler</a></li>
21 | <li><a href="weka/core/converters/HumanCodedToArff.html" title="class in weka.core.converters">HumanCodedToArff</a></li>
22 | <li><a href="affective/core/IntensityLexiconEvaluator.html" title="class in affective.core">IntensityLexiconEvaluator</a></li>
23 | <li><a href="weka/filters/unsupervised/attribute/LabelWordVectors.html" title="class in weka.filters.unsupervised.attribute">LabelWordVectors</a></li>
24 | <li><a href="weka/filters/unsupervised/attribute/LexiconDistantSupervision.html" title="class in weka.filters.unsupervised.attribute">LexiconDistantSupervision</a></li>
25 | <li><a href="affective/core/LexiconEvaluator.html" title="class in affective.core">LexiconEvaluator</a></li>
26 | <li><a href="affective/core/NegationEvaluator.html" title="class in affective.core">NegationEvaluator</a></li>
27 | <li><a href="weka/core/converters/NRCAffectToArff.html" title="class in weka.core.converters">NRCAffectToArff</a></li>
28 | <li><a href="affective/core/NRCEmotionLexiconEvaluator.html" title="class in affective.core">NRCEmotionLexiconEvaluator</a></li>
29 | <li><a href="affective/core/NRCExpandedEmotionLexiconEvaluator.html" title="class in affective.core">NRCExpandedEmotionLexiconEvaluator</a></li>
30 | <li><a href="affective/core/NRCHashtagEmotionLexiconEvaluator.html" title="class in affective.core">NRCHashtagEmotionLexiconEvaluator</a></li>
31 | <li><a href="weka/filters/supervised/attribute/PMILexiconExpander.html" title="class in weka.filters.supervised.attribute">PMILexiconExpander</a></li>
32 | <li><a href="affective/core/PolarityLexiconEvaluator.html" title="class in affective.core">PolarityLexiconEvaluator</a></li>
33 | <li><a href="weka/filters/unsupervised/attribute/PTCM.html" title="class in weka.filters.unsupervised.attribute">PTCM</a></li>
34 | <li><a href="weka/core/converters/SemEvalToArff.html" title="class in weka.core.converters">SemEvalToArff</a></li>
35 | <li><a href="affective/core/SentiStrengthEvaluator.html" title="class in affective.core">SentiStrengthEvaluator</a></li>
36 | <li><a href="affective/core/SWN3LexiconEvaluator.html" title="class in affective.core">SWN3LexiconEvaluator</a></li>
37 | <li><a href="weka/filters/unsupervised/attribute/TweetCentroid.html" title="class in weka.filters.unsupervised.attribute">TweetCentroid</a></li>
38 | <li><a href="weka/core/converters/TweetCollectionToArff.html" title="class in weka.core.converters">TweetCollectionToArff</a></li>
39 | <li><a href="weka/filters/unsupervised/attribute/TweetNLPPOSTagger.html" title="class in weka.filters.unsupervised.attribute">TweetNLPPOSTagger</a></li>
40 | <li><a href="weka/core/tokenizers/TweetNLPTokenizer.html" title="class in weka.core.tokenizers">TweetNLPTokenizer</a></li>
41 | <li><a href="weka/filters/unsupervised/attribute/TweetToEmbeddingsFeatureVector.html" title="class in weka.filters.unsupervised.attribute">TweetToEmbeddingsFeatureVector</a></li>
42 | <li><a href="weka/filters/unsupervised/attribute/TweetToFeatureVector.html" title="class in weka.filters.unsupervised.attribute">TweetToFeatureVector</a></li>
43 | <li><a href="weka/filters/unsupervised/attribute/TweetToInputLexiconFeatureVector.html" title="class in weka.filters.unsupervised.attribute">TweetToInputLexiconFeatureVector</a></li>
44 | <li><a href="weka/filters/unsupervised/attribute/TweetToLexiconFeatureVector.html" title="class in weka.filters.unsupervised.attribute">TweetToLexiconFeatureVector</a></li>
45 | <li><a href="weka/filters/unsupervised/attribute/TweetToSentiStrengthFeatureVector.html" title="class in weka.filters.unsupervised.attribute">TweetToSentiStrengthFeatureVector</a></li>
46 | <li><a href="weka/filters/unsupervised/attribute/TweetToSparseFeatureVector.html" title="class in weka.filters.unsupervised.attribute">TweetToSparseFeatureVector</a></li>
47 | <li><a href="weka/filters/unsupervised/attribute/TweetToWordListCountFeatureVector.html" title="class in weka.filters.unsupervised.attribute">TweetToWordListCountFeatureVector</a></li>
48 | <li><a href="affective/core/Utils.html" title="class in affective.core">Utils</a></li>
49 | </ul>
50 | </div>
51 | </body>
52 | </html>
53 | 


--------------------------------------------------------------------------------
/doc/constant-values.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
  2 | <!-- NewPage -->
  3 | <html lang="en">
  4 | <head>
  5 | <!-- Generated by javadoc (10.0.2) on Thu Feb 21 10:45:39 NZDT 2019 -->
  6 | <title>Constant Field Values</title>
  7 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
  8 | <meta name="date" content="2019-02-21">
  9 | <link rel="stylesheet" type="text/css" href="stylesheet.css" title="Style">
 10 | <link rel="stylesheet" type="text/css" href="jquery/jquery-ui.css" title="Style">
 11 | <script type="text/javascript" src="script.js"></script>
 12 | <script type="text/javascript" src="jquery/jszip/dist/jszip.min.js"></script>
 13 | <script type="text/javascript" src="jquery/jszip-utils/dist/jszip-utils.min.js"></script>
 14 | <!--[if IE]>
 15 | <script type="text/javascript" src="jquery/jszip-utils/dist/jszip-utils-ie.min.js"></script>
 16 | <![endif]-->
 17 | <script type="text/javascript" src="jquery/jquery-1.10.2.js"></script>
 18 | <script type="text/javascript" src="jquery/jquery-ui.js"></script>
 19 | </head>
 20 | <body>
 21 | <script type="text/javascript"><!--
 22 |     try {
 23 |         if (location.href.indexOf('is-external=true') == -1) {
 24 |             parent.document.title="Constant Field Values";
 25 |         }
 26 |     }
 27 |     catch(err) {
 28 |     }
 29 | //-->
 30 | var pathtoroot = "./";loadScripts(document, 'script');</script>
 31 | <noscript>
 32 | <div>JavaScript is disabled on your browser.</div>
 33 | </noscript>
 34 | <div class="fixedNav">
 35 | <!-- ========= START OF TOP NAVBAR ======= -->
 36 | <div class="topNav"><a name="navbar.top">
 37 | <!--   -->
 38 | </a>
 39 | <div class="skipNav"><a href="#skip.navbar.top" title="Skip navigation links">Skip navigation links</a></div>
 40 | <a name="navbar.top.firstrow">
 41 | <!--   -->
 42 | </a>
 43 | <ul class="navList" title="Navigation">
 44 | <li><a href="overview-summary.html">Overview</a></li>
 45 | <li>Package</li>
 46 | <li>Class</li>
 47 | <li><a href="overview-tree.html">Tree</a></li>
 48 | <li><a href="deprecated-list.html">Deprecated</a></li>
 49 | <li><a href="index-all.html">Index</a></li>
 50 | <li><a href="help-doc.html">Help</a></li>
 51 | </ul>
 52 | </div>
 53 | <div class="subNav">
 54 | <ul class="navList">
 55 | <li>Prev</li>
 56 | <li>Next</li>
 57 | </ul>
 58 | <ul class="navList">
 59 | <li><a href="index.html?constant-values.html" target="_top">Frames</a></li>
 60 | <li><a href="constant-values.html" target="_top">No&nbsp;Frames</a></li>
 61 | </ul>
 62 | <ul class="navList" id="allclasses_navbar_top">
 63 | <li><a href="allclasses-noframe.html">All&nbsp;Classes</a></li>
 64 | </ul>
 65 | <ul class="navListSearch">
 66 | <li><label for="search">SEARCH:</label>
 67 | <input type="text" id="search" value="search" disabled="disabled">
 68 | <input type="reset" id="reset" value="reset" disabled="disabled">
 69 | </li>
 70 | </ul>
 71 | <div>
 72 | <script type="text/javascript"><!--
 73 |   allClassesLink = document.getElementById("allclasses_navbar_top");
 74 |   if(window==top) {
 75 |     allClassesLink.style.display = "block";
 76 |   }
 77 |   else {
 78 |     allClassesLink.style.display = "none";
 79 |   }
 80 |   //-->
 81 | </script>
 82 | <noscript>
 83 | <div>JavaScript is disabled on your browser.</div>
 84 | </noscript>
 85 | </div>
 86 | <a name="skip.navbar.top">
 87 | <!--   -->
 88 | </a></div>
 89 | <!-- ========= END OF TOP NAVBAR ========= -->
 90 | </div>
 91 | <div class="navPadding">&nbsp;</div>
 92 | <script type="text/javascript"><!--
 93 | $('.navPadding').css('padding-top', $('.fixedNav').css("height"));
 94 | //-->
 95 | </script>
 96 | <div class="header">
 97 | <h1 title="Constant Field Values" class="title">Constant Field Values</h1>
 98 | <h2 title="Contents">Contents</h2>
 99 | </div>
100 | <!-- ======= START OF BOTTOM NAVBAR ====== -->
101 | <div class="bottomNav"><a name="navbar.bottom">
102 | <!--   -->
103 | </a>
104 | <div class="skipNav"><a href="#skip.navbar.bottom" title="Skip navigation links">Skip navigation links</a></div>
105 | <a name="navbar.bottom.firstrow">
106 | <!--   -->
107 | </a>
108 | <ul class="navList" title="Navigation">
109 | <li><a href="overview-summary.html">Overview</a></li>
110 | <li>Package</li>
111 | <li>Class</li>
112 | <li><a href="overview-tree.html">Tree</a></li>
113 | <li><a href="deprecated-list.html">Deprecated</a></li>
114 | <li><a href="index-all.html">Index</a></li>
115 | <li><a href="help-doc.html">Help</a></li>
116 | </ul>
117 | </div>
118 | <div class="subNav">
119 | <ul class="navList">
120 | <li>Prev</li>
121 | <li>Next</li>
122 | </ul>
123 | <ul class="navList">
124 | <li><a href="index.html?constant-values.html" target="_top">Frames</a></li>
125 | <li><a href="constant-values.html" target="_top">No&nbsp;Frames</a></li>
126 | </ul>
127 | <ul class="navList" id="allclasses_navbar_bottom">
128 | <li><a href="allclasses-noframe.html">All&nbsp;Classes</a></li>
129 | </ul>
130 | <div>
131 | <script type="text/javascript"><!--
132 |   allClassesLink = document.getElementById("allclasses_navbar_bottom");
133 |   if(window==top) {
134 |     allClassesLink.style.display = "block";
135 |   }
136 |   else {
137 |     allClassesLink.style.display = "none";
138 |   }
139 |   //-->
140 | </script>
141 | <noscript>
142 | <div>JavaScript is disabled on your browser.</div>
143 | </noscript>
144 | </div>
145 | <a name="skip.navbar.bottom">
146 | <!--   -->
147 | </a></div>
148 | <!-- ======== END OF BOTTOM NAVBAR ======= -->
149 | </body>
150 | </html>
151 | 


--------------------------------------------------------------------------------
/benchmark/nltk_scikit_ngram_liu.py:
--------------------------------------------------------------------------------
  1 | # This program is free software: you can redistribute it and/or modify
  2 | # it under the terms of the GNU General Public License as published by
  3 | # the Free Software Foundation, either version 3 of the License, or
  4 | # (at your option) any later version.
  5 | #
  6 | # This program is distributed in the hope that it will be useful,
  7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
  8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  9 | # GNU General Public License for more details.
 10 | #
 11 | # You should have received a copy of the GNU General Public License
 12 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 13 | 
 14 | # Authors: Felipe Bravo-Marquez
 15 | 
 16 | 	
 17 | import pandas as pd       
 18 | from nltk.tokenize import TweetTokenizer
 19 | from nltk.sentiment.util import  mark_negation
 20 | from nltk.corpus import opinion_lexicon
 21 | 
 22 | from sklearn.feature_extraction.text import CountVectorizer  
 23 | from sklearn.linear_model import LogisticRegression
 24 | from sklearn.pipeline import Pipeline, FeatureUnion
 25 | from sklearn.base import BaseEstimator, TransformerMixin
 26 | from sklearn.metrics import confusion_matrix, cohen_kappa_score, classification_report
 27 | import numpy as np
 28 | 
 29 | 
 30 | # load training and testing datasets as a pandas dataframe
 31 | train_data = pd.read_csv("dataset/twitter-train-B.txt", header=None, delimiter="\t",usecols=(2,3), names=("sent","tweet"))
 32 | test_data = pd.read_csv("dataset/twitter-test-gold-B.tsv", header=None, delimiter="\t",usecols=(2,3), names=("sent","tweet"))
 33 | 
 34 | # replace objective-OR-neutral and objective to neutral
 35 | train_data.sent = train_data.sent.replace(['objective-OR-neutral','objective'],['neutral','neutral'])
 36 | 
 37 | # use a Twitter-specific tokenizer
 38 | tokenizer = TweetTokenizer(preserve_case=False, reduce_len=True)
 39 | 
 40 | 
 41 | 
 42 | 
 43 | #####################################################################################
 44 | #
 45 | #  Train a linear model using  n-grams features + features derived from Bing Liu's lexicon 
 46 | #
 47 | ######################################################################################
 48 | #import nltk
 49 | #nltk.download('opinion_lexicon')
 50 | 
 51 | 
 52 | # load training and testing datasets as a pandas dataframe
 53 | train_data = pd.read_csv("dataset/twitter-train-B.txt", header=None, delimiter="\t",usecols=(2,3), names=("sent","tweet"))
 54 | test_data = pd.read_csv("dataset/twitter-test-gold-B.tsv", header=None, delimiter="\t",usecols=(2,3), names=("sent","tweet"))
 55 | 
 56 | # replace objective-OR-neutral and objective to neutral
 57 | train_data.sent = train_data.sent.replace(['objective-OR-neutral','objective'],['neutral','neutral'])
 58 | 
 59 | # use a Twitter-specific tokenizer
 60 | tokenizer = TweetTokenizer(preserve_case=False, reduce_len=True)
 61 | 
 62 | 
 63 | 
 64 | 
 65 | class LiuFeatureExtractor(BaseEstimator, TransformerMixin):
 66 |     """Takes in a corpus of tweets and calculates features using Bing Liu's lexicon"""
 67 | 
 68 |     def __init__(self, tokenizer):
 69 |         self.tokenizer = tokenizer
 70 |         self.pos_set = set(opinion_lexicon.positive())
 71 |         self.neg_set = set(opinion_lexicon.negative())
 72 | 
 73 |     def liu_score(self,sentence):
 74 |         """Calculates the number of positive and negative words in the sentence using Bing Liu's Lexicon""" 
 75 |         tokenized_sent = self.tokenizer.tokenize(sentence)
 76 |         pos_words = 0
 77 |         neg_words = 0
 78 |         for word in tokenized_sent:
 79 |             if word in self.pos_set:
 80 |                 pos_words += 1
 81 |             elif word in self.neg_set:
 82 |                 neg_words += 1
 83 |         return [pos_words,neg_words]
 84 |     
 85 |     def transform(self, X, y=None):
 86 |         """Applies liu_score and vader_score on a data.frame containing tweets """
 87 |         values = []
 88 |         for tweet in X:
 89 |             values.append(self.liu_score(tweet))
 90 |         
 91 |         return(np.array(values))
 92 | 
 93 |     def fit(self, X, y=None):
 94 |         """This function must return `self` unless we expect the transform function to perform a 
 95 |         different action on training and testing partitions (e.g., when we calculate unigram features, 
 96 |         the dictionary is only extracted from the first batch)"""
 97 |         return self
 98 | 
 99 | 
100 | 
101 | 
102 | 
103 | liu_feat = LiuFeatureExtractor(tokenizer)
104 | vectorizer = CountVectorizer(tokenizer = tokenizer.tokenize, preprocessor = mark_negation, ngram_range=(1,4))  
105 | log_mod = LogisticRegression(solver='liblinear',multi_class='ovr')  
106 | liu_ngram_clf = Pipeline([ ('feats', 
107 |                             FeatureUnion([ ('ngram', vectorizer), ('liu',liu_feat) ])),
108 |     ('clf', log_mod)])
109 | 
110 | 
111 | liu_ngram_clf.fit(train_data.tweet, train_data.sent)
112 | pred_liu_ngram = liu_ngram_clf.predict(test_data.tweet)
113 | 
114 | 
115 | conf_liu_ngram = confusion_matrix(test_data.sent, pred_liu_ngram)
116 | kappa_liu_ngram = cohen_kappa_score(test_data.sent, pred_liu_ngram) 
117 | class_rep_liu_ngram = classification_report(test_data.sent, pred_liu_ngram)
118 | 
119 | print('Confusion Matrix for Logistic Regression + ngrams + features from Bing Liu\'s Lexicon')
120 | print(conf_liu_ngram)
121 | print('Classification Report')
122 | print(class_rep_liu_ngram)
123 | print('kappa:'+str(kappa_liu_ngram))
124 | 
125 | 


--------------------------------------------------------------------------------
/src/test/java/weka/filters/unsupervised/attribute/TweetNLPPOSTaggerTest.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *   This program is free software: you can redistribute it and/or modify
  3 |  *   it under the terms of the GNU General Public License as published by
  4 |  *   the Free Software Foundation, either version 3 of the License, or
  5 |  *   (at your option) any later version.
  6 |  *
  7 |  *   This program is distributed in the hope that it will be useful,
  8 |  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 |  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10 |  *   GNU General Public License for more details.
 11 |  *
 12 |  *   You should have received a copy of the GNU General Public License
 13 |  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 14 |  */
 15 | 
 16 | /*
 17 |  * Copyright (C) 2019 University of Waikato, Hamilton, New Zealand
 18 |  */
 19 | 
 20 | package weka.filters.unsupervised.attribute;
 21 | 
 22 | import weka.classifiers.meta.FilteredClassifier;
 23 | import weka.core.Instances;
 24 | import weka.filters.AbstractFilterTest;
 25 | import weka.filters.Filter;
 26 | 
 27 | import junit.framework.Test;
 28 | import junit.framework.TestSuite;
 29 | 
 30 | import java.io.File;
 31 | 
 32 | /**
 33 |  * Tests TweetNLPPOSTagger. Run from the command line with: <p/>
 34 |  * java weka.filters.unsupervised.attribute.TweetNLPPOSTaggerTest
 35 |  * <p> 
 36 |  * AffectiveTweets package must either be installed or
 37 |  * JVM must be started in AffectiveTweets directory.
 38 |  * <p>
 39 |  * @author FracPete and eibe
 40 |  * @version $Revision: 9568 $
 41 |  */
 42 | public class TweetNLPPOSTaggerTest extends AbstractFilterTest {
 43 | 
 44 |     public TweetNLPPOSTaggerTest(String name) {
 45 |         super(name);
 46 |     }
 47 | 
 48 |     /** Creates a default TweetNLPPOSTagger filter */
 49 |     public Filter getFilter() {
 50 | 	Filter f = null;
 51 | 
 52 | 	// Check to see if the test is run from directory containing build_package.xml
 53 | 	if ((new File(".." + File.separator + "AffectiveTweets" + File.separator + "build_package.xml")).exists()) {
 54 | 	    File backup = weka.core.WekaPackageManager.PACKAGES_DIR;
 55 | 	    weka.core.WekaPackageManager.PACKAGES_DIR = new java.io.File(".."); // So that default lexicon, etc., is found.
 56 | 	    f = new TweetToSparseFeatureVector();
 57 | 	    weka.core.WekaPackageManager.PACKAGES_DIR = backup;
 58 | 	} else {
 59 | 	    f = new TweetNLPPOSTagger(); // Hope that the package is installed.
 60 | 	}
 61 | 	return f;
 62 |     }
 63 | 
 64 |     /**
 65 |      * Test for the FilteredClassifier used with this filter.
 66 |      *
 67 |      * @return the configured FilteredClassifier
 68 |      */
 69 |     protected FilteredClassifier getFilteredClassifier() {
 70 |         FilteredClassifier	result;
 71 | 
 72 |         result = new FilteredClassifier();
 73 | 
 74 | 	weka.filters.MultiFilter mf = new weka.filters.MultiFilter();
 75 | 	Filter[] filters = new Filter[2];
 76 | 	filters[0] = getFilter();
 77 | 	weka.filters.unsupervised.attribute.RemoveType rt = new weka.filters.unsupervised.attribute.RemoveType(); // Need to remove string attributes because they are kept by this filter.
 78 | 	filters[1] = rt;
 79 | 	mf.setFilters(filters);
 80 | 	result.setFilter(mf);
 81 |         result.setClassifier(new weka.classifiers.functions.SMO());
 82 | 
 83 |         return result;
 84 |     }
 85 | 
 86 |     /**
 87 |      * Data to be used for FilteredClassifier test.
 88 |      *
 89 |      * @return the configured FilteredClassifier
 90 |      */
 91 |     protected Instances getFilteredClassifierData() throws Exception {
 92 |         Instances result;
 93 | 
 94 | 	// Check to see if the test is run from directory containing build_package.xml
 95 | 	if ((new File(".." + File.separator + "AffectiveTweets" + File.separator + "build_package.xml")).exists()) {
 96 | 	    result = (new weka.core.converters.ConverterUtils.DataSource("data" + File.separator + "sent140test.arff.gz")).getDataSet();
 97 | 	} else { // Hope that package is installed.
 98 | 	    result = (new weka.core.converters.ConverterUtils.DataSource(weka.core.WekaPackageManager.PACKAGES_DIR.toString() + File.separator + "data" + File.separator + "sent140test.arff.gz")).getDataSet();
 99 | 	}
100 | 
101 | 	result.setClassIndex(result.numAttributes() - 1);
102 | 
103 |         return result;
104 |     }
105 | 
106 |     /**
107 |      * Called by JUnit before each test method. Sets up the Instances object to use based on 
108 |      * one of the datasets that comes with the package.
109 |      *
110 |      * @throws Exception if an error occurs reading the example instances.
111 |      */
112 |     protected void setUp() throws Exception {
113 |         super.setUp();
114 | 
115 | 	// Check to see if the test is run from directory containing build_package.xml
116 | 	if ((new File(".." + File.separator + "AffectiveTweets" + File.separator + "build_package.xml")).exists()) {
117 | 	    m_Instances = (new weka.core.converters.ConverterUtils.DataSource("data" + File.separator + "sent140test.arff.gz")).getDataSet();
118 | 	} else { // Hope that package is installed.
119 | 	    m_Instances = (new weka.core.converters.ConverterUtils.DataSource(weka.core.WekaPackageManager.PACKAGES_DIR.toString() + File.separator + "data" + File.separator + "sent140test.arff.gz")).getDataSet();
120 | 	}
121 | 
122 | 	m_Instances.setClassIndex(m_Instances.numAttributes() - 1);
123 |     }
124 | 
125 |     public static Test suite() {
126 |         return new TestSuite(TweetNLPPOSTaggerTest.class);
127 |     }
128 | 
129 |     public static void main(String[] args){
130 |         junit.textui.TestRunner.run(suite());
131 |     }
132 | }
133 | 


--------------------------------------------------------------------------------
/src/test/java/weka/filters/unsupervised/attribute/TweetToSparseFeatureVectorTest.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *   This program is free software: you can redistribute it and/or modify
  3 |  *   it under the terms of the GNU General Public License as published by
  4 |  *   the Free Software Foundation, either version 3 of the License, or
  5 |  *   (at your option) any later version.
  6 |  *
  7 |  *   This program is distributed in the hope that it will be useful,
  8 |  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 |  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10 |  *   GNU General Public License for more details.
 11 |  *
 12 |  *   You should have received a copy of the GNU General Public License
 13 |  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 14 |  */
 15 | 
 16 | /*
 17 |  * Copyright (C) 2019 University of Waikato, Hamilton, New Zealand
 18 |  */
 19 | 
 20 | package weka.filters.unsupervised.attribute;
 21 | 
 22 | import weka.classifiers.meta.FilteredClassifier;
 23 | import weka.core.Instances;
 24 | import weka.filters.AbstractFilterTest;
 25 | import weka.filters.Filter;
 26 | 
 27 | import junit.framework.Test;
 28 | import junit.framework.TestSuite;
 29 | 
 30 | import java.io.File;
 31 | 
 32 | /**
 33 |  * Tests TweetToSparseFeatureVector. Run from the command line with: <p/>
 34 |  * java weka.filters.unsupervised.attribute.TweetToSparseFeatureVectorTest
 35 |  * <p> 
 36 |  * AffectiveTweets package must either be installed or
 37 |  * JVM must be started in AffectiveTweets directory.
 38 |  * <p>
 39 |  * @author FracPete and eibe
 40 |  * @version $Revision: 9568 $
 41 |  */
 42 | public class TweetToSparseFeatureVectorTest extends AbstractFilterTest {
 43 | 
 44 |     public TweetToSparseFeatureVectorTest(String name) {
 45 |         super(name);
 46 |     }
 47 | 
 48 |     /** Creates a default TweetToSparseFeatureVector filter */
 49 |     public Filter getFilter() {
 50 | 	Filter f = null;
 51 | 
 52 | 	// Check to see if the test is run from directory containing build_package.xml
 53 | 	if ((new File(".." + File.separator + "AffectiveTweets" + File.separator + "build_package.xml")).exists()) {
 54 | 	    File backup = weka.core.WekaPackageManager.PACKAGES_DIR;
 55 | 	    weka.core.WekaPackageManager.PACKAGES_DIR = new java.io.File(".."); // So that default lexicon, etc., is found.
 56 | 	    f = new TweetToSparseFeatureVector();
 57 | 	    weka.core.WekaPackageManager.PACKAGES_DIR = backup;
 58 | 	} else {
 59 | 	    f = new TweetToSparseFeatureVector(); // Hope that the package is installed.
 60 | 	}
 61 | 	return f;
 62 |     }
 63 | 
 64 |     /**
 65 |      * Test for the FilteredClassifier used with this filter.
 66 |      *
 67 |      * @return the configured FilteredClassifier
 68 |      */
 69 |     protected FilteredClassifier getFilteredClassifier() {
 70 |         FilteredClassifier	result;
 71 | 
 72 |         result = new FilteredClassifier();
 73 | 
 74 | 	weka.filters.MultiFilter mf = new weka.filters.MultiFilter();
 75 | 	Filter[] filters = new Filter[2];
 76 | 	filters[0] = getFilter();
 77 | 	weka.filters.unsupervised.attribute.RemoveType rt = new weka.filters.unsupervised.attribute.RemoveType(); // Need to remove string attributes because they are kept by this filter.
 78 | 	filters[1] = rt;
 79 | 	mf.setFilters(filters);
 80 | 	result.setFilter(mf);
 81 |         result.setClassifier(new weka.classifiers.functions.SMO());
 82 | 
 83 |         return result;
 84 |     }
 85 | 
 86 |     /**
 87 |      * Data to be used for FilteredClassifier test.
 88 |      *
 89 |      * @return the configured FilteredClassifier
 90 |      */
 91 |     protected Instances getFilteredClassifierData() throws Exception {
 92 |         Instances result;
 93 | 
 94 | 	// Check to see if the test is run from directory containing build_package.xml
 95 | 	if ((new File(".." + File.separator + "AffectiveTweets" + File.separator + "build_package.xml")).exists()) {
 96 | 	    result = (new weka.core.converters.ConverterUtils.DataSource("data" + File.separator + "sent140test.arff.gz")).getDataSet();
 97 | 	} else { // Hope that package is installed.
 98 | 	    result = (new weka.core.converters.ConverterUtils.DataSource(weka.core.WekaPackageManager.PACKAGES_DIR.toString() + File.separator + "data" + File.separator + "sent140test.arff.gz")).getDataSet();
 99 | 	}
100 | 
101 | 	result.setClassIndex(result.numAttributes() - 1);
102 | 
103 |         return result;
104 |     }
105 | 
106 |     /**
107 |      * Called by JUnit before each test method. Sets up the Instances object to use based on 
108 |      * one of the datasets that comes with the package.
109 |      *
110 |      * @throws Exception if an error occurs reading the example instances.
111 |      */
112 |     protected void setUp() throws Exception {
113 |         super.setUp();
114 | 
115 | 	// Check to see if the test is run from directory containing build_package.xml
116 | 	if ((new File(".." + File.separator + "AffectiveTweets" + File.separator + "build_package.xml")).exists()) {
117 | 	    m_Instances = (new weka.core.converters.ConverterUtils.DataSource("data" + File.separator + "sent140test.arff.gz")).getDataSet();
118 | 	} else { // Hope that package is installed.
119 | 	    m_Instances = (new weka.core.converters.ConverterUtils.DataSource(weka.core.WekaPackageManager.PACKAGES_DIR.toString() + File.separator + "data" + File.separator + "sent140test.arff.gz")).getDataSet();
120 | 	}
121 | 
122 | 	m_Instances.setClassIndex(m_Instances.numAttributes() - 1);
123 |     }
124 | 
125 |     public static Test suite() {
126 |         return new TestSuite(TweetToSparseFeatureVectorTest.class);
127 |     }
128 | 
129 |     public static void main(String[] args){
130 |         junit.textui.TestRunner.run(suite());
131 |     }
132 | }
133 | 


--------------------------------------------------------------------------------
/src/test/java/weka/filters/supervised/attribute/PMILexiconExpanderTest.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *   This program is free software: you can redistribute it and/or modify
  3 |  *   it under the terms of the GNU General Public License as published by
  4 |  *   the Free Software Foundation, either version 3 of the License, or
  5 |  *   (at your option) any later version.
  6 |  *
  7 |  *   This program is distributed in the hope that it will be useful,
  8 |  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 |  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10 |  *   GNU General Public License for more details.
 11 |  *
 12 |  *   You should have received a copy of the GNU General Public License
 13 |  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 14 |  */
 15 | 
 16 | /*
 17 |  * Copyright (C) 2019 University of Waikato, Hamilton, New Zealand
 18 |  */
 19 | 
 20 | package weka.filters.supervised.attribute;
 21 | 
 22 | import weka.classifiers.meta.FilteredClassifier;
 23 | import weka.core.Instances;
 24 | import weka.filters.AbstractFilterTest;
 25 | import weka.filters.Filter;
 26 | 
 27 | import junit.framework.Test;
 28 | import junit.framework.TestSuite;
 29 | 
 30 | import java.io.File;
 31 | 
 32 | /**
 33 |  * Tests PMILexiconExpander. Run from the command line with: <p/>
 34 |  * java weka.filters.unsupervised.attribute.PMILexiconExpanderTest
 35 |  * <p> 
 36 |  * AffectiveTweets package must either be installed or
 37 |  * JVM must be started in AffectiveTweets directory.
 38 |  * <p>
 39 |  * @author FracPete and eibe
 40 |  * @version $Revision: 9568 $
 41 |  */
 42 | public class PMILexiconExpanderTest extends AbstractFilterTest {
 43 | 
 44 |     public PMILexiconExpanderTest(String name) {
 45 |         super(name);
 46 |     }
 47 | 
 48 |     /** Creates a default PMILexiconExpander filter */
 49 |     public Filter getFilter() {
 50 | 	Filter f = null;
 51 | 
 52 | 	// Check to see if the test is run from directory containing build_package.xml
 53 | 	if ((new File(".." + File.separator + "AffectiveTweets" + File.separator + "build_package.xml")).exists()) {
 54 | 	    File backup = weka.core.WekaPackageManager.PACKAGES_DIR;
 55 | 	    weka.core.WekaPackageManager.PACKAGES_DIR = new java.io.File(".."); // So that default lexicon, etc., is found.
 56 | 	    f = new PMILexiconExpander();
 57 | 	    weka.core.WekaPackageManager.PACKAGES_DIR = backup;
 58 | 	} else {
 59 | 	    f = new PMILexiconExpander(); // Hope that the package is installed.
 60 | 	}
 61 | 	return f;
 62 |     }
 63 | 
 64 |     /**
 65 |      * Test for the FilteredClassifier used with this filter.
 66 |      *
 67 |      * @return the configured FilteredClassifier
 68 |      */
 69 |     protected FilteredClassifier getFilteredClassifier() {
 70 |         FilteredClassifier	result;
 71 | 
 72 |         result = new FilteredClassifier();
 73 | 
 74 | 	weka.filters.MultiFilter mf = new weka.filters.MultiFilter();
 75 | 	Filter[] filters = new Filter[2];
 76 | 	filters[0] = getFilter();
 77 | 	weka.filters.unsupervised.attribute.RemoveType rt = new weka.filters.unsupervised.attribute.RemoveType(); // Need to remove string attributes because they are kept by this filter.
 78 | 	filters[1] = rt;
 79 | 	mf.setFilters(filters);
 80 | 	result.setFilter(mf);
 81 |         result.setClassifier(new weka.classifiers.functions.SMO());
 82 | 
 83 |         return result;
 84 |     }
 85 | 
 86 |     /**
 87 |      * Data to be used for FilteredClassifier test.
 88 |      *
 89 |      * @return the configured FilteredClassifier
 90 |      */
 91 |     protected Instances getFilteredClassifierData() throws Exception {
 92 |         Instances result;
 93 | 
 94 | 	// Check to see if the test is run from directory containing build_package.xml
 95 | 	if ((new File(".." + File.separator + "AffectiveTweets" + File.separator + "build_package.xml")).exists()) {
 96 | 	    result = (new weka.core.converters.ConverterUtils.DataSource("data" + File.separator + "sent140test.arff.gz")).getDataSet();
 97 | 	} else { // Hope that package is installed.
 98 | 	    result = (new weka.core.converters.ConverterUtils.DataSource(weka.core.WekaPackageManager.PACKAGES_DIR.toString() + File.separator + "data" + File.separator + "sent140test.arff.gz")).getDataSet();
 99 | 	}
100 | 
101 | 	result.setClassIndex(result.numAttributes() - 1);
102 | 
103 |         return result;
104 |     }
105 | 
106 |     /**
107 |      * Called by JUnit before each test method. Sets up the Instances object to use based on 
108 |      * one of the datasets that comes with the package.
109 |      *
110 |      * @throws Exception if an error occurs reading the example instances.
111 |      */
112 |     protected void setUp() throws Exception {
113 |         super.setUp();
114 | 
115 | 	// Check to see if the test is run from directory containing build_package.xml
116 | 	if ((new File(".." + File.separator + "AffectiveTweets" + File.separator + "build_package.xml")).exists()) {
117 | 	    m_Instances = (new weka.core.converters.ConverterUtils.DataSource("data" + File.separator + "sent140test.arff.gz")).getDataSet();
118 | 	} else { // Hope that package is installed.
119 | 	    m_Instances = (new weka.core.converters.ConverterUtils.DataSource(weka.core.WekaPackageManager.PACKAGES_DIR.toString() + File.separator + "data" + File.separator + "sent140test.arff.gz")).getDataSet();
120 | 	}
121 | 
122 | 	m_Instances.setClassIndex(m_Instances.numAttributes() - 1);
123 |     }
124 |     
125 |     public void testFilteredClassifier() {
126 |     	
127 |     }
128 | 
129 |     public static Test suite() {
130 |         return new TestSuite(PMILexiconExpanderTest.class);
131 |     }
132 | 
133 |     public static void main(String[] args){
134 |         junit.textui.TestRunner.run(suite());
135 |     }
136 | }
137 | 


--------------------------------------------------------------------------------
/src/test/java/weka/filters/unsupervised/attribute/TweetToEmbeddingsFeatureVectorTest.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *   This program is free software: you can redistribute it and/or modify
  3 |  *   it under the terms of the GNU General Public License as published by
  4 |  *   the Free Software Foundation, either version 3 of the License, or
  5 |  *   (at your option) any later version.
  6 |  *
  7 |  *   This program is distributed in the hope that it will be useful,
  8 |  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 |  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10 |  *   GNU General Public License for more details.
 11 |  *
 12 |  *   You should have received a copy of the GNU General Public License
 13 |  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 14 |  */
 15 | 
 16 | /*
 17 |  * Copyright (C) 2019 University of Waikato, Hamilton, New Zealand
 18 |  */
 19 | 
 20 | package weka.filters.unsupervised.attribute;
 21 | 
 22 | import weka.classifiers.meta.FilteredClassifier;
 23 | import weka.core.Instances;
 24 | import weka.filters.AbstractFilterTest;
 25 | import weka.filters.Filter;
 26 | 
 27 | import junit.framework.Test;
 28 | import junit.framework.TestSuite;
 29 | 
 30 | import java.io.File;
 31 | 
 32 | /**
 33 |  * Tests TweetToEmbeddingsFeatureVector. Run from the command line with: <p/>
 34 |  * java weka.filters.unsupervised.attribute.TweetToEmbeddingsFeatureVectorTest
 35 |  * <p> 
 36 |  * AffectiveTweets package must either be installed or
 37 |  * JVM must be started in AffectiveTweets directory.
 38 |  * <p>
 39 |  * @author FracPete and eibe
 40 |  * @version $Revision: 9568 $
 41 |  */
 42 | public class TweetToEmbeddingsFeatureVectorTest extends AbstractFilterTest {
 43 | 
 44 |     public TweetToEmbeddingsFeatureVectorTest(String name) {
 45 |         super(name);
 46 |     }
 47 | 
 48 |     /** Creates a default TweetToEmbeddingsFeatureVector filter */
 49 |     public Filter getFilter() {
 50 | 	Filter f = null;
 51 | 
 52 | 	// Check to see if the test is run from directory containing build_package.xml
 53 | 	if ((new File(".." + File.separator + "AffectiveTweets" + File.separator + "build_package.xml")).exists()) {
 54 | 	    File backup = weka.core.WekaPackageManager.PACKAGES_DIR;
 55 | 	    weka.core.WekaPackageManager.PACKAGES_DIR = new java.io.File(".."); // So that default lexicon, etc., is found.
 56 | 	    f = new TweetToEmbeddingsFeatureVector();
 57 | 	    weka.core.WekaPackageManager.PACKAGES_DIR = backup;
 58 | 	} else {
 59 | 	    f = new TweetToEmbeddingsFeatureVector(); // Hope that the package is installed.
 60 | 	}
 61 | 	return f;
 62 |     }
 63 | 
 64 |     /**
 65 |      * Test for the FilteredClassifier used with this filter.
 66 |      *
 67 |      * @return the configured FilteredClassifier
 68 |      */
 69 |     protected FilteredClassifier getFilteredClassifier() {
 70 |         FilteredClassifier	result;
 71 | 
 72 |         result = new FilteredClassifier();
 73 | 
 74 | 	weka.filters.MultiFilter mf = new weka.filters.MultiFilter();
 75 | 	Filter[] filters = new Filter[2];
 76 | 	filters[0] = getFilter();
 77 | 	weka.filters.unsupervised.attribute.RemoveType rt = new weka.filters.unsupervised.attribute.RemoveType(); // Need to remove string attributes because they are kept by this filter.
 78 | 	filters[1] = rt;
 79 | 	mf.setFilters(filters);
 80 | 	result.setFilter(mf);
 81 |         result.setClassifier(new weka.classifiers.functions.SMO());
 82 | 
 83 |         return result;
 84 |     }
 85 | 
 86 |     /**
 87 |      * Data to be used for FilteredClassifier test.
 88 |      *
 89 |      * @return the configured FilteredClassifier
 90 |      */
 91 |     protected Instances getFilteredClassifierData() throws Exception {
 92 |         Instances result;
 93 | 
 94 | 	// Check to see if the test is run from directory containing build_package.xml
 95 | 	if ((new File(".." + File.separator + "AffectiveTweets" + File.separator + "build_package.xml")).exists()) {
 96 | 	    result = (new weka.core.converters.ConverterUtils.DataSource("data" + File.separator + "sent140test.arff.gz")).getDataSet();
 97 | 	} else { // Hope that package is installed.
 98 | 	    result = (new weka.core.converters.ConverterUtils.DataSource(weka.core.WekaPackageManager.PACKAGES_DIR.toString() + File.separator + "data" + File.separator + "sent140test.arff.gz")).getDataSet();
 99 | 	}
100 | 
101 | 	result.setClassIndex(result.numAttributes() - 1);
102 | 
103 |         return result;
104 |     }
105 | 
106 |     /**
107 |      * Called by JUnit before each test method. Sets up the Instances object to use based on 
108 |      * one of the datasets that comes with the package.
109 |      *
110 |      * @throws Exception if an error occurs reading the example instances.
111 |      */
112 |     protected void setUp() throws Exception {
113 |         super.setUp();
114 | 
115 | 	// Check to see if the test is run from directory containing build_package.xml
116 | 	if ((new File(".." + File.separator + "AffectiveTweets" + File.separator + "build_package.xml")).exists()) {
117 | 	    m_Instances = (new weka.core.converters.ConverterUtils.DataSource("data" + File.separator + "sent140test.arff.gz")).getDataSet();
118 | 	} else { // Hope that package is installed.
119 | 	    m_Instances = (new weka.core.converters.ConverterUtils.DataSource(weka.core.WekaPackageManager.PACKAGES_DIR.toString() + File.separator + "data" + File.separator + "sent140test.arff.gz")).getDataSet();
120 | 	}
121 | 
122 | 	m_Instances.setClassIndex(m_Instances.numAttributes() - 1);
123 |     }
124 | 
125 |     public static Test suite() {
126 |         return new TestSuite(TweetToEmbeddingsFeatureVectorTest.class);
127 |     }
128 | 
129 |     public static void main(String[] args){
130 |         junit.textui.TestRunner.run(suite());
131 |     }
132 | }
133 | 


--------------------------------------------------------------------------------
/benchmark/nltk_scikit_liu_vader.py:
--------------------------------------------------------------------------------
  1 | # This program is free software: you can redistribute it and/or modify
  2 | # it under the terms of the GNU General Public License as published by
  3 | # the Free Software Foundation, either version 3 of the License, or
  4 | # (at your option) any later version.
  5 | #
  6 | # This program is distributed in the hope that it will be useful,
  7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
  8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  9 | # GNU General Public License for more details.
 10 | #
 11 | # You should have received a copy of the GNU General Public License
 12 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 13 | 
 14 | # Authors: Felipe Bravo-Marquez
 15 | 
 16 | 	
 17 | import pandas as pd       
 18 | from nltk.tokenize import TweetTokenizer
 19 | from nltk.sentiment import SentimentIntensityAnalyzer
 20 | from nltk.corpus import opinion_lexicon
 21 | 
 22 | from sklearn.linear_model import LogisticRegression
 23 | from sklearn.pipeline import Pipeline, FeatureUnion
 24 | from sklearn.base import BaseEstimator, TransformerMixin
 25 | from sklearn.metrics import confusion_matrix, cohen_kappa_score, classification_report
 26 | import numpy as np
 27 | 
 28 | 
 29 | 
 30 | 
 31 | # load training and testing datasets as a pandas dataframe
 32 | train_data = pd.read_csv("dataset/twitter-train-B.txt", header=None, delimiter="\t",usecols=(2,3), names=("sent","tweet"))
 33 | test_data = pd.read_csv("dataset/twitter-test-gold-B.tsv", header=None, delimiter="\t",usecols=(2,3), names=("sent","tweet"))
 34 | 
 35 | # replace objective-OR-neutral and objective to neutral
 36 | train_data.sent = train_data.sent.replace(['objective-OR-neutral','objective'],['neutral','neutral'])
 37 | 
 38 | # use a Twitter-specific tokenizer
 39 | tokenizer = TweetTokenizer(preserve_case=False, reduce_len=True)
 40 | 
 41 | 
 42 | 
 43 | #####################################################################################
 44 | #
 45 | #  Train a linear model using features from Bing Liu's lexicon + the Vader method
 46 | #
 47 | ######################################################################################
 48 | #nltk.download('vader_lexicon')
 49 | 
 50 | 
 51 | 
 52 | 
 53 | class LiuFeatureExtractor(BaseEstimator, TransformerMixin):
 54 |     """Takes in a corpus of tweets and calculates features using Bing Liu's lexicon"""
 55 | 
 56 |     def __init__(self, tokenizer):
 57 |         self.tokenizer = tokenizer
 58 |         self.pos_set = set(opinion_lexicon.positive())
 59 |         self.neg_set = set(opinion_lexicon.negative())
 60 | 
 61 |     def liu_score(self,sentence):
 62 |         """Calculates the number of positive and negative words in the sentence using Bing Liu's Lexicon""" 
 63 |         tokenized_sent = self.tokenizer.tokenize(sentence)
 64 |         pos_words = 0
 65 |         neg_words = 0
 66 |         for word in tokenized_sent:
 67 |             if word in self.pos_set:
 68 |                 pos_words += 1
 69 |             elif word in self.neg_set:
 70 |                 neg_words += 1
 71 |         return [pos_words,neg_words]
 72 |     
 73 |     def transform(self, X, y=None):
 74 |         """Applies liu_score and vader_score on a data.frame containing tweets """
 75 |         values = []
 76 |         for tweet in X:
 77 |             values.append(self.liu_score(tweet))
 78 |         
 79 |         return(np.array(values))
 80 | 
 81 |     def fit(self, X, y=None):
 82 |         """This function must return `self` unless we expect the transform function to perform a 
 83 |         different action on training and testing partitions (e.g., when we calculate unigram features, 
 84 |         the dictionary is only extracted from the first batch)"""
 85 |         return self
 86 | 
 87 | 
 88 | 
 89 | 
 90 | class VaderFeatureExtractor(BaseEstimator, TransformerMixin):
 91 |     """Takes in a corpus of tweets and calculates features using the Vader method"""
 92 | 
 93 |     def __init__(self, tokenizer):
 94 |         self.tokenizer = tokenizer
 95 |         self.sid = SentimentIntensityAnalyzer()
 96 | 
 97 |   
 98 |     def vader_score(self,sentence):
 99 |         """ Calculates sentiment scores for a sentence using the Vader method """
100 |         pol_scores = self.sid.polarity_scores(sentence)
101 |         return(list(pol_scores.values()))
102 | 
103 |     def transform(self, X, y=None):
104 |         """Applies vader_score on a data.frame containing tweets """
105 |         values = []
106 |         for tweet in X:
107 |             values.append(self.vader_score(tweet))
108 |         
109 |         return(np.array(values))
110 | 
111 |     def fit(self, X, y=None):
112 |         """Returns `self` unless something different happens in train and test"""
113 |         return self
114 | 
115 | 
116 | 
117 | 
118 | 
119 | vader_feat = VaderFeatureExtractor(tokenizer)
120 | liu_feat = LiuFeatureExtractor(tokenizer)
121 | 
122 | log_mod = LogisticRegression(solver='liblinear',multi_class='ovr')  
123 | vader_liu_clf = Pipeline([ ('feats', 
124 |                             FeatureUnion([ ('vader', vader_feat), ('liu',liu_feat) ])),
125 |     ('clf', log_mod)])
126 | 
127 | 
128 | vader_liu_clf.fit(train_data.tweet, train_data.sent)
129 | pred_vader_liu = vader_liu_clf.predict(test_data.tweet)
130 | 
131 | 
132 | conf_vader_liu = confusion_matrix(test_data.sent, pred_vader_liu)
133 | kappa_vader_liu = cohen_kappa_score(test_data.sent, pred_vader_liu) 
134 | class_rep_vader_liu = classification_report(test_data.sent, pred_vader_liu)
135 | 
136 | print('Confusion Matrix for Logistic Regression + Vader + features from Bing Liu\'s Lexicon')
137 | print(conf_vader_liu)
138 | print('Classification Report')
139 | print(class_rep_vader_liu)
140 | print('kappa:'+str(kappa_vader_liu))
141 | 
142 | 


--------------------------------------------------------------------------------
/src/test/java/weka/filters/unsupervised/attribute/TweetToSentiStrengthFeatureVectorTest.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *   This program is free software: you can redistribute it and/or modify
  3 |  *   it under the terms of the GNU General Public License as published by
  4 |  *   the Free Software Foundation, either version 3 of the License, or
  5 |  *   (at your option) any later version.
  6 |  *
  7 |  *   This program is distributed in the hope that it will be useful,
  8 |  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 |  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10 |  *   GNU General Public License for more details.
 11 |  *
 12 |  *   You should have received a copy of the GNU General Public License
 13 |  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 14 |  */
 15 | 
 16 | /*
 17 |  * Copyright (C) 2019 University of Waikato, Hamilton, New Zealand
 18 |  */
 19 | 
 20 | package weka.filters.unsupervised.attribute;
 21 | 
 22 | import weka.classifiers.meta.FilteredClassifier;
 23 | import weka.core.Instances;
 24 | import weka.filters.AbstractFilterTest;
 25 | import weka.filters.Filter;
 26 | 
 27 | import junit.framework.Test;
 28 | import junit.framework.TestSuite;
 29 | 
 30 | import java.io.File;
 31 | 
 32 | /**
 33 |  * Tests TweetToSentiStrengthFeatureVector. Run from the command line with: <p/>
 34 |  * java weka.filters.unsupervised.attribute.TweetToWordCountFeatureVectorTest
 35 |  * <p> 
 36 |  * AffectiveTweets package must either be installed or
 37 |  * JVM must be started in AffectiveTweets directory.
 38 |  * <p>
 39 |  * @author FracPete and eibe
 40 |  * @version $Revision: 9568 $
 41 |  */
 42 | public class TweetToSentiStrengthFeatureVectorTest extends AbstractFilterTest {
 43 | 
 44 |     public TweetToSentiStrengthFeatureVectorTest(String name) {
 45 |         super(name);
 46 |     }
 47 | 
 48 |     /** Creates a default TweetToSentiStrengthFeatureVector filter */
 49 |     public Filter getFilter() {
 50 | 	Filter f = null;
 51 | 
 52 | 	// Check to see if the test is run from directory containing build_package.xml
 53 | 	if ((new File(".." + File.separator + "AffectiveTweets" + File.separator + "build_package.xml")).exists()) {
 54 | 	    File backup = weka.core.WekaPackageManager.PACKAGES_DIR;
 55 | 	    weka.core.WekaPackageManager.PACKAGES_DIR = new java.io.File(".."); // So that default lexicon, etc., is found.
 56 | 	    f = new TweetToSentiStrengthFeatureVector();
 57 | 	    weka.core.WekaPackageManager.PACKAGES_DIR = backup;
 58 | 	} else {
 59 | 	    f = new TweetToSentiStrengthFeatureVector(); // Hope that the package is installed.
 60 | 	}
 61 | 	return f;
 62 |     }
 63 | 
 64 |     /**
 65 |      * Test for the FilteredClassifier used with this filter.
 66 |      *
 67 |      * @return the configured FilteredClassifier
 68 |      */
 69 |     protected FilteredClassifier getFilteredClassifier() {
 70 |         FilteredClassifier	result;
 71 | 
 72 |         result = new FilteredClassifier();
 73 | 
 74 | 	weka.filters.MultiFilter mf = new weka.filters.MultiFilter();
 75 | 	Filter[] filters = new Filter[2];
 76 | 	filters[0] = getFilter();
 77 | 	weka.filters.unsupervised.attribute.RemoveType rt = new weka.filters.unsupervised.attribute.RemoveType(); // Need to remove string attributes because they are kept by this filter.
 78 | 	filters[1] = rt;
 79 | 	mf.setFilters(filters);
 80 | 	result.setFilter(mf);
 81 |         result.setClassifier(new weka.classifiers.functions.SMO());
 82 | 
 83 |         return result;
 84 |     }
 85 | 
 86 |     /**
 87 |      * Data to be used for FilteredClassifier test.
 88 |      *
 89 |      * @return the configured FilteredClassifier
 90 |      */
 91 |     protected Instances getFilteredClassifierData() throws Exception {
 92 |         Instances result;
 93 | 
 94 | 	// Check to see if the test is run from directory containing build_package.xml
 95 | 	if ((new File(".." + File.separator + "AffectiveTweets" + File.separator + "build_package.xml")).exists()) {
 96 | 	    result = (new weka.core.converters.ConverterUtils.DataSource("data" + File.separator + "sent140test.arff.gz")).getDataSet();
 97 | 	} else { // Hope that package is installed.
 98 | 	    result = (new weka.core.converters.ConverterUtils.DataSource(weka.core.WekaPackageManager.PACKAGES_DIR.toString() + File.separator + "data" + File.separator + "sent140test.arff.gz")).getDataSet();
 99 | 	}
100 | 
101 | 	result.setClassIndex(result.numAttributes() - 1);
102 | 
103 |         return result;
104 |     }
105 | 
106 |     /**
107 |      * Called by JUnit before each test method. Sets up the Instances object to use based on 
108 |      * one of the datasets that comes with the package.
109 |      *
110 |      * @throws Exception if an error occurs reading the example instances.
111 |      */
112 |     protected void setUp() throws Exception {
113 |         super.setUp();
114 | 
115 | 	// Check to see if the test is run from directory containing build_package.xml
116 | 	if ((new File(".." + File.separator + "AffectiveTweets" + File.separator + "build_package.xml")).exists()) {
117 | 	    m_Instances = (new weka.core.converters.ConverterUtils.DataSource("data" + File.separator + "sent140test.arff.gz")).getDataSet();
118 | 	} else { // Hope that package is installed.
119 | 	    m_Instances = (new weka.core.converters.ConverterUtils.DataSource(weka.core.WekaPackageManager.PACKAGES_DIR.toString() + File.separator + "data" + File.separator + "sent140test.arff.gz")).getDataSet();
120 | 	}
121 | 
122 | 	m_Instances.setClassIndex(m_Instances.numAttributes() - 1);
123 |     }
124 | 
125 |     public static Test suite() {
126 |         return new TestSuite(TweetToSentiStrengthFeatureVectorTest.class);
127 |     }
128 | 
129 |     public static void main(String[] args){
130 |         junit.textui.TestRunner.run(suite());
131 |     }
132 | }
133 | 


--------------------------------------------------------------------------------
/src/test/java/weka/filters/unsupervised/attribute/TweetToWordListCountFeatureVectorTest.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *   This program is free software: you can redistribute it and/or modify
  3 |  *   it under the terms of the GNU General Public License as published by
  4 |  *   the Free Software Foundation, either version 3 of the License, or
  5 |  *   (at your option) any later version.
  6 |  *
  7 |  *   This program is distributed in the hope that it will be useful,
  8 |  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 |  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10 |  *   GNU General Public License for more details.
 11 |  *
 12 |  *   You should have received a copy of the GNU General Public License
 13 |  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 14 |  */
 15 | 
 16 | /*
 17 |  * Copyright (C) 2019 University of Waikato, Hamilton, New Zealand
 18 |  */
 19 | 
 20 | package weka.filters.unsupervised.attribute;
 21 | 
 22 | import weka.classifiers.meta.FilteredClassifier;
 23 | import weka.core.Instances;
 24 | import weka.filters.AbstractFilterTest;
 25 | import weka.filters.Filter;
 26 | 
 27 | import junit.framework.Test;
 28 | import junit.framework.TestSuite;
 29 | 
 30 | import java.io.File;
 31 | 
 32 | /**
 33 |  * Tests TweetToWordListCountFeatureVectorTest. Run from the command line with: <p/>
 34 |  * java weka.filters.unsupervised.attribute.TweetToWordListCountFeatureVectorTest
 35 |  * <p> 
 36 |  * AffectiveTweets package must either be installed or
 37 |  * JVM must be started in AffectiveTweets directory.
 38 |  * <p>
 39 |  * @author FracPete and eibe
 40 |  * @version $Revision: 9568 $
 41 |  */
 42 | public class TweetToWordListCountFeatureVectorTest extends AbstractFilterTest {
 43 | 
 44 |     public TweetToWordListCountFeatureVectorTest(String name) {
 45 |         super(name);
 46 |     }
 47 | 
 48 |     /** Creates a default TweetToSentiStrengthFeatureVector filter */
 49 |     public Filter getFilter() {
 50 | 	Filter f = null;
 51 | 
 52 | 	// Check to see if the test is run from directory containing build_package.xml
 53 | 	if ((new File(".." + File.separator + "AffectiveTweets" + File.separator + "build_package.xml")).exists()) {
 54 | 	    File backup = weka.core.WekaPackageManager.PACKAGES_DIR;
 55 | 	    weka.core.WekaPackageManager.PACKAGES_DIR = new java.io.File(".."); // So that default lexicon, etc., is found.
 56 | 	    f = new TweetToWordListCountFeatureVector();
 57 | 	    weka.core.WekaPackageManager.PACKAGES_DIR = backup;
 58 | 	} else {
 59 | 	    f = new TweetToWordListCountFeatureVector(); // Hope that the package is installed.
 60 | 	}
 61 | 	return f;
 62 |     }
 63 | 
 64 |     /**
 65 |      * Test for the FilteredClassifier used with this filter.
 66 |      *
 67 |      * @return the configured FilteredClassifier
 68 |      */
 69 |     protected FilteredClassifier getFilteredClassifier() {
 70 |         FilteredClassifier	result;
 71 | 
 72 |         result = new FilteredClassifier();
 73 | 
 74 | 	weka.filters.MultiFilter mf = new weka.filters.MultiFilter();
 75 | 	Filter[] filters = new Filter[2];
 76 | 	filters[0] = getFilter();
 77 | 	weka.filters.unsupervised.attribute.RemoveType rt = new weka.filters.unsupervised.attribute.RemoveType(); // Need to remove string attributes because they are kept by this filter.
 78 | 	filters[1] = rt;
 79 | 	mf.setFilters(filters);
 80 | 	result.setFilter(mf);
 81 |         result.setClassifier(new weka.classifiers.functions.SMO());
 82 | 
 83 |         return result;
 84 |     }
 85 | 
 86 |     /**
 87 |      * Data to be used for FilteredClassifier test.
 88 |      *
 89 |      * @return the configured FilteredClassifier
 90 |      */
 91 |     protected Instances getFilteredClassifierData() throws Exception {
 92 |         Instances result;
 93 | 
 94 | 	// Check to see if the test is run from directory containing build_package.xml
 95 | 	if ((new File(".." + File.separator + "AffectiveTweets" + File.separator + "build_package.xml")).exists()) {
 96 | 	    result = (new weka.core.converters.ConverterUtils.DataSource("data" + File.separator + "sent140test.arff.gz")).getDataSet();
 97 | 	} else { // Hope that package is installed.
 98 | 	    result = (new weka.core.converters.ConverterUtils.DataSource(weka.core.WekaPackageManager.PACKAGES_DIR.toString() + File.separator + "data" + File.separator + "sent140test.arff.gz")).getDataSet();
 99 | 	}
100 | 
101 | 	result.setClassIndex(result.numAttributes() - 1);
102 | 
103 |         return result;
104 |     }
105 | 
106 |     /**
107 |      * Called by JUnit before each test method. Sets up the Instances object to use based on 
108 |      * one of the datasets that comes with the package.
109 |      *
110 |      * @throws Exception if an error occurs reading the example instances.
111 |      */
112 |     protected void setUp() throws Exception {
113 |         super.setUp();
114 | 
115 | 	// Check to see if the test is run from directory containing build_package.xml
116 | 	if ((new File(".." + File.separator + "AffectiveTweets" + File.separator + "build_package.xml")).exists()) {
117 | 	    m_Instances = (new weka.core.converters.ConverterUtils.DataSource("data" + File.separator + "sent140test.arff.gz")).getDataSet();
118 | 	} else { // Hope that package is installed.
119 | 	    m_Instances = (new weka.core.converters.ConverterUtils.DataSource(weka.core.WekaPackageManager.PACKAGES_DIR.toString() + File.separator + "data" + File.separator + "sent140test.arff.gz")).getDataSet();
120 | 	}
121 | 
122 | 	m_Instances.setClassIndex(m_Instances.numAttributes() - 1);
123 |     }
124 | 
125 |     public static Test suite() {
126 |         return new TestSuite(TweetToWordListCountFeatureVectorTest.class);
127 |     }
128 | 
129 |     public static void main(String[] args){
130 |         junit.textui.TestRunner.run(suite());
131 |     }
132 | }
133 | 


--------------------------------------------------------------------------------
/src/main/java/affective/core/CSVEmbeddingHandler.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *   This program is free software: you can redistribute it and/or modify
  3 |  *   it under the terms of the GNU General Public License as published by
  4 |  *   the Free Software Foundation, either version 3 of the License, or
  5 |  *   (at your option) any later version.
  6 |  *
  7 |  *   This program is distributed in the hope that it will be useful,
  8 |  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 |  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10 |  *   GNU General Public License for more details.
 11 |  *
 12 |  *   You should have received a copy of the GNU General Public License
 13 |  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 14 |  */
 15 | 
 16 | /*
 17 |  *    EmbeddingHandler.java
 18 |  *    Copyright (C) 1999-2018 University of Waikato, Hamilton, New Zealand
 19 |  *
 20 |  */
 21 | 
 22 | 
 23 | 
 24 | package affective.core;
 25 | 
 26 | import it.unimi.dsi.fastutil.doubles.AbstractDoubleList;
 27 | import it.unimi.dsi.fastutil.doubles.DoubleArrayList;
 28 | 
 29 | import java.io.BufferedReader;
 30 | import java.io.File;
 31 | import java.io.FileInputStream;
 32 | import java.io.InputStreamReader;
 33 | import java.util.zip.GZIPInputStream;
 34 | 
 35 | import weka.core.OptionMetadata;
 36 | import weka.core.SingleIndex;
 37 | import weka.core.WekaPackageManager;
 38 | 
 39 | 
 40 | /**
 41 |  *  <!-- globalinfo-start --> 
 42 |  *  This class is used for handling word vector or embeddings stored in gzipped files.
 43 |  * 
 44 |  * <!-- globalinfo-end -->
 45 |  * 
 46 |  * 
 47 |  * @author Felipe Bravo-Marquez (fbravoma@waikato.ac.nz)
 48 |  * @version $Revision: 1 $
 49 |  */
 50 | public class CSVEmbeddingHandler extends EmbeddingHandler {
 51 | 
 52 | 	/** For serialization **/ 
 53 | 	private static final long serialVersionUID = -2458037798910799631L;
 54 | 
 55 | 	/** Default path to where resources are stored. */
 56 | 	public static String RESOURCES_FOLDER_NAME = WekaPackageManager.PACKAGES_DIR.toString() + File.separator + "AffectiveTweets" + File.separator + "resources";
 57 | 
 58 | 
 59 | 	/** Embedding File Name.    **/
 60 | 	protected File embeddingsFile=new File(RESOURCES_FOLDER_NAME + File.separator + "w2v.twitter.edinburgh.100d.csv.gz");
 61 | 
 62 | 
 63 | 	/** The separator String    **/
 64 | 	protected String separator="TAB";
 65 | 
 66 | 
 67 | 	/** the index of the string attribute to be processed */
 68 | 	protected SingleIndex wordNameIndex = new SingleIndex("last");
 69 | 
 70 | 
 71 | 
 72 | 
 73 | 	/**
 74 | 	 * Returns a string describing this filter.
 75 | 	 * 
 76 | 	 * @return a description of the filter suitable for displaying in the
 77 | 	 *         explorer/experimenter gui
 78 | 	 */	
 79 | 	public String globalInfo() {
 80 | 		return "This object handles word embeddings in csv.gz format. \n";
 81 | 	}
 82 | 
 83 | 
 84 | 
 85 | 
 86 | 
 87 | 	/* (non-Javadoc)
 88 | 	 * @see affective.core.EmbeddingHandler#createDict()
 89 | 	 */
 90 | 	public void createDict() throws Exception {
 91 | 
 92 | 		FileInputStream fin = new FileInputStream(this.embeddingsFile);
 93 | 		GZIPInputStream gzis = new GZIPInputStream(fin);
 94 | 		InputStreamReader xover = new InputStreamReader(gzis);
 95 | 		BufferedReader bf = new BufferedReader(xover);
 96 | 
 97 | 		this.separator = this.separator.equals("TAB")?"\t":this.separator;
 98 | 		
 99 | 		
100 | 		String line;
101 | 		boolean firstLine=true;
102 | 		while ((line = bf.readLine()) != null) {
103 | 			String parts[]=line.split(this.separator);
104 | 
105 | 			AbstractDoubleList wordVector=new DoubleArrayList();
106 | 			if(firstLine){
107 | 				this.dimensions=parts.length-1;
108 | 
109 | 				this.wordNameIndex.setUpper(this.dimensions);
110 | 				firstLine=false;				
111 | 			}
112 | 			
113 | 			// only consider lines with right number of dimensions
114 | 			if(parts.length-1==this.dimensions){
115 | 				for(int i=0;i<parts.length-1;i++){
116 | 					if(i!=this.wordNameIndex.getIndex())				
117 | 						wordVector.add(Double.parseDouble(parts[i]));
118 | 				}
119 | 
120 | 				
121 | 				this.wordMap.put(parts[this.wordNameIndex.getIndex()], wordVector);
122 | 
123 | 			}
124 | 
125 | 
126 | 
127 | 		}
128 | 		bf.close();
129 | 		xover.close();
130 | 		gzis.close();
131 | 		fin.close();
132 | 	
133 | 
134 | 	}
135 | 
136 | 
137 | 
138 | 	@OptionMetadata(displayName = "embeddingsFile",
139 | 			description = "The file name containing the word vectors. It has to be a gzip compressed csv file",
140 | 			commandLineParamName = "K", commandLineParamSynopsis = "-K <string>",
141 | 			displayOrder = 1)
142 | 	public File getEmbeddingsFile() {
143 | 		return embeddingsFile;
144 | 	}
145 | 	public void setEmbeddingsFile(File embeddingsFile) {
146 | 		this.embeddingsFile = embeddingsFile;
147 | 	}
148 | 
149 | 
150 | 	@OptionMetadata(displayName = "separator",
151 | 			description = "The string to use as separator for the columns (you can use 'TAB' or '\\t' for the TAB symbol). \t Default:TAB",
152 | 			commandLineParamName = "sep", commandLineParamSynopsis = "-sep <string>",
153 | 			displayOrder = 2)	
154 | 	public String getSeparator() {
155 | 		return separator;
156 | 	}
157 | 	public void setSeparator(String separator) {
158 | 		this.separator = separator;
159 | 	}
160 | 
161 | 
162 | 
163 | 
164 | 	@OptionMetadata(displayName = "wordNameIndex",
165 | 			description = "The index (starting from 1) of the word string in the file. First and last are valid values. ",
166 | 			commandLineParamName = "I", commandLineParamSynopsis = "-I <col>",
167 | 			displayOrder = 0)	
168 | 	public String getWordNameIndex() {
169 | 		return wordNameIndex.getSingleIndex();
170 | 	}
171 | 	public void setWordNameIndex(String wordNameIndex) {
172 | 		this.wordNameIndex.setSingleIndex(wordNameIndex);
173 | 	}
174 | 
175 | 
176 | 
177 | 
178 | }
179 | 


--------------------------------------------------------------------------------
/src/main/java/weka/filters/unsupervised/attribute/TweetToWordListCountFeatureVector.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *   This program is free software: you can redistribute it and/or modify
  3 |  *   it under the terms of the GNU General Public License as published by
  4 |  *   the Free Software Foundation, either version 3 of the License, or
  5 |  *   (at your option) any later version.
  6 |  *
  7 |  *   This program is distributed in the hope that it will be useful,
  8 |  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 |  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10 |  *   GNU General Public License for more details.
 11 |  *
 12 |  *   You should have received a copy of the GNU General Public License
 13 |  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 14 |  */
 15 | 
 16 | /*
 17 |  *    TweetToWordListCountFeatureVector.java
 18 |  *    Copyright (C) 1999-2019 University of Waikato, Hamilton, New Zealand
 19 |  *
 20 |  */
 21 | 
 22 | package weka.filters.unsupervised.attribute;
 23 | 
 24 | 
 25 | 
 26 | import java.util.ArrayList;
 27 | import java.util.HashSet;
 28 | import java.util.List;
 29 | import java.util.Set;
 30 | 
 31 | import java.util.Arrays;
 32 | 
 33 | import weka.core.Attribute;
 34 | import weka.core.Instance;
 35 | import weka.core.Instances;
 36 | import weka.core.OptionMetadata;
 37 | import weka.core.SparseInstance;
 38 | 
 39 | 
 40 | 
 41 | /**
 42 |  * 
 43 |  * @author Felipe Bravo-Marquez (fbravoma@waikato.ac.nz)
 44 |  */
 45 | 
 46 | 
 47 | public class TweetToWordListCountFeatureVector extends TweetToFeatureVector {
 48 | 
 49 | 	/** For serialization.  */
 50 | 	private static final long serialVersionUID = -573366510055859430L;
 51 | 
 52 | 	/** The given word list as a comma separated string. */ 
 53 | 	public  String wordList = "love,happy,great";
 54 | 
 55 | 
 56 | 
 57 | 
 58 | 
 59 | 	/**
 60 | 	 * Returns a string describing this filter.
 61 | 	 * 
 62 | 	 * @return a description of the filter suitable for displaying in the
 63 | 	 *         explorer/experimenter gui
 64 | 	 */	
 65 | 	@Override
 66 | 	public String globalInfo() {
 67 | 		return "A simple filter that counts occurrences of words from a given list.";
 68 | 	}
 69 | 
 70 | 
 71 | 
 72 | 
 73 | 	/* (non-Javadoc)
 74 | 	 * @see weka.filters.SimpleFilter#determineOutputFormat(weka.core.Instances)
 75 | 	 */
 76 | 	@Override
 77 | 	protected Instances determineOutputFormat(Instances inputFormat)
 78 | 			throws Exception {
 79 | 
 80 | 		ArrayList<Attribute> att = new ArrayList<Attribute>();
 81 | 
 82 | 		// Adds all attributes of the inputformat
 83 | 		for (int i = 0; i < inputFormat.numAttributes(); i++) {
 84 | 			att.add(inputFormat.attribute(i));
 85 | 		}
 86 | 
 87 | 		// adds the new attribute
 88 | 		att.add(new Attribute("wordListCount"));
 89 | 		
 90 | 		Instances result = new Instances(inputFormat.relationName(), att, 0);
 91 | 
 92 | 		// set the class index
 93 | 		result.setClassIndex(inputFormat.classIndex());
 94 | 
 95 | 		return result;
 96 | 	}
 97 | 
 98 | 
 99 | 
100 | 	/* (non-Javadoc)
101 | 	 * @see weka.filters.SimpleFilter#process(weka.core.Instances)
102 | 	 */
103 | 	@Override
104 | 	protected Instances process(Instances instances) throws Exception {
105 | 
106 | 
107 | 		// set upper value for text index
108 | 		m_textIndex.setUpper(instances.numAttributes() - 1);
109 | 
110 | 		Instances result = getOutputFormat();
111 | 
112 | 
113 | 		// reference to the content of the message, users index start from zero
114 | 		Attribute attrCont = instances.attribute(this.m_textIndex.getIndex());
115 | 
116 | 
117 | 
118 | 		for (int i = 0; i < instances.numInstances(); i++) {	
119 | 
120 | 			// copy all attribute values from the original dataset
121 | 			double[] values = new double[result.numAttributes()];
122 | 			for (int n = 0; n < instances.numAttributes(); n++)
123 | 				values[n] = instances.instance(i).value(n);
124 | 
125 | 			
126 | 			String content = instances.instance(i).stringValue(attrCont);
127 | 			// tokenize the content
128 | 			List<String> words = affective.core.Utils.tokenize(content, this.toLowerCase, this.standarizeUrlsUsers, this.reduceRepeatedLetters, this.m_tokenizer,this.m_stemmer,this.m_stopwordsHandler);
129 | 
130 | 			// convert the list of words into a HashSet
131 | 			Set<String> wordSet = new HashSet<String>(Arrays.asList(wordList.split(",")));
132 | 			
133 | 			// count all the occurrences of words from the list
134 | 			int wordCounter = 0;			
135 | 			for(String word:words){
136 | 				if(wordSet.contains(word))
137 | 					wordCounter++;
138 | 			}
139 | 			
140 | 			
141 | 			// add the value to the last attribute
142 | 			values[values.length - 1] = wordCounter;
143 | 			
144 | 
145 | 			Instance inst = new SparseInstance(1, values);
146 | 
147 | 			inst.setDataset(result);
148 | 
149 | 			// copy possible strings, relational values...
150 | 			copyValues(inst, false, instances, result);
151 | 
152 | 			result.add(inst);
153 | 
154 | 		}
155 | 
156 | 		return result;
157 | 	}
158 | 
159 | 
160 | 
161 | 
162 | 
163 | 
164 | 
165 | 	/**
166 | 	 * Main method for testing this class.
167 | 	 *
168 | 	 * @param args should contain arguments to the filter: use -h for help
169 | 	 */		
170 | 	public static void main(String[] args) {
171 | 		runFilter(new TweetToWordListCountFeatureVector(), args);
172 | 	}
173 | 
174 | 	
175 | 	// OptionMetada allows setting parameters from within the command-line interface
176 | 	@OptionMetadata(displayName = "wordlist",
177 | 			description = "The list with the words to count separated by a comma symbol.",
178 | 			commandLineParamName = "wordlist", commandLineParamSynopsis = "-wordlist <string>",
179 | 			displayOrder = 6)
180 | 	public String getWordList() {
181 | 		return wordList;
182 | 	}
183 | 	public void setWordList(String wordList) {
184 | 		this.wordList = wordList;
185 | 	}
186 | 
187 | 
188 | 
189 | }
190 | 


--------------------------------------------------------------------------------
/src/test/java/weka/filters/unsupervised/attribute/TweetToInputLexiconFeatureVectorTest.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *   This program is free software: you can redistribute it and/or modify
  3 |  *   it under the terms of the GNU General Public License as published by
  4 |  *   the Free Software Foundation, either version 3 of the License, or
  5 |  *   (at your option) any later version.
  6 |  *
  7 |  *   This program is distributed in the hope that it will be useful,
  8 |  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 |  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10 |  *   GNU General Public License for more details.
 11 |  *
 12 |  *   You should have received a copy of the GNU General Public License
 13 |  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 14 |  */
 15 | 
 16 | /*
 17 |  * Copyright (C) 2019 University of Waikato, Hamilton, New Zealand
 18 |  */
 19 | 
 20 | package weka.filters.unsupervised.attribute;
 21 | 
 22 | import weka.classifiers.meta.FilteredClassifier;
 23 | import weka.core.Instances;
 24 | import weka.filters.AbstractFilterTest;
 25 | import weka.filters.Filter;
 26 | 
 27 | import junit.framework.Test;
 28 | import junit.framework.TestSuite;
 29 | 
 30 | import java.io.File;
 31 | 
 32 | 
 33 | /**
 34 |  * Tests TweetToInputLexiconFeatureVector. Run from the command line with: <p/>
 35 |  * java weka.filters.unsupervised.attribute.TweetToInputLexiconFeatureVectorTest
 36 |  * <p> 
 37 |  * AffectiveTweets package must either be installed or
 38 |  * JVM must be started in AffectiveTweets directory.
 39 |  * <p>
 40 |  * @author FracPete and eibe
 41 |  * @version $Revision: 9568 $
 42 |  */
 43 | public class TweetToInputLexiconFeatureVectorTest extends AbstractFilterTest {
 44 | 
 45 | 	public TweetToInputLexiconFeatureVectorTest(String name) {
 46 | 		super(name);
 47 | 	}
 48 | 
 49 | 	/** Creates a default TweetToInputLexiconFeatureVector filter */
 50 | 	public Filter getFilter() {
 51 | 		Filter f = null;
 52 | 
 53 | 		// Check to see if the test is run from directory containing build_package.xml
 54 | 		if ((new File(".." + File.separator + "AffectiveTweets" + File.separator + "build_package.xml")).exists()) {
 55 | 			File backup = weka.core.WekaPackageManager.PACKAGES_DIR;
 56 | 			weka.core.WekaPackageManager.PACKAGES_DIR = new java.io.File(".."); // So that default lexicon, etc., is found.
 57 | 			f = new TweetToInputLexiconFeatureVector();
 58 | 			weka.core.WekaPackageManager.PACKAGES_DIR = backup;
 59 | 		} else {
 60 | 			f = new TweetToInputLexiconFeatureVector(); // Hope that the package is installed.
 61 | 		}
 62 | 		return f;
 63 | 	}
 64 | 
 65 | 	/**
 66 | 	 * Test for the FilteredClassifier used with this filter.
 67 | 	 *
 68 | 	 * @return the configured FilteredClassifier
 69 | 	 */
 70 | 	protected FilteredClassifier getFilteredClassifier() {
 71 | 		FilteredClassifier	result;
 72 | 
 73 | 		result = new FilteredClassifier();
 74 | 
 75 | 		weka.filters.MultiFilter mf = new weka.filters.MultiFilter();
 76 | 		Filter[] filters = new Filter[2];
 77 | 		filters[0] = getFilter();
 78 | 		weka.filters.unsupervised.attribute.RemoveType rt = new weka.filters.unsupervised.attribute.RemoveType(); // Need to remove string attributes because they are kept by this filter.
 79 | 		filters[1] = rt;
 80 | 		mf.setFilters(filters);
 81 | 		result.setFilter(mf);
 82 | 		result.setClassifier(new weka.classifiers.functions.SMO());
 83 | 
 84 | 		return result;
 85 | 	}
 86 | 
 87 | 	/**
 88 | 	 * Data to be used for FilteredClassifier test.
 89 | 	 *
 90 | 	 * @return the configured FilteredClassifier
 91 | 	 */
 92 | 	protected Instances getFilteredClassifierData() throws Exception {
 93 | 		Instances result;
 94 | 
 95 | 		// Check to see if the test is run from directory containing build_package.xml
 96 | 		if ((new File(".." + File.separator + "AffectiveTweets" + File.separator + "build_package.xml")).exists()) {
 97 | 			result = (new weka.core.converters.ConverterUtils.DataSource("data" + File.separator + "sent140test.arff.gz")).getDataSet();
 98 | 		} else { // Hope that package is installed.
 99 | 			result = (new weka.core.converters.ConverterUtils.DataSource(weka.core.WekaPackageManager.PACKAGES_DIR.toString() + File.separator + "data" + File.separator + "sent140test.arff.gz")).getDataSet();
100 | 		}
101 | 
102 | 		result.setClassIndex(result.numAttributes() - 1);
103 | 
104 | 		return result;
105 | 	}
106 | 
107 | 	/**
108 | 	 * Called by JUnit before each test method. Sets up the Instances object to use based on 
109 | 	 * one of the datasets that comes with the package.
110 | 	 *
111 | 	 * @throws Exception if an error occurs reading the example instances.
112 | 	 */
113 | 	protected void setUp() throws Exception {
114 | 		super.setUp();
115 | 
116 | 		// Check to see if the test is run from directory containing build_package.xml
117 | 		if ((new File(".." + File.separator + "AffectiveTweets" + File.separator + "build_package.xml")).exists()) {
118 | 			m_Instances = (new weka.core.converters.ConverterUtils.DataSource("data" + File.separator + "sent140test.arff.gz")).getDataSet();
119 | 		} else { // Hope that package is installed.
120 | 			m_Instances = (new weka.core.converters.ConverterUtils.DataSource(weka.core.WekaPackageManager.PACKAGES_DIR.toString() + File.separator + "data" + File.separator + "sent140test.arff.gz")).getDataSet();
121 | 		}
122 | 
123 | 		m_Instances.setClassIndex(m_Instances.numAttributes() - 1);
124 | 	}
125 | 
126 | 
127 | 	/* (non-Javadoc)
128 | 	 * @see weka.filters.AbstractFilterTest#testBuffered()
129 | 	 */
130 | 	public void testBuffered(){}
131 | 	
132 | 	
133 | 	/* (non-Javadoc)
134 | 	 * @see weka.filters.AbstractFilterTest#testRegression()
135 | 	 */
136 | 	public void testRegression(){}
137 | 	
138 | 
139 | 	public static Test suite() {
140 | 		return new TestSuite(TweetToInputLexiconFeatureVectorTest.class);
141 | 	}
142 | 
143 | 	public static void main(String[] args){
144 | 		junit.textui.TestRunner.run(suite());
145 | 	}
146 | }
147 | 


--------------------------------------------------------------------------------
/wekarefs/weka/filters/unsupervised/attribute/LexiconDistantSupervisionTest.ref:
--------------------------------------------------------------------------------
 1 | @relation 'Twitter Sentiment Analysis Sentiment140 Test: Dataset. More info at: http://help.sentiment140.com/-weka.filters.unsupervised.attribute.LexiconDistantSupervision-lex../AffectiveTweets/lexicons/arff_lexicons/emoticons.arff-polattpolarity-negvalnegative-posvalpositive-removeMatchingWord-I1-tokenizerweka.core.tokenizers.TweetNLPTokenizer'
 2 | 
 3 | @attribute content string
 4 | @attribute topic string
 5 | @attribute class {negative,neutral,positive}
 6 | @attribute polarity {negative,positive}
 7 | 
 8 | @data
 9 | {0 '@kenburbary You\'ll love your Kindle2. I\'ve had mine for a few months and never looked back. The new big one is huge! No need for remorse! ',1 kindle2,2 positive,3 positive}
10 | {0 '@mikefish  Fair enough. But i have the Kindle2 and I think it\'s perfect  ',1 kindle2,2 positive,3 positive}
11 | {0 'downloading apps for my iphone! So much fun  There literally is an app for just about anything.',1 'iphone app',2 positive,3 positive}
12 | {0 'I\'m listening to \"P.Y.T\" by Danny Gokey &lt;3 &lt;3 &lt;3 Aww, he\'s so amazing. I &lt;3 him so much ',1 'Danny Gokey',2 positive,3 positive}
13 | {0 'is going to sleep then on a bike ride',1 sleep,2 positive,3 positive}
14 | {0 'Hello Twitter API ',1 '\"twitter api\"',2 positive,3 positive}
15 | {0 'RT @SmartChickPDX: Was just told that Nike layoffs started today ',1 nike}
16 | {0 'Back when I worked for Nike we had one fav word : JUST DO IT! ',1 nike,2 positive,3 positive}
17 | {0 'Class... The 50d is supposed to come today ',1 50d,2 positive,3 positive}
18 | {0 'needs someone to explain lambda calculus to him! ',1 'lambda calculus'}
19 | {0 'insects have infected my spinach plant ',1 insects}
20 | {0 'I hate revision, it\'s so boring! I am totally unprepared for my exam tomorrow  Things are not looking good...',1 exam}
21 | {0 'Higher physics exam tommorow, not lookin forward to it much ',1 exam}
22 | {0 'It\'s a bank holiday, yet I\'m only out of work now. Exam season sucks',1 exam}
23 | {0 'i srsly hate the stupid twitter API timeout thing, soooo annoying!!!!! ',1 'twitter api'}
24 | {0 'Night at the Museum tonite instead of UP.  oh well. that 4 yr old better enjoy it. LOL',1 '\"night at the museum\"'}
25 | {0 'Ahh...got rid of stupid time warner today &amp; now taking a nap while the roomies cook for me. Pretty good end for a monday ',1 'time warner',3 positive}
26 | {0 'Recovering from surgery..wishing @julesrenner was here ',1 surgery}
27 | {0 'My wrist still hurts. I have to get it looked at. I HATE the dr/dentist/scary places.  Time to watch Eagle eye. If you want to join, txt!',1 dentist,2 positive}
28 | {0 '@kirstiealley my dentist is great but she\'s expensive...',1 dentist}
29 | {0 'is studing math  tomorrow exam and dentist ',1 dentist,2 positive,3 positive}
30 | {0 'Going to the dentist later.',1 dentist}
31 | {0 'Found NOTHING at Nike Factory  Off to Banana Republic Outlet! http/myloc.me/2zic',1 nike}
32 | {0 'According to the create a school, Notre Dame will have 7 receivers in NCAA 10 at 84 or higher rating  *sweet*',1 'notre dame school',2 positive,3 positive}
33 | {0 '@siratomofbones we tried but Time Warner wasn\'t being nice so we recorded today. ',1 'time warner',3 positive}
34 | {0 'Safari 4 is fast  Even on my shitty AT&amp;T tethering.',1 at&t,3 positive}
35 | {0 '@ArunBasilLal I love Google Translator too !  Good day mate !',1 google,2 positive,3 positive}
36 | {0 'My Kindle2 came and I LOVE it! ',1 kindle2,2 positive,3 positive}
37 | {0 'Obama is quite a good comedian! check out his dinner speech on CNN  very funny jokes.',1 obama,2 positive,3 positive}
38 | {0 'Obama\'s got JOKES!! haha just got to watch a bit of his after dinner speech from last night... i\'m in love with mr. president ',1 obama,2 positive,3 positive}
39 | {0 '@ambcharlesfield lol. Ah my skin is itchy  damn lawnmowing.',1 itchy}
40 | {0 '@dannygokey I love you DANNY GOKEY!! ',1 'Danny Gokey',2 positive,3 positive}
41 | {0 '@Fraggle312 oh those are awesome! i so wish they weren\'t owned by nike ',1 nike}
42 | {0 '@mitzs hey bud  np I do so love my 50D, although I\'d love a 5D mkII more',1 50d,2 positive,3 positive}
43 | {0 '@jonduenas @robynlyn just got us a 50D for the office. ',1 50d,2 positive,3 positive}
44 | {0 'Learning about lambda calculus ',1 'lambda calculus',2 positive,3 positive}
45 | {0 'Just had McDonalds for dinner.  It was goooood. Big Mac Meal. ',1 mcdonalds,2 positive,3 positive}
46 | {0 'Stopped to have lunch at McDonalds. Chicken Nuggetssss!  yummmmmy.',1 mcdonalds,2 positive,3 positive}
47 | {0 'my exam went good. @HelloLeonie: your prayers worked ',1 exam,2 positive,3 positive}
48 | {0 'Only one exam left, and i am so happy for it ',1 exam,2 positive,3 positive}
49 | {0 '@mashable I never did thank you for including me in your Top 100 Twitter Authors! You Rock! (&amp; I New Wave ) http://bit.ly/EOrFV',1 mashable,2 positive,3 positive}
50 | {0 'HTML 5 Demos! Lots of great stuff to come! Yes, I\'m excited.  http://htmlfive.appspot.com #io2009 #googleio',1 googleio,2 positive,3 positive}
51 | {0 '#RantsAndRaves The worst thing about GM (concord / pleasant hill / martinez is the fucking UAW. ..   http://buzzup.com/4ueb',1 gm}
52 | {0 'Just got home from chick-fil-a with the boys. Damn my internets down  stupid time warner',1 'time warner'}
53 | {0 'confirmed: it\'s Time Warner\'s fault, not Facebook\'s, that fb is taking about 3 minutes to load. so tempted to switch to verizon ',1 'time warner'}
54 | {0 'this dentist\'s office is cold ',1 dentist}
55 | {0 'dropped her broccoli walking home from safeway!  so depressed',1 safeway,2 neutral}
56 | {0 'Nike rocks. I\'m super grateful for what I\'ve done with them  &amp; the European Division of NIKE is BEYOND! @whitSTYLES @muchasmuertes',1 nike,2 positive,3 positive}
57 | {0 '@sheridanmarfil - its not so much my obsession with cell phones, but the iphone!  i\'m a slave to at&amp;t forever because of it. ',1 at&t,3 positive}
58 | {0 'Ahhh... back in a *real* text editing environment. I &lt;3 LaTeX.',1 latex,2 positive,3 positive}
59 | 


--------------------------------------------------------------------------------
/src/main/java/weka/core/tokenizers/TweetNLPTokenizer.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *   This program is free software: you can redistribute it and/or modify
  3 |  *   it under the terms of the GNU General Public License as published by
  4 |  *   the Free Software Foundation, either version 3 of the License, or
  5 |  *   (at your option) any later version.
  6 |  *
  7 |  *   This program is distributed in the hope that it will be useful,
  8 |  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 |  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10 |  *   GNU General Public License for more details.
 11 |  *
 12 |  *   You should have received a copy of the GNU General Public License
 13 |  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 14 |  */
 15 | 
 16 | /*
 17 |  *    TwitterNLPTokenizer.java
 18 |  *    Copyright (C) 1999-2018 University of Waikato, Hamilton, New Zealand
 19 |  *
 20 |  */
 21 | 
 22 | package weka.core.tokenizers;
 23 | 
 24 | 
 25 | import java.util.Iterator;
 26 | import java.util.List;
 27 | 
 28 | import cmu.arktweetnlp.Twokenize;
 29 | 
 30 | import weka.core.RevisionUtils;
 31 | import weka.core.TechnicalInformation;
 32 | import weka.core.TechnicalInformation.Type;
 33 | 
 34 | 
 35 | 
 36 | 
 37 | /**
 38 |  *  <!-- globalinfo-start --> A Twitter-specific tokenizer based on the CMU TwitterNLP library: http://www.cs.cmu.edu/~ark/TweetNLP/
 39 |  * <!-- globalinfo-end -->
 40 |  * 
 41 |  * <!-- technical-bibtex-start -->
 42 |  * BibTeX:
 43 |  * <pre>
 44 |  * &#64;InProceedings{twitterNLP,
 45 |  * 	Title                    = {Part-of-speech tagging for twitter: Annotation, features, and experiments},
 46 |  * 	Author                   = {Gimpel, Kevin and Schneider, Nathan and O'Connor, Brendan and Das, Dipanjan and Mills, Daniel and Eisenstein, Jacob and Heilman, Michael and Yogatama, Dani and Flanigan, Jeffrey and Smith, Noah A},
 47 |  * 	Booktitle                = {Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies: short papers-Volume 2},
 48 |  * 	Year                     = {2011},
 49 |  * 	Organization             = {Association for Computational Linguistics},
 50 |  * 	Pages                    = {42--47}
 51 |  *	}
 52 |  * </pre>
 53 |  <!-- technical-bibtex-end -->
 54 |  * 
 55 |  * 
 56 |  * @author Felipe Bravo-Marquez (fbravoma@waikato.ac.nz)
 57 |  * @version $Revision: 1 $
 58 |  */
 59 | public class TweetNLPTokenizer extends Tokenizer {
 60 | 
 61 | 	/** For serialization.    **/
 62 | 	private static final long serialVersionUID = 4352757127093531518L;
 63 | 
 64 | 
 65 | 	/** the actual tokenizer */
 66 | 	protected transient Iterator<String> m_tokenIterator;
 67 | 
 68 | 	
 69 | 
 70 | 	/**
 71 | 	 * Returns a string describing this tokenizer.
 72 | 	 * 
 73 | 	 * @return a description of the filter suitable for displaying in the
 74 | 	 *         explorer/experimenter gui
 75 | 	 */
 76 | 	@Override
 77 | 	public String globalInfo() {
 78 | 		return "A Twitter-specific tokenizer based on the CMU TweetNLP library.\n" + getTechnicalInformation().toString();				
 79 | 	}
 80 | 
 81 | 	
 82 | 
 83 | 	 /**
 84 |      * Returns an instance of a TechnicalInformation object, containing
 85 |      * detailed information about the technical background of this class,
 86 |      * e.g., paper reference or book this class is based on.
 87 |      *
 88 |      * @return the technical information about this class
 89 |      */
 90 |     public TechnicalInformation getTechnicalInformation() {
 91 |         TechnicalInformation result;
 92 | 
 93 |         result = new TechnicalInformation(Type.INPROCEEDINGS);
 94 |         result.setValue(TechnicalInformation.Field.AUTHOR, "Gimpel, Kevin and Schneider, Nathan and O'Connor, Brendan and Das, Dipanjan and Mills, Daniel and Eisenstein, Jacob and Heilman, Michael and Yogatama, Dani and Flanigan, Jeffrey and Smith, Noah A");
 95 |         result.setValue(TechnicalInformation.Field.TITLE, "Part-of-speech tagging for twitter: Annotation, features, and experiments");
 96 |         result.setValue(TechnicalInformation.Field.YEAR, "2011");
 97 |         result.setValue(TechnicalInformation.Field.URL, "http://www.cs.cmu.edu/~ark/TweetNLP/");
 98 |         result.setValue(TechnicalInformation.Field.NOTE, "The Weka tokenizer works with version 0.32 of TweetNLP.");
 99 | 
100 |         return result;
101 | }
102 | 	
103 | 	
104 | 
105 | 	/**
106 | 	 * Tests if this enumeration contains more elements.
107 | 	 * 
108 | 	 * @return true if and only if this enumeration object contains at least one
109 | 	 *         more element to provide; false otherwise.
110 | 	 */
111 | 	public boolean hasMoreElements() {
112 | 		return this.m_tokenIterator.hasNext();	
113 | 	}
114 | 
115 | 	/**
116 | 	 * Returns the next element of this enumeration if this enumeration object has
117 | 	 * at least one more element to provide.
118 | 	 * 
119 | 	 * @return the next element of this enumeration.
120 | 	 */
121 | 	@Override
122 | 	public String nextElement() {
123 | 		return this.m_tokenIterator.next();	
124 | 	}
125 | 
126 | 	/**
127 | 	 * Sets the string to tokenize. Tokenization happens immediately.
128 | 	 * 
129 | 	 * @param s the string to tokenize
130 | 	 */
131 | 	@Override
132 | 	public void tokenize(String s) {
133 | 
134 | 		List<String> words=Twokenize.tokenizeRawTweetText(s);
135 | 		this.m_tokenIterator=words.iterator();	
136 | 
137 | 
138 | 	}
139 | 
140 | 	
141 | 
142 | 	
143 | 	  /**
144 | 	   * Returns the revision string.
145 | 	   * 
146 | 	   * @return the revision
147 | 	   */
148 | 	  public String getRevision() {
149 | 	    return RevisionUtils.extract("$Revision: 1 $");
150 | 	  }
151 | 	
152 | 	
153 | 	/**
154 | 	 * Runs the tokenizer with the given options and strings to tokenize. The
155 | 	 * tokens are printed to stdout.
156 | 	 * 
157 | 	 * @param args the commandline options and strings to tokenize
158 | 	 */
159 | 	public static void main(String[] args) {
160 | 		runTokenizer(new TweetNLPTokenizer(), args);
161 | 	}
162 | 
163 | }
164 | 


--------------------------------------------------------------------------------
/src/test/resources/wekarefs/weka/filters/unsupervised/attribute/LexiconDistantSupervisionTest.ref:
--------------------------------------------------------------------------------
 1 | @relation 'Twitter Sentiment Analysis Sentiment140 Test: Dataset. More info at: http://help.sentiment140.com/-weka.filters.unsupervised.attribute.LexiconDistantSupervision-lex../AffectiveTweets/lexicons/arff_lexicons/emoticons.arff-polattpolarity-negvalnegative-posvalpositive-removeMatchingWord-I1-tokenizerweka.core.tokenizers.TweetNLPTokenizer'
 2 | 
 3 | @attribute content string
 4 | @attribute topic string
 5 | @attribute class {negative,neutral,positive}
 6 | @attribute polarity {negative,positive}
 7 | 
 8 | @data
 9 | {0 '@kenburbary You\'ll love your Kindle2. I\'ve had mine for a few months and never looked back. The new big one is huge! No need for remorse! ',1 kindle2,2 positive,3 positive}
10 | {0 '@mikefish  Fair enough. But i have the Kindle2 and I think it\'s perfect  ',1 kindle2,2 positive,3 positive}
11 | {0 'downloading apps for my iphone! So much fun  There literally is an app for just about anything.',1 'iphone app',2 positive,3 positive}
12 | {0 'I\'m listening to \"P.Y.T\" by Danny Gokey &lt;3 &lt;3 &lt;3 Aww, he\'s so amazing. I &lt;3 him so much ',1 'Danny Gokey',2 positive,3 positive}
13 | {0 'is going to sleep then on a bike ride',1 sleep,2 positive,3 positive}
14 | {0 'Hello Twitter API ',1 '\"twitter api\"',2 positive,3 positive}
15 | {0 'RT @SmartChickPDX: Was just told that Nike layoffs started today ',1 nike}
16 | {0 'Back when I worked for Nike we had one fav word : JUST DO IT! ',1 nike,2 positive,3 positive}
17 | {0 'Class... The 50d is supposed to come today ',1 50d,2 positive,3 positive}
18 | {0 'needs someone to explain lambda calculus to him! ',1 'lambda calculus'}
19 | {0 'insects have infected my spinach plant ',1 insects}
20 | {0 'I hate revision, it\'s so boring! I am totally unprepared for my exam tomorrow  Things are not looking good...',1 exam}
21 | {0 'Higher physics exam tommorow, not lookin forward to it much ',1 exam}
22 | {0 'It\'s a bank holiday, yet I\'m only out of work now. Exam season sucks',1 exam}
23 | {0 'i srsly hate the stupid twitter API timeout thing, soooo annoying!!!!! ',1 'twitter api'}
24 | {0 'Night at the Museum tonite instead of UP.  oh well. that 4 yr old better enjoy it. LOL',1 '\"night at the museum\"'}
25 | {0 'Ahh...got rid of stupid time warner today &amp; now taking a nap while the roomies cook for me. Pretty good end for a monday ',1 'time warner',3 positive}
26 | {0 'Recovering from surgery..wishing @julesrenner was here ',1 surgery}
27 | {0 'My wrist still hurts. I have to get it looked at. I HATE the dr/dentist/scary places.  Time to watch Eagle eye. If you want to join, txt!',1 dentist,2 positive}
28 | {0 '@kirstiealley my dentist is great but she\'s expensive...',1 dentist}
29 | {0 'is studing math  tomorrow exam and dentist ',1 dentist,2 positive,3 positive}
30 | {0 'Going to the dentist later.',1 dentist}
31 | {0 'Found NOTHING at Nike Factory  Off to Banana Republic Outlet! http/myloc.me/2zic',1 nike}
32 | {0 'According to the create a school, Notre Dame will have 7 receivers in NCAA 10 at 84 or higher rating  *sweet*',1 'notre dame school',2 positive,3 positive}
33 | {0 '@siratomofbones we tried but Time Warner wasn\'t being nice so we recorded today. ',1 'time warner',3 positive}
34 | {0 'Safari 4 is fast  Even on my shitty AT&amp;T tethering.',1 at&t,3 positive}
35 | {0 '@ArunBasilLal I love Google Translator too !  Good day mate !',1 google,2 positive,3 positive}
36 | {0 'My Kindle2 came and I LOVE it! ',1 kindle2,2 positive,3 positive}
37 | {0 'Obama is quite a good comedian! check out his dinner speech on CNN  very funny jokes.',1 obama,2 positive,3 positive}
38 | {0 'Obama\'s got JOKES!! haha just got to watch a bit of his after dinner speech from last night... i\'m in love with mr. president ',1 obama,2 positive,3 positive}
39 | {0 '@ambcharlesfield lol. Ah my skin is itchy  damn lawnmowing.',1 itchy}
40 | {0 '@dannygokey I love you DANNY GOKEY!! ',1 'Danny Gokey',2 positive,3 positive}
41 | {0 '@Fraggle312 oh those are awesome! i so wish they weren\'t owned by nike ',1 nike}
42 | {0 '@mitzs hey bud  np I do so love my 50D, although I\'d love a 5D mkII more',1 50d,2 positive,3 positive}
43 | {0 '@jonduenas @robynlyn just got us a 50D for the office. ',1 50d,2 positive,3 positive}
44 | {0 'Learning about lambda calculus ',1 'lambda calculus',2 positive,3 positive}
45 | {0 'Just had McDonalds for dinner.  It was goooood. Big Mac Meal. ',1 mcdonalds,2 positive,3 positive}
46 | {0 'Stopped to have lunch at McDonalds. Chicken Nuggetssss!  yummmmmy.',1 mcdonalds,2 positive,3 positive}
47 | {0 'my exam went good. @HelloLeonie: your prayers worked ',1 exam,2 positive,3 positive}
48 | {0 'Only one exam left, and i am so happy for it ',1 exam,2 positive,3 positive}
49 | {0 '@mashable I never did thank you for including me in your Top 100 Twitter Authors! You Rock! (&amp; I New Wave ) http://bit.ly/EOrFV',1 mashable,2 positive,3 positive}
50 | {0 'HTML 5 Demos! Lots of great stuff to come! Yes, I\'m excited.  http://htmlfive.appspot.com #io2009 #googleio',1 googleio,2 positive,3 positive}
51 | {0 '#RantsAndRaves The worst thing about GM (concord / pleasant hill / martinez is the fucking UAW. ..   http://buzzup.com/4ueb',1 gm}
52 | {0 'Just got home from chick-fil-a with the boys. Damn my internets down  stupid time warner',1 'time warner'}
53 | {0 'confirmed: it\'s Time Warner\'s fault, not Facebook\'s, that fb is taking about 3 minutes to load. so tempted to switch to verizon ',1 'time warner'}
54 | {0 'this dentist\'s office is cold ',1 dentist}
55 | {0 'dropped her broccoli walking home from safeway!  so depressed',1 safeway,2 neutral}
56 | {0 'Nike rocks. I\'m super grateful for what I\'ve done with them  &amp; the European Division of NIKE is BEYOND! @whitSTYLES @muchasmuertes',1 nike,2 positive,3 positive}
57 | {0 '@sheridanmarfil - its not so much my obsession with cell phones, but the iphone!  i\'m a slave to at&amp;t forever because of it. ',1 at&t,3 positive}
58 | {0 'Ahhh... back in a *real* text editing environment. I &lt;3 LaTeX.',1 latex,2 positive,3 positive}
59 | 


--------------------------------------------------------------------------------