├── COPYRIGHT ├── README.md ├── TED_comment_annotations ├── 1 │ ├── .svn │ │ ├── entries │ │ └── text-base │ │ │ ├── comment_test.txt.svn-base │ │ │ ├── sentence_test.txt.svn-base │ │ │ └── thomas.svn-base │ ├── comment_test.txt │ ├── complete │ │ ├── .svn │ │ │ ├── entries │ │ │ └── text-base │ │ │ │ ├── comment_test.txt.svn-base │ │ │ │ └── sentence_test.txt.svn-base │ │ ├── comment_test.txt │ │ └── sentence_test.txt │ └── sentence_test.txt ├── 2 │ ├── .svn │ │ ├── entries │ │ └── text-base │ │ │ ├── comment_test.txt.svn-base │ │ │ ├── ilja.svn-base │ │ │ └── sentence_test.txt.svn-base │ ├── comment_test.txt │ ├── complete │ │ ├── .svn │ │ │ ├── entries │ │ │ └── text-base │ │ │ │ ├── comment_test.txt.svn-base │ │ │ │ └── sentence_test.txt.svn-base │ │ ├── comment_test.txt │ │ └── sentence_test.txt │ └── sentence_test.txt ├── 3 │ ├── .svn │ │ ├── entries │ │ └── text-base │ │ │ ├── comment_test.txt.svn-base │ │ │ ├── dimitri.svn-base │ │ │ └── sentence_test.txt.svn-base │ ├── comment_test.txt │ ├── complete │ │ ├── .svn │ │ │ ├── entries │ │ │ └── text-base │ │ │ │ ├── comment_test.txt.svn-base │ │ │ │ └── sentence_test.txt.svn-base │ │ ├── comment_test.txt │ │ └── sentence_test.txt │ └── sentence_test.txt ├── 4 │ ├── .svn │ │ ├── entries │ │ └── text-base │ │ │ ├── comment_test.txt.svn-base │ │ │ ├── marc.svn-base │ │ │ └── sentence_test.txt.svn-base │ ├── comment_test.txt │ ├── complete │ │ ├── .svn │ │ │ ├── entries │ │ │ └── text-base │ │ │ │ ├── comment_test.txt.svn-base │ │ │ │ └── sentence_test.txt.svn-base │ │ ├── comment_test.txt │ │ └── sentence_test.txt │ └── sentence_test.txt ├── 5 │ ├── .svn │ │ ├── entries │ │ └── text-base │ │ │ ├── comment_test.txt.svn-base │ │ │ ├── nesli.svn-base │ │ │ └── sentence_test.txt.svn-base │ ├── comment_test.txt │ ├── complete │ │ ├── .svn │ │ │ ├── entries │ │ │ └── text-base │ │ │ │ ├── comment_test.txt.svn-base │ │ │ │ └── sentence_test.txt.svn-base │ │ ├── comment_test.txt │ │ └── sentence_test.txt │ └── sentence_test.txt └── 6 │ ├── .svn │ ├── entries │ └── text-base │ │ ├── comment_test.txt.svn-base │ │ ├── sentence_test.txt.svn-base │ │ └── subheim.svn-base │ ├── comment_test.txt │ ├── complete │ ├── .svn │ │ ├── entries │ │ └── text-base │ │ │ ├── comment_test.txt.svn-base │ │ │ └── sentence_test.txt.svn-base │ ├── comment_test.txt │ └── sentence_test.txt │ └── sentence_test.txt ├── bootstrapping.py ├── datasets ├── .svn │ ├── entries │ └── text-base │ │ ├── emoticons.data.svn-base │ │ ├── emoticons_patch.py.svn-base │ │ ├── segmentation_corpus.data.svn-base │ │ └── subjclueslen1-HLTEMNLP05.tff.svn-base ├── __init__.py ├── __init__.pyc ├── emoticons.data ├── emoticons_patch.py ├── emoticons_patch.pyc ├── segmentation_corpus.data └── subjclueslen1-HLTEMNLP05.tff ├── hp_classifiers.py ├── images └── bootstrap.png ├── lexicon.py ├── lexicon ├── negative-words.txt └── positive-words.txt ├── pb_classifiers.py ├── polarity.py ├── pos.py ├── replacer.py ├── sentiment.py ├── stored ├── learned_patterns ├── lexicon └── ubt_tagger.classifier ├── svm.py └── terminal_colors.py /README.md: -------------------------------------------------------------------------------- 1 | usent — The attached code is a Python implementation of a dictionary-based sentiment classification procedure which combines two different bootstrapping procedures, namely for subjectivity and polarity detection (as in [3, 4] respectively). The rule-based polarity classifier is an extension of the one that was presented in [5]. Moreover, /TED_comment_annotations folder contains the files of the human study we conducted on TED comment sentiment classification (with 6 human annotators). 2 | 3 | - E. Riloff and J. Wiebe. Learning extraction patterns for subjective expressions. 4 | In Proceedings of the 2003 conference on Empirical methods in natural language 5 | processing, 2003.
6 | - D. K. M Wiegand. Bootstrapping supervised machine-learning polarity classifiers with rule-based classification. 7 | In Proceedings of the ECAI-Workshop on Computational Approaches to Subjectivity and Sentiment Analysis, 2009.
8 | - T. Wilson, J. Wiebe, and P. Hoffmann. Recognizing contextual polarity in phrase-level sentiment 9 | analysis. In Proceedings of the conference on Human Language Technology and Empirical Methods in 10 | Natural Language Processing, 2005.
11 | 12 | The code was used for an opinion mining and retrieval system presented at CICLing 2013 [1], and for improving one-class collaborative filtering [2]. 13 | 14 | ``` 15 | @incollection{pappas13c, 16 | location = {Samos, Greece}, 17 | year = {2013}, 18 | booktitle = {Computational Linguistics and Intelligent Text Processing}, 19 | volume = {7817}, 20 | doi = {10.1007/978-3-642-37256-8_17}, 21 | title = {Distinguishing the Popularity between Topics: A System for Up-to-Date Opinion Retrieval and Mining in the Web}, 22 | author = {Pappas, Nikolaos and Katsimpras, Georgios and Stamatatos, Efstathios}, 23 | pages = {197-209} 24 | } 25 | 26 | @inproceedings{pappas13a, 27 | author = {Pappas, Nikolaos and Popescu-Belis, Andrei}, 28 | title = {Sentiment Analysis of User Comments for One-Class Collaborative Filtering Over {TED} Talks}, 29 | booktitle = {Proceedings of the 36th international ACM SIGIR conference on Research and development in information retrieval}, 30 | series = {SIGIR '13}, 31 | year = {2013}, 32 | isbn = {978-1-4503-2034-4}, 33 | location = {Dublin, Ireland}, 34 | pages = {773--776}, 35 | numpages = {4}, 36 | } 37 | ``` 38 | 39 | 40 | 41 | Installing dependencies 42 | ------------ 43 | The available code for unsupervised sentiment classification requires Python programming 44 | language and pip package manager to run. For detailed installing instructions please refer to 45 | the following links:
46 | http://www.python.org/getit/
47 | http://www.pip-installer.org/en/latest/ 48 | 49 | After installing them, you should be able to install the following packages:
50 | ```bash 51 | $ pip install nltk 52 | $ pip install stemmer 53 | $ pip install numpy 54 | $ pip install pickle 55 | ``` 56 | 57 | After you install nltk you will need some corpora to train the sequential POS tagger (pos.py) and the nltk tokenizer. 58 | ```bash 59 | $ python 60 | ``` 61 | ```python 62 | import nltk 63 | nltk.download() 64 | ``` 65 | The issue of the above command will load a graphical interface that lets you manage several corpora 66 | related to nltk library. From the list select and download the following corpora: 67 | *tokenizers/punkt/english*, *wordnet*, *brown*, *conll2000* and *treebank*. 68 | 69 | Lastly, pyml library is needed for the SVM classifier that is used currently in our code.
70 | Download http://pyml.sourceforge.net/ and then issue:
71 | ```bash 72 | $ tar zxvf PyML-0.7.11.tar.gz 73 | $ cd PyML-0.7.11 74 | $ python setup.py build 75 | $ python setup.py install 76 | ``` 77 | 78 | 79 | Processing pipeline 80 | ------------------- 81 | The current pipline that is implemented in sentiment.py is depicted in the following diagram. Initially, 82 | the input text is split into sentences and each sentence is fed to a high precision subjectivity classifier. 83 | If the sentence is classified as subjective then syntactic patterns are learned from this instance. In case 84 | that the sentence is not detected as such then it is fed to the pattern-based classifier. The pattern-based 85 | classifier outputs the class of the sentence based on the learned patterns so far. If the instance is subjective 86 | then again more patterns are learned from it, otherwise it is fed to a high precision objectivity classifier. 87 | If the sentence is classified as objective, then it is ignored, otherwise it is fed to the polarity classifier. 88 | Finally, the polarity classifier estimates the numerical sentiment and normalized sentiment values and outputs 89 | the result. The instances with high confidence from the polarity classifier can be further used to train an SVM 90 | classifier to improve further the classification performance (see paper for further details). At the current version 91 | this option is disabled, but you can easily enable it. Similarly, you can remove some of the components from the 92 | pipeline according to your needs (e.g. skip subjectivity classification). 93 | 94 |

95 | 96 |

97 | 98 | Examples 99 | -------- 100 | To estimate the total sentiment and total normalized sentiment (as described in the papers), 101 | you can simply execute the sentiment.py file and give the desired block of text as an argument. 102 | Make sure that you escape symbols such as '"' and '!'. Apart from the command line execution you 103 | can integrate the library to your code and use directly the returned results. Below you can 104 | find two simple examples for demonstrating purposes: 105 | 106 | ```bash 107 | $ python sentiment.py "I have to give much love and respect to Rony. Your work is Amazing\\!" 108 | ``` 109 | ``` 110 | [+] Loaded existing UBT tagger! 111 | [+] Loaded existing pattern knowledge! 112 | 113 | [*] Checking block of text: 114 | [1] I have to give much love and respect to Rony. 115 | [2] Your work is Amazing! 116 | 117 | [*] Overall sentiment analysis: 118 | 119 | Parts: 2 120 | Sentiments: ['positive', 'positive'] 121 | Scores: [4, 6.0] 122 | Results: {'positive': {'count': 2, 'score': 10.0, 'nscore': 1.9}, 123 | 'neutral': {'count': 0, 'score': 0, 'nscore': 0}, 124 | 'negative': {'count': 0, 'score': 0, 'nscore': 0}} 125 | 126 | 127 | subjective-----> 100.00% 128 | objective------> 0.00% 129 | 130 | 131 | positive-------> 100.00% 132 | neutral--------> 0.00% 133 | negative-------> 0.00% 134 | 135 | [x] positive (10.00, 1.90) 136 | ``` 137 | 138 | 139 | ```bash 140 | $ python sentiment.py "I was blown away by some of the comments here posted by people who is either 141 | uneducated, ignorant, self-righteous or al-of-the-above. I'm irritated and saddened as I read these 142 | finger-pointing \"i'm right and you're wrong\" type of posts\!" 143 | ``` 144 | ``` 145 | [+] Loaded existing UBT tagger! 146 | [+] Loaded existing pattern knowledge! 147 | 148 | [*] Checking block of text: 149 | [1] I was blown away by some of the comments here posted by people who is either uneducated, ignorant, self-righteous or al-of-the-above. 150 | [2] I'm irritated and saddened as I read these finger-pointing "i'm right and you're wrong" type of posts! 151 | 152 | [*] Overall sentiment analysis: 153 | 154 | Parts: 2 155 | Sentiments: ['negative', 'negative'] 156 | Scores: [-4, -4.0] 157 | Results: {'positive': {'count': 0, 'score': 0, 'nscore': 0}, 158 | 'neutral': {'count': 0, 'score': 0, 'nscore': 0}, 159 | 'negative': {'count': 2, 'score': -8.0, 'nscore': -0.3722943722943723}} 160 | 161 | 162 | subjective-----> 100.00% 163 | objective------> 0.00% 164 | 165 | 166 | positive-------> 0.00% 167 | neutral--------> 0.00% 168 | negative-------> 100.00% 169 | 170 | [x] negative (-8.00, -0.37) 171 | ``` 172 | -------------------------------------------------------------------------------- /TED_comment_annotations/1/.svn/entries: -------------------------------------------------------------------------------- 1 | 10 2 | 3 | dir 4 | 2043 5 | svn://deepcore.gr/dc/ted/v1/hlt_project/utils/full_corpus/subjects/testing/1 6 | svn://deepcore.gr/dc 7 | 8 | 9 | 10 | 2013-01-18T18:08:40.681354Z 11 | 1662 12 | snupdc 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 5120806d-c870-0410-adaa-e3d35f40fc17 28 | 29 | comment_test.txt 30 | file 31 | 32 | 33 | 34 | 35 | 2013-01-18T19:54:08.000000Z 36 | 866d3f79f17831ae93d69b7a3e60fe63 37 | 2013-01-18T18:08:40.681354Z 38 | 1662 39 | snupdc 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 34783 62 | 63 | sentence_test.txt 64 | file 65 | 66 | 67 | 68 | 69 | 2013-01-18T19:54:08.000000Z 70 | 1f8e13fcdf3ae509424e4d068f5530cd 71 | 2013-01-18T18:08:40.681354Z 72 | 1662 73 | snupdc 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 14723 96 | 97 | complete 98 | dir 99 | 100 | thomas 101 | file 102 | 103 | 104 | 105 | 106 | 2013-01-18T19:54:08.000000Z 107 | d41d8cd98f00b204e9800998ecf8427e 108 | 2013-01-18T18:08:40.681354Z 109 | 1662 110 | snupdc 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 0 133 | 134 | -------------------------------------------------------------------------------- /TED_comment_annotations/1/.svn/text-base/thomas.svn-base: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nik0spapp/usent/52fe720964001344897db2ad9a98cb53a64be717/TED_comment_annotations/1/.svn/text-base/thomas.svn-base -------------------------------------------------------------------------------- /TED_comment_annotations/1/complete/.svn/entries: -------------------------------------------------------------------------------- 1 | 10 2 | 3 | dir 4 | 2043 5 | svn://deepcore.gr/dc/ted/v1/hlt_project/utils/full_corpus/subjects/testing/1/complete 6 | svn://deepcore.gr/dc 7 | 8 | 9 | 10 | 2013-01-18T18:08:40.681354Z 11 | 1662 12 | snupdc 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 5120806d-c870-0410-adaa-e3d35f40fc17 28 | 29 | comment_test.txt 30 | file 31 | 32 | 33 | 34 | 35 | 2013-01-18T19:54:08.000000Z 36 | a427c44f4483e55f129daef2756b86fc 37 | 2013-01-18T18:08:40.681354Z 38 | 1662 39 | snupdc 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 34826 62 | 63 | sentence_test.txt 64 | file 65 | 66 | 67 | 68 | 69 | 2013-01-18T19:54:08.000000Z 70 | 3af629e72cc2189c0abe62e8e60e50dc 71 | 2013-01-18T18:08:40.681354Z 72 | 1662 73 | snupdc 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 14863 96 | 97 | -------------------------------------------------------------------------------- /TED_comment_annotations/1/sentence_test.txt: -------------------------------------------------------------------------------- 1 | 1. No magic, alien assistance or lost mystical powers - just plain patience and determination ignoringg the attendant slavery for the moment). 2 | ---> 3 | 4 | 2. Not only does he put focus on the dumbing down, dilution and perversion of scientific findings one sees presented in the media, but he explains the mechanisms by which the same findings become inaccessible or incomprehensible to the less probing masses. 5 | ---> 6 | 7 | 3. This is science at it's best and the future is bright. 8 | ---> 9 | 10 | 4. Is this really a good idea in the long term? 11 | ---> 12 | 13 | 5. The inventor did not mean to produce such kind of glue. 14 | ---> 15 | 16 | 6. People do not want to change now as long as oil is still affordable. 17 | ---> 18 | 19 | 7. You fail to realize what povertyy" really means in regards to education, connection, status, class, family and social clasism. 20 | ---> 21 | 22 | 8. clumsy to say the less) but I feel the full conclusion was not reached. 23 | ---> 24 | 25 | 9. Agricultural techniques may increase that amount but doing so often comes with environmental damage and a loss of biodiversity. 26 | ---> 27 | 28 | 10. Inspiring talk! 29 | ---> 30 | 31 | 11. Think what you want, but evolution is true and there's no conspiracy. 32 | ---> 33 | 34 | 12. The question of great power bringing great responsibility isn't new (spiderman, anyone? 35 | ---> 36 | 37 | 13. With out ideologues, there is no hope whatsoever. 38 | ---> 39 | 40 | 14. Beautiful, but for someone like me who's biggest problem is control"l over my experiences, it sounds like a sort of hell to not be able to see my boundaries! 41 | ---> 42 | 43 | 15. Direction of Magnetic force, and 4. 44 | ---> 45 | 46 | 16. i got a little bored listening. 47 | ---> 48 | 49 | 17. Helping Handicapped and/or elderly people. 50 | ---> 51 | 52 | 18. I do not mean to demean anyone. 53 | ---> 54 | 55 | 19. I do love this concept of design for outcome, nice talk! 56 | ---> 57 | 58 | 20. The most basic concept here is people thrive on autonomy. 59 | ---> 60 | 61 | 21. The negative attitude is ingrained in us by evolution, no less. 62 | ---> 63 | 64 | 22. Check out this newly created blog about bigg data", which in this wek's post discusses the ethical implications of this topic. 65 | ---> 66 | 67 | 23. I love this! 68 | ---> 69 | 70 | 24. I'm living in Japan, which consistently appears at the better end of the scale. 71 | ---> 72 | 73 | 25. In his eloquent reminiscence of his visit to the Kaba- Islam's religious shrine, Mr. Mustafe Akyol uses the word, islamicc Law" several times. 74 | ---> 75 | 76 | 26. Fantastic talk. 77 | ---> 78 | 79 | 27. This sounds amazing. 80 | ---> 81 | 82 | 28. More importantly, there is no government that would legitimize such businesses by issuing business permits or licenses, whose sole purpose is to police their role. 83 | ---> 84 | 85 | 29. Aime is an extraordinary speaker. 86 | ---> 87 | 88 | 30. I think it's pretty good to see someone stand out and endorse this kind of viewpoint providing a new way to see things. 89 | ---> 90 | 91 | 31. I love her voice and the cello. 92 | ---> 93 | 94 | 32. He doesn't have a clue about Functional Medicine and its uses in modern Europe. 95 | ---> 96 | 97 | 33. Where's the innovation? 98 | ---> 99 | 100 | 34. Looking at the data, I was struck not only by Thailand's rapid decrease in fertility rate but also by a similar decrease in fertility rate of its neighbors. 101 | ---> 102 | 103 | 35. Many of my friends died in car accidents which could have been all avoided by this technology! 104 | ---> 105 | 106 | 36. very nice work mr. klein. 107 | ---> 108 | 109 | 37. Muchas veces al ver un problema pensamos que las soluciones siempre son complejas, lo cual implica de cierta forma m´s tiempo y m´s dinero; sin embargo existen soluciones simples y de bajo costo que permiten resultados similares, y eso es lo que vemos en este video que est´ orientado a las clases de escazos recursos. 110 | ---> 111 | 112 | 38. the particles he mentions can be descriptions of different possible routs for vibrations of a very complex vortex of interacting stringss". 113 | ---> 114 | 115 | 39. Not worthy of Ted. 116 | ---> 117 | 118 | 40. not all of us may have the capital and resources that she had but i am pretty sure that most of us have the basic education needed to teach. 119 | ---> 120 | 121 | 41. An amazing speaker with knowledge is enough to get me excited about any topic. 122 | ---> 123 | 124 | 42. Most of all, it can inspire us to do things we never thought we could. 125 | ---> 126 | 127 | 43. I believe that this debate has shown that wind for one can be a big part in solving our energy problems. 128 | ---> 129 | 130 | 44. Bjorn reminds me of what I don't really like about organizations and events like Ted, Davos, the Cfr, etc. 131 | ---> 132 | 133 | 45. I made the conscience decision to stop listening to music for a while which was devastating because music is my passion by far. 134 | ---> 135 | 136 | 46. Many have denied in follow-up that they reject common descent (your 'macroevolution') and claim to have been misled or misrepresented. 137 | ---> 138 | 139 | 47. The project requirements were to construct a minimal structure from unusual materials. 140 | ---> 141 | 142 | 48. fascinating body of work and process of thinking 143 | ---> 144 | 145 | 49. It is a kind of revolution in the area of technology. 146 | ---> 147 | 148 | 50. @chris: Have you considered confirmation bias (chery-picking)? 149 | ---> 150 | 151 | 51. Are you saying it's impossible for these apes to have a similar consciousness? 152 | ---> 153 | 154 | 52. lastt phrase shamelessly copied from who knows who). 155 | ---> 156 | 157 | 53. If anything, this lecture should have seen the different type. 158 | ---> 159 | 160 | 54. But, its root cause, The Covert Enslavement of Humanity, is history's greatest injustice. 161 | ---> 162 | 163 | 55. Dear T Walker sadlyy, you will not understand what that sentence really means". 164 | ---> 165 | 166 | 56. Nick Bostrom; you can not prove a negative, self away beings will always search & need meaning, and from what we know from neuroscience, the brain seems to have evolved the ability to experience transcendence and belief. 167 | ---> 168 | 169 | 57. This is a bad idea that leads to a generation or more that doesn't understand the fundamentals of calculation. 170 | ---> 171 | 172 | 58. A brilliant professor ! 173 | ---> 174 | 175 | 59. Yet no one has been able to compare the effectiveness of solar orbital microwave power stations? 176 | ---> 177 | 178 | 60. As far as his introduction, it was a hook and it was interesting and thought provoking, just like what followed. 179 | ---> 180 | 181 | 61. Rahul Dewan: ii feel, the way to prevent terrorism is to 'raise the level of discourse' on God and Spirituality, and make it spread across all religious faiths". 182 | ---> 183 | 184 | 62. And equally like the 19th century, our governing powers and ruling elites don't quite have any answer of how to undo what can't really be undone. 185 | ---> 186 | 187 | 63. Commenting on Geof Mosher alongg with Tj Green) Iwould start b ystating the fact that evolutionn is war" is quite an understatment. 188 | ---> 189 | 190 | 64. I will address them personally if you wish me to do so. 191 | ---> 192 | 193 | 65. That's the wonderful right of freedom. 194 | ---> 195 | 196 | 66. Very nice. 197 | ---> 198 | 199 | 67. But could the opposite be true: that the gregariousness that is the driving force of human beings, is being virtualized by make-believe substitutes to actual social interaction? 200 | ---> 201 | 202 | 68. Guys use Vlc player, the mp4 480p version works perfectly on it, awesome quality too. 203 | ---> 204 | 205 | 69. Would it be impolite to ask where you stand in this argument? 206 | ---> 207 | 208 | 70. Just wow. 209 | ---> 210 | 211 | 71. However, had large numbers of Dutch with their hunting and military style rifles been able to start a sniping campaign against the Germans, Wi may have been shortened. 212 | ---> 213 | 214 | 72. Amazing - Good Work - He is Worth to not only to India but also to the Whole world His promise for Open Source is also unbelivable a lots of Thanks to him and sponsers 215 | ---> 216 | 217 | 73. It isn't good or bad; it's inevitable. 218 | ---> 219 | 220 | 74. A couple of years ago the idea was about privacy that if you are connected to the Internet then your privacy is eliminated and you don't have any right to claim it back because you're a part of the network and you're obviously exchanging info that crosses a lot of paths to reach you, but recently, telecom providers are the ones dominating and watching, closely, what everyone is doing. 221 | ---> 222 | 223 | 75. Wait for gasoline to hit $5 a gallon and it will. 224 | ---> 225 | 226 | 76. Ib is a rigorous international education program for high school students. 227 | ---> 228 | 229 | 77. As someone who has spent several years teaching, I can say from experience that children need discipline, a rigid structure and for the most part, they need to be told what to do. 230 | ---> 231 | 232 | 78. Horrible architect, decent marketer perhaps? 233 | ---> 234 | 235 | 79. They aren't reproducible on command, and they do not openly discussed the incidences because the subject is taboo. 236 | ---> 237 | 238 | 80. Examples: To say that conservatives are much more into ideological purity is absurd. 239 | ---> 240 | 241 | 81. David is talking about a job interview on steroids, and like all job interviews there is an understanding that things will be shown in the best light possible (to say the least). 242 | ---> 243 | 244 | 82. In order to ensure we aren't fluctuations, we need to make an observation and analyze it. 245 | ---> 246 | 247 | 83. They don’t have room or numbers and we are losing them. 248 | ---> 249 | 250 | 84. The point I'm trying to make is this: a great craftsman is Not by default a great artist. 251 | ---> 252 | 253 | 85. They are invited to raise their right hand and in unison say globall Truce Starts Now! 254 | ---> 255 | 256 | 86. I would say all the fundamentalisms floating around in peoples minds making war on each other are one of the great problems of our time. 257 | ---> 258 | 259 | 87. I will probably be one of the first to be swept away! 260 | ---> 261 | 262 | 88. Please don't even Think of taking it down! 263 | ---> 264 | 265 | 89. You see a lot of people, youngsters especially, who complain that they are depressed and alone. 266 | ---> 267 | 268 | 90. I'm dubious about the payback periods you state. 269 | ---> 270 | 271 | 91. Did his disgraceful comment on Israel fall on deaf ears? 272 | ---> 273 | 274 | 92. To that end, how would empathy fix any of these problems? 275 | ---> 276 | 277 | 93. follow his lead by taking a hard look at your own life and either get help or work on your own to become more 'gay'. 278 | ---> 279 | 280 | 94. Sorry, he's a pet peeve. 281 | ---> 282 | 283 | 95. human have no right and ability to make it! 284 | ---> 285 | 286 | 96. Felix, why should an artist or a political activist not be commercially astute? 287 | ---> 288 | 289 | 97. And now we have screwed up our heads were screwing about with monkeys heads as well): 290 | ---> 291 | 292 | 98. It searches for and indexes common data in a vast resource and, not surprisingly, ends up finding lots of that data. 293 | ---> 294 | 295 | 99. People need to try to be less offended by differing viewpoints. 296 | ---> 297 | 298 | 100. their use of memetics suggests an intentionalityy" in memes. 299 | ---> 300 | 301 | 101. A saying like childrenn are the pillars of Nation", at one time was just a saying without much ado. 302 | ---> 303 | 304 | 102. Good stuff indeed, Dave. 305 | ---> 306 | 307 | 103. Science can be used to gauge the value of some expressed morality, but&helip; only when one shares intention. 308 | ---> 309 | 310 | 104. I am of the opinion that my life belongs to the community, and as long as I live, it is my privilege to do for it whatever I can. 311 | ---> 312 | 313 | 105. Excellent speech. 314 | ---> 315 | 316 | 106. ' 'how dare you say you know what we need, you don't know what it's like' 'emiliano's dad stole from me, so Emiliano should shut the hell up' ii am, of course, paraphrasing) what gives people the title of 'true mexican'? 317 | ---> 318 | 319 | 107. For example - Lint Porter- " I can't confirm Steve Jones' numbers; but yes, it's safe to say that gay men are much more likely to be pedophiles. 320 | ---> 321 | 322 | 108. As we continue to maintain the pace of technology development, it will be hard to take heed to what Dan Barber is preaching. 323 | ---> 324 | 325 | 109. htp:/goglemapsapi.blogspot.com/208/01/put-your-map-on-map.html Now is that so hard Microsoft? 326 | ---> 327 | 328 | 110. I found it interesting how Daniel used “think they’re hapy” to describe a person who moved from Ohio to California, but accepted the happiness as fact for those with incomes over $60,0. 329 | ---> 330 | 331 | 111. I love the idea of technology that knows your emotional state and wants to help you, dims the lights for you when you're sleepy etc. 332 | ---> 333 | 334 | 112. A hero can be someone who saves someone from making bad descisions or heading down the wrong path in their lives. 335 | ---> 336 | 337 | 113. Really makes you think about genius and the creative process differently, at least for a moment. 338 | ---> 339 | 340 | 114. Guess you're not ready for a public debate". 341 | ---> 342 | 343 | 115. You're a real triumph who shows that things can be done. 344 | ---> 345 | 346 | 116. Sending teachers and money to build school is a way more efficient long term way to fight terrorism then sending an army. 347 | ---> 348 | 349 | 117. Pink is referring to were conducted, in part, by Dan Ariely, a Behavioral Economist from Mit, whose Ted talk addresses predictablyy irrational" human behavior. 350 | ---> 351 | 352 | 118. Each year the garbage & recycling industry generates billions of dollars in revenue and recycling is the least of their priorities. 353 | ---> 354 | 355 | 119. Thats completely honest. 356 | ---> 357 | 358 | 120. Just before that, my 73 year old father had degenerative arthritis and needed a new hip to alleviate pain. 359 | ---> 360 | 361 | 121. I suppose you consider them evil? 362 | ---> 363 | 364 | 122. I find the fat-free, efficient part of the philosophy useful for handling and managing my work, schedule and projects. 365 | ---> 366 | 367 | 123. And he who has deserved to drink from the ocean of life deserves to fill his cup from your little stream. 368 | ---> 369 | 370 | 124. Cooked food, clothing, and the girl's room are part of modern comforts. 371 | ---> 372 | 373 | 125. Facts about our happiness do not dictate moral conclusions, rather they must be interpreted with a number of moral principles in mind, such as e.g. 374 | ---> 375 | 376 | 126. When this sensing device is attached to the sticky note, it senses whatever is written on this and transfer it to the Pc thereby making it a digital data. 377 | ---> 378 | 379 | 127. I've been in a whel-chair for these 35 years and have never been sad or unhappy. 380 | ---> 381 | 382 | 128. I just want a hall (150' long)? 383 | ---> 384 | 385 | 129. Fascinating _ 386 | ---> 387 | 388 | 130. i am thinking now, thank you. 389 | ---> 390 | 391 | 131. I really like the analogy of carbon atoms in two different objects totally having opposite properties and behaviours. 392 | ---> 393 | 394 | 132. There is another option nobody seems to talk about because, what I think, a paradigm that is blocking vision: carr needs gas tank". 395 | ---> 396 | 397 | 133. When we talk about costs, pricing, and profits, we need to remember who is the one paying for it. 398 | ---> 399 | 400 | 134. Such a beautiful voice to deliver such a timely message as I get ready to top my last 'performance'. 401 | ---> 402 | 403 | 135. I think you would too. 404 | ---> 405 | 406 | 136. All the eccentricities aside, the main point about being passionate about learning came through. 407 | ---> 408 | 409 | 137. There is so much real magic going on in the universe around us, I wish Ted would have focused on that instead of this. 410 | ---> 411 | 412 | 138. But what a lot of you seem to not understand or simply ignore are that there are bad people out there that don't share your same values for life. 413 | ---> 414 | 415 | 139. Despite the fact that a surprising proportion of people actually adhere to the current paradigm of what a healthy diet is, we are getting sicker and fatter despite mainstream dietary recommendations. 416 | ---> 417 | 418 | 140. The only thing you'l find is a pacifier to suck or a security blanket to hold for those who want to believe there is life after death. 419 | ---> 420 | 421 | -------------------------------------------------------------------------------- /TED_comment_annotations/2/.svn/entries: -------------------------------------------------------------------------------- 1 | 10 2 | 3 | dir 4 | 2043 5 | svn://deepcore.gr/dc/ted/v1/hlt_project/utils/full_corpus/subjects/testing/2 6 | svn://deepcore.gr/dc 7 | 8 | 9 | 10 | 2013-01-18T18:08:40.681354Z 11 | 1662 12 | snupdc 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 5120806d-c870-0410-adaa-e3d35f40fc17 28 | 29 | comment_test.txt 30 | file 31 | 32 | 33 | 34 | 35 | 2013-01-18T19:54:08.000000Z 36 | 6cf8c1dcb6f4c951027a6cadf32e786e 37 | 2013-01-18T18:08:40.681354Z 38 | 1662 39 | snupdc 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 31519 62 | 63 | ilja 64 | file 65 | 66 | 67 | 68 | 69 | 2013-01-18T19:54:08.000000Z 70 | d41d8cd98f00b204e9800998ecf8427e 71 | 2013-01-18T18:08:40.681354Z 72 | 1662 73 | snupdc 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 0 96 | 97 | sentence_test.txt 98 | file 99 | 100 | 101 | 102 | 103 | 2013-01-18T19:54:08.000000Z 104 | 011538bf103e354a0cc8d543bc0b4e71 105 | 2013-01-18T18:08:40.681354Z 106 | 1662 107 | snupdc 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 15078 130 | 131 | complete 132 | dir 133 | 134 | -------------------------------------------------------------------------------- /TED_comment_annotations/2/.svn/text-base/ilja.svn-base: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nik0spapp/usent/52fe720964001344897db2ad9a98cb53a64be717/TED_comment_annotations/2/.svn/text-base/ilja.svn-base -------------------------------------------------------------------------------- /TED_comment_annotations/2/complete/.svn/entries: -------------------------------------------------------------------------------- 1 | 10 2 | 3 | dir 4 | 2043 5 | svn://deepcore.gr/dc/ted/v1/hlt_project/utils/full_corpus/subjects/testing/2/complete 6 | svn://deepcore.gr/dc 7 | 8 | 9 | 10 | 2013-01-18T18:08:40.681354Z 11 | 1662 12 | snupdc 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 5120806d-c870-0410-adaa-e3d35f40fc17 28 | 29 | comment_test.txt 30 | file 31 | 32 | 33 | 34 | 35 | 2013-01-18T19:54:08.000000Z 36 | df34d17361b67058e2323d38869318bf 37 | 2013-01-18T18:08:40.681354Z 38 | 1662 39 | snupdc 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 31561 62 | 63 | sentence_test.txt 64 | file 65 | 66 | 67 | 68 | 69 | 2013-01-18T19:54:08.000000Z 70 | 2c96232f4f3872f5dcba7a20f0ec02c8 71 | 2013-01-18T18:08:40.681354Z 72 | 1662 73 | snupdc 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 15218 96 | 97 | -------------------------------------------------------------------------------- /TED_comment_annotations/3/.svn/entries: -------------------------------------------------------------------------------- 1 | 10 2 | 3 | dir 4 | 2043 5 | svn://deepcore.gr/dc/ted/v1/hlt_project/utils/full_corpus/subjects/testing/3 6 | svn://deepcore.gr/dc 7 | 8 | 9 | 10 | 2013-01-18T18:08:40.681354Z 11 | 1662 12 | snupdc 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 5120806d-c870-0410-adaa-e3d35f40fc17 28 | 29 | comment_test.txt 30 | file 31 | 32 | 33 | 34 | 35 | 2013-01-18T19:54:08.000000Z 36 | 8a6af5675fa03e242fbd07128b65f48a 37 | 2013-01-18T18:08:40.681354Z 38 | 1662 39 | snupdc 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 30035 62 | 63 | dimitri 64 | file 65 | 66 | 67 | 68 | 69 | 2013-01-18T19:54:08.000000Z 70 | d41d8cd98f00b204e9800998ecf8427e 71 | 2013-01-18T18:08:40.681354Z 72 | 1662 73 | snupdc 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 0 96 | 97 | sentence_test.txt 98 | file 99 | 100 | 101 | 102 | 103 | 2013-01-18T19:54:08.000000Z 104 | 0d2e575a77990f2adda0594d795cb97b 105 | 2013-01-18T18:08:40.681354Z 106 | 1662 107 | snupdc 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 15069 130 | 131 | complete 132 | dir 133 | 134 | -------------------------------------------------------------------------------- /TED_comment_annotations/3/.svn/text-base/dimitri.svn-base: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nik0spapp/usent/52fe720964001344897db2ad9a98cb53a64be717/TED_comment_annotations/3/.svn/text-base/dimitri.svn-base -------------------------------------------------------------------------------- /TED_comment_annotations/3/complete/.svn/entries: -------------------------------------------------------------------------------- 1 | 10 2 | 3 | dir 4 | 2043 5 | svn://deepcore.gr/dc/ted/v1/hlt_project/utils/full_corpus/subjects/testing/3/complete 6 | svn://deepcore.gr/dc 7 | 8 | 9 | 10 | 2013-01-18T18:08:40.681354Z 11 | 1662 12 | snupdc 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 5120806d-c870-0410-adaa-e3d35f40fc17 28 | 29 | comment_test.txt 30 | file 31 | 32 | 33 | 34 | 35 | 2013-01-18T19:54:08.000000Z 36 | b68a90f0fac3709583b5b4ba34c05289 37 | 2013-01-18T18:08:40.681354Z 38 | 1662 39 | snupdc 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 30095 62 | 63 | sentence_test.txt 64 | file 65 | 66 | 67 | 68 | 69 | 2013-01-18T19:54:08.000000Z 70 | 0c06884d6acff4d313b37a2625613440 71 | 2013-01-18T18:08:40.681354Z 72 | 1662 73 | snupdc 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 15209 96 | 97 | -------------------------------------------------------------------------------- /TED_comment_annotations/4/.svn/entries: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /TED_comment_annotations/4/.svn/text-base/marc.svn-base: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nik0spapp/usent/52fe720964001344897db2ad9a98cb53a64be717/TED_comment_annotations/4/.svn/text-base/marc.svn-base -------------------------------------------------------------------------------- /TED_comment_annotations/4/.svn/text-base/sentence_test.txt.svn-base: -------------------------------------------------------------------------------- 1 | 1. No magic, alien assistance or lost mystical powers - just plain patience and determination ignoringg the attendant slavery for the moment). 2 | ---> 3 | 4 | 2. Not only does he put focus on the dumbing down, dilution and perversion of scientific findings one sees presented in the media, but he explains the mechanisms by which the same findings become inaccessible or incomprehensible to the less probing masses. 5 | ---> 6 | 7 | 3. This is science at it's best and the future is bright. 8 | ---> 9 | 10 | 4. Is this really a good idea in the long term? 11 | ---> 12 | 13 | 5. The inventor did not mean to produce such kind of glue. 14 | ---> 15 | 16 | 6. People do not want to change now as long as oil is still affordable. 17 | ---> 18 | 19 | 7. You fail to realize what povertyy" really means in regards to education, connection, status, class, family and social clasism. 20 | ---> 21 | 22 | 8. clumsy to say the less) but I feel the full conclusion was not reached. 23 | ---> 24 | 25 | 9. Agricultural techniques may increase that amount but doing so often comes with environmental damage and a loss of biodiversity. 26 | ---> 27 | 28 | 10. Inspiring talk! 29 | ---> 30 | 31 | 11. Think what you want, but evolution is true and there's no conspiracy. 32 | ---> 33 | 34 | 12. The question of great power bringing great responsibility isn't new (spiderman, anyone? 35 | ---> 36 | 37 | 13. With out ideologues, there is no hope whatsoever. 38 | ---> 39 | 40 | 14. Beautiful, but for someone like me who's biggest problem is control"l over my experiences, it sounds like a sort of hell to not be able to see my boundaries! 41 | ---> 42 | 43 | 15. Direction of Magnetic force, and 4. 44 | ---> 45 | 46 | 16. i got a little bored listening. 47 | ---> 48 | 49 | 17. Helping Handicapped and/or elderly people. 50 | ---> 51 | 52 | 18. I do not mean to demean anyone. 53 | ---> 54 | 55 | 19. I do love this concept of design for outcome, nice talk! 56 | ---> 57 | 58 | 20. The most basic concept here is people thrive on autonomy. 59 | ---> 60 | 61 | 21. The negative attitude is ingrained in us by evolution, no less. 62 | ---> 63 | 64 | 22. Check out this newly created blog about bigg data", which in this wek's post discusses the ethical implications of this topic. 65 | ---> 66 | 67 | 23. I love this! 68 | ---> 69 | 70 | 24. I'm living in Japan, which consistently appears at the better end of the scale. 71 | ---> 72 | 73 | 25. In his eloquent reminiscence of his visit to the Kaba- Islam's religious shrine, Mr. Mustafe Akyol uses the word, islamicc Law" several times. 74 | ---> 75 | 76 | 26. Fantastic talk. 77 | ---> 78 | 79 | 27. This sounds amazing. 80 | ---> 81 | 82 | 28. More importantly, there is no government that would legitimize such businesses by issuing business permits or licenses, whose sole purpose is to police their role. 83 | ---> 84 | 85 | 29. Aime is an extraordinary speaker. 86 | ---> 87 | 88 | 30. I think it's pretty good to see someone stand out and endorse this kind of viewpoint providing a new way to see things. 89 | ---> 90 | 91 | 31. I love her voice and the cello. 92 | ---> 93 | 94 | 32. He doesn't have a clue about Functional Medicine and its uses in modern Europe. 95 | ---> 96 | 97 | 33. Where's the innovation? 98 | ---> 99 | 100 | 34. Looking at the data, I was struck not only by Thailand's rapid decrease in fertility rate but also by a similar decrease in fertility rate of its neighbors. 101 | ---> 102 | 103 | 35. Many of my friends died in car accidents which could have been all avoided by this technology! 104 | ---> 105 | 106 | 36. very nice work mr. klein. 107 | ---> 108 | 109 | 37. Muchas veces al ver un problema pensamos que las soluciones siempre son complejas, lo cual implica de cierta forma m´s tiempo y m´s dinero; sin embargo existen soluciones simples y de bajo costo que permiten resultados similares, y eso es lo que vemos en este video que est´ orientado a las clases de escazos recursos. 110 | ---> 111 | 112 | 38. the particles he mentions can be descriptions of different possible routs for vibrations of a very complex vortex of interacting stringss". 113 | ---> 114 | 115 | 39. Not worthy of Ted. 116 | ---> 117 | 118 | 40. not all of us may have the capital and resources that she had but i am pretty sure that most of us have the basic education needed to teach. 119 | ---> 120 | 121 | 41. An amazing speaker with knowledge is enough to get me excited about any topic. 122 | ---> 123 | 124 | 42. Most of all, it can inspire us to do things we never thought we could. 125 | ---> 126 | 127 | 43. I believe that this debate has shown that wind for one can be a big part in solving our energy problems. 128 | ---> 129 | 130 | 44. Bjorn reminds me of what I don't really like about organizations and events like Ted, Davos, the Cfr, etc. 131 | ---> 132 | 133 | 45. I made the conscience decision to stop listening to music for a while which was devastating because music is my passion by far. 134 | ---> 135 | 136 | 46. Many have denied in follow-up that they reject common descent (your 'macroevolution') and claim to have been misled or misrepresented. 137 | ---> 138 | 139 | 47. The project requirements were to construct a minimal structure from unusual materials. 140 | ---> 141 | 142 | 48. fascinating body of work and process of thinking 143 | ---> 144 | 145 | 49. It is a kind of revolution in the area of technology. 146 | ---> 147 | 148 | 50. @chris: Have you considered confirmation bias (chery-picking)? 149 | ---> 150 | 151 | 51. Are you saying it's impossible for these apes to have a similar consciousness? 152 | ---> 153 | 154 | 52. lastt phrase shamelessly copied from who knows who). 155 | ---> 156 | 157 | 53. If anything, this lecture should have seen the different type. 158 | ---> 159 | 160 | 54. But, its root cause, The Covert Enslavement of Humanity, is history's greatest injustice. 161 | ---> 162 | 163 | 55. Dear T Walker sadlyy, you will not understand what that sentence really means". 164 | ---> 165 | 166 | 56. Nick Bostrom; you can not prove a negative, self away beings will always search & need meaning, and from what we know from neuroscience, the brain seems to have evolved the ability to experience transcendence and belief. 167 | ---> 168 | 169 | 57. This is a bad idea that leads to a generation or more that doesn't understand the fundamentals of calculation. 170 | ---> 171 | 172 | 58. A brilliant professor ! 173 | ---> 174 | 175 | 59. Yet no one has been able to compare the effectiveness of solar orbital microwave power stations? 176 | ---> 177 | 178 | 60. As far as his introduction, it was a hook and it was interesting and thought provoking, just like what followed. 179 | ---> 180 | 181 | 61. Rahul Dewan: ii feel, the way to prevent terrorism is to 'raise the level of discourse' on God and Spirituality, and make it spread across all religious faiths". 182 | ---> 183 | 184 | 62. And equally like the 19th century, our governing powers and ruling elites don't quite have any answer of how to undo what can't really be undone. 185 | ---> 186 | 187 | 63. Commenting on Geof Mosher alongg with Tj Green) Iwould start b ystating the fact that evolutionn is war" is quite an understatment. 188 | ---> 189 | 190 | 64. I will address them personally if you wish me to do so. 191 | ---> 192 | 193 | 65. That's the wonderful right of freedom. 194 | ---> 195 | 196 | 66. Very nice. 197 | ---> 198 | 199 | 67. But could the opposite be true: that the gregariousness that is the driving force of human beings, is being virtualized by make-believe substitutes to actual social interaction? 200 | ---> 201 | 202 | 68. Guys use Vlc player, the mp4 480p version works perfectly on it, awesome quality too. 203 | ---> 204 | 205 | 69. Would it be impolite to ask where you stand in this argument? 206 | ---> 207 | 208 | 70. Just wow. 209 | ---> 210 | 211 | 71. However, had large numbers of Dutch with their hunting and military style rifles been able to start a sniping campaign against the Germans, Wi may have been shortened. 212 | ---> 213 | 214 | 72. Amazing - Good Work - He is Worth to not only to India but also to the Whole world His promise for Open Source is also unbelivable a lots of Thanks to him and sponsers 215 | ---> 216 | 217 | 73. It isn't good or bad; it's inevitable. 218 | ---> 219 | 220 | 74. A couple of years ago the idea was about privacy that if you are connected to the Internet then your privacy is eliminated and you don't have any right to claim it back because you're a part of the network and you're obviously exchanging info that crosses a lot of paths to reach you, but recently, telecom providers are the ones dominating and watching, closely, what everyone is doing. 221 | ---> 222 | 223 | 75. Wait for gasoline to hit $5 a gallon and it will. 224 | ---> 225 | 226 | 76. Ib is a rigorous international education program for high school students. 227 | ---> 228 | 229 | 77. As someone who has spent several years teaching, I can say from experience that children need discipline, a rigid structure and for the most part, they need to be told what to do. 230 | ---> 231 | 232 | 78. Horrible architect, decent marketer perhaps? 233 | ---> 234 | 235 | 79. They aren't reproducible on command, and they do not openly discussed the incidences because the subject is taboo. 236 | ---> 237 | 238 | 80. Examples: To say that conservatives are much more into ideological purity is absurd. 239 | ---> 240 | 241 | 81. David is talking about a job interview on steroids, and like all job interviews there is an understanding that things will be shown in the best light possible (to say the least). 242 | ---> 243 | 244 | 82. In order to ensure we aren't fluctuations, we need to make an observation and analyze it. 245 | ---> 246 | 247 | 83. They don’t have room or numbers and we are losing them. 248 | ---> 249 | 250 | 84. The point I'm trying to make is this: a great craftsman is Not by default a great artist. 251 | ---> 252 | 253 | 85. They are invited to raise their right hand and in unison say globall Truce Starts Now! 254 | ---> 255 | 256 | 86. I would say all the fundamentalisms floating around in peoples minds making war on each other are one of the great problems of our time. 257 | ---> 258 | 259 | 87. I will probably be one of the first to be swept away! 260 | ---> 261 | 262 | 88. Please don't even Think of taking it down! 263 | ---> 264 | 265 | 89. You see a lot of people, youngsters especially, who complain that they are depressed and alone. 266 | ---> 267 | 268 | 90. I'm dubious about the payback periods you state. 269 | ---> 270 | 271 | 91. Did his disgraceful comment on Israel fall on deaf ears? 272 | ---> 273 | 274 | 92. To that end, how would empathy fix any of these problems? 275 | ---> 276 | 277 | 93. follow his lead by taking a hard look at your own life and either get help or work on your own to become more 'gay'. 278 | ---> 279 | 280 | 94. Sorry, he's a pet peeve. 281 | ---> 282 | 283 | 95. human have no right and ability to make it! 284 | ---> 285 | 286 | 96. Felix, why should an artist or a political activist not be commercially astute? 287 | ---> 288 | 289 | 97. And now we have screwed up our heads were screwing about with monkeys heads as well): 290 | ---> 291 | 292 | 98. It searches for and indexes common data in a vast resource and, not surprisingly, ends up finding lots of that data. 293 | ---> 294 | 295 | 99. People need to try to be less offended by differing viewpoints. 296 | ---> 297 | 298 | 100. their use of memetics suggests an intentionalityy" in memes. 299 | ---> 300 | 301 | 101. Noah Feldman constantly refers to the supposed democratic beliefs of a vast majority of Muslims. 302 | ---> 303 | 304 | 102. I felt it helped me deal with issues in a good way and was the right attitude to have. 305 | ---> 306 | 307 | 103. This feed specific microbes that in tern secrete the nutrient that the plant needs. 308 | ---> 309 | 310 | 104. I know that's a bit simplistic, but still. 311 | ---> 312 | 313 | 105. another one thing is staffs, they work only for salary they wouldn't give clear leacture. 314 | ---> 315 | 316 | 106. Ingenious incorporation of the natural binding aspect of the mycelium from the mushroom plant to create a products)s that could partially compete with some single uses of Styrofoam. 317 | ---> 318 | 319 | 107. As for China, I mostly agree with Mr. Tabarok, but I'm not sure if the post-mao development would've been possible without Mao's prior contribution to health seee Hans Rosling's Ted talks). 320 | ---> 321 | 322 | 108. his talk is great. 323 | ---> 324 | 325 | 109. Now, to briefly address your list of supposed contradictions and fallacies. 326 | ---> 327 | 328 | 110. Lol - hilarious. 329 | ---> 330 | 331 | 111. But the point is missed what Dan Gilbert is trying to tell you how we proceed looking at some time of value because its illusion, mostly thats what we are buying "that Illusion! 332 | ---> 333 | 334 | 112. Stephen Colbert, Jon Stewart and Bill Maher do make things easier to discuss with humor but they talk about American issues and Americans watch them. 335 | ---> 336 | 337 | 113. xd It's been awhile since I smiled so much just from a video ^ I want more of this! 338 | ---> 339 | 340 | 114. In my view , school is really a good place to sort out the smart ones who survive the education system and create stupid people (to work as labour) . 341 | ---> 342 | 343 | 115. Having been a professional background vocalist in Hollywood for several decades, I att age 24) rejoined a pop band, which had drastically changed its repetoire during my absence, having added several vocally demanding rock tunes, which I would be required to sing to regain my old position. 344 | ---> 345 | 346 | 116. Thank goodness and for the interactive transcript feature and Lee Smolin. 347 | ---> 348 | 349 | 117. I initiated and developed this project some years ago, which makes it possible for disabled people to take a look beyond their horizons and experience the ultimate feeling of freedom and independence: htp:/w.youtube.com/watch?v=ljbmau7jc 350 | ---> 351 | 352 | 118. Pinker uses warped statistics ass pointed out in the blog you mention). 353 | ---> 354 | 355 | 119. it's also our fault too by allowing men to do so. 356 | ---> 357 | 358 | 120. many are institutionalised profit based or religious hagovers from another world ago. 359 | ---> 360 | 361 | 121. This is a political manipulation attempt to educate the children. 362 | ---> 363 | 364 | 122. It's a glorified 70-year-old hand cannon and it is not modern, civil, humane or precise. 365 | ---> 366 | 367 | 123. Peace! 368 | ---> 369 | 370 | 124. Of course this may not guarantee. 371 | ---> 372 | 373 | 125. This was some kind of parody, right? 374 | ---> 375 | 376 | 126. I do these things for a distraction and generally when I am bored. 377 | ---> 378 | 379 | 127. right or wrong I think this is an inevitable advancement you have to be fine with it. 380 | ---> 381 | 382 | 128. Would really love to go to a postsecret live event. 383 | ---> 384 | 385 | 129. " the words from a man, whom I think, seriously meant that. 386 | ---> 387 | 388 | 130. I can see that I wasn't as clear in my writing as I thought I was. 389 | ---> 390 | 391 | 131. tragic at the least and still playing out every day. 392 | ---> 393 | 394 | 132. I wonder how well this was received and how many of his countrymen saw this. 395 | ---> 396 | 397 | 133. I am no scientist but it would seem possible to manipulate such large amounts of data on societal reactions and interactions in a way that would produce highly reliable prognostications relative to very specific stimulus. 398 | ---> 399 | 400 | 134. While one size will not fit all, this looks to be a wonderful piece in the puzzle. 401 | ---> 402 | 403 | 135. any body please help 404 | ---> 405 | 406 | 136. This allows our brains to wire for efficiency. 407 | ---> 408 | 409 | 137. When I built the house, I built it according to mostt of) the building codes in Canada. 410 | ---> 411 | 412 | 138. 2)2 He created them male and female and blessed them and named them [both] Adam [man] at the time they were created. 413 | ---> 414 | 415 | 139. I definitely think, that there is no movie, that can be so epic and so truly touching that these real life - stuffs happening here in Ted. 416 | ---> 417 | 418 | 140. All reasearch is of course fantastic, and I take your points about satelites seriously, although suspect that most of us could do without sky tv, gps and various military derivatives if we're brutally honest. 419 | ---> 420 | 421 | -------------------------------------------------------------------------------- /TED_comment_annotations/4/complete/.svn/entries: -------------------------------------------------------------------------------- 1 | 10 2 | 3 | dir 4 | 2043 5 | svn://deepcore.gr/dc/ted/v1/hlt_project/utils/full_corpus/subjects/testing/4/complete 6 | svn://deepcore.gr/dc 7 | 8 | 9 | 10 | 2013-01-18T18:08:40.681354Z 11 | 1662 12 | snupdc 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 5120806d-c870-0410-adaa-e3d35f40fc17 28 | 29 | comment_test.txt 30 | file 31 | 32 | 33 | 34 | 35 | 2013-01-18T19:54:08.000000Z 36 | 302705fd9ecbb863ddb52f325a106d94 37 | 2013-01-18T18:08:40.681354Z 38 | 1662 39 | snupdc 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 33864 62 | 63 | sentence_test.txt 64 | file 65 | 66 | 67 | 68 | 69 | 2013-01-18T19:54:08.000000Z 70 | be6d1c8983bd2ba64c2c7e29f63ab1ed 71 | 2013-01-18T18:08:40.681354Z 72 | 1662 73 | snupdc 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 14614 96 | 97 | -------------------------------------------------------------------------------- /TED_comment_annotations/4/complete/.svn/text-base/sentence_test.txt.svn-base: -------------------------------------------------------------------------------- 1 | 1. No magic, alien assistance or lost mystical powers - just plain patience and determination ignoringg the attendant slavery for the moment). 2 | ---> n 3 | 4 | 2. Not only does he put focus on the dumbing down, dilution and perversion of scientific findings one sees presented in the media, but he explains the mechanisms by which the same findings become inaccessible or incomprehensible to the less probing masses. 5 | ---> n 6 | 7 | 3. This is science at it's best and the future is bright. 8 | ---> p 9 | 10 | 4. Is this really a good idea in the long term? 11 | ---> n 12 | 13 | 5. The inventor did not mean to produce such kind of glue. 14 | ---> n 15 | 16 | 6. People do not want to change now as long as oil is still affordable. 17 | ---> n 18 | 19 | 7. You fail to realize what povertyy" really means in regards to education, connection, status, class, family and social clasism. 20 | ---> n 21 | 22 | 8. clumsy to say the less) but I feel the full conclusion was not reached. 23 | ---> n 24 | 25 | 9. Agricultural techniques may increase that amount but doing so often comes with environmental damage and a loss of biodiversity. 26 | ---> n 27 | 28 | 10. Inspiring talk! 29 | ---> p 30 | 31 | 11. Think what you want, but evolution is true and there's no conspiracy. 32 | ---> n 33 | 34 | 12. The question of great power bringing great responsibility isn't new (spiderman, anyone? 35 | ---> u 36 | 37 | 13. With out ideologues, there is no hope whatsoever. 38 | ---> p 39 | 40 | 14. Beautiful, but for someone like me who's biggest problem is control"l over my experiences, it sounds like a sort of hell to not be able to see my boundaries! 41 | ---> p 42 | 43 | 15. Direction of Magnetic force, and 4. 44 | ---> u 45 | 46 | 16. i got a little bored listening. 47 | ---> n 48 | 49 | 17. Helping Handicapped and/or elderly people. 50 | ---> p 51 | 52 | 18. I do not mean to demean anyone. 53 | ---> n 54 | 55 | 19. I do love this concept of design for outcome, nice talk! 56 | ---> p 57 | 58 | 20. The most basic concept here is people thrive on autonomy. 59 | ---> p 60 | 61 | 21. The negative attitude is ingrained in us by evolution, no less. 62 | ---> n 63 | 64 | 22. Check out this newly created blog about bigg data", which in this wek's post discusses the ethical implications of this topic. 65 | ---> p 66 | 67 | 23. I love this! 68 | ---> p 69 | 70 | 24. I'm living in Japan, which consistently appears at the better end of the scale. 71 | ---> p 72 | 73 | 25. In his eloquent reminiscence of his visit to the Kaba- Islam's religious shrine, Mr. Mustafe Akyol uses the word, islamicc Law" several times. 74 | ---> u 75 | 76 | 26. Fantastic talk. 77 | ---> p 78 | 79 | 27. This sounds amazing. 80 | ---> p 81 | 82 | 28. More importantly, there is no government that would legitimize such businesses by issuing business permits or licenses, whose sole purpose is to police their role. 83 | ---> n 84 | 85 | 29. Aime is an extraordinary speaker. 86 | ---> p 87 | 88 | 30. I think it's pretty good to see someone stand out and endorse this kind of viewpoint providing a new way to see things. 89 | ---> p 90 | 91 | 31. I love her voice and the cello. 92 | ---> p 93 | 94 | 32. He doesn't have a clue about Functional Medicine and its uses in modern Europe. 95 | ---> n 96 | 97 | 33. Where's the innovation? 98 | ---> n 99 | 100 | 34. Looking at the data, I was struck not only by Thailand's rapid decrease in fertility rate but also by a similar decrease in fertility rate of its neighbors. 101 | ---> n 102 | 103 | 35. Many of my friends died in car accidents which could have been all avoided by this technology! 104 | ---> p 105 | 106 | 36. very nice work mr. klein. 107 | ---> p 108 | 109 | 37. Muchas veces al ver un problema pensamos que las soluciones siempre son complejas, lo cual implica de cierta forma m´s tiempo y m´s dinero; sin embargo existen soluciones simples y de bajo costo que permiten resultados similares, y eso es lo que vemos en este video que est´ orientado a las clases de escazos recursos. 110 | ---> p 111 | 112 | 38. the particles he mentions can be descriptions of different possible routs for vibrations of a very complex vortex of interacting stringss". 113 | ---> p 114 | 115 | 39. Not worthy of Ted. 116 | ---> n 117 | 118 | 40. not all of us may have the capital and resources that she had but i am pretty sure that most of us have the basic education needed to teach. 119 | ---> p 120 | 121 | 41. An amazing speaker with knowledge is enough to get me excited about any topic. 122 | ---> p 123 | 124 | 42. Most of all, it can inspire us to do things we never thought we could. 125 | ---> p 126 | 127 | 43. I believe that this debate has shown that wind for one can be a big part in solving our energy problems. 128 | ---> p 129 | 130 | 44. Bjorn reminds me of what I don't really like about organizations and events like Ted, Davos, the Cfr, etc. 131 | ---> n 132 | 133 | 45. I made the conscience decision to stop listening to music for a while which was devastating because music is my passion by far. 134 | ---> p 135 | 136 | 46. Many have denied in follow-up that they reject common descent (your 'macroevolution') and claim to have been misled or misrepresented. 137 | ---> n 138 | 139 | 47. The project requirements were to construct a minimal structure from unusual materials. 140 | ---> n 141 | 142 | 48. fascinating body of work and process of thinking 143 | ---> p 144 | 145 | 49. It is a kind of revolution in the area of technology. 146 | ---> p 147 | 148 | 50. @chris: Have you considered confirmation bias (chery-picking)? 149 | ---> n 150 | 151 | 51. Are you saying it's impossible for these apes to have a similar consciousness? 152 | ---> n 153 | 154 | 52. lastt phrase shamelessly copied from who knows who). 155 | ---> n 156 | 157 | 53. If anything, this lecture should have seen the different type. 158 | ---> n 159 | 160 | 54. But, its root cause, The Covert Enslavement of Humanity, is history's greatest injustice. 161 | ---> n 162 | 163 | 55. Dear T Walker sadlyy, you will not understand what that sentence really means". 164 | ---> n 165 | 166 | 56. Nick Bostrom; you can not prove a negative, self away beings will always search & need meaning, and from what we know from neuroscience, the brain seems to have evolved the ability to experience transcendence and belief. 167 | ---> n 168 | 169 | 57. This is a bad idea that leads to a generation or more that doesn't understand the fundamentals of calculation. 170 | ---> n 171 | 172 | 58. A brilliant professor ! 173 | ---> p 174 | 175 | 59. Yet no one has been able to compare the effectiveness of solar orbital microwave power stations? 176 | ---> n 177 | 178 | 60. As far as his introduction, it was a hook and it was interesting and thought provoking, just like what followed. 179 | ---> p 180 | 181 | 61. Rahul Dewan: ii feel, the way to prevent terrorism is to 'raise the level of discourse' on God and Spirituality, and make it spread across all religious faiths". 182 | ---> p 183 | 184 | 62. And equally like the 19th century, our governing powers and ruling elites don't quite have any answer of how to undo what can't really be undone. 185 | ---> n 186 | 187 | 63. Commenting on Geof Mosher alongg with Tj Green) Iwould start b ystating the fact that evolutionn is war" is quite an understatment. 188 | ---> n 189 | 190 | 64. I will address them personally if you wish me to do so. 191 | ---> n 192 | 193 | 65. That's the wonderful right of freedom. 194 | ---> p 195 | 196 | 66. Very nice. 197 | ---> p 198 | 199 | 67. But could the opposite be true: that the gregariousness that is the driving force of human beings, is being virtualized by make-believe substitutes to actual social interaction? 200 | ---> p 201 | 202 | 68. Guys use Vlc player, the mp4 480p version works perfectly on it, awesome quality too. 203 | ---> u 204 | 205 | 69. Would it be impolite to ask where you stand in this argument? 206 | ---> n 207 | 208 | 70. Just wow. 209 | ---> p 210 | 211 | 71. However, had large numbers of Dutch with their hunting and military style rifles been able to start a sniping campaign against the Germans, Wi may have been shortened. 212 | ---> n 213 | 214 | 72. Amazing - Good Work - He is Worth to not only to India but also to the Whole world His promise for Open Source is also unbelivable a lots of Thanks to him and sponsers 215 | ---> p 216 | 217 | 73. It isn't good or bad; it's inevitable. 218 | ---> u 219 | 220 | 74. A couple of years ago the idea was about privacy that if you are connected to the Internet then your privacy is eliminated and you don't have any right to claim it back because you're a part of the network and you're obviously exchanging info that crosses a lot of paths to reach you, but recently, telecom providers are the ones dominating and watching, closely, what everyone is doing. 221 | ---> n 222 | 223 | 75. Wait for gasoline to hit $5 a gallon and it will. 224 | ---> n 225 | 226 | 76. Ib is a rigorous international education program for high school students. 227 | ---> p 228 | 229 | 77. As someone who has spent several years teaching, I can say from experience that children need discipline, a rigid structure and for the most part, they need to be told what to do. 230 | ---> n 231 | 232 | 78. Horrible architect, decent marketer perhaps? 233 | ---> n 234 | 235 | 79. They aren't reproducible on command, and they do not openly discussed the incidences because the subject is taboo. 236 | ---> n 237 | 238 | 80. Examples: To say that conservatives are much more into ideological purity is absurd. 239 | ---> n 240 | 241 | 81. David is talking about a job interview on steroids, and like all job interviews there is an understanding that things will be shown in the best light possible (to say the least). 242 | ---> p 243 | 244 | 82. In order to ensure we aren't fluctuations, we need to make an observation and analyze it. 245 | ---> n 246 | 247 | 83. They don’t have room or numbers and we are losing them. 248 | ---> n 249 | 250 | 84. The point I'm trying to make is this: a great craftsman is Not by default a great artist. 251 | ---> n 252 | 253 | 85. They are invited to raise their right hand and in unison say globall Truce Starts Now! 254 | ---> u 255 | 256 | 86. I would say all the fundamentalisms floating around in peoples minds making war on each other are one of the great problems of our time. 257 | ---> u 258 | 259 | 87. I will probably be one of the first to be swept away! 260 | ---> p 261 | 262 | 88. Please don't even Think of taking it down! 263 | ---> u 264 | 265 | 89. You see a lot of people, youngsters especially, who complain that they are depressed and alone. 266 | ---> p 267 | 268 | 90. I'm dubious about the payback periods you state. 269 | ---> n 270 | 271 | 91. Did his disgraceful comment on Israel fall on deaf ears? 272 | ---> n 273 | 274 | 92. To that end, how would empathy fix any of these problems? 275 | ---> n 276 | 277 | 93. follow his lead by taking a hard look at your own life and either get help or work on your own to become more 'gay'. 278 | ---> n 279 | 280 | 94. Sorry, he's a pet peeve. 281 | ---> n 282 | 283 | 95. human have no right and ability to make it! 284 | ---> n 285 | 286 | 96. Felix, why should an artist or a political activist not be commercially astute? 287 | ---> n 288 | 289 | 97. And now we have screwed up our heads were screwing about with monkeys heads as well): 290 | ---> n 291 | 292 | 98. It searches for and indexes common data in a vast resource and, not surprisingly, ends up finding lots of that data. 293 | ---> n 294 | 295 | 99. People need to try to be less offended by differing viewpoints. 296 | ---> p 297 | 298 | 100. their use of memetics suggests an intentionalityy" in memes. 299 | ---> u 300 | 301 | 101. Noah Feldman constantly refers to the supposed democratic beliefs of a vast majority of Muslims. 302 | ---> n 303 | 304 | 102. I felt it helped me deal with issues in a good way and was the right attitude to have. 305 | ---> p 306 | 307 | 103. This feed specific microbes that in tern secrete the nutrient that the plant needs. 308 | ---> p 309 | 310 | 104. I know that's a bit simplistic, but still. 311 | ---> u 312 | 313 | 105. another one thing is staffs, they work only for salary they wouldn't give clear leacture. 314 | ---> n 315 | 316 | 106. Ingenious incorporation of the natural binding aspect of the mycelium from the mushroom plant to create a products)s that could partially compete with some single uses of Styrofoam. 317 | ---> p 318 | 319 | 107. As for China, I mostly agree with Mr. Tabarok, but I'm not sure if the post-mao development would've been possible without Mao's prior contribution to health seee Hans Rosling's Ted talks). 320 | ---> p 321 | 322 | 108. his talk is great. 323 | ---> p 324 | 325 | 109. Now, to briefly address your list of supposed contradictions and fallacies. 326 | ---> n 327 | 328 | 110. Lol - hilarious. 329 | ---> p 330 | 331 | 111. But the point is missed what Dan Gilbert is trying to tell you how we proceed looking at some time of value because its illusion, mostly thats what we are buying "that Illusion! 332 | ---> p 333 | 334 | 112. Stephen Colbert, Jon Stewart and Bill Maher do make things easier to discuss with humor but they talk about American issues and Americans watch them. 335 | ---> n 336 | 337 | 113. xd It's been awhile since I smiled so much just from a video ^ I want more of this! 338 | ---> p 339 | 340 | 114. In my view , school is really a good place to sort out the smart ones who survive the education system and create stupid people (to work as labour) . 341 | ---> u 342 | 343 | 115. Having been a professional background vocalist in Hollywood for several decades, I att age 24) rejoined a pop band, which had drastically changed its repetoire during my absence, having added several vocally demanding rock tunes, which I would be required to sing to regain my old position. 344 | ---> n 345 | 346 | 116. Thank goodness and for the interactive transcript feature and Lee Smolin. 347 | ---> p 348 | 349 | 117. I initiated and developed this project some years ago, which makes it possible for disabled people to take a look beyond their horizons and experience the ultimate feeling of freedom and independence: htp:/w.youtube.com/watch?v=ljbmau7jc 350 | ---> p 351 | 352 | 118. Pinker uses warped statistics ass pointed out in the blog you mention). 353 | ---> n 354 | 355 | 119. it's also our fault too by allowing men to do so. 356 | ---> n 357 | 358 | 120. many are institutionalised profit based or religious hagovers from another world ago. 359 | ---> p 360 | 361 | 121. This is a political manipulation attempt to educate the children. 362 | ---> n 363 | 364 | 122. It's a glorified 70-year-old hand cannon and it is not modern, civil, humane or precise. 365 | ---> n 366 | 367 | 123. Peace! 368 | ---> p 369 | 370 | 124. Of course this may not guarantee. 371 | ---> n 372 | 373 | 125. This was some kind of parody, right? 374 | ---> n 375 | 376 | 126. I do these things for a distraction and generally when I am bored. 377 | ---> n 378 | 379 | 127. right or wrong I think this is an inevitable advancement you have to be fine with it. 380 | ---> p 381 | 382 | 128. Would really love to go to a postsecret live event. 383 | ---> p 384 | 385 | 129. " the words from a man, whom I think, seriously meant that. 386 | ---> n 387 | 388 | 130. I can see that I wasn't as clear in my writing as I thought I was. 389 | ---> u 390 | 391 | 131. tragic at the least and still playing out every day. 392 | ---> n 393 | 394 | 132. I wonder how well this was received and how many of his countrymen saw this. 395 | ---> n 396 | 397 | 133. I am no scientist but it would seem possible to manipulate such large amounts of data on societal reactions and interactions in a way that would produce highly reliable prognostications relative to very specific stimulus. 398 | ---> p 399 | 400 | 134. While one size will not fit all, this looks to be a wonderful piece in the puzzle. 401 | ---> p 402 | 403 | 135. any body please help 404 | ---> n 405 | 406 | 136. This allows our brains to wire for efficiency. 407 | ---> p 408 | 409 | 137. When I built the house, I built it according to mostt of) the building codes in Canada. 410 | ---> u 411 | 412 | 138. 2)2 He created them male and female and blessed them and named them [both] Adam [man] at the time they were created. 413 | ---> u 414 | 415 | 139. I definitely think, that there is no movie, that can be so epic and so truly touching that these real life - stuffs happening here in Ted. 416 | ---> p 417 | 418 | 140. All reasearch is of course fantastic, and I take your points about satelites seriously, although suspect that most of us could do without sky tv, gps and various military derivatives if we're brutally honest. 419 | ---> n 420 | 421 | -------------------------------------------------------------------------------- /TED_comment_annotations/4/complete/sentence_test.txt: -------------------------------------------------------------------------------- 1 | 1. No magic, alien assistance or lost mystical powers - just plain patience and determination ignoringg the attendant slavery for the moment). 2 | ---> n 3 | 4 | 2. Not only does he put focus on the dumbing down, dilution and perversion of scientific findings one sees presented in the media, but he explains the mechanisms by which the same findings become inaccessible or incomprehensible to the less probing masses. 5 | ---> n 6 | 7 | 3. This is science at it's best and the future is bright. 8 | ---> p 9 | 10 | 4. Is this really a good idea in the long term? 11 | ---> n 12 | 13 | 5. The inventor did not mean to produce such kind of glue. 14 | ---> n 15 | 16 | 6. People do not want to change now as long as oil is still affordable. 17 | ---> n 18 | 19 | 7. You fail to realize what povertyy" really means in regards to education, connection, status, class, family and social clasism. 20 | ---> n 21 | 22 | 8. clumsy to say the less) but I feel the full conclusion was not reached. 23 | ---> n 24 | 25 | 9. Agricultural techniques may increase that amount but doing so often comes with environmental damage and a loss of biodiversity. 26 | ---> n 27 | 28 | 10. Inspiring talk! 29 | ---> p 30 | 31 | 11. Think what you want, but evolution is true and there's no conspiracy. 32 | ---> n 33 | 34 | 12. The question of great power bringing great responsibility isn't new (spiderman, anyone? 35 | ---> u 36 | 37 | 13. With out ideologues, there is no hope whatsoever. 38 | ---> p 39 | 40 | 14. Beautiful, but for someone like me who's biggest problem is control"l over my experiences, it sounds like a sort of hell to not be able to see my boundaries! 41 | ---> p 42 | 43 | 15. Direction of Magnetic force, and 4. 44 | ---> u 45 | 46 | 16. i got a little bored listening. 47 | ---> n 48 | 49 | 17. Helping Handicapped and/or elderly people. 50 | ---> p 51 | 52 | 18. I do not mean to demean anyone. 53 | ---> n 54 | 55 | 19. I do love this concept of design for outcome, nice talk! 56 | ---> p 57 | 58 | 20. The most basic concept here is people thrive on autonomy. 59 | ---> p 60 | 61 | 21. The negative attitude is ingrained in us by evolution, no less. 62 | ---> n 63 | 64 | 22. Check out this newly created blog about bigg data", which in this wek's post discusses the ethical implications of this topic. 65 | ---> p 66 | 67 | 23. I love this! 68 | ---> p 69 | 70 | 24. I'm living in Japan, which consistently appears at the better end of the scale. 71 | ---> p 72 | 73 | 25. In his eloquent reminiscence of his visit to the Kaba- Islam's religious shrine, Mr. Mustafe Akyol uses the word, islamicc Law" several times. 74 | ---> u 75 | 76 | 26. Fantastic talk. 77 | ---> p 78 | 79 | 27. This sounds amazing. 80 | ---> p 81 | 82 | 28. More importantly, there is no government that would legitimize such businesses by issuing business permits or licenses, whose sole purpose is to police their role. 83 | ---> n 84 | 85 | 29. Aime is an extraordinary speaker. 86 | ---> p 87 | 88 | 30. I think it's pretty good to see someone stand out and endorse this kind of viewpoint providing a new way to see things. 89 | ---> p 90 | 91 | 31. I love her voice and the cello. 92 | ---> p 93 | 94 | 32. He doesn't have a clue about Functional Medicine and its uses in modern Europe. 95 | ---> n 96 | 97 | 33. Where's the innovation? 98 | ---> n 99 | 100 | 34. Looking at the data, I was struck not only by Thailand's rapid decrease in fertility rate but also by a similar decrease in fertility rate of its neighbors. 101 | ---> n 102 | 103 | 35. Many of my friends died in car accidents which could have been all avoided by this technology! 104 | ---> p 105 | 106 | 36. very nice work mr. klein. 107 | ---> p 108 | 109 | 37. Muchas veces al ver un problema pensamos que las soluciones siempre son complejas, lo cual implica de cierta forma m´s tiempo y m´s dinero; sin embargo existen soluciones simples y de bajo costo que permiten resultados similares, y eso es lo que vemos en este video que est´ orientado a las clases de escazos recursos. 110 | ---> p 111 | 112 | 38. the particles he mentions can be descriptions of different possible routs for vibrations of a very complex vortex of interacting stringss". 113 | ---> p 114 | 115 | 39. Not worthy of Ted. 116 | ---> n 117 | 118 | 40. not all of us may have the capital and resources that she had but i am pretty sure that most of us have the basic education needed to teach. 119 | ---> p 120 | 121 | 41. An amazing speaker with knowledge is enough to get me excited about any topic. 122 | ---> p 123 | 124 | 42. Most of all, it can inspire us to do things we never thought we could. 125 | ---> p 126 | 127 | 43. I believe that this debate has shown that wind for one can be a big part in solving our energy problems. 128 | ---> p 129 | 130 | 44. Bjorn reminds me of what I don't really like about organizations and events like Ted, Davos, the Cfr, etc. 131 | ---> n 132 | 133 | 45. I made the conscience decision to stop listening to music for a while which was devastating because music is my passion by far. 134 | ---> p 135 | 136 | 46. Many have denied in follow-up that they reject common descent (your 'macroevolution') and claim to have been misled or misrepresented. 137 | ---> n 138 | 139 | 47. The project requirements were to construct a minimal structure from unusual materials. 140 | ---> n 141 | 142 | 48. fascinating body of work and process of thinking 143 | ---> p 144 | 145 | 49. It is a kind of revolution in the area of technology. 146 | ---> p 147 | 148 | 50. @chris: Have you considered confirmation bias (chery-picking)? 149 | ---> n 150 | 151 | 51. Are you saying it's impossible for these apes to have a similar consciousness? 152 | ---> n 153 | 154 | 52. lastt phrase shamelessly copied from who knows who). 155 | ---> n 156 | 157 | 53. If anything, this lecture should have seen the different type. 158 | ---> n 159 | 160 | 54. But, its root cause, The Covert Enslavement of Humanity, is history's greatest injustice. 161 | ---> n 162 | 163 | 55. Dear T Walker sadlyy, you will not understand what that sentence really means". 164 | ---> n 165 | 166 | 56. Nick Bostrom; you can not prove a negative, self away beings will always search & need meaning, and from what we know from neuroscience, the brain seems to have evolved the ability to experience transcendence and belief. 167 | ---> n 168 | 169 | 57. This is a bad idea that leads to a generation or more that doesn't understand the fundamentals of calculation. 170 | ---> n 171 | 172 | 58. A brilliant professor ! 173 | ---> p 174 | 175 | 59. Yet no one has been able to compare the effectiveness of solar orbital microwave power stations? 176 | ---> n 177 | 178 | 60. As far as his introduction, it was a hook and it was interesting and thought provoking, just like what followed. 179 | ---> p 180 | 181 | 61. Rahul Dewan: ii feel, the way to prevent terrorism is to 'raise the level of discourse' on God and Spirituality, and make it spread across all religious faiths". 182 | ---> p 183 | 184 | 62. And equally like the 19th century, our governing powers and ruling elites don't quite have any answer of how to undo what can't really be undone. 185 | ---> n 186 | 187 | 63. Commenting on Geof Mosher alongg with Tj Green) Iwould start b ystating the fact that evolutionn is war" is quite an understatment. 188 | ---> n 189 | 190 | 64. I will address them personally if you wish me to do so. 191 | ---> n 192 | 193 | 65. That's the wonderful right of freedom. 194 | ---> p 195 | 196 | 66. Very nice. 197 | ---> p 198 | 199 | 67. But could the opposite be true: that the gregariousness that is the driving force of human beings, is being virtualized by make-believe substitutes to actual social interaction? 200 | ---> p 201 | 202 | 68. Guys use Vlc player, the mp4 480p version works perfectly on it, awesome quality too. 203 | ---> u 204 | 205 | 69. Would it be impolite to ask where you stand in this argument? 206 | ---> n 207 | 208 | 70. Just wow. 209 | ---> p 210 | 211 | 71. However, had large numbers of Dutch with their hunting and military style rifles been able to start a sniping campaign against the Germans, Wi may have been shortened. 212 | ---> n 213 | 214 | 72. Amazing - Good Work - He is Worth to not only to India but also to the Whole world His promise for Open Source is also unbelivable a lots of Thanks to him and sponsers 215 | ---> p 216 | 217 | 73. It isn't good or bad; it's inevitable. 218 | ---> u 219 | 220 | 74. A couple of years ago the idea was about privacy that if you are connected to the Internet then your privacy is eliminated and you don't have any right to claim it back because you're a part of the network and you're obviously exchanging info that crosses a lot of paths to reach you, but recently, telecom providers are the ones dominating and watching, closely, what everyone is doing. 221 | ---> n 222 | 223 | 75. Wait for gasoline to hit $5 a gallon and it will. 224 | ---> n 225 | 226 | 76. Ib is a rigorous international education program for high school students. 227 | ---> p 228 | 229 | 77. As someone who has spent several years teaching, I can say from experience that children need discipline, a rigid structure and for the most part, they need to be told what to do. 230 | ---> n 231 | 232 | 78. Horrible architect, decent marketer perhaps? 233 | ---> n 234 | 235 | 79. They aren't reproducible on command, and they do not openly discussed the incidences because the subject is taboo. 236 | ---> n 237 | 238 | 80. Examples: To say that conservatives are much more into ideological purity is absurd. 239 | ---> n 240 | 241 | 81. David is talking about a job interview on steroids, and like all job interviews there is an understanding that things will be shown in the best light possible (to say the least). 242 | ---> p 243 | 244 | 82. In order to ensure we aren't fluctuations, we need to make an observation and analyze it. 245 | ---> n 246 | 247 | 83. They don’t have room or numbers and we are losing them. 248 | ---> n 249 | 250 | 84. The point I'm trying to make is this: a great craftsman is Not by default a great artist. 251 | ---> n 252 | 253 | 85. They are invited to raise their right hand and in unison say globall Truce Starts Now! 254 | ---> u 255 | 256 | 86. I would say all the fundamentalisms floating around in peoples minds making war on each other are one of the great problems of our time. 257 | ---> u 258 | 259 | 87. I will probably be one of the first to be swept away! 260 | ---> p 261 | 262 | 88. Please don't even Think of taking it down! 263 | ---> u 264 | 265 | 89. You see a lot of people, youngsters especially, who complain that they are depressed and alone. 266 | ---> p 267 | 268 | 90. I'm dubious about the payback periods you state. 269 | ---> n 270 | 271 | 91. Did his disgraceful comment on Israel fall on deaf ears? 272 | ---> n 273 | 274 | 92. To that end, how would empathy fix any of these problems? 275 | ---> n 276 | 277 | 93. follow his lead by taking a hard look at your own life and either get help or work on your own to become more 'gay'. 278 | ---> n 279 | 280 | 94. Sorry, he's a pet peeve. 281 | ---> n 282 | 283 | 95. human have no right and ability to make it! 284 | ---> n 285 | 286 | 96. Felix, why should an artist or a political activist not be commercially astute? 287 | ---> n 288 | 289 | 97. And now we have screwed up our heads were screwing about with monkeys heads as well): 290 | ---> n 291 | 292 | 98. It searches for and indexes common data in a vast resource and, not surprisingly, ends up finding lots of that data. 293 | ---> n 294 | 295 | 99. People need to try to be less offended by differing viewpoints. 296 | ---> p 297 | 298 | 100. their use of memetics suggests an intentionalityy" in memes. 299 | ---> u 300 | 301 | 101. Noah Feldman constantly refers to the supposed democratic beliefs of a vast majority of Muslims. 302 | ---> n 303 | 304 | 102. I felt it helped me deal with issues in a good way and was the right attitude to have. 305 | ---> p 306 | 307 | 103. This feed specific microbes that in tern secrete the nutrient that the plant needs. 308 | ---> p 309 | 310 | 104. I know that's a bit simplistic, but still. 311 | ---> u 312 | 313 | 105. another one thing is staffs, they work only for salary they wouldn't give clear leacture. 314 | ---> n 315 | 316 | 106. Ingenious incorporation of the natural binding aspect of the mycelium from the mushroom plant to create a products)s that could partially compete with some single uses of Styrofoam. 317 | ---> p 318 | 319 | 107. As for China, I mostly agree with Mr. Tabarok, but I'm not sure if the post-mao development would've been possible without Mao's prior contribution to health seee Hans Rosling's Ted talks). 320 | ---> p 321 | 322 | 108. his talk is great. 323 | ---> p 324 | 325 | 109. Now, to briefly address your list of supposed contradictions and fallacies. 326 | ---> n 327 | 328 | 110. Lol - hilarious. 329 | ---> p 330 | 331 | 111. But the point is missed what Dan Gilbert is trying to tell you how we proceed looking at some time of value because its illusion, mostly thats what we are buying "that Illusion! 332 | ---> p 333 | 334 | 112. Stephen Colbert, Jon Stewart and Bill Maher do make things easier to discuss with humor but they talk about American issues and Americans watch them. 335 | ---> n 336 | 337 | 113. xd It's been awhile since I smiled so much just from a video ^ I want more of this! 338 | ---> p 339 | 340 | 114. In my view , school is really a good place to sort out the smart ones who survive the education system and create stupid people (to work as labour) . 341 | ---> u 342 | 343 | 115. Having been a professional background vocalist in Hollywood for several decades, I att age 24) rejoined a pop band, which had drastically changed its repetoire during my absence, having added several vocally demanding rock tunes, which I would be required to sing to regain my old position. 344 | ---> n 345 | 346 | 116. Thank goodness and for the interactive transcript feature and Lee Smolin. 347 | ---> p 348 | 349 | 117. I initiated and developed this project some years ago, which makes it possible for disabled people to take a look beyond their horizons and experience the ultimate feeling of freedom and independence: htp:/w.youtube.com/watch?v=ljbmau7jc 350 | ---> p 351 | 352 | 118. Pinker uses warped statistics ass pointed out in the blog you mention). 353 | ---> n 354 | 355 | 119. it's also our fault too by allowing men to do so. 356 | ---> n 357 | 358 | 120. many are institutionalised profit based or religious hagovers from another world ago. 359 | ---> p 360 | 361 | 121. This is a political manipulation attempt to educate the children. 362 | ---> n 363 | 364 | 122. It's a glorified 70-year-old hand cannon and it is not modern, civil, humane or precise. 365 | ---> n 366 | 367 | 123. Peace! 368 | ---> p 369 | 370 | 124. Of course this may not guarantee. 371 | ---> n 372 | 373 | 125. This was some kind of parody, right? 374 | ---> n 375 | 376 | 126. I do these things for a distraction and generally when I am bored. 377 | ---> n 378 | 379 | 127. right or wrong I think this is an inevitable advancement you have to be fine with it. 380 | ---> p 381 | 382 | 128. Would really love to go to a postsecret live event. 383 | ---> p 384 | 385 | 129. " the words from a man, whom I think, seriously meant that. 386 | ---> n 387 | 388 | 130. I can see that I wasn't as clear in my writing as I thought I was. 389 | ---> u 390 | 391 | 131. tragic at the least and still playing out every day. 392 | ---> n 393 | 394 | 132. I wonder how well this was received and how many of his countrymen saw this. 395 | ---> n 396 | 397 | 133. I am no scientist but it would seem possible to manipulate such large amounts of data on societal reactions and interactions in a way that would produce highly reliable prognostications relative to very specific stimulus. 398 | ---> p 399 | 400 | 134. While one size will not fit all, this looks to be a wonderful piece in the puzzle. 401 | ---> p 402 | 403 | 135. any body please help 404 | ---> n 405 | 406 | 136. This allows our brains to wire for efficiency. 407 | ---> p 408 | 409 | 137. When I built the house, I built it according to mostt of) the building codes in Canada. 410 | ---> u 411 | 412 | 138. 2)2 He created them male and female and blessed them and named them [both] Adam [man] at the time they were created. 413 | ---> u 414 | 415 | 139. I definitely think, that there is no movie, that can be so epic and so truly touching that these real life - stuffs happening here in Ted. 416 | ---> p 417 | 418 | 140. All reasearch is of course fantastic, and I take your points about satelites seriously, although suspect that most of us could do without sky tv, gps and various military derivatives if we're brutally honest. 419 | ---> n 420 | 421 | -------------------------------------------------------------------------------- /TED_comment_annotations/4/sentence_test.txt: -------------------------------------------------------------------------------- 1 | 1. No magic, alien assistance or lost mystical powers - just plain patience and determination ignoringg the attendant slavery for the moment). 2 | ---> 3 | 4 | 2. Not only does he put focus on the dumbing down, dilution and perversion of scientific findings one sees presented in the media, but he explains the mechanisms by which the same findings become inaccessible or incomprehensible to the less probing masses. 5 | ---> 6 | 7 | 3. This is science at it's best and the future is bright. 8 | ---> 9 | 10 | 4. Is this really a good idea in the long term? 11 | ---> 12 | 13 | 5. The inventor did not mean to produce such kind of glue. 14 | ---> 15 | 16 | 6. People do not want to change now as long as oil is still affordable. 17 | ---> 18 | 19 | 7. You fail to realize what povertyy" really means in regards to education, connection, status, class, family and social clasism. 20 | ---> 21 | 22 | 8. clumsy to say the less) but I feel the full conclusion was not reached. 23 | ---> 24 | 25 | 9. Agricultural techniques may increase that amount but doing so often comes with environmental damage and a loss of biodiversity. 26 | ---> 27 | 28 | 10. Inspiring talk! 29 | ---> 30 | 31 | 11. Think what you want, but evolution is true and there's no conspiracy. 32 | ---> 33 | 34 | 12. The question of great power bringing great responsibility isn't new (spiderman, anyone? 35 | ---> 36 | 37 | 13. With out ideologues, there is no hope whatsoever. 38 | ---> 39 | 40 | 14. Beautiful, but for someone like me who's biggest problem is control"l over my experiences, it sounds like a sort of hell to not be able to see my boundaries! 41 | ---> 42 | 43 | 15. Direction of Magnetic force, and 4. 44 | ---> 45 | 46 | 16. i got a little bored listening. 47 | ---> 48 | 49 | 17. Helping Handicapped and/or elderly people. 50 | ---> 51 | 52 | 18. I do not mean to demean anyone. 53 | ---> 54 | 55 | 19. I do love this concept of design for outcome, nice talk! 56 | ---> 57 | 58 | 20. The most basic concept here is people thrive on autonomy. 59 | ---> 60 | 61 | 21. The negative attitude is ingrained in us by evolution, no less. 62 | ---> 63 | 64 | 22. Check out this newly created blog about bigg data", which in this wek's post discusses the ethical implications of this topic. 65 | ---> 66 | 67 | 23. I love this! 68 | ---> 69 | 70 | 24. I'm living in Japan, which consistently appears at the better end of the scale. 71 | ---> 72 | 73 | 25. In his eloquent reminiscence of his visit to the Kaba- Islam's religious shrine, Mr. Mustafe Akyol uses the word, islamicc Law" several times. 74 | ---> 75 | 76 | 26. Fantastic talk. 77 | ---> 78 | 79 | 27. This sounds amazing. 80 | ---> 81 | 82 | 28. More importantly, there is no government that would legitimize such businesses by issuing business permits or licenses, whose sole purpose is to police their role. 83 | ---> 84 | 85 | 29. Aime is an extraordinary speaker. 86 | ---> 87 | 88 | 30. I think it's pretty good to see someone stand out and endorse this kind of viewpoint providing a new way to see things. 89 | ---> 90 | 91 | 31. I love her voice and the cello. 92 | ---> 93 | 94 | 32. He doesn't have a clue about Functional Medicine and its uses in modern Europe. 95 | ---> 96 | 97 | 33. Where's the innovation? 98 | ---> 99 | 100 | 34. Looking at the data, I was struck not only by Thailand's rapid decrease in fertility rate but also by a similar decrease in fertility rate of its neighbors. 101 | ---> 102 | 103 | 35. Many of my friends died in car accidents which could have been all avoided by this technology! 104 | ---> 105 | 106 | 36. very nice work mr. klein. 107 | ---> 108 | 109 | 37. Muchas veces al ver un problema pensamos que las soluciones siempre son complejas, lo cual implica de cierta forma m´s tiempo y m´s dinero; sin embargo existen soluciones simples y de bajo costo que permiten resultados similares, y eso es lo que vemos en este video que est´ orientado a las clases de escazos recursos. 110 | ---> 111 | 112 | 38. the particles he mentions can be descriptions of different possible routs for vibrations of a very complex vortex of interacting stringss". 113 | ---> 114 | 115 | 39. Not worthy of Ted. 116 | ---> 117 | 118 | 40. not all of us may have the capital and resources that she had but i am pretty sure that most of us have the basic education needed to teach. 119 | ---> 120 | 121 | 41. An amazing speaker with knowledge is enough to get me excited about any topic. 122 | ---> 123 | 124 | 42. Most of all, it can inspire us to do things we never thought we could. 125 | ---> 126 | 127 | 43. I believe that this debate has shown that wind for one can be a big part in solving our energy problems. 128 | ---> 129 | 130 | 44. Bjorn reminds me of what I don't really like about organizations and events like Ted, Davos, the Cfr, etc. 131 | ---> 132 | 133 | 45. I made the conscience decision to stop listening to music for a while which was devastating because music is my passion by far. 134 | ---> 135 | 136 | 46. Many have denied in follow-up that they reject common descent (your 'macroevolution') and claim to have been misled or misrepresented. 137 | ---> 138 | 139 | 47. The project requirements were to construct a minimal structure from unusual materials. 140 | ---> 141 | 142 | 48. fascinating body of work and process of thinking 143 | ---> 144 | 145 | 49. It is a kind of revolution in the area of technology. 146 | ---> 147 | 148 | 50. @chris: Have you considered confirmation bias (chery-picking)? 149 | ---> 150 | 151 | 51. Are you saying it's impossible for these apes to have a similar consciousness? 152 | ---> 153 | 154 | 52. lastt phrase shamelessly copied from who knows who). 155 | ---> 156 | 157 | 53. If anything, this lecture should have seen the different type. 158 | ---> 159 | 160 | 54. But, its root cause, The Covert Enslavement of Humanity, is history's greatest injustice. 161 | ---> 162 | 163 | 55. Dear T Walker sadlyy, you will not understand what that sentence really means". 164 | ---> 165 | 166 | 56. Nick Bostrom; you can not prove a negative, self away beings will always search & need meaning, and from what we know from neuroscience, the brain seems to have evolved the ability to experience transcendence and belief. 167 | ---> 168 | 169 | 57. This is a bad idea that leads to a generation or more that doesn't understand the fundamentals of calculation. 170 | ---> 171 | 172 | 58. A brilliant professor ! 173 | ---> 174 | 175 | 59. Yet no one has been able to compare the effectiveness of solar orbital microwave power stations? 176 | ---> 177 | 178 | 60. As far as his introduction, it was a hook and it was interesting and thought provoking, just like what followed. 179 | ---> 180 | 181 | 61. Rahul Dewan: ii feel, the way to prevent terrorism is to 'raise the level of discourse' on God and Spirituality, and make it spread across all religious faiths". 182 | ---> 183 | 184 | 62. And equally like the 19th century, our governing powers and ruling elites don't quite have any answer of how to undo what can't really be undone. 185 | ---> 186 | 187 | 63. Commenting on Geof Mosher alongg with Tj Green) Iwould start b ystating the fact that evolutionn is war" is quite an understatment. 188 | ---> 189 | 190 | 64. I will address them personally if you wish me to do so. 191 | ---> 192 | 193 | 65. That's the wonderful right of freedom. 194 | ---> 195 | 196 | 66. Very nice. 197 | ---> 198 | 199 | 67. But could the opposite be true: that the gregariousness that is the driving force of human beings, is being virtualized by make-believe substitutes to actual social interaction? 200 | ---> 201 | 202 | 68. Guys use Vlc player, the mp4 480p version works perfectly on it, awesome quality too. 203 | ---> 204 | 205 | 69. Would it be impolite to ask where you stand in this argument? 206 | ---> 207 | 208 | 70. Just wow. 209 | ---> 210 | 211 | 71. However, had large numbers of Dutch with their hunting and military style rifles been able to start a sniping campaign against the Germans, Wi may have been shortened. 212 | ---> 213 | 214 | 72. Amazing - Good Work - He is Worth to not only to India but also to the Whole world His promise for Open Source is also unbelivable a lots of Thanks to him and sponsers 215 | ---> 216 | 217 | 73. It isn't good or bad; it's inevitable. 218 | ---> 219 | 220 | 74. A couple of years ago the idea was about privacy that if you are connected to the Internet then your privacy is eliminated and you don't have any right to claim it back because you're a part of the network and you're obviously exchanging info that crosses a lot of paths to reach you, but recently, telecom providers are the ones dominating and watching, closely, what everyone is doing. 221 | ---> 222 | 223 | 75. Wait for gasoline to hit $5 a gallon and it will. 224 | ---> 225 | 226 | 76. Ib is a rigorous international education program for high school students. 227 | ---> 228 | 229 | 77. As someone who has spent several years teaching, I can say from experience that children need discipline, a rigid structure and for the most part, they need to be told what to do. 230 | ---> 231 | 232 | 78. Horrible architect, decent marketer perhaps? 233 | ---> 234 | 235 | 79. They aren't reproducible on command, and they do not openly discussed the incidences because the subject is taboo. 236 | ---> 237 | 238 | 80. Examples: To say that conservatives are much more into ideological purity is absurd. 239 | ---> 240 | 241 | 81. David is talking about a job interview on steroids, and like all job interviews there is an understanding that things will be shown in the best light possible (to say the least). 242 | ---> 243 | 244 | 82. In order to ensure we aren't fluctuations, we need to make an observation and analyze it. 245 | ---> 246 | 247 | 83. They don’t have room or numbers and we are losing them. 248 | ---> 249 | 250 | 84. The point I'm trying to make is this: a great craftsman is Not by default a great artist. 251 | ---> 252 | 253 | 85. They are invited to raise their right hand and in unison say globall Truce Starts Now! 254 | ---> 255 | 256 | 86. I would say all the fundamentalisms floating around in peoples minds making war on each other are one of the great problems of our time. 257 | ---> 258 | 259 | 87. I will probably be one of the first to be swept away! 260 | ---> 261 | 262 | 88. Please don't even Think of taking it down! 263 | ---> 264 | 265 | 89. You see a lot of people, youngsters especially, who complain that they are depressed and alone. 266 | ---> 267 | 268 | 90. I'm dubious about the payback periods you state. 269 | ---> 270 | 271 | 91. Did his disgraceful comment on Israel fall on deaf ears? 272 | ---> 273 | 274 | 92. To that end, how would empathy fix any of these problems? 275 | ---> 276 | 277 | 93. follow his lead by taking a hard look at your own life and either get help or work on your own to become more 'gay'. 278 | ---> 279 | 280 | 94. Sorry, he's a pet peeve. 281 | ---> 282 | 283 | 95. human have no right and ability to make it! 284 | ---> 285 | 286 | 96. Felix, why should an artist or a political activist not be commercially astute? 287 | ---> 288 | 289 | 97. And now we have screwed up our heads were screwing about with monkeys heads as well): 290 | ---> 291 | 292 | 98. It searches for and indexes common data in a vast resource and, not surprisingly, ends up finding lots of that data. 293 | ---> 294 | 295 | 99. People need to try to be less offended by differing viewpoints. 296 | ---> 297 | 298 | 100. their use of memetics suggests an intentionalityy" in memes. 299 | ---> 300 | 301 | 101. Noah Feldman constantly refers to the supposed democratic beliefs of a vast majority of Muslims. 302 | ---> 303 | 304 | 102. I felt it helped me deal with issues in a good way and was the right attitude to have. 305 | ---> 306 | 307 | 103. This feed specific microbes that in tern secrete the nutrient that the plant needs. 308 | ---> 309 | 310 | 104. I know that's a bit simplistic, but still. 311 | ---> 312 | 313 | 105. another one thing is staffs, they work only for salary they wouldn't give clear leacture. 314 | ---> 315 | 316 | 106. Ingenious incorporation of the natural binding aspect of the mycelium from the mushroom plant to create a products)s that could partially compete with some single uses of Styrofoam. 317 | ---> 318 | 319 | 107. As for China, I mostly agree with Mr. Tabarok, but I'm not sure if the post-mao development would've been possible without Mao's prior contribution to health seee Hans Rosling's Ted talks). 320 | ---> 321 | 322 | 108. his talk is great. 323 | ---> 324 | 325 | 109. Now, to briefly address your list of supposed contradictions and fallacies. 326 | ---> 327 | 328 | 110. Lol - hilarious. 329 | ---> 330 | 331 | 111. But the point is missed what Dan Gilbert is trying to tell you how we proceed looking at some time of value because its illusion, mostly thats what we are buying "that Illusion! 332 | ---> 333 | 334 | 112. Stephen Colbert, Jon Stewart and Bill Maher do make things easier to discuss with humor but they talk about American issues and Americans watch them. 335 | ---> 336 | 337 | 113. xd It's been awhile since I smiled so much just from a video ^ I want more of this! 338 | ---> 339 | 340 | 114. In my view , school is really a good place to sort out the smart ones who survive the education system and create stupid people (to work as labour) . 341 | ---> 342 | 343 | 115. Having been a professional background vocalist in Hollywood for several decades, I att age 24) rejoined a pop band, which had drastically changed its repetoire during my absence, having added several vocally demanding rock tunes, which I would be required to sing to regain my old position. 344 | ---> 345 | 346 | 116. Thank goodness and for the interactive transcript feature and Lee Smolin. 347 | ---> 348 | 349 | 117. I initiated and developed this project some years ago, which makes it possible for disabled people to take a look beyond their horizons and experience the ultimate feeling of freedom and independence: htp:/w.youtube.com/watch?v=ljbmau7jc 350 | ---> 351 | 352 | 118. Pinker uses warped statistics ass pointed out in the blog you mention). 353 | ---> 354 | 355 | 119. it's also our fault too by allowing men to do so. 356 | ---> 357 | 358 | 120. many are institutionalised profit based or religious hagovers from another world ago. 359 | ---> 360 | 361 | 121. This is a political manipulation attempt to educate the children. 362 | ---> 363 | 364 | 122. It's a glorified 70-year-old hand cannon and it is not modern, civil, humane or precise. 365 | ---> 366 | 367 | 123. Peace! 368 | ---> 369 | 370 | 124. Of course this may not guarantee. 371 | ---> 372 | 373 | 125. This was some kind of parody, right? 374 | ---> 375 | 376 | 126. I do these things for a distraction and generally when I am bored. 377 | ---> 378 | 379 | 127. right or wrong I think this is an inevitable advancement you have to be fine with it. 380 | ---> 381 | 382 | 128. Would really love to go to a postsecret live event. 383 | ---> 384 | 385 | 129. " the words from a man, whom I think, seriously meant that. 386 | ---> 387 | 388 | 130. I can see that I wasn't as clear in my writing as I thought I was. 389 | ---> 390 | 391 | 131. tragic at the least and still playing out every day. 392 | ---> 393 | 394 | 132. I wonder how well this was received and how many of his countrymen saw this. 395 | ---> 396 | 397 | 133. I am no scientist but it would seem possible to manipulate such large amounts of data on societal reactions and interactions in a way that would produce highly reliable prognostications relative to very specific stimulus. 398 | ---> 399 | 400 | 134. While one size will not fit all, this looks to be a wonderful piece in the puzzle. 401 | ---> 402 | 403 | 135. any body please help 404 | ---> 405 | 406 | 136. This allows our brains to wire for efficiency. 407 | ---> 408 | 409 | 137. When I built the house, I built it according to mostt of) the building codes in Canada. 410 | ---> 411 | 412 | 138. 2)2 He created them male and female and blessed them and named them [both] Adam [man] at the time they were created. 413 | ---> 414 | 415 | 139. I definitely think, that there is no movie, that can be so epic and so truly touching that these real life - stuffs happening here in Ted. 416 | ---> 417 | 418 | 140. All reasearch is of course fantastic, and I take your points about satelites seriously, although suspect that most of us could do without sky tv, gps and various military derivatives if we're brutally honest. 419 | ---> 420 | 421 | -------------------------------------------------------------------------------- /TED_comment_annotations/5/.svn/entries: -------------------------------------------------------------------------------- 1 | 10 2 | 3 | dir 4 | 2043 5 | svn://deepcore.gr/dc/ted/v1/hlt_project/utils/full_corpus/subjects/testing/5 6 | svn://deepcore.gr/dc 7 | 8 | 9 | 10 | 2013-01-18T18:08:40.681354Z 11 | 1662 12 | snupdc 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 5120806d-c870-0410-adaa-e3d35f40fc17 28 | 29 | comment_test.txt 30 | file 31 | 32 | 33 | 34 | 35 | 2013-01-18T19:54:08.000000Z 36 | 9dac5c2b37dda0ef1690c72bc194a66a 37 | 2013-01-18T18:08:40.681354Z 38 | 1662 39 | snupdc 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 35826 62 | 63 | sentence_test.txt 64 | file 65 | 66 | 67 | 68 | 69 | 2013-01-18T19:54:08.000000Z 70 | 0e4a83b879db676dc8822e18e196b941 71 | 2013-01-18T18:08:40.681354Z 72 | 1662 73 | snupdc 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 15350 96 | 97 | complete 98 | dir 99 | 100 | nesli 101 | file 102 | 103 | 104 | 105 | 106 | 2013-01-18T19:54:08.000000Z 107 | d41d8cd98f00b204e9800998ecf8427e 108 | 2013-01-18T18:08:40.681354Z 109 | 1662 110 | snupdc 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 0 133 | 134 | -------------------------------------------------------------------------------- /TED_comment_annotations/5/.svn/text-base/nesli.svn-base: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nik0spapp/usent/52fe720964001344897db2ad9a98cb53a64be717/TED_comment_annotations/5/.svn/text-base/nesli.svn-base -------------------------------------------------------------------------------- /TED_comment_annotations/5/complete/.svn/entries: -------------------------------------------------------------------------------- 1 | 10 2 | 3 | dir 4 | 2043 5 | svn://deepcore.gr/dc/ted/v1/hlt_project/utils/full_corpus/subjects/testing/5/complete 6 | svn://deepcore.gr/dc 7 | 8 | 9 | 10 | 2013-01-18T18:08:40.681354Z 11 | 1662 12 | snupdc 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 5120806d-c870-0410-adaa-e3d35f40fc17 28 | 29 | comment_test.txt 30 | file 31 | 32 | 33 | 34 | 35 | 2013-01-18T19:54:08.000000Z 36 | 10df67bb1b966d3e5a86a45def9aa9dd 37 | 2013-01-18T18:08:40.681354Z 38 | 1662 39 | snupdc 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 35870 62 | 63 | sentence_test.txt 64 | file 65 | 66 | 67 | 68 | 69 | 2013-01-18T19:54:08.000000Z 70 | 960b32687763c69f5dcf2b9b8e2950e4 71 | 2013-01-18T18:08:40.681354Z 72 | 1662 73 | snupdc 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 15490 96 | 97 | -------------------------------------------------------------------------------- /TED_comment_annotations/6/.svn/entries: -------------------------------------------------------------------------------- 1 | 10 2 | 3 | dir 4 | 2043 5 | svn://deepcore.gr/dc/ted/v1/hlt_project/utils/full_corpus/subjects/testing/6 6 | svn://deepcore.gr/dc 7 | 8 | 9 | 10 | 2013-01-18T18:08:40.681354Z 11 | 1662 12 | snupdc 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 5120806d-c870-0410-adaa-e3d35f40fc17 28 | 29 | comment_test.txt 30 | file 31 | 32 | 33 | 34 | 35 | 2013-01-18T19:54:08.000000Z 36 | fe2a2f87b74c9659be22ce8fc7cfcd79 37 | 2013-01-18T18:08:40.681354Z 38 | 1662 39 | snupdc 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 34289 62 | 63 | sentence_test.txt 64 | file 65 | 66 | 67 | 68 | 69 | 2013-01-18T19:54:08.000000Z 70 | 8534c630c098c3b86035cb588f0ce6d1 71 | 2013-01-18T18:08:40.681354Z 72 | 1662 73 | snupdc 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 15056 96 | 97 | complete 98 | dir 99 | 100 | subheim 101 | file 102 | 103 | 104 | 105 | 106 | 2013-01-18T19:54:08.000000Z 107 | d41d8cd98f00b204e9800998ecf8427e 108 | 2013-01-18T18:08:40.681354Z 109 | 1662 110 | snupdc 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 0 133 | 134 | -------------------------------------------------------------------------------- /TED_comment_annotations/6/.svn/text-base/subheim.svn-base: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nik0spapp/usent/52fe720964001344897db2ad9a98cb53a64be717/TED_comment_annotations/6/.svn/text-base/subheim.svn-base -------------------------------------------------------------------------------- /TED_comment_annotations/6/complete/.svn/entries: -------------------------------------------------------------------------------- 1 | 10 2 | 3 | dir 4 | 2043 5 | svn://deepcore.gr/dc/ted/v1/hlt_project/utils/full_corpus/subjects/testing/6/complete 6 | svn://deepcore.gr/dc 7 | 8 | 9 | 10 | 2013-01-18T18:08:40.681354Z 11 | 1662 12 | snupdc 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 5120806d-c870-0410-adaa-e3d35f40fc17 28 | 29 | comment_test.txt 30 | file 31 | 32 | 33 | 34 | 35 | 2013-01-18T19:54:08.000000Z 36 | 656bbac049c18c99a919ac37ab8955b7 37 | 2013-01-18T18:08:40.681354Z 38 | 1662 39 | snupdc 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 34592 62 | 63 | sentence_test.txt 64 | file 65 | 66 | 67 | 68 | 69 | 2013-01-18T19:54:08.000000Z 70 | e4b8259a46324d95d28285e9a9b0e839 71 | 2013-01-18T18:08:40.681354Z 72 | 1662 73 | snupdc 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 15196 96 | 97 | -------------------------------------------------------------------------------- /bootstrapping.py: -------------------------------------------------------------------------------- 1 | #################################################################### 2 | # Licence: Creative Commons (see COPYRIGHT) # 3 | # Authors: Nikolaos Pappas, Georgios Katsimpras # 4 | # {nik0spapp, gkatsimpras}@gmail.com # 5 | # Supervisor: Efstathios stamatatos # 6 | # stamatatos@aegean.gr # 7 | # University of the Aegean # 8 | # Department of Information and Communication Systems Engineering # 9 | # Information Management Track (MSc) # 10 | # Karlovasi, Samos # 11 | # Greece # 12 | #################################################################### 13 | 14 | import re 15 | import sys 16 | import pickle 17 | from terminal_colors import Tcolors 18 | from pb_classifiers import PbSubj 19 | 20 | class Bootstrapping: 21 | """ 22 | Bootstrapping: Class performing the bootstrapping process for 23 | subjectivity and objectivity classification of sentences. The 24 | method learns linguistically rich extraction patterns for subjective 25 | (opinionated) expressions from unannotated data. The learned 26 | patterns are used to identify more subjective sentences that simple 27 | high precision classifiers can't recall. 28 | 29 | Related paper: 30 | E. Riloff and J. Wiebe. Learning extraction patterns for subjective 31 | expressions. In Proceedings of the 2003 conference on Empirical methods 32 | in natural language processing, EMNLP '03, pages 105--112, 2003. ACL. 33 | 34 | Learned patterns structure 35 | e.g. {" was killed" : {'type' : 'subj', 36 | 'display' : 'was killed', 37 | 'subj_freq' : 10, 38 | 'freq' : 20, 39 | 'prob' : 0.5}} 40 | 41 | """ 42 | 43 | def __init__(self, hp_obj, hp_subj, tagger, debug=False): 44 | # Syntactic forms for pattern extraction 45 | self.syntactic_forms = {"subj" : [["BE","VBN*|VBD*"], 46 | ["HAVE","BE","VB*"], 47 | ["VB*"], 48 | ["VB*","*","NN*|NP*|NC*"], 49 | ["VB*","TO","VB*"], 50 | ["HAVE","TO","BE"], 51 | ["HAVE","NN*"]], 52 | "dobj" : [["VB*"], 53 | ["TO","VB*"], 54 | ["VB*","TO","VB*"]], 55 | "np" : [["NN","IN"], 56 | ["VB*","NN","IN"], 57 | ["BE","VBN","IN"], 58 | ["TO","VB","TO"]] 59 | } 60 | self.filename = "stored/learned_patterns" 61 | try: 62 | self.learned_patterns = pickle.load(open(self.filename)) 63 | print Tcolors.ADD + Tcolors.OKBLUE + " Loaded existing pattern knowledge!" + Tcolors.ENDC 64 | except: 65 | print Tcolors.ACT + Tcolors.RED + " Existing pattern knowledge not found." + Tcolors.ENDC 66 | self.learned_patterns = {} 67 | 68 | # Part Of Speech Sequential Tagger (Unigram->Bigram->Trigram) 69 | self.tagger = tagger 70 | # Sentence to be classified 71 | self.subjective = False 72 | self.objective = False 73 | # High precision objective classifier 74 | self.hp_obj = hp_obj 75 | # High precision subjective classifier 76 | self.hp_subj = hp_subj 77 | # Pattern-Based Subjective Classifier 78 | self.pb_subj = PbSubj(self.tagger, debug=debug) 79 | # Learned patterns 80 | self.patterns = {} 81 | self.debug = debug 82 | 83 | def classify(self, sentence, previous="", next=""): 84 | """ 85 | Subjectivity classification using boostrapping method. 86 | """ 87 | # STEP 1: Classify sentence with HP Subjective classifier 88 | self.subjective = self.hp_subj.classify(sentence) 89 | # STEP 1: Get help from learned patterns 90 | if not self.subjective: 91 | if self.debug: print Tcolors.ACT + " Training pattern based classifier...\n" 92 | self.pb_subj.train(self.learned_patterns) 93 | found, self.subjective, obj = self.pb_subj.classify(sentence) 94 | 95 | if not self.subjective and not self.objective: 96 | # STEP 2: Classify sentence with HP Objective classifier 97 | self.objective = self.hp_obj.classify(sentence, previous, next) 98 | 99 | if self.subjective or self.objective: 100 | # STEP 3: Learn 101 | self.learn_patterns_from(sentence) 102 | else: 103 | # STEP 4: Classify based on learned patterns 104 | found, self.subjective, self.objective = self.pb_subj.classify(sentence) 105 | # Uncomment the two following to bootstrap further the subjective 106 | # sentences detected from the pattern-based classifier. 107 | # if self.subjective: 108 | # self.learn_patterns_from(sentence) 109 | if self.subjective: 110 | return 'subjective' 111 | elif self.objective: 112 | return 'objective' 113 | else: 114 | return None 115 | 116 | def learn_patterns_from(self, sentence): 117 | """ 118 | Learns extraction patterns associated with subjectivity 119 | from a given sentence. 120 | """ 121 | tagged_sentence = self.tagger.tag(sentence) 122 | tags = [] 123 | words = [] 124 | if self.debug: 125 | print Tcolors.ACT + " Performing part of speech (POS) tagging..." + Tcolors.WARNING 126 | print tagged_sentence 127 | print Tcolors.ENDC 128 | for (w,tag) in tagged_sentence: 129 | if tag is None: 130 | tag = "" 131 | tags.append(tag) 132 | words.append(w) 133 | 134 | self.trigger_patterns(tags, words) 135 | 136 | def match_until_next_nn(self, i, tags, words, form, key): 137 | """ 138 | The hard job for triggering the syntactic forms :-) 139 | """ 140 | LIMITER = 4 141 | BE = ['was','were','be','being','am','been','are','is'] 142 | HAVE = ['has','have','had'] 143 | matched = 0 144 | prev_matched = 0 145 | positions_matched = [] 146 | learned_pattern = [] 147 | star = False 148 | 149 | for j,ctag in enumerate(form): 150 | next = i + j + 1 151 | inner = 0 152 | found = False 153 | while(not found and next < len(tags)): 154 | next += inner 155 | if next < len(words) and ctag == "VB*" and words[next] in HAVE: 156 | next += 1 157 | if next < len(words) and ctag == "VB*" and words[next] in BE: 158 | next += 1 159 | elif next < len(words) and ctag == "VB*" and words[next] in BE: 160 | next += 1 161 | if ctag == "*": 162 | star = True 163 | elif ctag.find("*") > -1: 164 | ortags = ctag.split("|") 165 | for ortag in ortags: 166 | if next < len(tags) and tags[next].find(ortag.replace("*","")) > -1\ 167 | and next not in positions_matched: 168 | if star and inner < 2: 169 | matched += 1 170 | matched += 1 171 | positions_matched.append(next) 172 | found = True 173 | elif ctag == "BE": 174 | if next < len(tags) and (tags[next].find("VB") > -1 or tags[next].find("BE") > -1) \ 175 | and words[next] in BE and next not in positions_matched: 176 | matched += 1 177 | positions_matched.append(next) 178 | found = True 179 | elif ctag == "HAVE": 180 | if next < len(tags) and (tags[next].find("VB") > -1 or tags[next].find("HV") > -1)\ 181 | and words[next] in HAVE and next not in positions_matched: 182 | matched += 1 183 | positions_matched.append(next) 184 | found = True 185 | elif next < len(tags) and tags[next].find(ctag) > -1\ 186 | and next not in positions_matched: 187 | matched += 1 188 | positions_matched.append(next) 189 | found = True 190 | else: 191 | found = True 192 | inner += 1 193 | 194 | if key == "subj": 195 | learned_pattern = [""] 196 | for pos in positions_matched: 197 | learned_pattern.append(words[pos]) 198 | if key != "subj": 199 | learned_pattern.append("<" + key +">") 200 | 201 | learned_pattern = " ".join(learned_pattern) 202 | 203 | if matched == len(form): 204 | if self.debug: 205 | print Tcolors.ACT + Tcolors.RED + " Form triggered: ", form, Tcolors.ENDC 206 | print "Pattern learned:", learned_pattern 207 | return True, learned_pattern 208 | else: 209 | return False, None 210 | 211 | def proccess_learned_pattern(self, pattern): 212 | """ 213 | Add pattern to learned patterns if it doesn't exist else 214 | update its probability. 215 | """ 216 | if pattern.find("subj") > -1: 217 | key = "subj" 218 | elif pattern.find("dobj") > -1: 219 | key = "dobj" 220 | else: 221 | key = "np" 222 | cur_subj_freq = 0 223 | if self.subjective: 224 | cur_subj_freq = 1 225 | pkey = pattern 226 | pkey = re.sub(r" | | ","",pkey) 227 | if self.learned_patterns.has_key(pattern): 228 | subj_freq = self.learned_patterns[pattern]['subj_freq'] + cur_subj_freq 229 | freq = self.learned_patterns[pattern]['freq'] + 1 230 | prob = (float)(subj_freq)/(float)(freq) 231 | self.learned_patterns[pattern]['prob'] = prob 232 | self.learned_patterns[pattern]['subj_freq'] = subj_freq 233 | self.learned_patterns[pattern]['freq'] = freq 234 | if self.debug: print Tcolors.ADD + Tcolors.HEADER + " Updating pattern:", pattern, Tcolors.ENDC 235 | else: 236 | subj_freq = 0 237 | freq = 1 238 | subj_freq += cur_subj_freq 239 | prob = (float)(subj_freq)/(float)(freq) 240 | self.learned_patterns[pattern] = {'type': key, 241 | 'display': pkey, 242 | 'freq' : freq, 243 | 'subj_freq' : subj_freq, 244 | 'prob' : prob} 245 | if self.debug: print Tcolors.ADD + Tcolors.CYAN + " Learning pattern:", pattern, Tcolors.ENDC 246 | 247 | def store_knowledge(self): 248 | """ 249 | Stored learned patterns for future usage. 250 | """ 251 | output = open(self.filename, 'wb') 252 | pickle.dump(self.learned_patterns, output) 253 | 254 | 255 | def trigger_patterns(self, tags, words): 256 | """ 257 | Method that triggers syntactic forms and returns the learned 258 | patterns from the triggering. 259 | """ 260 | patterns = [] 261 | if self.debug: print Tcolors.ACT + " Triggering subjective syntactic forms..." 262 | for key in self.syntactic_forms.keys(): 263 | syntactic_forms = self.syntactic_forms[key] 264 | if self.debug: print Tcolors.PROC + Tcolors.GRAY + " Checking form group " + key + "..." + Tcolors.ENDC 265 | 266 | for form in syntactic_forms: 267 | for i,tag in enumerate(tags): 268 | if tag.find("NN") > -1 or tag.find("NP") > -1 \ 269 | or tag.find("PR") > -1: 270 | triggered, pattern = self.match_until_next_nn(i, tags, words, form, key) 271 | if pattern is not None and pattern not in patterns: 272 | if self.debug: print Tcolors.ACT + Tcolors.RED + " Form triggered: ", form, Tcolors.ENDC 273 | patterns.append(pattern) 274 | for pattern in patterns: 275 | self.proccess_learned_pattern(pattern) 276 | if self.debug: 277 | print Tcolors.OKBLUE 278 | print self.learned_patterns 279 | print Tcolors.ENDC 280 | self.store_knowledge() 281 | 282 | def train(self, data): 283 | """ 284 | Method to train the pattern-based classifier 285 | """ 286 | for sentence in data: 287 | self.classify(sentence) 288 | 289 | def clear_learned_data(self): 290 | self.learned_patterns = {} 291 | 292 | 293 | if __name__ == "__main__": 294 | from hp_classifiers import HpObj, HpSubj 295 | from pos import SequentialTagger 296 | hp_obj = HpObj() 297 | hp_subj = HpSubj() 298 | tagger = SequentialTagger() 299 | bootstrapping = Bootstrapping(hp_obj, hp_subj, tagger) 300 | if self.debug: 301 | print bootstrapping.classify(sys.argv[1]) 302 | 303 | 304 | -------------------------------------------------------------------------------- /datasets/.svn/entries: -------------------------------------------------------------------------------- 1 | 10 2 | 3 | dir 4 | 583 5 | svn://deepcore.gr/dc/projects/icrawler/icrawler/mining/learning/datasets 6 | svn://deepcore.gr/dc 7 | 8 | 9 | 10 | 2011-02-01T03:02:12.956261Z 11 | 576 12 | snupdc 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 5120806d-c870-0410-adaa-e3d35f40fc17 28 | 29 | segmentation_corpus.data 30 | file 31 | 32 | 33 | 34 | 35 | 2011-02-01T03:10:39.216122Z 36 | 2f7d9a17cdff7f6e23493e364c7769df 37 | 2011-02-01T03:02:12.956261Z 38 | 576 39 | snupdc 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 12681 62 | 63 | subjclueslen1-HLTEMNLP05.tff 64 | file 65 | 66 | 67 | 68 | 69 | 2011-01-30T01:50:27.916001Z 70 | 407e41c19d80bf84b264e6443fcff16c 71 | 2011-01-30T03:58:08.885718Z 72 | 568 73 | snupdc 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 662976 96 | 97 | emoticons_patch.py 98 | file 99 | 100 | 101 | 102 | 103 | 2011-02-01T03:10:39.216122Z 104 | 4f311e51ccfc4756e0a84dda712b7396 105 | 2011-02-01T03:02:12.956261Z 106 | 576 107 | snupdc 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 2194 130 | 131 | emoticons.data 132 | file 133 | 134 | 135 | 136 | 137 | 2011-01-30T01:51:34.232001Z 138 | d50ca7abe90b55099fa4726ce5f18e7f 139 | 2011-01-30T03:58:08.885718Z 140 | 568 141 | snupdc 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 3950 164 | 165 | -------------------------------------------------------------------------------- /datasets/.svn/text-base/emoticons.data.svn-base: -------------------------------------------------------------------------------- 1 | positive 2 | :-) :) :o) :] :3 :c) :> =] 8) =) :} :^) 3 | :D C: :-D :D 8D xD XD =D =3 <=3 <=8 :-9 4 | :-* :* <3 <333 =^_^= =>.>= =<_<= =>.<= 5 | \,,/ \m/ \m/\>..... B) B-) 8) 8-) 8 | negative 9 | e.e O.e O.e <=3 <=8 8===D 8===B --!-- :-( :( :c :< :[ :{ 10 | D: D8 D; D= DX v.v :-O :O O_O o_o 8O OwO O-O 0_o O_o O3O o0o ;o_o; o...o 0w0 11 | c.c C.C :-/ :/ :\ =/ =\ :S :| :-X :X :-# :# 12 | :'( ;*( T_T TT_TT T.T Q.Q Q_Q ;_; >:) >;) >:-) 13 | D:< >:( >:C D-:< >:-( :-@ ;( `_´ _>) (¬_¬) (-_-) (^_^') ^_^_^') ^^" ^^^_.^') ^^_^^; ^&^^.^;& ^^^; ^^^7 15 | q(;^;)p (O_o) (._.) (,_,) (;_;) (T_T) (T~T) (ToT) (T^T) (X_X) x_x 16 | bad_words 17 | ahole 18 | anus 19 | ash0le 20 | ash0les 21 | asholes 22 | ass 23 | Ass Monkey 24 | Assface 25 | assh0le 26 | assh0lez 27 | asshole 28 | assholes 29 | assholz 30 | asswipe 31 | azzhole 32 | bassterds 33 | bastard 34 | bastards 35 | bastardz 36 | basterds 37 | basterdz 38 | Biatch 39 | bitch 40 | bitches 41 | Blow Job 42 | boffing 43 | butthole 44 | buttwipe 45 | c0ck 46 | c0cks 47 | c0k 48 | Carpet Muncher 49 | cawk 50 | cawks 51 | Clit 52 | cnts 53 | cntz 54 | cock 55 | cockhead 56 | cock-head 57 | cocks 58 | CockSucker 59 | cock-sucker 60 | crap 61 | cum 62 | cunt 63 | cunts 64 | cuntz 65 | dick 66 | dild0 67 | dild0s 68 | dildo 69 | dildos 70 | dilld0 71 | dilld0s 72 | dominatricks 73 | dominatrics 74 | dominatrix 75 | dyke 76 | enema 77 | f u c k 78 | f u c k e r 79 | fag 80 | fag1t 81 | faget 82 | fagg1t 83 | faggit 84 | faggot 85 | fagit 86 | fags 87 | fagz 88 | faig 89 | faigs 90 | fart 91 | flipping the bird 92 | fuck 93 | fucker 94 | fuckin 95 | fucking 96 | fucks 97 | Fudge Packer 98 | fuk 99 | Fukah 100 | Fuken 101 | fuker 102 | Fukin 103 | Fukk 104 | Fukkah 105 | Fukken 106 | Fukker 107 | Fukkin 108 | g00k 109 | gay 110 | gayboy 111 | gaygirl 112 | gays 113 | gayz 114 | God-damned 115 | h00r 116 | h0ar 117 | h0re 118 | hells 119 | hoar 120 | hoor 121 | hoore 122 | jackoff 123 | jap 124 | japs 125 | jerk-off 126 | jisim 127 | jiss 128 | jizm 129 | jizz 130 | knob 131 | knobs 132 | knobz 133 | kunt 134 | kunts 135 | kuntz 136 | Lesbian 137 | Lezzian 138 | Lipshits 139 | Lipshitz 140 | masochist 141 | masokist 142 | massterbait 143 | masstrbait 144 | masstrbate 145 | masterbaiter 146 | masterbate 147 | masterbates 148 | Motha Fucker 149 | Motha Fuker 150 | Motha Fukkah 151 | Motha Fukker 152 | Mother Fucker 153 | Mother Fukah 154 | Mother Fuker 155 | Mother Fukkah 156 | Mother Fukker 157 | mother-fucker 158 | Mutha Fucker 159 | Mutha Fukah 160 | Mutha Fuker 161 | Mutha Fukkah 162 | Mutha Fukker 163 | n1gr 164 | nastt 165 | nigger; 166 | nigur; 167 | niiger; 168 | niigr; 169 | orafis 170 | orgasim; 171 | orgasm 172 | orgasum 173 | oriface 174 | orifice 175 | orifiss 176 | packi 177 | packie 178 | packy 179 | paki 180 | pakie 181 | paky 182 | pecker 183 | peeenus 184 | peeenusss 185 | peenus 186 | peinus 187 | pen1s 188 | penas 189 | penis 190 | penis-breath 191 | penus 192 | penuus 193 | Phuc 194 | Phuck 195 | Phuk 196 | Phuker 197 | Phukker 198 | polac 199 | polack 200 | polak 201 | Poonani 202 | pr1c 203 | pr1ck 204 | pr1k 205 | pusse 206 | pussee 207 | pussy 208 | puuke 209 | puuker 210 | queer 211 | queers 212 | queerz 213 | qweers 214 | qweerz 215 | qweir 216 | recktum 217 | rectum 218 | retard 219 | sadist 220 | scank 221 | schlong 222 | screwing 223 | semen 224 | sex 225 | sexy 226 | Sh!t 227 | sh1t 228 | sh1ter 229 | sh1ts 230 | sh1tter 231 | sh1tz 232 | shit 233 | shits 234 | shitter 235 | Shitty 236 | Shity 237 | shitz 238 | Shyt 239 | Shyte 240 | Shytty 241 | Shyty 242 | skanck 243 | skank 244 | skankee 245 | skankey 246 | skanks 247 | Skanky 248 | slut 249 | sluts 250 | Slutty 251 | slutz 252 | son-of-a-bitch 253 | tit 254 | turd 255 | va1jina 256 | vag1na 257 | vagiina 258 | vagina 259 | vaj1na 260 | vajina 261 | vullva 262 | vulva 263 | w0p 264 | wh00r 265 | wh0re 266 | whore 267 | xrated 268 | xxx 269 | b!+ch 270 | bitch 271 | blowjob 272 | clit 273 | arschloch 274 | fuck 275 | shit 276 | ass 277 | asshole 278 | b!tch 279 | b17ch 280 | b1tch 281 | bastard 282 | bi+ch 283 | boiolas 284 | buceta 285 | c0ck 286 | cawk 287 | chink 288 | cipa 289 | clits 290 | cock 291 | cum 292 | cunt 293 | dildo 294 | dirsa 295 | ejakulate 296 | fatass 297 | fcuk 298 | fuk 299 | fux0r 300 | hoer 301 | hore 302 | jism 303 | kawk 304 | l3itch 305 | l3i+ch 306 | lesbian 307 | masturbate 308 | masterbat* 309 | masterbat3 310 | motherfucker 311 | s.o.b. 312 | mofo 313 | nazi 314 | nigga 315 | nigger 316 | nutsack 317 | phuck 318 | pimpis 319 | pusse 320 | pussy 321 | scrotum 322 | sh!t 323 | shemale 324 | shi+ 325 | sh!+ 326 | slut 327 | smut 328 | teets 329 | tits 330 | boobs 331 | b00bs 332 | teez 333 | testical 334 | testicle 335 | titt 336 | w00se 337 | jackoff 338 | wank 339 | whoar 340 | whore 341 | *damn 342 | *dyke 343 | *fuck* 344 | *shit* 345 | @$$ 346 | amcik 347 | andskota 348 | arse* 349 | assrammer 350 | ayir 351 | bi7ch 352 | bitch* 353 | bollock* 354 | breasts 355 | butt-pirate 356 | cabron 357 | cazzo 358 | chraa 359 | chuj 360 | Cock* 361 | cunt* 362 | d4mn 363 | daygo 364 | dego 365 | dick* 366 | dike* 367 | dupa 368 | dziwka 369 | ejackulate 370 | Ekrem* 371 | Ekto 372 | enculer 373 | faen 374 | fag* 375 | fanculo 376 | fanny 377 | feces 378 | feg 379 | Felcher 380 | ficken 381 | fitt* 382 | Flikker 383 | foreskin 384 | Fotze 385 | Fu(* 386 | fuk* 387 | futkretzn 388 | gay 389 | gook 390 | guiena 391 | h0r 392 | h4x0r 393 | hell 394 | helvete 395 | hoer* 396 | honkey 397 | Huevon 398 | hui 399 | injun 400 | jizz 401 | kanker* 402 | kike 403 | klootzak 404 | kraut 405 | knulle 406 | kuk 407 | kuksuger 408 | Kurac 409 | kurwa 410 | kusi* 411 | kyrpa* 412 | lesbo 413 | mamhoon 414 | masturbat* 415 | merd* 416 | mibun 417 | monkleigh 418 | mouliewop 419 | muie 420 | mulkku 421 | muschi 422 | nazis 423 | nepesaurio 424 | nigger* 425 | orospu 426 | paska* 427 | perse 428 | picka 429 | pierdol* 430 | pillu* 431 | pimmel 432 | piss* 433 | pizda 434 | poontsee 435 | poop 436 | porn 437 | p0rn 438 | pr0n 439 | preteen 440 | pula 441 | pule 442 | puta 443 | puto 444 | qahbeh 445 | queef* 446 | rautenberg 447 | schaffer 448 | scheiss* 449 | schlampe 450 | schmuck 451 | screw 452 | sh!t* 453 | sharmuta 454 | sharmute 455 | shipal 456 | shiz 457 | skribz 458 | skurwysyn 459 | sphencter 460 | spic 461 | spierdalaj 462 | splooge 463 | stfu 464 | suka 465 | b00b* 466 | testicle* 467 | titt* 468 | twat 469 | vittu 470 | wank* 471 | wetback* 472 | wichser 473 | wop* 474 | yed 475 | zabourah 476 | -------------------------------------------------------------------------------- /datasets/.svn/text-base/emoticons_patch.py.svn-base: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from scrapy.conf import settings 4 | PATH = settings.get("PATH")+"icrawler/icrawler/mining/learning/" 5 | 6 | p_flag = False 7 | n_flag = False 8 | b_flag = False 9 | 10 | positives = {} #{"word1":{"typ":"", "pos1":"", "priorpolarity":}} 11 | negatives = {} #{"word1":{"type":"", "pos1":"", "priorpolarity":}} 12 | emoticons_file = open(PATH + "datasets/emoticons.data","r") 13 | lines = emoticons_file.readlines() 14 | 15 | def patch_emoticons(): 16 | global p_flag, n_flag, b_flag 17 | 18 | for line in lines: 19 | if line.startswith("positive"): 20 | "Parsing Positives...." 21 | p_flag = True 22 | n_flag = False 23 | 24 | elif line.startswith("negative"): 25 | "Parsing Negatives...." 26 | n_flag = True 27 | p_flag = False 28 | 29 | elif line.startswith("bad_words"): 30 | n_flag = False 31 | p_flag = False 32 | b_flag = True 33 | 34 | 35 | if p_flag and not line.startswith("positive"): 36 | p_data = line.split(" ") 37 | for data in p_data: 38 | positives[data] = {"type":"strongsub", "emoticon" : True, "pos1":"anypos", "priorpolarity":"positive"} 39 | 40 | if n_flag and not line.startswith("negative"): 41 | n_data = line.split(" ") 42 | for data in n_data: 43 | negatives[data] = {"type":"strongsub", "emoticon" : True, "pos1":"anypos", "priorpolarity":"negative"} 44 | 45 | if b_flag and not line.startswith("bad_words"): 46 | word = line.replace("\n", "") 47 | negatives[word] = {"type":"strongsub", "pos1":"anypos", "priorpolarity":"negative"} 48 | 49 | #print "POS", len(positives.keys()) 50 | #print "NEG", len(negatives.keys()) 51 | 52 | totals = dict(positives, **negatives) 53 | 54 | #print "TOTALS ", len(totals) 55 | return totals 56 | 57 | def parse_dataset(filename): 58 | 59 | parsed_data = [] 60 | dataset = open(os.path.abspath("") +"/"+ filename,"r") 61 | lines = dataset.readlines() 62 | 63 | for line in lines: 64 | parsed_data.append(line.replace("\n", "")) 65 | 66 | #print len(parsed_data) 67 | return parsed_data 68 | 69 | #patch_emoticons() 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | -------------------------------------------------------------------------------- /datasets/.svn/text-base/segmentation_corpus.data.svn-base: -------------------------------------------------------------------------------- 1 | article 2 | http://abcnews.go.com/International/abc-news-christiane-amanpour-reports-protestors-fight-freedom/story?id=12806642 3 | http://www.nytimes.com/roomfordebate/2011/01/31/mubaraks-role-and-mideast-peace 4 | http://www.mirror.co.uk/news/top-stories/2011/02/01/fernando-torres-and-andy-carroll-the-two-multi-million-pound-strikers-who-are-worlds-apart-115875-22890025/ 5 | http://ibnlive.in.com/generalnewsfeed/news/wall-st-gains-on-earnings-economy-egypt-fears-ease/559860.html 6 | http://soccernet.espn.go.com/news/story/_/id/873998/spurs-miss-out-on-adam-by-minutes?cc=5739 7 | http://www.ajc.com/news/nation-world/judge-obamas-health-overhaul-822829.html 8 | http://www.montrealgazette.com/travel/Ottawa+offers+flights+Canadians+urged+leave+Egypt/4193713/story.html 9 | http://www.bbc.co.uk/news/business-12314656 10 | http://www.bbc.co.uk/news/world-africa-12328506 11 | http://sports.espn.go.com/dallas/nfl/news/story?id=6077650 12 | http://blogs.forbes.com/christopherhelman/2011/01/31/will-egypts-revolution-mean-oil-armageddon/ 13 | http://www.palestinechronicle.com/view_article_details.php?id=16603 14 | http://www.thestar.com/opinion/editorialopinion/article/931187--egypt-connected-to-revolution 15 | http://www.euractiv.com/en/global-europe/egypt-revolution-hijacks-eu-ministers-agenda-news-501730 16 | http://www.theglobeandmail.com/news/opinions/opinion/lessons-from-a-revolution-irans-that-is/article1889288/ 17 | http://www.shalomlife.com/eng/14557/Is_Syria's_Bashar_al-Assad_Next/ 18 | http://www.fastcompany.com/1722492/how-social-media-accelerated-the-uprising-in-egypt 19 | http://nation.com.pk/pakistan-news-newspaper-daily-english-online/Politics/31-Jan-2011/Altaf-sounds-Tunisia-Egypt 20 | http://www.afrol.com/articles/37168 21 | http://www.theprovince.com/news/This+start+revolution/4192112/story.html 22 | http://money.cnn.com/2011/01/31/technology/google_executive_missing_egypt/index.htm 23 | http://www.nytimes.com/2011/02/01/world/middleeast/01palestinians.html?_r=1&partner=rss&emc=rss 24 | http://www.ibtimes.com/articles/106733/20110130/egypt-ian-lustick-protests-hosni-mubarak.htm 25 | http://www.usnews.com/opinion/blogs/mary-kate-cary/2011/01/28/egypt-cuts-the-internet-in-the-face-of-revolution 26 | http://threatpost.com/en_us/blogs/facebook-owned-42-egyptian-web-traffic-blackout-013111 27 | http://www.grist.org/article/2011-01-31-the-egyptian-government-has-been-able-to-shut-down-the-internet- 28 | http://www.orlandosentinel.com/news/opinion/os-ed-egypt-unrest-myword-020111-20110131,0,2543131.story 29 | http://globalgrind.com/channel/news/content/1903560/the-revolution-of-the-facebook-generation-by-michael-de-la-rocha/ 30 | http://www.csmonitor.com/Commentary/Editorial-Board-Blog/2011/0131/How-history-might-really-be-made-in-Egypt 31 | http://communities.washingtontimes.com/neighborhood/word-national-interest/2011/jan/31/pity-mubaraks-successor/ 32 | http://www.cbc.ca/canada/windsor/story/2011/01/31/wdr-egypt-reaction.html 33 | http://www.ennaharonline.com/en/international/5681.html 34 | http://www.freep.com/article/20110131/NEWS06/110131023/U-M-MSU-students-affected-by-Egypt-unrest 35 | http://www.currentintelligence.net/gulfstream/2011/1/31/the-great-arab-revolution-and-the-gulf-states.html 36 | http://omadeon.wordpress.com/2011/01/31/omar-suleiman-chief-torturer/ 37 | http://www.expatinvesting.org/egypt-revolution-analysis/ 38 | http://newzeal.blogspot.com/2011/01/revolution-in-egypt-out-of-frying-pan.html 39 | http://politeching.wordpress.com/2011/01/25/egypts-revolution-powered-by-the-internet/ 40 | http://www.huffingtonpost.com/magda-abufadil/media-key-to-covering-egy_b_815920.html 41 | http://www.workers.org/2011/world/wwp_statement_0203/ 42 | http://abcclio.blogspot.com/2011/01/egypt-revolution-at-hand.html 43 | http://kasamaproject.org/2011/01/29/the-egyptian-revolution-a-very-fine-thing/ 44 | http://www.egyptianrevolution.com/2011/01/egypts-security-apparatus/ 45 | http://www.socialist.net/egypt-revolution-knows-no-frontiers.htm 46 | http://www.islamidavet.com/english/2011/01/28/egypt-revolution-hard-to-predict/ 47 | http://www.lewrockwell.com/blog/lewrw/archives/76683.html 48 | http://politicalsex.me/unrest-in-egypt-revolution-sounds-exhausting/ 49 | http://www.voxy.co.nz/politics/egyptian-revolution-finally-breaks-out-down-mubarak/1273/79978 50 | http://theglobalawakening.wordpress.com/2011/01/31/egypt-defense-min-threatens-people/ 51 | http://flagpole.com/Weekly/WorldView/WorldView-28Jan11 52 | http://www.isaintel.com/2011/01/06/remnants-of-a-greek-past-image-from-the-future/ 53 | http://www.anarkismo.net/article/18536 54 | http://daily.swarthmore.edu/announcements/2011/nationalism-anarchism-reform-understanding-political-islam-from-the-inside-out/ 55 | http://www.bakersfield.com/opinion/letters/x864760197/They-owe-victims-an-apology 56 | http://www.deccanherald.com/content/127462/creative-boundary.html 57 | http://www.brecorder.com/news/top-stories/1142748:news.html 58 | http://maceiteagain.wordpress.com/2011/01/28/280111-anarchism-an-introduction/ 59 | http://blogs.ubc.ca/ross/2011/01/%E2%80%9Canarchism%E2%80%A6is-a-living-force-within-our-life%E2%80%A6%E2%80%9D-anarchism-education-and-new-possibilities/ 60 | http://revolutionradio.org/?p=11320 61 | http://www.whoplanswhom.com/blog/2011/socialist-misonceptions-about-market-anarchism/ 62 | http://francoistremblay.wordpress.com/2010/12/13/the-case-for-socialist-intellectual-ownership/ 63 | http://dancull.wordpress.com/2011/01/28/these-seeds-wont-be-uprooted/ 64 | http://antigerman.wordpress.com/2009/11/27/capitalism-a-love-story/ 65 | http://www.carsuk.net/lamborghini-lp550-2-new-entry-level-lamborghini-gallardo/ 66 | http://www.autonewscast.com/2011/01/26/lamborghini-presents-the-gallardo-lp-560-4-bicolore/ 67 | http://www.hot-screensaver.com/2011/01/05/lamborghini-aventador-to-replace-murcielago-in-2011/ 68 | http://www.nybooks.com/blogs/nyrblog/2011/jan/05/end-medvedev-revolution/ 69 | http://www.ottawacitizen.com/news/Little+will+come+Egypt+middle+class+revolution/4197834/story.html 70 | http://www.telegraph.co.uk/news/wikileaks-files/libya-wikileaks/8294609/INVITATION-FOR-THE-SECRETARY-TO-ATTEND-40TH-ANNIVERSARY-OF-QADHAFIS-REVOLUTION.html 71 | http://economictimes.indiatimes.com/news/news-by-industry/services/advertising/online-revolution-may-not-be-televised-nikesh-arora/articleshow/7400360.cms 72 | http://www.esquire.com/blogs/politics/julian-assange-wikileaks-revolution-5072412 73 | http://www.thewrap.com/movies/column-post/tunisian-producer-making-thriller-about-man-who-inspired-jasmine-revolution-24342 74 | http://www.iranian.com/main/2011/jan/shishaki-time 75 | http://sports.yahoo.com/nfl/news?slug=lc-ingamebetting013111 76 | http://www.theatlantic.com/technology/archive/2011/01/twitter-it-wont-start-a-revolution-but-it-can-feed-one/70530/ 77 | http://liveshots.blogs.foxnews.com/2011/01/31/a-secular-revolution-for-now/ 78 | http://www.allaccess.com/net-news/archive/story/86644/miranda-lambert-announces-the-revolution-continues 79 | http://www.newsweek.com/2011/01/30/el-shaheed-the-mysterious-anonymous-behind-egypt-s-revolt.html 80 | http://www.washingtonpost.com/wp-dyn/content/article/2011/01/30/AR2011013003175.html 81 | http://english.peopledaily.com.cn/90001/98705/100621/7277916.html 82 | http://www.kuwaittimes.net/read_news.php?newsid=MTE3MTU4NTk3NA== 83 | http://mwcnews.net/focus/analysis/8354-egyptian-revolution.html 84 | http://www.monitor.co.ug/SpecialReports/Elections/-/859108/1098344/-/jnywf8/-/ 85 | http://www.abugidainfo.com/?p=17136 86 | http://www.sfbg.com/pixel_vision/2011/01/31/aerial-revolution 87 | http://lagunaniguel.patch.com/articles/sc-journalist-egyptians-take-charge-of-the-country 88 | http://www.thoughtleader.co.za/candiceholdsworth/2011/01/31/just-about-time-for-a-revolution/ 89 | http://print.dailymirror.lk/news/front-page-news/34443.html 90 | http://blog.dawn.com/2011/01/31/you-say-you-want-a-revolution/ 91 | http://www.mideastyouth.com/2011/01/31/tunisia%E2%80%99s-intifada-an-uprising-or-a-revolution/ 92 | http://links.org.au/node/2128 93 | http://www.presstv.ir/detail/162823.html 94 | http://www.jpost.com/Features/InThespotlight/Article.aspx?id=205799 95 | http://www.rodiaki.gr/article.php?id=82981&catid=1&subcatid=5 96 | http://www.athinorama.gr/digital/games/articles/default.aspx?id=10006 97 | http://www.sport-fm.gr/article/454202 98 | http://www.newsbeast.gr/culture/arthro/115907/null/ 99 | http://www.theglobeandmail.com/news/politics/ottawa-notebook/will-house-resume-with-a-big-bang/article1888486/ 100 | http://www.newser.com/story/110885/japanese-volcano-may-be-headed-for-big-bang.html 101 | http://www.washingtonpost.com/wp-dyn/content/article/2011/01/27/AR2011012707212.html 102 | http://www.daemonstv.com/2011/01/21/the-big-bang-theory-the-love-car-displacement-review/ 103 | http://www.iol.co.za/tonight/tv-radio/heights-and-lows-for-arm-of-the-law-in-the-hood-1.1019028 104 | http://www.reuters.com/article/2011/01/14/us-ratings-idUSTRE70D6C020110114 105 | http://tubepotato.net/miley-cyrus-could-take-part-in-%E2%80%9Cthe-big-bang%E2%80%9D-movie/33836 106 | http://www.mcccagora.com/a-e/monroe-big-band-makes-big-bang-at-meyer-theater-1.1921045 107 | http://timesofindia.indiatimes.com/home/science/Missing-link-between-Big-Bang-and-star-creation-found/articleshow/7233583.cms 108 | http://www.greatandhra.com/viewnews.php?id=26660&cat=1&scat=4 109 | http://uk.eonline.com/uberblog/watch_with_kristin/b219735_scoop_big_bang_theoryrsquos_johnny.html 110 | http://www.thetowntalk.com/article/20110121/SPORTS/101210344 111 | http://www.spottedbylocals.com/barcelona/big-bang-bar/ 112 | http://www.milforddailynews.com/mysource/boomers/health/x1896054311/Gary-Brown-The-chocolate-diet 113 | http://homedailynews.com/a-car-with-chocolate-layer/4478/ 114 | http://www.mysuburbanlife.com/glenellyn/features/x896128704/Vinegar-mixes-with-chocolate-at-The-Olive-Tap-in-Downers-Grove 115 | http://blog.seattlepi.com/recipesyndicate/archives/237642.asp 116 | http://www.eciks.org/english/lajme.php?action=total_news&main_id=1101 117 | http://ny.eater.com/archives/2011/01/boxed_city_bakery_hot_chocolate_jacques_torres_opens.php 118 | http://dailytrojan.com/2011/01/30/chocolate-milk-more-than-a-simple-snack/ 119 | http://blogs.desmoinesregister.com/dmr/index.php/2011/01/31/chocolate-prices-to-rise/ 120 | http://www.communitypress-online.com/ArticleDisplay.aspx?e=2955313 121 | http://www.csmonitor.com/The-Culture/Food/Stir-It-Up/2011/0127/It-s-National-Chocolate-Cake-Day 122 | http://www.sify.com/news/chocolate-gift-causes-heart-attacks-to-school-boy-news-international-lb2sEiadfcb.html 123 | http://montgomeryvillage.patch.com/articles/a-week-without-pizza 124 | http://www.thegrocer.co.uk/articles.aspx?page=independentarticle&ID=215470 125 | http://blog.seattlepi.com/nowinesnobs/archives/237231.asp 126 | http://www.irontontribune.com/2011/01/29/chocolate-walk-set-for-saturday-in-portsmouth/ 127 | http://www.acorn-online.com/joomla15/lewisboroledger/news/localnews/83906-a-chocolate-valentine-to-benefit-league.html 128 | http://www.internetbits.com/girl-scouts-cut-cookie-varieties/57320/ 129 | http://www.bemidjipioneer.com/event/article/id/100025477/group/homepage/ 130 | http://www.examiner.net/opinions/opinions_columnists/x536400824/Bolton-Do-you-like-chocolate-You-may-be-fueling-violence 131 | http://www.charlotteobserver.com/2011/01/30/2015080/snow-days-sliding-hot-chocolate.html 132 | http://www.coloradoan.com/article/20110131/BUSINESS/101310309/Espresso-People-comes-to-town 133 | http://www.morningstarpublishing.com/articles/2011/01/31/grand_traverse_insider/news/benzie_area/doc4d47176d011fc889886936.txt 134 | http://www.mediaforfreedom.com/readarticle.php?AID=17161 135 | http://www.sigmalive.com/news/scitech/346031 136 | http://en.rian.ru/columnists/20110131/162392378.html 137 | http://www.newvision.co.ug/D/8/16/745324 138 | http://english.ruvr.ru/2011/01/21/40676788.html 139 | 140 | comments 141 | http://www.fool.com/investing/international/2011/01/31/egypt-the-cost-of-revolution.aspx 142 | http://www.huffingtonpost.com/haroon-moghul/4-reasons-why-egypts-revo_b_815848.html 143 | http://www.businessspectator.com.au/bs.nsf/Article/egypt-revolution-cairo-food-prices-oil-pd20110201-DMRQ4?OpenDocument&src=sph 144 | http://pajamasmedia.com/michaelledeen/2011/01/28/revolution-by-whom-for-what/ 145 | http://www.cbc.ca/canada/story/2011/01/31/f-egypt-canadian-voices.html 146 | http://bigthink.com/ideas/26699 147 | http://www.pri.org/business/social-entrepreneurs/facebook-helps-foment-revolution-in-egypt-and-tunisia2586.html 148 | http://www.huffingtonpost.com/haroon-moghul/4-reasons-why-egypts-revo_b_815848.html 149 | http://bensix.wordpress.com/2011/01/26/chaotic-thoughts-on-anarchism/ 150 | http://libcom.org/history/anarchism-syndicalism-brighton-late-19th-early-20th-centuries 151 | http://gigaom.com/2011/01/14/was-what-happened-in-tunisia-a-twitter-revolution/ 152 | http://www.naplesnews.com/blogs/naples-tea-party/2011/jan/31/time_____4___tea____/ 153 | http://media.www.gwhatchet.com/media/storage/paper332/news/2011/01/31/News/Witnessing.A.Revolution-3972096.shtml 154 | http://www.politicsdaily.com/2011/01/31/why-conservatives-differ-on-remaking-the-world/ 155 | http://www.connectmidmichigan.com/news/story.aspx?id=573825 156 | http://www.care2.com/greenliving/chocolate-may-reduce-risk-of-heart-failure.html 157 | http://www.vcstar.com/news/2011/jan/30/emptying-lenins-tomb/ 158 | -------------------------------------------------------------------------------- /datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nik0spapp/usent/52fe720964001344897db2ad9a98cb53a64be717/datasets/__init__.py -------------------------------------------------------------------------------- /datasets/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nik0spapp/usent/52fe720964001344897db2ad9a98cb53a64be717/datasets/__init__.pyc -------------------------------------------------------------------------------- /datasets/emoticons.data: -------------------------------------------------------------------------------- 1 | positive 2 | :-) :) :o) :] :3 :c) :> =] 8) =) :} :^) 3 | :D C: :-D :D 8D xD XD =D =3 <=3 <=8 :-9 4 | :-* :* <3 <333 =^_^= =>.>= =<_<= =>.<= 5 | \,,/ \m/ \m/\>..... B) B-) 8) 8-) 8 | negative 9 | e.e O.e O.e <=3 <=8 8===D 8===B --!-- :-( :( :c :< :[ :{ 10 | D: D8 D; D= DX v.v :-O :O O_O o_o 8O OwO O-O 0_o O_o O3O o0o ;o_o; o...o 0w0 11 | c.c C.C :-/ :/ :\ =/ =\ :S :| :-X :X :-# :# 12 | :'( ;*( T_T TT_TT T.T Q.Q Q_Q ;_; >:) >;) >:-) 13 | D:< >:( >:C D-:< >:-( :-@ ;( `_´ _>) (¬_¬) (-_-) (^_^') ^_^_^') ^^" ^^^_.^') ^^_^^; ^&^^.^;& ^^^; ^^^7 15 | q(;^;)p (O_o) (._.) (,_,) (;_;) (T_T) (T~T) (ToT) (T^T) (X_X) x_x 16 | bad_words 17 | ahole 18 | anus 19 | ash0le 20 | ash0les 21 | asholes 22 | ass 23 | Ass Monkey 24 | Assface 25 | assh0le 26 | assh0lez 27 | asshole 28 | assholes 29 | assholz 30 | asswipe 31 | azzhole 32 | bassterds 33 | bastard 34 | bastards 35 | bastardz 36 | basterds 37 | basterdz 38 | Biatch 39 | bitch 40 | bitches 41 | Blow Job 42 | boffing 43 | butthole 44 | buttwipe 45 | c0ck 46 | c0cks 47 | c0k 48 | Carpet Muncher 49 | cawk 50 | cawks 51 | Clit 52 | cnts 53 | cntz 54 | cock 55 | cockhead 56 | cock-head 57 | cocks 58 | CockSucker 59 | cock-sucker 60 | crap 61 | cum 62 | cunt 63 | cunts 64 | cuntz 65 | dick 66 | dild0 67 | dild0s 68 | dildo 69 | dildos 70 | dilld0 71 | dilld0s 72 | dominatricks 73 | dominatrics 74 | dominatrix 75 | dyke 76 | enema 77 | f u c k 78 | f u c k e r 79 | fag 80 | fag1t 81 | faget 82 | fagg1t 83 | faggit 84 | faggot 85 | fagit 86 | fags 87 | fagz 88 | faig 89 | faigs 90 | fart 91 | flipping the bird 92 | fuck 93 | fucker 94 | fuckin 95 | fucking 96 | fucks 97 | Fudge Packer 98 | fuk 99 | Fukah 100 | Fuken 101 | fuker 102 | Fukin 103 | Fukk 104 | Fukkah 105 | Fukken 106 | Fukker 107 | Fukkin 108 | g00k 109 | gay 110 | gayboy 111 | gaygirl 112 | gays 113 | gayz 114 | God-damned 115 | h00r 116 | h0ar 117 | h0re 118 | hells 119 | hoar 120 | hoor 121 | hoore 122 | jackoff 123 | jap 124 | japs 125 | jerk-off 126 | jisim 127 | jiss 128 | jizm 129 | jizz 130 | knob 131 | knobs 132 | knobz 133 | kunt 134 | kunts 135 | kuntz 136 | Lesbian 137 | Lezzian 138 | Lipshits 139 | Lipshitz 140 | masochist 141 | masokist 142 | massterbait 143 | masstrbait 144 | masstrbate 145 | masterbaiter 146 | masterbate 147 | masterbates 148 | Motha Fucker 149 | Motha Fuker 150 | Motha Fukkah 151 | Motha Fukker 152 | Mother Fucker 153 | Mother Fukah 154 | Mother Fuker 155 | Mother Fukkah 156 | Mother Fukker 157 | mother-fucker 158 | Mutha Fucker 159 | Mutha Fukah 160 | Mutha Fuker 161 | Mutha Fukkah 162 | Mutha Fukker 163 | n1gr 164 | nastt 165 | nigger; 166 | nigur; 167 | niiger; 168 | niigr; 169 | orafis 170 | orgasim; 171 | orgasm 172 | orgasum 173 | oriface 174 | orifice 175 | orifiss 176 | packi 177 | packie 178 | packy 179 | paki 180 | pakie 181 | paky 182 | pecker 183 | peeenus 184 | peeenusss 185 | peenus 186 | peinus 187 | pen1s 188 | penas 189 | penis 190 | penis-breath 191 | penus 192 | penuus 193 | Phuc 194 | Phuck 195 | Phuk 196 | Phuker 197 | Phukker 198 | polac 199 | polack 200 | polak 201 | Poonani 202 | pr1c 203 | pr1ck 204 | pr1k 205 | pusse 206 | pussee 207 | pussy 208 | puuke 209 | puuker 210 | queer 211 | queers 212 | queerz 213 | qweers 214 | qweerz 215 | qweir 216 | recktum 217 | rectum 218 | retard 219 | sadist 220 | scank 221 | schlong 222 | screwing 223 | semen 224 | sex 225 | sexy 226 | Sh!t 227 | sh1t 228 | sh1ter 229 | sh1ts 230 | sh1tter 231 | sh1tz 232 | shit 233 | shits 234 | shitter 235 | Shitty 236 | Shity 237 | shitz 238 | Shyt 239 | Shyte 240 | Shytty 241 | Shyty 242 | skanck 243 | skank 244 | skankee 245 | skankey 246 | skanks 247 | Skanky 248 | slut 249 | sluts 250 | Slutty 251 | slutz 252 | son-of-a-bitch 253 | tit 254 | turd 255 | va1jina 256 | vag1na 257 | vagiina 258 | vagina 259 | vaj1na 260 | vajina 261 | vullva 262 | vulva 263 | w0p 264 | wh00r 265 | wh0re 266 | whore 267 | xrated 268 | xxx 269 | b!+ch 270 | bitch 271 | blowjob 272 | clit 273 | arschloch 274 | fuck 275 | shit 276 | ass 277 | asshole 278 | b!tch 279 | b17ch 280 | b1tch 281 | bastard 282 | bi+ch 283 | boiolas 284 | buceta 285 | c0ck 286 | cawk 287 | chink 288 | cipa 289 | clits 290 | cock 291 | cum 292 | cunt 293 | dildo 294 | dirsa 295 | ejakulate 296 | fatass 297 | fcuk 298 | fuk 299 | fux0r 300 | hoer 301 | hore 302 | jism 303 | kawk 304 | l3itch 305 | l3i+ch 306 | lesbian 307 | masturbate 308 | masterbat* 309 | masterbat3 310 | motherfucker 311 | s.o.b. 312 | mofo 313 | nazi 314 | nigga 315 | nigger 316 | nutsack 317 | phuck 318 | pimpis 319 | pusse 320 | pussy 321 | scrotum 322 | sh!t 323 | shemale 324 | shi+ 325 | sh!+ 326 | slut 327 | smut 328 | teets 329 | tits 330 | boobs 331 | b00bs 332 | teez 333 | testical 334 | testicle 335 | titt 336 | w00se 337 | jackoff 338 | wank 339 | whoar 340 | whore 341 | *damn 342 | *dyke 343 | *fuck* 344 | *shit* 345 | @$$ 346 | amcik 347 | andskota 348 | arse* 349 | assrammer 350 | ayir 351 | bi7ch 352 | bitch* 353 | bollock* 354 | breasts 355 | butt-pirate 356 | cabron 357 | cazzo 358 | chraa 359 | chuj 360 | Cock* 361 | cunt* 362 | d4mn 363 | daygo 364 | dego 365 | dick* 366 | dike* 367 | dupa 368 | dziwka 369 | ejackulate 370 | Ekrem* 371 | Ekto 372 | enculer 373 | faen 374 | fag* 375 | fanculo 376 | fanny 377 | feces 378 | feg 379 | Felcher 380 | ficken 381 | fitt* 382 | Flikker 383 | foreskin 384 | Fotze 385 | Fu(* 386 | fuk* 387 | futkretzn 388 | gay 389 | gook 390 | guiena 391 | h0r 392 | h4x0r 393 | hell 394 | helvete 395 | hoer* 396 | honkey 397 | Huevon 398 | hui 399 | injun 400 | jizz 401 | kanker* 402 | kike 403 | klootzak 404 | kraut 405 | knulle 406 | kuk 407 | kuksuger 408 | Kurac 409 | kurwa 410 | kusi* 411 | kyrpa* 412 | lesbo 413 | mamhoon 414 | masturbat* 415 | merd* 416 | mibun 417 | monkleigh 418 | mouliewop 419 | muie 420 | mulkku 421 | muschi 422 | nazis 423 | nepesaurio 424 | nigger* 425 | orospu 426 | paska* 427 | perse 428 | picka 429 | pierdol* 430 | pillu* 431 | pimmel 432 | piss* 433 | pizda 434 | poontsee 435 | poop 436 | porn 437 | p0rn 438 | pr0n 439 | preteen 440 | pula 441 | pule 442 | puta 443 | puto 444 | qahbeh 445 | queef* 446 | rautenberg 447 | schaffer 448 | scheiss* 449 | schlampe 450 | schmuck 451 | screw 452 | sh!t* 453 | sharmuta 454 | sharmute 455 | shipal 456 | shiz 457 | skribz 458 | skurwysyn 459 | sphencter 460 | spic 461 | spierdalaj 462 | splooge 463 | stfu 464 | suka 465 | b00b* 466 | testicle* 467 | titt* 468 | twat 469 | vittu 470 | wank* 471 | wetback* 472 | wichser 473 | wop* 474 | yed 475 | zabourah 476 | -------------------------------------------------------------------------------- /datasets/emoticons_patch.py: -------------------------------------------------------------------------------- 1 | #################################################################### 2 | # Licence: Creative Commons (see COPYRIGHT) # 3 | # Authors: Nikolaos Pappas, Georgios Katsimpras # 4 | # {nik0spapp, gkatsimpras}@gmail.com # 5 | # Supervisor: Efstathios stamatatos # 6 | # stamatatos@aegean.gr # 7 | # University of the Aegean # 8 | # Department of Information and Communication Systems Engineering # 9 | # Information Management Track (MSc) # 10 | # Karlovasi, Samos # 11 | # Greece # 12 | #################################################################### 13 | 14 | import os 15 | import sys 16 | 17 | p_flag = False 18 | n_flag = False 19 | b_flag = False 20 | 21 | positives = {} #{"word1":{"typ":"", "pos1":"", "priorpolarity":}} 22 | negatives = {} #{"word1":{"type":"", "pos1":"", "priorpolarity":}} 23 | emoticons_file = open("datasets/emoticons.data","r") 24 | lines = emoticons_file.readlines() 25 | 26 | def patch_emoticons(): 27 | global p_flag, n_flag, b_flag 28 | 29 | for line in lines: 30 | if line.startswith("positive"): 31 | print "Parsing Positives...." 32 | p_flag = True 33 | n_flag = False 34 | 35 | elif line.startswith("negative"): 36 | print "Parsing Negatives...." 37 | n_flag = True 38 | p_flag = False 39 | 40 | elif line.startswith("bad_words"): 41 | n_flag = False 42 | p_flag = False 43 | b_flag = True 44 | 45 | if p_flag and not line.startswith("positive"): 46 | p_data = line.split(" ") 47 | for data in p_data: 48 | positives[data] = {"type":"strongsub", "emoticon" : True, "pos1":"anypos", "priorpolarity":"positive"} 49 | 50 | if n_flag and not line.startswith("negative"): 51 | n_data = line.split(" ") 52 | for data in n_data: 53 | negatives[data] = {"type":"strongsub", "emoticon" : True, "pos1":"anypos", "priorpolarity":"negative"} 54 | 55 | if b_flag and not line.startswith("bad_words"): 56 | word = line.replace("\n", "") 57 | negatives[word] = {"type":"strongsub", "pos1":"anypos", "priorpolarity":"negative"} 58 | totals = dict(positives, **negatives) 59 | return totals 60 | 61 | def parse_dataset(filename): 62 | parsed_data = [] 63 | dataset = open(os.path.abspath("") +"/"+ filename,"r") 64 | lines = dataset.readlines() 65 | 66 | for line in lines: 67 | parsed_data.append(line.replace("\n", "")) 68 | return parsed_data 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | -------------------------------------------------------------------------------- /datasets/emoticons_patch.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nik0spapp/usent/52fe720964001344897db2ad9a98cb53a64be717/datasets/emoticons_patch.pyc -------------------------------------------------------------------------------- /datasets/segmentation_corpus.data: -------------------------------------------------------------------------------- 1 | article 2 | http://abcnews.go.com/International/abc-news-christiane-amanpour-reports-protestors-fight-freedom/story?id=12806642 3 | http://www.nytimes.com/roomfordebate/2011/01/31/mubaraks-role-and-mideast-peace 4 | http://www.mirror.co.uk/news/top-stories/2011/02/01/fernando-torres-and-andy-carroll-the-two-multi-million-pound-strikers-who-are-worlds-apart-115875-22890025/ 5 | http://ibnlive.in.com/generalnewsfeed/news/wall-st-gains-on-earnings-economy-egypt-fears-ease/559860.html 6 | http://soccernet.espn.go.com/news/story/_/id/873998/spurs-miss-out-on-adam-by-minutes?cc=5739 7 | http://www.ajc.com/news/nation-world/judge-obamas-health-overhaul-822829.html 8 | http://www.montrealgazette.com/travel/Ottawa+offers+flights+Canadians+urged+leave+Egypt/4193713/story.html 9 | http://www.bbc.co.uk/news/business-12314656 10 | http://www.bbc.co.uk/news/world-africa-12328506 11 | http://sports.espn.go.com/dallas/nfl/news/story?id=6077650 12 | http://blogs.forbes.com/christopherhelman/2011/01/31/will-egypts-revolution-mean-oil-armageddon/ 13 | http://www.palestinechronicle.com/view_article_details.php?id=16603 14 | http://www.thestar.com/opinion/editorialopinion/article/931187--egypt-connected-to-revolution 15 | http://www.euractiv.com/en/global-europe/egypt-revolution-hijacks-eu-ministers-agenda-news-501730 16 | http://www.theglobeandmail.com/news/opinions/opinion/lessons-from-a-revolution-irans-that-is/article1889288/ 17 | http://www.shalomlife.com/eng/14557/Is_Syria's_Bashar_al-Assad_Next/ 18 | http://www.fastcompany.com/1722492/how-social-media-accelerated-the-uprising-in-egypt 19 | http://nation.com.pk/pakistan-news-newspaper-daily-english-online/Politics/31-Jan-2011/Altaf-sounds-Tunisia-Egypt 20 | http://www.afrol.com/articles/37168 21 | http://www.theprovince.com/news/This+start+revolution/4192112/story.html 22 | http://money.cnn.com/2011/01/31/technology/google_executive_missing_egypt/index.htm 23 | http://www.nytimes.com/2011/02/01/world/middleeast/01palestinians.html?_r=1&partner=rss&emc=rss 24 | http://www.ibtimes.com/articles/106733/20110130/egypt-ian-lustick-protests-hosni-mubarak.htm 25 | http://www.usnews.com/opinion/blogs/mary-kate-cary/2011/01/28/egypt-cuts-the-internet-in-the-face-of-revolution 26 | http://threatpost.com/en_us/blogs/facebook-owned-42-egyptian-web-traffic-blackout-013111 27 | http://www.grist.org/article/2011-01-31-the-egyptian-government-has-been-able-to-shut-down-the-internet- 28 | http://www.orlandosentinel.com/news/opinion/os-ed-egypt-unrest-myword-020111-20110131,0,2543131.story 29 | http://globalgrind.com/channel/news/content/1903560/the-revolution-of-the-facebook-generation-by-michael-de-la-rocha/ 30 | http://www.csmonitor.com/Commentary/Editorial-Board-Blog/2011/0131/How-history-might-really-be-made-in-Egypt 31 | http://communities.washingtontimes.com/neighborhood/word-national-interest/2011/jan/31/pity-mubaraks-successor/ 32 | http://www.cbc.ca/canada/windsor/story/2011/01/31/wdr-egypt-reaction.html 33 | http://www.ennaharonline.com/en/international/5681.html 34 | http://www.freep.com/article/20110131/NEWS06/110131023/U-M-MSU-students-affected-by-Egypt-unrest 35 | http://www.currentintelligence.net/gulfstream/2011/1/31/the-great-arab-revolution-and-the-gulf-states.html 36 | http://omadeon.wordpress.com/2011/01/31/omar-suleiman-chief-torturer/ 37 | http://www.expatinvesting.org/egypt-revolution-analysis/ 38 | http://newzeal.blogspot.com/2011/01/revolution-in-egypt-out-of-frying-pan.html 39 | http://politeching.wordpress.com/2011/01/25/egypts-revolution-powered-by-the-internet/ 40 | http://www.huffingtonpost.com/magda-abufadil/media-key-to-covering-egy_b_815920.html 41 | http://www.workers.org/2011/world/wwp_statement_0203/ 42 | http://abcclio.blogspot.com/2011/01/egypt-revolution-at-hand.html 43 | http://kasamaproject.org/2011/01/29/the-egyptian-revolution-a-very-fine-thing/ 44 | http://www.egyptianrevolution.com/2011/01/egypts-security-apparatus/ 45 | http://www.socialist.net/egypt-revolution-knows-no-frontiers.htm 46 | http://www.islamidavet.com/english/2011/01/28/egypt-revolution-hard-to-predict/ 47 | http://www.lewrockwell.com/blog/lewrw/archives/76683.html 48 | http://politicalsex.me/unrest-in-egypt-revolution-sounds-exhausting/ 49 | http://www.voxy.co.nz/politics/egyptian-revolution-finally-breaks-out-down-mubarak/1273/79978 50 | http://theglobalawakening.wordpress.com/2011/01/31/egypt-defense-min-threatens-people/ 51 | http://flagpole.com/Weekly/WorldView/WorldView-28Jan11 52 | http://www.isaintel.com/2011/01/06/remnants-of-a-greek-past-image-from-the-future/ 53 | http://www.anarkismo.net/article/18536 54 | http://daily.swarthmore.edu/announcements/2011/nationalism-anarchism-reform-understanding-political-islam-from-the-inside-out/ 55 | http://www.bakersfield.com/opinion/letters/x864760197/They-owe-victims-an-apology 56 | http://www.deccanherald.com/content/127462/creative-boundary.html 57 | http://www.brecorder.com/news/top-stories/1142748:news.html 58 | http://maceiteagain.wordpress.com/2011/01/28/280111-anarchism-an-introduction/ 59 | http://blogs.ubc.ca/ross/2011/01/%E2%80%9Canarchism%E2%80%A6is-a-living-force-within-our-life%E2%80%A6%E2%80%9D-anarchism-education-and-new-possibilities/ 60 | http://revolutionradio.org/?p=11320 61 | http://www.whoplanswhom.com/blog/2011/socialist-misonceptions-about-market-anarchism/ 62 | http://francoistremblay.wordpress.com/2010/12/13/the-case-for-socialist-intellectual-ownership/ 63 | http://dancull.wordpress.com/2011/01/28/these-seeds-wont-be-uprooted/ 64 | http://antigerman.wordpress.com/2009/11/27/capitalism-a-love-story/ 65 | http://www.carsuk.net/lamborghini-lp550-2-new-entry-level-lamborghini-gallardo/ 66 | http://www.autonewscast.com/2011/01/26/lamborghini-presents-the-gallardo-lp-560-4-bicolore/ 67 | http://www.hot-screensaver.com/2011/01/05/lamborghini-aventador-to-replace-murcielago-in-2011/ 68 | http://www.nybooks.com/blogs/nyrblog/2011/jan/05/end-medvedev-revolution/ 69 | http://www.ottawacitizen.com/news/Little+will+come+Egypt+middle+class+revolution/4197834/story.html 70 | http://www.telegraph.co.uk/news/wikileaks-files/libya-wikileaks/8294609/INVITATION-FOR-THE-SECRETARY-TO-ATTEND-40TH-ANNIVERSARY-OF-QADHAFIS-REVOLUTION.html 71 | http://economictimes.indiatimes.com/news/news-by-industry/services/advertising/online-revolution-may-not-be-televised-nikesh-arora/articleshow/7400360.cms 72 | http://www.esquire.com/blogs/politics/julian-assange-wikileaks-revolution-5072412 73 | http://www.thewrap.com/movies/column-post/tunisian-producer-making-thriller-about-man-who-inspired-jasmine-revolution-24342 74 | http://www.iranian.com/main/2011/jan/shishaki-time 75 | http://sports.yahoo.com/nfl/news?slug=lc-ingamebetting013111 76 | http://www.theatlantic.com/technology/archive/2011/01/twitter-it-wont-start-a-revolution-but-it-can-feed-one/70530/ 77 | http://liveshots.blogs.foxnews.com/2011/01/31/a-secular-revolution-for-now/ 78 | http://www.allaccess.com/net-news/archive/story/86644/miranda-lambert-announces-the-revolution-continues 79 | http://www.newsweek.com/2011/01/30/el-shaheed-the-mysterious-anonymous-behind-egypt-s-revolt.html 80 | http://www.washingtonpost.com/wp-dyn/content/article/2011/01/30/AR2011013003175.html 81 | http://english.peopledaily.com.cn/90001/98705/100621/7277916.html 82 | http://www.kuwaittimes.net/read_news.php?newsid=MTE3MTU4NTk3NA== 83 | http://mwcnews.net/focus/analysis/8354-egyptian-revolution.html 84 | http://www.monitor.co.ug/SpecialReports/Elections/-/859108/1098344/-/jnywf8/-/ 85 | http://www.abugidainfo.com/?p=17136 86 | http://www.sfbg.com/pixel_vision/2011/01/31/aerial-revolution 87 | http://lagunaniguel.patch.com/articles/sc-journalist-egyptians-take-charge-of-the-country 88 | http://www.thoughtleader.co.za/candiceholdsworth/2011/01/31/just-about-time-for-a-revolution/ 89 | http://print.dailymirror.lk/news/front-page-news/34443.html 90 | http://blog.dawn.com/2011/01/31/you-say-you-want-a-revolution/ 91 | http://www.mideastyouth.com/2011/01/31/tunisia%E2%80%99s-intifada-an-uprising-or-a-revolution/ 92 | http://links.org.au/node/2128 93 | http://www.presstv.ir/detail/162823.html 94 | http://www.jpost.com/Features/InThespotlight/Article.aspx?id=205799 95 | http://www.rodiaki.gr/article.php?id=82981&catid=1&subcatid=5 96 | http://www.athinorama.gr/digital/games/articles/default.aspx?id=10006 97 | http://www.sport-fm.gr/article/454202 98 | http://www.newsbeast.gr/culture/arthro/115907/null/ 99 | http://www.theglobeandmail.com/news/politics/ottawa-notebook/will-house-resume-with-a-big-bang/article1888486/ 100 | http://www.newser.com/story/110885/japanese-volcano-may-be-headed-for-big-bang.html 101 | http://www.washingtonpost.com/wp-dyn/content/article/2011/01/27/AR2011012707212.html 102 | http://www.daemonstv.com/2011/01/21/the-big-bang-theory-the-love-car-displacement-review/ 103 | http://www.iol.co.za/tonight/tv-radio/heights-and-lows-for-arm-of-the-law-in-the-hood-1.1019028 104 | http://www.reuters.com/article/2011/01/14/us-ratings-idUSTRE70D6C020110114 105 | http://tubepotato.net/miley-cyrus-could-take-part-in-%E2%80%9Cthe-big-bang%E2%80%9D-movie/33836 106 | http://www.mcccagora.com/a-e/monroe-big-band-makes-big-bang-at-meyer-theater-1.1921045 107 | http://timesofindia.indiatimes.com/home/science/Missing-link-between-Big-Bang-and-star-creation-found/articleshow/7233583.cms 108 | http://www.greatandhra.com/viewnews.php?id=26660&cat=1&scat=4 109 | http://uk.eonline.com/uberblog/watch_with_kristin/b219735_scoop_big_bang_theoryrsquos_johnny.html 110 | http://www.thetowntalk.com/article/20110121/SPORTS/101210344 111 | http://www.spottedbylocals.com/barcelona/big-bang-bar/ 112 | http://www.milforddailynews.com/mysource/boomers/health/x1896054311/Gary-Brown-The-chocolate-diet 113 | http://homedailynews.com/a-car-with-chocolate-layer/4478/ 114 | http://www.mysuburbanlife.com/glenellyn/features/x896128704/Vinegar-mixes-with-chocolate-at-The-Olive-Tap-in-Downers-Grove 115 | http://blog.seattlepi.com/recipesyndicate/archives/237642.asp 116 | http://www.eciks.org/english/lajme.php?action=total_news&main_id=1101 117 | http://ny.eater.com/archives/2011/01/boxed_city_bakery_hot_chocolate_jacques_torres_opens.php 118 | http://dailytrojan.com/2011/01/30/chocolate-milk-more-than-a-simple-snack/ 119 | http://blogs.desmoinesregister.com/dmr/index.php/2011/01/31/chocolate-prices-to-rise/ 120 | http://www.communitypress-online.com/ArticleDisplay.aspx?e=2955313 121 | http://www.csmonitor.com/The-Culture/Food/Stir-It-Up/2011/0127/It-s-National-Chocolate-Cake-Day 122 | http://www.sify.com/news/chocolate-gift-causes-heart-attacks-to-school-boy-news-international-lb2sEiadfcb.html 123 | http://montgomeryvillage.patch.com/articles/a-week-without-pizza 124 | http://www.thegrocer.co.uk/articles.aspx?page=independentarticle&ID=215470 125 | http://blog.seattlepi.com/nowinesnobs/archives/237231.asp 126 | http://www.irontontribune.com/2011/01/29/chocolate-walk-set-for-saturday-in-portsmouth/ 127 | http://www.acorn-online.com/joomla15/lewisboroledger/news/localnews/83906-a-chocolate-valentine-to-benefit-league.html 128 | http://www.internetbits.com/girl-scouts-cut-cookie-varieties/57320/ 129 | http://www.bemidjipioneer.com/event/article/id/100025477/group/homepage/ 130 | http://www.examiner.net/opinions/opinions_columnists/x536400824/Bolton-Do-you-like-chocolate-You-may-be-fueling-violence 131 | http://www.charlotteobserver.com/2011/01/30/2015080/snow-days-sliding-hot-chocolate.html 132 | http://www.coloradoan.com/article/20110131/BUSINESS/101310309/Espresso-People-comes-to-town 133 | http://www.morningstarpublishing.com/articles/2011/01/31/grand_traverse_insider/news/benzie_area/doc4d47176d011fc889886936.txt 134 | http://www.mediaforfreedom.com/readarticle.php?AID=17161 135 | http://www.sigmalive.com/news/scitech/346031 136 | http://en.rian.ru/columnists/20110131/162392378.html 137 | http://www.newvision.co.ug/D/8/16/745324 138 | http://english.ruvr.ru/2011/01/21/40676788.html 139 | 140 | comments 141 | http://www.fool.com/investing/international/2011/01/31/egypt-the-cost-of-revolution.aspx 142 | http://www.huffingtonpost.com/haroon-moghul/4-reasons-why-egypts-revo_b_815848.html 143 | http://www.businessspectator.com.au/bs.nsf/Article/egypt-revolution-cairo-food-prices-oil-pd20110201-DMRQ4?OpenDocument&src=sph 144 | http://pajamasmedia.com/michaelledeen/2011/01/28/revolution-by-whom-for-what/ 145 | http://www.cbc.ca/canada/story/2011/01/31/f-egypt-canadian-voices.html 146 | http://bigthink.com/ideas/26699 147 | http://www.pri.org/business/social-entrepreneurs/facebook-helps-foment-revolution-in-egypt-and-tunisia2586.html 148 | http://www.huffingtonpost.com/haroon-moghul/4-reasons-why-egypts-revo_b_815848.html 149 | http://bensix.wordpress.com/2011/01/26/chaotic-thoughts-on-anarchism/ 150 | http://libcom.org/history/anarchism-syndicalism-brighton-late-19th-early-20th-centuries 151 | http://gigaom.com/2011/01/14/was-what-happened-in-tunisia-a-twitter-revolution/ 152 | http://www.naplesnews.com/blogs/naples-tea-party/2011/jan/31/time_____4___tea____/ 153 | http://media.www.gwhatchet.com/media/storage/paper332/news/2011/01/31/News/Witnessing.A.Revolution-3972096.shtml 154 | http://www.politicsdaily.com/2011/01/31/why-conservatives-differ-on-remaking-the-world/ 155 | http://www.connectmidmichigan.com/news/story.aspx?id=573825 156 | http://www.care2.com/greenliving/chocolate-may-reduce-risk-of-heart-failure.html 157 | http://www.vcstar.com/news/2011/jan/30/emptying-lenins-tomb/ 158 | -------------------------------------------------------------------------------- /hp_classifiers.py: -------------------------------------------------------------------------------- 1 | #################################################################### 2 | # Licence: Creative Commons (see COPYRIGHT) # 3 | # Authors: Nikolaos Pappas, Georgios Katsimpras # 4 | # {nik0spapp, gkatsimpras}@gmail.com # 5 | # Supervisor: Efstathios stamatatos # 6 | # stamatatos@aegean.gr # 7 | # University of the Aegean # 8 | # Department of Information and Communication Systems Engineering # 9 | # Information Management Track (MSc) # 10 | # Karlovasi, Samos # 11 | # Greece # 12 | #################################################################### 13 | 14 | import sys 15 | import nltk 16 | from lexicon import Lexicon 17 | from stemming.porter2 import stem 18 | 19 | class HpSubj: 20 | """ 21 | High precision subjective sentence classifier which uses an annotated 22 | lexicon of words as features. It classifies a sentence as subjective 23 | if it contains two or more of the strong subjective clues. 24 | """ 25 | 26 | def __init__(self, debug=False): 27 | self.dictionary = Lexicon().words 28 | self.debug = debug 29 | 30 | def classify(self, sentence): 31 | wdict = self.dictionary 32 | words = nltk.word_tokenize(sentence) 33 | strong_subjective_words_count = 0 34 | subjective = False 35 | for word in words: 36 | word = word.lower() 37 | check = [word, stem(word)] 38 | for w in check: 39 | if wdict.has_key(w) and wdict[w]['type'] == 'strongsubj': 40 | strong_subjective_words_count += 1 41 | if strong_subjective_words_count >= 2: 42 | subjective = True 43 | break 44 | return subjective 45 | 46 | 47 | class HpObj: 48 | """ 49 | High precision objective sentence classifier which uses an annotated 50 | lexicon as training data. It classifies a sentence as objective if it 51 | doesn't contain along with its previous and next sentence, not even 52 | one strong subjective clue and at most one weak subjective clue. 53 | """ 54 | 55 | def __init__(self, debug=False): 56 | 57 | self.lexicon = Lexicon() 58 | self.dictionary = self.lexicon.words 59 | self.debug = debug 60 | 61 | def classify(self, current, previous="", next=""): 62 | if self.debug: 63 | print 64 | print "current:", current 65 | print "previous:", previous 66 | print "next:", next 67 | print 68 | wdict = self.dictionary 69 | words = nltk.word_tokenize(current) 70 | prev_words = nltk.word_tokenize(previous) 71 | next_words = nltk.word_tokenize(next) 72 | words += prev_words + next_words 73 | strong_subjective_words_count = 0 74 | weak_subjective_words_count = 0 75 | objective = True 76 | 77 | for word in words: 78 | word = word.lower() 79 | check = [word, stem(word)] 80 | for w in check: 81 | if wdict.has_key(w): 82 | if wdict[w]['type'] == 'strongsubj': 83 | strong_subjective_words_count += 1 84 | if strong_subjective_words_count > 0: 85 | objective = False 86 | break 87 | elif wdict[w]['type'] == 'weaksubj': 88 | weak_subjective_words_count += 1 89 | if weak_subjective_words_count > 1: 90 | objective = False 91 | break 92 | return objective 93 | 94 | 95 | if __name__ == '__main__': 96 | hpo = HpObj() 97 | hps = HpSubj() 98 | print "Objective: " + (str)(hpo.classify(sys.argv[1])) 99 | print "Subjective: " + (str)(hps.classify(sys.argv[1])) 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | -------------------------------------------------------------------------------- /images/bootstrap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nik0spapp/usent/52fe720964001344897db2ad9a98cb53a64be717/images/bootstrap.png -------------------------------------------------------------------------------- /lexicon.py: -------------------------------------------------------------------------------- 1 | #################################################################### 2 | # Licence: Creative Commons (see COPYRIGHT) # 3 | # Authors: Nikolaos Pappas, Georgios Katsimpras # 4 | # {nik0spapp, gkatsimpras}@gmail.com # 5 | # Supervisor: Efstathios stamatatos # 6 | # stamatatos@aegean.gr # 7 | # University of the Aegean # 8 | # Department of Information and Communication Systems Engineering # 9 | # Information Management Track (MSc) # 10 | # Karlovasi, Samos # 11 | # Greece # 12 | #################################################################### 13 | 14 | import pickle 15 | import os 16 | import sys 17 | from datasets.emoticons_patch import patch_emoticons 18 | 19 | class Lexicon: 20 | """ 21 | Lexicon class loads an annotated dataset of words 22 | that have strong/weak subjectivity and is used to 23 | train the high precision objective and subjective 24 | classifiers. 25 | """ 26 | 27 | def __init__(self): 28 | self.filename = "stored/lexicon" 29 | try: 30 | self.words = pickle.load(open(self.filename)) 31 | except: 32 | self.words = {} 33 | self.load() 34 | output = open(self.filename, 'wb') 35 | pickle.dump(self.words, output) 36 | 37 | def load(self): 38 | """ 39 | The method loads the annotated corpus and extracts the structure 40 | with easy access for the classifiers. 41 | """ 42 | dictionary_file = open("datasets/subjclueslen1-HLTEMNLP05.tff","r") 43 | lines = dictionary_file.readlines() 44 | 45 | for line in lines: 46 | attributes = line.split(" ") 47 | for index,attr in enumerate(attributes): 48 | if attr.find('word1') > -1: 49 | word_value = attr.split("=")[1] 50 | attributes[index] = [] 51 | break 52 | if self.words.has_key(word_value): 53 | for attr in attributes: 54 | if attr != []: 55 | arr = attr.split("=") 56 | key = arr[0] 57 | if key == "pos1": 58 | pos = self.words[word_value][key] 59 | self.words[word_value][key].append(arr[1]) 60 | break 61 | else: 62 | self.words[word_value] = {} 63 | for attr in attributes: 64 | if attr != []: 65 | arr = attr.split("=") 66 | key = arr[0] 67 | if len(arr) > 1: 68 | value = arr[1] 69 | if key == "pos1": 70 | self.words[word_value][key] = [value.replace("\n", "")] 71 | else: 72 | self.words[word_value][key] = value.replace("\n", "") 73 | 74 | self.words = dict(patch_emoticons(), **self.words) 75 | 76 | 77 | 78 | 79 | -------------------------------------------------------------------------------- /lexicon/negative-words.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nik0spapp/usent/52fe720964001344897db2ad9a98cb53a64be717/lexicon/negative-words.txt -------------------------------------------------------------------------------- /pb_classifiers.py: -------------------------------------------------------------------------------- 1 | #################################################################### 2 | # Licence: Creative Commons (see COPYRIGHT) # 3 | # Authors: Nikolaos Pappas, Georgios Katsimpras # 4 | # {nik0spapp, gkatsimpras}@gmail.com # 5 | # Supervisor: Efstathios stamatatos # 6 | # stamatatos@aegean.gr # 7 | # University of the Aegean # 8 | # Department of Information and Communication Systems Engineering # 9 | # Information Management Track (MSc) # 10 | # Karlovasi, Samos # 11 | # Greece # 12 | #################################################################### 13 | 14 | import os 15 | import sys 16 | import nltk 17 | import random 18 | import datetime 19 | from terminal_colors import Tcolors 20 | sys.path.append(os.path.abspath("") + "/../") 21 | 22 | class PbSubj: 23 | """ 24 | PbSubj: Pattern-Based subjective sentence classifier which classifies a 25 | sentence as subjective with a probability of the top-matched 26 | pattern among a list of strongly associated with subjectivity 27 | patterns. The selection of these patterns is made using two thresholds 28 | t1 and t2. The patterns whom frequency is greater than t1 and the 29 | subjective frequency greater than t2 are selected. 30 | """ 31 | 32 | def __init__(self, tagger, debug=False): 33 | self.tagger = tagger 34 | # Patterns learned from the pattern learner 35 | self.learned_patterns = {} 36 | # Strong subjective patterns 37 | self.ss_patterns = {} 38 | self.sorted_ss_patterns = None 39 | self.t1_threshold = 5 # 3 40 | self.t2_threshold = 1 # 0.9 41 | self.pl_threshold = 25 42 | self.limit = 1 43 | self.debug = debug 44 | 45 | def classify(self, sentence): 46 | """ 47 | Classify sentence based on the probabilities of the strongly 48 | associated patterns with subjectivity. 49 | """ 50 | found = False 51 | matched_pattern = None 52 | # POS tagging 53 | tagged_sentence = self.tagger.tag(sentence) 54 | words = [] 55 | tags = [] 56 | for (word, tag) in tagged_sentence: 57 | words.append(word) 58 | if tag is None: 59 | tag = "" 60 | tags.append(tag) 61 | 62 | for (pattern, value) in self.sorted_ss_patterns: 63 | display = value['display'] 64 | pattern_type = value['type'] 65 | try: 66 | pos_in_sentence = sentence.find(display) 67 | except: 68 | pos_in_sentence = -1 69 | 70 | if pos_in_sentence > -1: 71 | matched_pattern = value 72 | if pattern_type == "subj": 73 | found = self.search_for_subject(display, words, tags) 74 | elif pattern_type in ["dobj", "np"]: 75 | remaining_sentence = sentence[pos_in_sentence:] 76 | found = self.search_for_object(display, words, tags) 77 | if found: 78 | break 79 | if not found: 80 | objective = False 81 | subjective = False 82 | else: 83 | if self.debug: print matched_pattern 84 | random.seed() 85 | if random.uniform(0,1) <= matched_pattern['prob']: 86 | subjective = True 87 | objective = False 88 | if self.debug: print "Probability: " + (str)(matched_pattern['prob']) 89 | else: 90 | objective = True 91 | subjective = False 92 | if self.debug: print "Probability: " + (str)(1 - matched_pattern['prob']) 93 | return found, subjective, objective 94 | 95 | 96 | def find_needle_in_haystack(self, needle, haystack): 97 | """ 98 | This method finds the position of the intersection on 99 | the haystack array (if there is one). 100 | """ 101 | r = [] 102 | L = len(needle) 103 | for i in range(len(haystack)): 104 | if haystack[i:i+L] == needle: 105 | r.append(i) 106 | return r 107 | 108 | 109 | def search_for_object(self, pattern, words, tags): 110 | pattern_words = pattern.split() 111 | position = self.find_needle_in_haystack(pattern_words, words) 112 | if len(position) > 0: 113 | position = position[0] + len(pattern_words) 114 | for i, tag in enumerate(tags[position:]): 115 | if i < self.limit and (tag.find("NN") > -1 or tag.find("NP") > -1\ 116 | or tag.find("PR") > -1): 117 | return True 118 | return False 119 | 120 | 121 | def search_for_subject(self, pattern, words, tags): 122 | pattern_words = pattern.split() 123 | position = self.find_needle_in_haystack(pattern_words, words) 124 | if len(position) > 0: 125 | position = position[0] - 1 126 | for i, tag in enumerate(tags[position:]): 127 | if i < self.limit and (tag.find("NN") > -1 or tag.find("NP") > -1\ 128 | or tag.find("PR") > -1): 129 | return True 130 | return False 131 | 132 | 133 | def select_strong_subjective_patterns(self): 134 | """ 135 | Selection of the strongly associated with subjectivity patterns 136 | using the thresholds t1 and t2. 137 | """ 138 | self.ss_patterns = {} 139 | for pattern in self.learned_patterns.keys(): 140 | freq = self.learned_patterns[pattern]['freq'] 141 | prob = self.learned_patterns[pattern]['prob'] 142 | if freq >= self.t1_threshold and prob >= self.t2_threshold: 143 | self.ss_patterns[pattern] = self.learned_patterns[pattern] 144 | # delete some patterns with low frequency and probability for efficiency 145 | elif freq > 5 and freq < ((self.t1_threshold*3) / 4): 146 | del(self.learned_patterns[pattern]) 147 | 148 | sorted_ss = sorted(self.ss_patterns.iteritems(),key=lambda x: x[1]['prob'], reverse=True) 149 | self.sorted_ss_patterns = sorted_ss 150 | for (s,v) in sorted_ss: 151 | title = (Tcolors.OKGREEN+s+Tcolors.ENDC+" ").ljust(70,'-') 152 | pbs = (str)(v['freq'])+"/" + Tcolors.CYAN + (str)(v['prob']) + Tcolors.ENDC 153 | if self.debug: print title + "------------> " + pbs 154 | if self.debug: print 155 | if len(sorted_ss) > self.pl_threshold: 156 | self.t1_threshold += 1 157 | 158 | def train(self, learned_patterns): 159 | """ 160 | Train classifier with the learned patterns derived from 161 | the pattern learner. 162 | """ 163 | self.learned_patterns = learned_patterns 164 | self.select_strong_subjective_patterns() 165 | 166 | 167 | -------------------------------------------------------------------------------- /polarity.py: -------------------------------------------------------------------------------- 1 | #################################################################### 2 | # Licence: Creative Commons (see COPYRIGHT) # 3 | # Authors: Nikolaos Pappas, Georgios Katsimpras # 4 | # {nik0spapp, gkatsimpras}@gmail.com # 5 | # Supervisor: Efstathios stamatatos # 6 | # stamatatos@aegean.gr # 7 | # University of the Aegean # 8 | # Department of Information and Communication Systems Engineering # 9 | # Information Management Track (MSc) # 10 | # Karlovasi, Samos # 11 | # Greece # 12 | #################################################################### 13 | 14 | from __future__ import division 15 | import nltk 16 | import string 17 | import sys 18 | from terminal_colors import Tcolors 19 | from stemming.porter2 import stem 20 | sys.path.append(sys.path[0] + "/../") 21 | 22 | 23 | class PolarityClassifier: 24 | """ 25 | PolarityClassifier: Rule-based polarity classification of sentences 26 | according to the following paper: 27 | 28 | T. Wilson, J. Wiebe, and P. Hoffmann. Recognizing contextual polarity 29 | in phrase-level sentiment analysis. In Proceedings of the conference 30 | on Human Language Technology and Empirical Methods in Natural Language 31 | Processing, HLT '05, pages 347--354, 2005. 32 | 33 | Enhancements: We have incorporated emoticons and slung dictionary 34 | apart from the MPQA lexicon that is used in the paper. 35 | """ 36 | 37 | def __init__(self, tagger, lexicon, debug=False): 38 | self.lexicon = lexicon.words 39 | self.sentence = None 40 | self.words = [] 41 | self.feature_words = {} 42 | self.polar_expressions = [] 43 | self.polar_with_tags = {} 44 | self.polar_with_score = {} 45 | self.strong_polar_expressions = [] 46 | self.negation_words = ["not", "no", "but"] 47 | self.tagger = tagger 48 | self.words_pos_tags = [] 49 | self.stokens = [] 50 | self.emotions_score = [] 51 | self.emoticons = [] 52 | self.debug = debug 53 | 54 | def apply_emotions(self): 55 | """ 56 | Compute emotion scores based on emoticon list. 57 | """ 58 | score = 0 59 | for token in self.stokens: 60 | if self.lexicon.has_key(token) \ 61 | and self.lexicon[token].has_key('emoticon'): 62 | if self.lexicon[token]['priorpolarity'] == "negative": 63 | score = -2 64 | else: 65 | score = 2 66 | self.emoticons.append(token) 67 | self.emotions_score.append(score) 68 | 69 | def apply_weights(self): 70 | """ 71 | Adjust (2*n times) scores for polar expressions. 72 | """ 73 | # Strong emotion heuristic based on punctuation 74 | strong_emotion = 1 75 | if self.sentence.endswith("!"): 76 | strong_emotion = 1.5 77 | elif self.sentence.endswith("?"): 78 | strong_emotion = 0.5 79 | 80 | for i,polar in enumerate(self.polar_expressions): 81 | #first rule: if strong double score 82 | if polar in self.strong_polar_expressions: 83 | self.polar_with_score[polar] *= 2 84 | #second rule: if intensified polar double score 85 | if self.intensified_polar(polar): 86 | self.polar_with_score[polar] *= 2 87 | #third rule: if polar expr is adjective double score 88 | if self.polar_with_tags[polar] == "adj": 89 | self.polar_with_score[polar] *= 2 90 | 91 | if i + 1 == len(self.polar_expressions): 92 | self.polar_with_score[polar] *= strong_emotion 93 | 94 | def check_precedings(self, polar, words): 95 | """ 96 | Find the indexes of polar words and 5 preceding of it. 97 | """ 98 | if words.index(polar) >= 6: 99 | return True 100 | else: 101 | return False 102 | 103 | def classify(self, sentence): 104 | """ 105 | Sum up the contextual scores of polar expressions and classify the sentence. 106 | """ 107 | self.sentence = sentence 108 | # Extracting Features from sentence 109 | self.extract_features() 110 | # Performing Word Sense Disambiguation 111 | # Extracting Polar Expressions 112 | self.word_sense_disambiguation() 113 | # Checking for negation words 114 | self.negation_modeling() 115 | # Adjusting weights to Polar Expressions 116 | self.apply_weights() 117 | self.apply_emotions() 118 | # Performing polarity classification 119 | [prediction, score] = self.predict_class() 120 | 121 | 122 | if self.debug: 123 | print "\n[*] --------------------RESULTS----------------------" 124 | print Tcolors.ADD + " FEATURE WORDS:", self.feature_words 125 | print Tcolors.ADD + " POLAR EXPRESSIONS FOUND:", self.polar_expressions 126 | print Tcolors.ADD + " POLAR WEIGHTS:", self.polar_with_score 127 | print Tcolors.ADD + " EMOTICONS:", self.emoticons 128 | print Tcolors.ADD + " EMOTION WEIGHTS:", self.emotions_score 129 | print Tcolors.ADD + " PREDICTION: ", prediction, ", WITH CONFIDENCE: ", score 130 | print Tcolors.ADD + " NORMALIZED CONFIDENCE: ", score/len(self.words) 131 | 132 | self.words = [w for w in self.words if w != ''] 133 | return prediction, score, score/len(self.words) #normalizedScore 134 | 135 | def extract_features(self): 136 | """ 137 | Match positive and negative words of a sentence with a score of +1 or -1 138 | respectively, if found in the lexicon. 139 | """ 140 | self.words = self.tokenize_words(self.sentence) 141 | words = self.words 142 | for word in words: 143 | word = word.lower() 144 | if word in self.lexicon: 145 | if self.lexicon[word]['priorpolarity'] == "positive": 146 | if not self.feature_words.has_key(word): 147 | self.feature_words[word] = 1 148 | else: 149 | self.feature_words[word] += 1 150 | elif self.lexicon[word]['priorpolarity'] == "negative": 151 | if not self.feature_words.has_key(word): 152 | self.feature_words[word] = -1 153 | else: 154 | self.feature_words[word] -= 1 155 | else: 156 | self.feature_words[word] = 0 157 | 158 | def intensified_polar(self, polar): 159 | if self.words.index(polar) > 0: 160 | previous_word = self.words[self.words.index(polar) - 1] 161 | 162 | if self.lexicon.has_key(previous_word) and self.lexicon[previous_word]["type"]=="strongsubj" \ 163 | and 'adj' in self.lexicon[previous_word]["pos1"]: 164 | return True 165 | return False 166 | 167 | def match_tags(self, pos_tag): 168 | if pos_tag: 169 | if pos_tag.startswith("VB"): 170 | return "verb" 171 | elif pos_tag.startswith("JJ"): 172 | return "adj" 173 | elif pos_tag.startswith("NN"): 174 | return "noun" 175 | elif pos_tag.startswith("RB"): 176 | return "adverb" 177 | else: 178 | return 'anypos' 179 | else: 180 | return "anypos" 181 | 182 | def negation_modeling(self): 183 | """ 184 | Examine negation words and reassign polarity. 185 | """ 186 | for polar in self.polar_expressions: 187 | has_precedings = self.check_precedings(polar, self.words) 188 | for neg in self.negation_words: 189 | if has_precedings: 190 | if (neg in self.words[(self.words.index(polar)-5):self.words.index(polar)])\ 191 | or self.polarity_shifting(polar, self.words[(self.words.index(polar)-5):self.words.index(polar)]): 192 | self.polar_with_score[polar] = self.polar_with_score[polar]*(-1) 193 | break 194 | else: 195 | if neg in self.words[0:self.words.index(polar)] or \ 196 | self.polarity_shifting(polar, self.words[0:self.words.index(polar)]): 197 | self.polar_with_score[polar] = self.polar_with_score[polar]*(-1) 198 | 199 | def predict_class(self): 200 | summary = 0 201 | summary = sum([value for value in self.polar_with_score.values()]) 202 | summary += sum(self.emotions_score) 203 | 204 | if summary > 0: 205 | return "positive", summary 206 | elif summary < 0: 207 | return "negative", summary 208 | else: 209 | return "neutral", summary 210 | 211 | def polarity_shifting(self, polar, words): 212 | for word in words: 213 | if (self.lexicon.has_key(word) and self.lexicon[word]["type"]=="strongsubj" \ 214 | and self.lexicon[word]["priorpolarity"] != self.lexicon[polar]["priorpolarity"] \ 215 | and self.lexicon[word]["priorpolarity"]!="neutral") or \ 216 | word.endswith("n't"): 217 | # reverse polarity of polar 218 | if self.debug: 219 | print "[!] NEGATION WORD FOUND: ",word 220 | return True 221 | 222 | return False 223 | 224 | def tokenize_words(self, sentence): 225 | words = nltk.word_tokenize(sentence.lower()) 226 | self.stokens = sentence.split() 227 | for i,word in enumerate(words): 228 | if word[len(word)-1] in string.punctuation: 229 | words[words.index(word)] = word[0:(len(word)-1)] 230 | 231 | self.words_pos_tags = self.tagger.tag(sentence) 232 | for (word, TAG) in self.words_pos_tags: 233 | word = word.lower() 234 | # Handle punctuation in word_tokenization 235 | if TAG and TAG.startswith("VB"): 236 | if word in words: 237 | words[words.index(word)] = stem(word) 238 | return words 239 | 240 | def word_sense_disambiguation(self): 241 | """ 242 | Disambiguate words in a sentence if only if the POS tag of the word matches 243 | the POS tag in the lexicon 244 | """ 245 | for (word, TAG) in self.words_pos_tags: 246 | # Handle punctuation in word_tokenization 247 | if word[len(word)-1] in string.punctuation: 248 | word = word[0:(len(word)-1)] 249 | matched_tag = self.match_tags(TAG) 250 | word = word.lower() 251 | words = [word, stem(word)] 252 | for w in words: 253 | if self.feature_words.has_key(w) and (matched_tag in self.lexicon[w]["pos1"] \ 254 | or self.lexicon[w]["pos1"][0] == "anypos" \ 255 | or matched_tag == "anypos"): 256 | self.polar_expressions.append(w) 257 | self.polar_with_score[w] = self.feature_words[w] 258 | self.polar_with_tags[w] = matched_tag 259 | if self.lexicon[w]['type'] == "strongsubj": 260 | self.strong_polar_expressions.append(w) 261 | 262 | self.polar_expressions = list(set(self.polar_expressions)) 263 | 264 | 265 | if __name__ == '__main__': 266 | polarity = PolarityClassifier(sys.argv[1]) 267 | polarity.classify() 268 | 269 | 270 | 271 | 272 | -------------------------------------------------------------------------------- /pos.py: -------------------------------------------------------------------------------- 1 | #################################################################### 2 | # Licence: Creative Commons (see COPYRIGHT) # 3 | # Authors: Nikolaos Pappas, Georgios Katsimpras # 4 | # {nik0spapp, gkatsimpras}@gmail.com # 5 | # Supervisor: Efstathios stamatatos # 6 | # stamatatos@aegean.gr # 7 | # University of the Aegean # 8 | # Department of Information and Communication Systems Engineering # 9 | # Information Management Track (MSc) # 10 | # Karlovasi, Samos # 11 | # Greece # 12 | #################################################################### 13 | 14 | import os 15 | import sys 16 | import pickle 17 | import nltk.corpus, nltk.tag, itertools 18 | from terminal_colors import Tcolors 19 | 20 | class SequentialTagger: 21 | """ 22 | Sequential tagger: It uses a sequential tagging method for tagging 23 | untagged sentences. Three tag classifiers are used in sequential 24 | order (Unigram, Bigram and Trigram) that are trained with brown 25 | corpus. Experiments have been made to select this specific UBT 26 | sequence that seems to have better precision than the other 27 | combinations. 28 | """ 29 | def __init__(self): 30 | 31 | self.filename = "stored/ubt_tagger.classifier" 32 | try: 33 | self.ubt_tagger = pickle.load(open(self.filename)) 34 | print Tcolors.ADD + Tcolors.OKBLUE + " Loaded existing UBT tagger!" + Tcolors.ENDC 35 | except: 36 | print Tcolors.ACT + Tcolors.RED + " Existing UBT tagger not found." + Tcolors.ENDC 37 | print "Path:", "stored/ubt_tagger.classifier" 38 | print "Training..." 39 | brown_review_all = nltk.corpus.brown.tagged_sents() 40 | brown_review_sents = nltk.corpus.brown.tagged_sents(categories=['reviews']) 41 | brown_lore_sents = nltk.corpus.brown.tagged_sents(categories=['lore']) 42 | brown_romance_sents = nltk.corpus.brown.tagged_sents(categories=['romance']) 43 | 44 | brown_train = list(itertools.chain(brown_review_sents[:1000], 45 | brown_lore_sents[:1000], 46 | brown_romance_sents[:1000])) 47 | brown_test = list(itertools.chain(brown_review_sents[1000:2000], 48 | brown_lore_sents[1000:2000], 49 | brown_romance_sents[1000:2000])) 50 | 51 | conll_sents = nltk.corpus.conll2000.tagged_sents() 52 | conll2_sents = nltk.corpus.conll2002.tagged_sents() 53 | conll_train = list(conll_sents[:4000]) 54 | conll_test = list(conll_sents[4000:8000]) 55 | 56 | treebank_sents = nltk.corpus.treebank.tagged_sents() 57 | treebank_train = list(treebank_sents[:1500]) 58 | treebank_test = list(treebank_sents[1500:3000]) 59 | train_sents = conll_sents + conll2_sents + treebank_sents + brown_train 60 | test_sents = conll_test 61 | 62 | ubt_tagger = self.backoff_tagger(train_sents, [nltk.tag.UnigramTagger, 63 | nltk.tag.BigramTagger, 64 | nltk.tag.TrigramTagger]) 65 | self.ubt_tagger = ubt_tagger 66 | output = open(self.filename,'wb') 67 | pickle.dump(self.ubt_tagger,output) 68 | output.close() 69 | 70 | def backoff_tagger(self, tagged_sents, tagger_classes, backoff=None): 71 | """ 72 | Creation the sequential tagger 73 | """ 74 | if not backoff: 75 | backoff = tagger_classes[0](tagged_sents) 76 | del tagger_classes[0] 77 | 78 | for cls in tagger_classes: 79 | tagger = cls(tagged_sents, backoff=backoff) 80 | backoff = tagger 81 | 82 | return backoff 83 | 84 | def tag(self, sentence): 85 | """ 86 | Method for tagging untagged sentences. 87 | """ 88 | words = nltk.word_tokenize(sentence) 89 | return self.ubt_tagger.tag(words) 90 | -------------------------------------------------------------------------------- /replacer.py: -------------------------------------------------------------------------------- 1 | #################################################################### 2 | # Licence: Creative Commons (see COPYRIGHT) # 3 | # Authors: Nikolaos Pappas, Georgios Katsimpras # 4 | # {nik0spapp, gkatsimpras}@gmail.com # 5 | # Supervisor: Efstathios stamatatos # 6 | # stamatatos@aegean.gr # 7 | # University of the Aegean # 8 | # Department of Information and Communication Systems Engineering # 9 | # Information Management Track (MSc) # 10 | # Karlovasi, Samos # 11 | # Greece # 12 | #################################################################### 13 | 14 | import re 15 | from nltk.corpus import wordnet 16 | 17 | class RepeatReplacer(object): 18 | """ 19 | RepeatReplacer: Replaces letters that appear in irregular 20 | repetition inside words. 21 | """ 22 | 23 | def __init__(self, lexicon): 24 | self.lexicon = lexicon.words 25 | self.repeat_regexp = re.compile(r'(.*)(.)\2(.*)') 26 | self.repl = r'\1\2\3' 27 | 28 | def replace(self, word): 29 | check = re.sub(r'\!|;|\||\.|\?|,|:|"|\)|\(','',word) 30 | if self.lexicon.has_key(word) and self.lexicon[word].has_key('emoticon'): 31 | return word 32 | if wordnet.synsets(check): 33 | if word == check: 34 | return word 35 | else: 36 | return check + "".join(set(word[len(check):])) 37 | repl_word = self.repeat_regexp.sub(self.repl, word) 38 | if repl_word != word: 39 | return self.replace(repl_word) 40 | else: 41 | return repl_word 42 | 43 | 44 | if __name__ == '__main__': 45 | rr = RepeatReplacer() 46 | example = "sorrryyyyyyyyyyy" 47 | print "Before: " + example 48 | rr.replace(example) 49 | print "After: " + example 50 | -------------------------------------------------------------------------------- /sentiment.py: -------------------------------------------------------------------------------- 1 | #################################################################### 2 | # Licence: Creative Commons (see COPYRIGHT) # 3 | # Authors: Nikolaos Pappas, Georgios Katsimpras # 4 | # {nik0spapp, gkatsimpras}@gmail.com # 5 | # Supervisor: Efstathios stamatatos # 6 | # stamatatos@aegean.gr # 7 | # University of the Aegean # 8 | # Department of Information and Communication Systems Engineering # 9 | # Information Management Track (MSc) # 10 | # Karlovasi, Samos # 11 | # Greece # 12 | #################################################################### 13 | 14 | import os 15 | import sys 16 | import nltk 17 | import pickle 18 | from bootstrapping import Bootstrapping 19 | from pos import SequentialTagger 20 | from hp_classifiers import HpObj, HpSubj 21 | from polarity import PolarityClassifier 22 | from replacer import RepeatReplacer 23 | from terminal_colors import Tcolors 24 | 25 | DEBUG = False 26 | 27 | class Sentiment: 28 | """ 29 | Sentiment: Analyses the global sentiment of given text regions 30 | that are decomposed to sentences, using bootstrapping methods for 31 | subjectivity and polarity classification. All sub modules except 32 | from POS tagging are learning by experience. 33 | """ 34 | 35 | def __init__(self): 36 | self.pos_tagger = SequentialTagger() 37 | self.hp_obj = HpObj(debug=DEBUG) 38 | self.hp_subj = HpSubj(debug=DEBUG) 39 | self.lexicon = self.hp_obj.lexicon 40 | self.bootstrapping = Bootstrapping(self.hp_obj, self.hp_subj, self.pos_tagger, debug=DEBUG) 41 | self.sentence_tokenizer = nltk.data.load('tokenizers/punkt/english.pickle') 42 | self.total_sentences = ["good","bad"] 43 | self.total_sentiments = ["positive","negative"] 44 | 45 | def analyze(self, clean_text_areas): 46 | """ 47 | Analysis of text regions using the following order: Each sentence per 48 | region is passed from the subjectivity classification using bootstrapping 49 | method and then if it turns out to be subjective it is passed 50 | from the polarity classification using bootstrapping method also. 51 | Finally, it results to a decision for the sentiment of the sentence 52 | and the overall sentiment of the regions. 53 | """ 54 | if len(clean_text_areas) > 0: 55 | for clean_text in clean_text_areas: 56 | # Sentence detection 57 | clean_text = self.normalize(clean_text) 58 | try: 59 | sentences = self.sentence_tokenizer.tokenize(clean_text) 60 | except: 61 | return {} 62 | sentiments = [] 63 | scores = [] 64 | nscores = [] 65 | results = {'positive':{'count' : 0, 'score' : 0, 'nscore' : 0}, 66 | 'neutral':{'count' : 0, 'score' : 0, 'nscore' : 0}, 67 | 'negative':{'count' : 0, 'score' : 0, 'nscore' : 0}} 68 | 69 | print 70 | print Tcolors.ACT + " Checking block of text:" 71 | for i, sentence in enumerate(sentences): 72 | print "[" + str(i+1) + "] " + sentence 73 | for i, sentence in enumerate(sentences): 74 | # Proceed to subjectivity classification (bootstrapping procedure). 75 | # (This step could be skipped in case you deal with subjective sentences only.) 76 | sentiment = "" 77 | previous = "" 78 | next = "" 79 | score = 0 80 | nscore = 0 81 | if i == 0 and i + 1 < len(sentences): 82 | next = sentences[i+1] 83 | elif i != 0 and i < len(sentences): 84 | if i + 1 != len(sentences): 85 | next = sentences[i+1] 86 | previous = sentences[i-1] 87 | 88 | if DEBUG: print Tcolors.ACT + " Analyzing subjectivity..." 89 | result = self.bootstrapping.classify(sentence, previous, next) 90 | if result is None: 91 | res = 'Not found!' 92 | else: 93 | res = result 94 | if DEBUG: 95 | print Tcolors.RES + Tcolors.OKGREEN + " " + res + Tcolors.ENDC 96 | print 97 | 98 | # If sentence is subjective 99 | if result == 'subjective' or result is None: 100 | # Proceed to polarity classification 101 | if DEBUG: print Tcolors.ACT + " Analyzing sentiment..." 102 | polarity_classifier = PolarityClassifier(self.pos_tagger, self.lexicon, debug=DEBUG) 103 | sentiment, score, nscore = polarity_classifier.classify(sentence) 104 | if DEBUG: print Tcolors.RES + Tcolors.OKGREEN + " " + sentiment + Tcolors.ENDC 105 | # If sentence is objective 106 | elif result == 'objective': 107 | sentiment = 'neutral' 108 | 109 | # Collect high-confidence training instances for SVM classifier. 110 | # After the training, SVM can be used to classify new sentences. 111 | #if sentiment != "neutral" and sentiment != "": 112 | #if sentiment != "neutral" and abs(nscore) >= 0.4: 113 | # self.total_sentences.append(sentence) 114 | # self.total_sentiments.append(sentiment) 115 | 116 | # Store results to memory 117 | sentiments.append(sentiment) 118 | scores.append(score) 119 | nscores.append(nscore) 120 | 121 | # Update score 122 | if results.has_key(sentiment): 123 | results[sentiment]['nscore'] += nscore 124 | results[sentiment]['score'] += score 125 | results[sentiment]['count'] += 1 126 | 127 | print 128 | print Tcolors.ACT + " Overall sentiment analysis:" 129 | print Tcolors.BGH 130 | print " Parts: ", len(sentences) 131 | print " Sentiments: ", sentiments 132 | print " Scores: ", scores 133 | print " Results: ", "},\n\t ".join((str)(results).split("}, ")) 134 | print Tcolors.C 135 | 136 | pcount = results['positive']['count'] 137 | ncount = results['negative']['count'] 138 | total = len(sentences) 139 | print Tcolors.BG 140 | print " subjective".ljust(16,"-") + "> %.2f" % ((float)(pcount + ncount)*100 / total) + "%" 141 | print " objective".ljust(16,"-") + "> %.2f" % (100 - ((float)(pcount + ncount)*100 / total)) + "%" 142 | print Tcolors.C 143 | print Tcolors.BGGRAY 144 | for sense in results.keys(): 145 | count = results[sense]['count'] 146 | percentage = (float)(count) * 100 / (len(sentences)) 147 | print " " +sense.ljust(15,"-")+"> %.2f" % (percentage) + "%" 148 | 149 | print Tcolors.C 150 | ssum = sum(scores) 151 | confidence = " (%.2f, %.2f)" % (ssum,sum(nscores)) 152 | final_sent = "" 153 | pos = True 154 | if results["negative"]["count"] > len(sentences)*1.0/3: 155 | pos = False 156 | 157 | # Print total sentiment score and normalized sentiment score 158 | if ssum > 0 and pos: 159 | print Tcolors.RES + Tcolors.OKGREEN + " positive" + confidence + Tcolors.C 160 | final_sent = "positive" 161 | elif ssum == 0: 162 | print Tcolors.RES + Tcolors.OKGREEN + " neutral" + confidence + Tcolors.C 163 | final_sent = "neutral" 164 | else: 165 | print Tcolors.RES + Tcolors.OKGREEN + " negative" + confidence + Tcolors.C 166 | final_sent = "negative" 167 | print Tcolors.C 168 | 169 | # Store results 170 | total_result_hash = {'sentences' : sentences, 171 | 'sentiments': sentiments, 172 | 'scores' : scores, 173 | 'nscores' : nscores, 174 | 'results' : results, 175 | 'final' : {final_sent:{'score':ssum,'nscore':sum(nscores)}}} 176 | # Train SVM classifier 177 | # self.train_svm() 178 | return total_result_hash 179 | 180 | def normalize(self, text): 181 | """ 182 | Make some word improvements before feeding to the sentence tokenizer. 183 | """ 184 | rr = RepeatReplacer(self.lexicon) 185 | normalized_text = [] 186 | final = None 187 | try: 188 | for word in text.split(): 189 | normal = rr.replace(word.lower()) 190 | if word[0].isupper(): 191 | normal = normal[0].upper() + normal[1:] 192 | 193 | normalized_text.append(normal) 194 | final = " ".join(normalized_text) 195 | except: 196 | final = text 197 | 198 | return final 199 | 200 | def train_svm(self): 201 | """ 202 | Train SVM and store data with pickle. 203 | """ 204 | self.svm.train(self.total_sentences, self.total_sentiments) 205 | t_output = open(self.svm_train_filename,'wb') 206 | l_output = open(self.svm_label_filename,'wb') 207 | pickle.dump(self.total_sentences,t_output) 208 | pickle.dump(self.total_sentiments,l_output) 209 | t_output.close() 210 | l_output.close() 211 | 212 | 213 | if __name__ == '__main__': 214 | sentiment = Sentiment() 215 | if len(sys.argv) > 1: 216 | sentiment.analyze([sys.argv[1]]) 217 | else: 218 | sentiment.analyze([u"I was blown away by some of the comments here posted by people who is either uneducated, ignorant, self-righteous or all-of-the-above...I'm irritated and saddened as I read these \"finger-pointing\" or \"I'm right and you're wrong\" type of posts! Grow up folks! You're not in grade school...learn to embrace what is positive and move forward to do what is right... I have to give much love and respect to Ronny...your work is AMAZING!!! You cannot fathom how good I feel after I watched this video...regardless of history, politics, or whatever forces that makes what the mid-east today...for what you did and many of the followers in Iran and Palestine ...I BELIEVE TOMORROW WILL BE BETTER!!!!!! My name is Christopher Lee, I'm a nurse in Los Angeles and I {HEART} YOU ALL (especially to all of you beautiful and sweet ladies across the way)!!!!!"]) 219 | -------------------------------------------------------------------------------- /stored/learned_patterns: -------------------------------------------------------------------------------- 1 | (dp0 2 | S'learning ' 3 | p1 4 | (dp2 5 | S'type' 6 | p3 7 | S'dobj' 8 | p4 9 | sS'freq' 10 | p5 11 | I8 12 | sS'subj_freq' 13 | p6 14 | I8 15 | sS'display' 16 | p7 17 | S'learning' 18 | p8 19 | sS'prob' 20 | p9 21 | F1.0 22 | ssVwatched 23 | p10 24 | (dp11 25 | S'subj_freq' 26 | p12 27 | I25 28 | sS'freq' 29 | p13 30 | I25 31 | sS'type' 32 | p14 33 | S'dobj' 34 | p15 35 | sS'display' 36 | p16 37 | Vwatched 38 | p17 39 | sS'prob' 40 | p18 41 | F1.0 42 | ssV tracks 43 | p19 44 | (dp20 45 | S'type' 46 | p21 47 | S'subj' 48 | p22 49 | sS'freq' 50 | p23 51 | I9 52 | sS'subj_freq' 53 | p24 54 | I9 55 | sS'display' 56 | p25 57 | Vtracks 58 | p26 59 | sS'prob' 60 | p27 61 | F1.0 62 | ssVimagined 63 | p28 64 | (dp29 65 | g21 66 | S'dobj' 67 | p30 68 | sg23 69 | I9 70 | sg24 71 | I9 72 | sg25 73 | Vimagined 74 | p31 75 | sg27 76 | F1.0 77 | ssVbring 78 | p32 79 | (dp33 80 | g21 81 | g30 82 | sg23 83 | I9 84 | sg24 85 | I9 86 | sg25 87 | Vbring 88 | p34 89 | sg27 90 | F1.0 91 | ssVfeatured 92 | p35 93 | (dp36 94 | g21 95 | g30 96 | sg23 97 | I9 98 | sg24 99 | I9 100 | sg25 101 | Vfeatured 102 | p37 103 | sg27 104 | F1.0 105 | ssV posted people 106 | p38 107 | (dp39 108 | g12 109 | I34 110 | sg13 111 | I34 112 | sg14 113 | S'subj' 114 | p40 115 | sg16 116 | Vposted people 117 | p41 118 | sg18 119 | F1.0 120 | ssV get seat 121 | p42 122 | (dp43 123 | g21 124 | g22 125 | sg23 126 | I9 127 | sg24 128 | I9 129 | sg25 130 | Vget seat 131 | p44 132 | sg27 133 | F1.0 134 | ssS" 'm" 135 | p45 136 | (dp46 137 | S'subj_freq' 138 | p47 139 | I7 140 | sS'freq' 141 | p48 142 | I7 143 | sS'type' 144 | p49 145 | S'subj' 146 | p50 147 | sS'display' 148 | p51 149 | S"'m" 150 | p52 151 | sS'prob' 152 | p53 153 | F1.0 154 | ssVdid 155 | p54 156 | (dp55 157 | g12 158 | I25 159 | sg13 160 | I25 161 | sg14 162 | g15 163 | sg16 164 | Vdid 165 | p56 166 | sg18 167 | F1.0 168 | ssVkeep 169 | p57 170 | (dp58 171 | g21 172 | g30 173 | sg23 174 | I9 175 | sg24 176 | I9 177 | sg25 178 | Vkeep 179 | p59 180 | sg27 181 | F1.0 182 | ssV featured 183 | p60 184 | (dp61 185 | g21 186 | g22 187 | sg23 188 | I9 189 | sg24 190 | I9 191 | sg25 192 | Vfeatured 193 | p62 194 | sg27 195 | F1.0 196 | ssS' found' 197 | p63 198 | (dp64 199 | S'subj_freq' 200 | p65 201 | I4 202 | sS'freq' 203 | p66 204 | I4 205 | sS'type' 206 | p67 207 | S'subj' 208 | p68 209 | sS'display' 210 | p69 211 | S'found' 212 | p70 213 | sS'prob' 214 | p71 215 | F1.0 216 | ssVconveyed 217 | p72 218 | (dp73 219 | g21 220 | g30 221 | sg23 222 | I9 223 | sg24 224 | I9 225 | sg25 226 | Vconveyed 227 | p74 228 | sg27 229 | F1.0 230 | ssVCalled 231 | p75 232 | (dp76 233 | g21 234 | g30 235 | sg23 236 | I23 237 | sg24 238 | I23 239 | sg25 240 | VCalled 241 | p77 242 | sg27 243 | F1.0 244 | ssVwait 245 | p78 246 | (dp79 247 | g21 248 | g30 249 | sg23 250 | I7 251 | sg24 252 | I7 253 | sg25 254 | Vwait 255 | p80 256 | sg27 257 | F1.0 258 | ssVforces 259 | p81 260 | (dp82 261 | g12 262 | I25 263 | sg13 264 | I25 265 | sg14 266 | g15 267 | sg16 268 | Vforces 269 | p83 270 | sg18 271 | F1.0 272 | ssV did 273 | p84 274 | (dp85 275 | g12 276 | I25 277 | sg13 278 | I25 279 | sg14 280 | g40 281 | sg16 282 | Vdid 283 | p86 284 | sg18 285 | F1.0 286 | ssV blown 287 | p87 288 | (dp88 289 | g12 290 | I34 291 | sg13 292 | I34 293 | sg14 294 | g40 295 | sg16 296 | Vblown 297 | p89 298 | sg18 299 | F1.0 300 | ssS"'m " 301 | p90 302 | (dp91 303 | g47 304 | I7 305 | sg48 306 | I7 307 | sg49 308 | S'dobj' 309 | p92 310 | sg51 311 | S"'m" 312 | p93 313 | sg53 314 | F1.0 315 | ssV keep 316 | p94 317 | (dp95 318 | g21 319 | g22 320 | sg23 321 | I9 322 | sg24 323 | I9 324 | sg25 325 | Vkeep 326 | p96 327 | sg27 328 | F1.0 329 | ssVposted 330 | p97 331 | (dp98 332 | g12 333 | I34 334 | sg13 335 | I34 336 | sg14 337 | g15 338 | sg16 339 | Vposted 340 | p99 341 | sg18 342 | F1.0 343 | ssV Called L 344 | p100 345 | (dp101 346 | g21 347 | g22 348 | sg23 349 | I7 350 | sg24 351 | I7 352 | sg25 353 | VCalled L 354 | p102 355 | sg27 356 | F1.0 357 | ssVreleased 358 | p103 359 | (dp104 360 | g21 361 | g30 362 | sg23 363 | I9 364 | sg24 365 | I9 366 | sg25 367 | Vreleased 368 | p105 369 | sg27 370 | F1.0 371 | ssS'found ' 372 | p106 373 | (dp107 374 | g65 375 | I4 376 | sg66 377 | I4 378 | sg67 379 | S'dobj' 380 | p108 381 | sg69 382 | S'found' 383 | p109 384 | sg71 385 | F1.0 386 | ssVblown 387 | p110 388 | (dp111 389 | g12 390 | I34 391 | sg13 392 | I34 393 | sg14 394 | g15 395 | sg16 396 | Vblown 397 | p112 398 | sg18 399 | F1.0 400 | ssVCompiled 401 | p113 402 | (dp114 403 | g21 404 | g30 405 | sg23 406 | I9 407 | sg24 408 | I9 409 | sg25 410 | VCompiled 411 | p115 412 | sg27 413 | F1.0 414 | ssV get 415 | p116 416 | (dp117 417 | g21 418 | g22 419 | sg23 420 | I9 421 | sg24 422 | I9 423 | sg25 424 | Vget 425 | p118 426 | sg27 427 | F1.0 428 | ssV give 429 | p119 430 | (dp120 431 | g12 432 | I37 433 | sg13 434 | I37 435 | sg14 436 | g40 437 | sg16 438 | Vgive 439 | p121 440 | sg18 441 | F1.0 442 | ssV conveyed 443 | p122 444 | (dp123 445 | g21 446 | g22 447 | sg23 448 | I9 449 | sg24 450 | I9 451 | sg25 452 | Vconveyed 453 | p124 454 | sg27 455 | F1.0 456 | ssV forces 457 | p125 458 | (dp126 459 | g12 460 | I25 461 | sg13 462 | I25 463 | sg14 464 | g40 465 | sg16 466 | Vforces 467 | p127 468 | sg18 469 | F1.0 470 | ssV released 471 | p128 472 | (dp129 473 | g21 474 | g22 475 | sg23 476 | I9 477 | sg24 478 | I9 479 | sg25 480 | Vreleased 481 | p130 482 | sg27 483 | F1.0 484 | ssV was blown 485 | p131 486 | (dp132 487 | g12 488 | I34 489 | sg13 490 | I34 491 | sg14 492 | g40 493 | sg16 494 | Vwas blown 495 | p133 496 | sg18 497 | F1.0 498 | ssV capture 499 | p134 500 | (dp135 501 | g21 502 | g22 503 | sg23 504 | I9 505 | sg24 506 | I9 507 | sg25 508 | Vcapture 509 | p136 510 | sg27 511 | F1.0 512 | ssVtracks 513 | p137 514 | (dp138 515 | g21 516 | g30 517 | sg23 518 | I9 519 | sg24 520 | I9 521 | sg25 522 | Vtracks 523 | p139 524 | sg27 525 | F1.0 526 | ssS' found presentation' 527 | p140 528 | (dp141 529 | g65 530 | I4 531 | sg66 532 | I4 533 | sg67 534 | g68 535 | sg69 536 | S'found presentation' 537 | p142 538 | sg71 539 | F1.0 540 | ssS' think' 541 | p143 542 | (dp144 543 | S'type' 544 | p145 545 | S'subj' 546 | p146 547 | sS'freq' 548 | p147 549 | I4 550 | sS'subj_freq' 551 | p148 552 | I4 553 | sS'display' 554 | p149 555 | S'think' 556 | p150 557 | sS'prob' 558 | p151 559 | F1.0 560 | ssVgive 561 | p152 562 | (dp153 563 | g12 564 | I37 565 | sg13 566 | I37 567 | sg14 568 | g15 569 | sg16 570 | Vgive 571 | p154 572 | sg18 573 | F1.0 574 | ssS'think ' 575 | p155 576 | (dp156 577 | g145 578 | S'dobj' 579 | p157 580 | sg147 581 | I4 582 | sg148 583 | I4 584 | sg149 585 | S'think' 586 | p158 587 | sg151 588 | F1.0 589 | ssV posted 590 | p159 591 | (dp160 592 | g12 593 | I34 594 | sg13 595 | I34 596 | sg14 597 | g40 598 | sg16 599 | Vposted 600 | p161 601 | sg18 602 | F1.0 603 | ssV Called D 604 | p162 605 | (dp163 606 | g21 607 | g22 608 | sg23 609 | I7 610 | sg24 611 | I7 612 | sg25 613 | VCalled D 614 | p164 615 | sg27 616 | F1.0 617 | ssV Called 618 | p165 619 | (dp166 620 | g21 621 | g22 622 | sg23 623 | I23 624 | sg24 625 | I23 626 | sg25 627 | VCalled 628 | p167 629 | sg27 630 | F1.0 631 | ssS'sharing ' 632 | p168 633 | (dp169 634 | g21 635 | g30 636 | sg23 637 | I14 638 | sg24 639 | I0 640 | sg25 641 | S'sharing' 642 | p170 643 | sg27 644 | F0.0 645 | ssS' learning' 646 | p171 647 | (dp172 648 | g3 649 | S'subj' 650 | p173 651 | sg5 652 | I8 653 | sg6 654 | I8 655 | sg7 656 | S'learning' 657 | p174 658 | sg9 659 | F1.0 660 | ssVget 661 | p175 662 | (dp176 663 | g21 664 | g30 665 | sg23 666 | I9 667 | sg24 668 | I9 669 | sg25 670 | Vget 671 | p177 672 | sg27 673 | F1.0 674 | ssV wait 675 | p178 676 | (dp179 677 | g21 678 | g22 679 | sg23 680 | I7 681 | sg24 682 | I7 683 | sg25 684 | Vwait 685 | p180 686 | sg27 687 | F1.0 688 | ssV Called Ad 689 | p181 690 | (dp182 691 | g21 692 | g22 693 | sg23 694 | I14 695 | sg24 696 | I14 697 | sg25 698 | VCalled Ad 699 | p183 700 | sg27 701 | F1.0 702 | ssVread 703 | p184 704 | (dp185 705 | g12 706 | I34 707 | sg13 708 | I34 709 | sg14 710 | g15 711 | sg16 712 | Vread 713 | p186 714 | sg18 715 | F1.0 716 | ssV imagined 717 | p187 718 | (dp188 719 | g21 720 | g22 721 | sg23 722 | I9 723 | sg24 724 | I9 725 | sg25 726 | Vimagined 727 | p189 728 | sg27 729 | F1.0 730 | ssV watched 731 | p190 732 | (dp191 733 | g12 734 | I25 735 | sg13 736 | I25 737 | sg14 738 | g40 739 | sg16 740 | Vwatched 741 | p192 742 | sg18 743 | F1.0 744 | ssS' sharing' 745 | p193 746 | (dp194 747 | g21 748 | g22 749 | sg23 750 | I14 751 | sg24 752 | I0 753 | sg25 754 | S'sharing' 755 | p195 756 | sg27 757 | F0.0 758 | ssV bring 759 | p196 760 | (dp197 761 | g21 762 | g22 763 | sg23 764 | I9 765 | sg24 766 | I9 767 | sg25 768 | Vbring 769 | p198 770 | sg27 771 | F1.0 772 | ssVcapture 773 | p199 774 | (dp200 775 | g21 776 | g30 777 | sg23 778 | I9 779 | sg24 780 | I9 781 | sg25 782 | Vcapture 783 | p201 784 | sg27 785 | F1.0 786 | ssV Compiled 787 | p202 788 | (dp203 789 | g21 790 | g22 791 | sg23 792 | I9 793 | sg24 794 | I9 795 | sg25 796 | VCompiled 797 | p204 798 | sg27 799 | F1.0 800 | ssV imagined worlds 801 | p205 802 | (dp206 803 | g21 804 | g22 805 | sg23 806 | I9 807 | sg24 808 | I9 809 | sg25 810 | Vimagined worlds 811 | p207 812 | sg27 813 | F1.0 814 | ssV Called Land 815 | p208 816 | (dp209 817 | g21 818 | g22 819 | sg23 820 | I23 821 | sg24 822 | I23 823 | sg25 824 | VCalled Land 825 | p210 826 | sg27 827 | F1.0 828 | ssV read 829 | p211 830 | (dp212 831 | g12 832 | I34 833 | sg13 834 | I34 835 | sg14 836 | g40 837 | sg16 838 | Vread 839 | p213 840 | sg18 841 | F1.0 842 | ss. -------------------------------------------------------------------------------- /stored/ubt_tagger.classifier: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nik0spapp/usent/52fe720964001344897db2ad9a98cb53a64be717/stored/ubt_tagger.classifier -------------------------------------------------------------------------------- /svm.py: -------------------------------------------------------------------------------- 1 | #################################################################### 2 | # Licence: Creative Commons (see COPYRIGHT) # 3 | # Authors: Nikolaos Pappas, Georgios Katsimpras # 4 | # {nik0spapp, gkatsimpras}@gmail.com # 5 | # Supervisor: Efstathios stamatatos # 6 | # stamatatos@aegean.gr # 7 | # University of the Aegean # 8 | # Department of Information and Communication Systems Engineering # 9 | # Information Management Track (MSc) # 10 | # Karlovasi, Samos # 11 | # Greece # 12 | #################################################################### 13 | 14 | import nltk 15 | import sys 16 | import os 17 | import pickle 18 | import numpy as np 19 | from PyML import svm, ker, featsel 20 | from PyML.containers.vectorDatasets import SparseDataSet, VectorDataSet 21 | from PyML.classifiers.composite import Chain, FeatureSelect 22 | from scrapy.conf import settings 23 | from terminal_colors import Tcolors 24 | from PyML.classifiers.svm import loadSVM 25 | 26 | class SvmClassifier: 27 | """ 28 | SVM classifier: Performing training and prediction of sentiment class. 29 | """ 30 | def __init__(self, lexicon, C=1, num_features=100): 31 | self.training_set = None 32 | self.classes = None 33 | self.test_set = None 34 | self.results = None 35 | self.kernel = ker.Linear() 36 | self.C = C 37 | self.feature_data = PATH + "/learning/stored/feature.data" 38 | self.label_data = PATH + "/learning/stored/svm_label.data" 39 | self.lexicon = lexicon 40 | self.num_features = len(self.lexicon.words.keys()) 41 | try: 42 | print "Loading existing SVM..." 43 | features = pickle.load(open(self.feature_data)) 44 | labels = pickle.load(open(self.label_data)) 45 | sparsedata = SparseDataSet(features, L=labels) 46 | self.svm_classifier = loadSVM(PATH + "/learning/stored/svm.classifier",sparsedata) 47 | except Exception as e: 48 | print e 49 | print "Existing SVM not found!" 50 | self.svm_classifier = svm.SVM(self.kernel) 51 | self.accuracy = None 52 | self.predicted_labels = None 53 | score = featsel.FeatureScore('golub') 54 | self.filter = featsel.Filter(score) 55 | self.feature_selector = FeatureSelect(self.svm_classifier, self.filter) 56 | self.chain = Chain([self.feature_selector, self.svm_classifier]) 57 | 58 | def classify(self, sentences, labels): 59 | self.test_set = self.compute_features(sentences) 60 | print 61 | print Tcolors.ACT + " Classifying instance with SVM: " + Tcolors.RED + sentences[0] + Tcolors.C 62 | print Tcolors.HEADER 63 | test_data = SparseDataSet(self.test_set, L=labels) 64 | self.results = self.svm_classifier.test(test_data) 65 | print Tcolors.C 66 | return self.results 67 | 68 | def compute_features(self, sentences): 69 | features = [] 70 | for i,sent in enumerate(sentences): 71 | sent = sent.lower() 72 | words = nltk.word_tokenize(sent) 73 | feature = np.zeros(self.num_features) 74 | for word in words: 75 | if word.lower() in self.lexicon.words.keys(): 76 | feature[self.lexicon.words.keys().index(word)] = 1 77 | features.append(feature) 78 | return features 79 | 80 | def initialize_lexicon(self): 81 | pass 82 | 83 | def print_stats(self): 84 | print "[*] SVM Classifier ACCURACY: ", self.accuracy 85 | print "[*] SVM Classifier PREDICTED_LABEL: ", self.predicted_labels[0] 86 | 87 | def stats(self): 88 | self.accuracy = self.results.getSuccessRate() 89 | self.predicted_labels = self.results.getPredictedLabels() 90 | 91 | def save(self,data,features,labels): 92 | output = open(self.feature_data ,'wb') 93 | pickle.dump(features,output) 94 | output.close() 95 | output = open(self.label_data,'wb') 96 | pickle.dump(labels,output) 97 | output.close() 98 | self.svm_classifier.save(PATH + "/learning/stored/svm.classifier") 99 | 100 | def train(self, training_set, labels): 101 | print Tcolors.ACT + " Training SVM with chaining..." 102 | features = self.compute_features(training_set) 103 | data = SparseDataSet(features, L=labels) 104 | print Tcolors.CYAN 105 | self.training_set = data 106 | self.svm_classifier.train(data) 107 | self.save(data,features,labels) 108 | print Tcolors.C 109 | 110 | 111 | -------------------------------------------------------------------------------- /terminal_colors.py: -------------------------------------------------------------------------------- 1 | #################################################################### 2 | # Licence: Creative Commons (see COPYRIGHT) # 3 | # Authors: Nikolaos Pappas, Georgios Katsimpras # 4 | # {nik0spapp, gkatsimpras}@gmail.com # 5 | # Supervisor: Efstathios stamatatos # 6 | # stamatatos@aegean.gr # 7 | # University of the Aegean # 8 | # Department of Information and Communication Systems Engineering # 9 | # Information Management Track (MSc) # 10 | # Karlovasi, Samos # 11 | # Greece # 12 | #################################################################### 13 | 14 | class Tcolors: 15 | HEADER = '\033[1;95m' 16 | OKBLUE = '\033[94m' 17 | OKGREEN = '\033[1;92m' 18 | WARNING = '\033[1;93m' 19 | W = '\033[1;37m' 20 | GRAY = W 21 | BGGRAY = '\033[1;37;40m' 22 | BG = '\033[1;30;47m' 23 | BGH = '\033[1;40;41m' 24 | FAIL = '\033[91m' 25 | RED = '\033[1;91m' 26 | ENDC = '\033[0m' 27 | CYAN = '\033[1;36m' 28 | INF = '\033[1;90m' 29 | C = ENDC 30 | 31 | ACT = W + "["+RED+"*"+ENDC+W+"]" + C 32 | PROC = W + "["+OKBLUE+"*"+ENDC+W+"]" + C 33 | ADD = W + "["+WARNING+"+"+ENDC+W+"]" + C 34 | RES = W + "["+OKGREEN+"x"+ENDC+W+"]" + C 35 | INFO = W + "["+OKBLUE+"INFO:"+ENDC+W+"]" + C 36 | OK = W + "[ "+ OKGREEN + "OK" + ENDC+W+ " ]" + C 37 | def disable(self): 38 | self.HEADER = '' 39 | self.OKBLUE = '' 40 | self.OKGREEN = '' 41 | self.WARNING = '' 42 | self.FAIL = '' 43 | self.ENDC = '' 44 | --------------------------------------------------------------------------------