├── .flake8 ├── .github └── workflows │ └── pythonpackage.yml ├── .gitignore ├── .pre-commit-config.yaml ├── Dockerfile ├── Makefile ├── README.md ├── conf.py ├── dataset ├── .DS_Store ├── ABSA-15_Restaurants_Train_Final.json ├── ABSA-15_Restaurants_Train_Final.xml ├── ABSA-15_Restaurants_Train_Final_mid_training_data.csv ├── ABSA15_Restaurants_Test.json ├── ABSA15_Restaurants_Test.json_mid_training_data.csv ├── ABSA15_Restaurants_Test.xml ├── ABSA15_Restaurants_Test_mid_training_data.csv ├── Restaurants_Test_Data_phaseB.json ├── Restaurants_Test_Data_phaseB.xml ├── Restaurants_Test_Data_phaseB_mid_training_data.csv ├── Restaurants_Train_2014.json ├── Restaurants_Train_2014_mid_training_data.csv ├── __init__.py ├── annoted_data.json ├── annoted_data.json_mid_training_data.csv ├── annoted_data_mid_training_data.csv ├── customer_review_data │ ├── .DS_Store │ ├── Apex AD2600 Progressive-scan DVD player.txt │ ├── Apex AD2600 Progressive-scan DVD player.txt.json │ ├── Apex AD2600 Progressive-scan DVD player_mid_training_data.csv │ ├── Canon G3.txt │ ├── Canon G3.txt.json │ ├── Canon G3_mid_training_data.csv │ ├── Creative Labs Nomad Jukebox Zen Xtra 40GB.txt │ ├── Creative Labs Nomad Jukebox Zen Xtra 40GB.txt.json │ ├── Creative Labs Nomad Jukebox Zen Xtra 40GB_mid_training_data.csv │ ├── Nikon coolpix 4300.txt │ ├── Nikon coolpix 4300.txt.json │ ├── Nikon coolpix 4300_mid_training_data.csv │ ├── Nokia 6610.txt │ ├── Nokia 6610.txt.json │ ├── Nokia 6610_mid_training_data.csv │ └── Readme.txt ├── read_dataset.py └── sentiment_words_text_files │ ├── less_adj.csv │ ├── negative_words.txt │ ├── neutral_modifiers.txt │ └── positive_words.txt ├── feature_extraction ├── __init__.py ├── feature_vector_builder.py ├── pos_pattern_feature │ ├── __init__.py │ └── syntactic_pos_pattern.py └── word_level_feature │ └── __init__.py ├── grammar ├── __init__.py ├── chunker.py ├── language_processor.py ├── pattern_grammar.py ├── pos_tagger.py ├── sentiment.py └── source_target_extractor.py ├── index.rst ├── requirements.txt ├── review_highlight_paper.pdf ├── setup.py ├── setup.sh ├── sphinx_to_gh_pages.sh └── training ├── helpers.py ├── mid_stage_prepare_dataset.py ├── pipeline ├── __init__.py ├── acquire_dataset.py ├── data_processing.py └── train.py └── train_top_classifier.py /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | #exclude = .git,__pycache__,docs/source/conf.py,old,build,dist # defaults to: .svn,CVS,.bzr,.hg,.git,__pycache__,.tox 3 | exclude = .git,__pycache__,build,dist,.svn,CVS,.bzr,.hg,.git,__pycache__,.tox,env 4 | max-line-length = 120 5 | max-complexity = 10 6 | #show_source = True 7 | # Count the number of occurrences of each error/warning code and print a report. 8 | statistics = True 9 | # Print the total number of errors. 10 | count = True 11 | -------------------------------------------------------------------------------- /.github/workflows/pythonpackage.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Python package 5 | 6 | on: 7 | push: 8 | branches: [ master ] 9 | pull_request: 10 | branches: [ master ] 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | strategy: 17 | matrix: 18 | python-version: [3.6, 3.7, 3.8] 19 | 20 | steps: 21 | - uses: actions/checkout@v2 22 | - name: Set up Python ${{ matrix.python-version }} 23 | uses: actions/setup-python@v1 24 | with: 25 | python-version: ${{ matrix.python-version }} 26 | - name: Install dependencies 27 | run: | 28 | python -m pip install --upgrade pip 29 | pip install flake8 pytest cython 30 | sudo apt-get install python3-dev 31 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 32 | - name: Lint with flake8 33 | run: | 34 | # stop the build if there are Python syntax errors or undefined names 35 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 36 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 37 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 38 | - name: Test with pytest 39 | run: | 40 | pytest 41 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | ### Python template 3 | # Byte-compiled / optimized / DLL files 4 | .idea 5 | BaseEvalValid1/ 6 | .idea/workspace.xml 7 | __pycache__/ 8 | *.py[cod] 9 | *$py.class 10 | # C extensions 11 | *.so 12 | *.pkl 13 | *.xml 14 | *.csv 15 | # Distribution / packaging 16 | .Python 17 | env/ 18 | build/ 19 | develop-eggs/ 20 | dist/ 21 | downloads/ 22 | eggs/ 23 | .eggs/ 24 | lib/ 25 | lib64/ 26 | parts/ 27 | sdist/ 28 | var/ 29 | wheels/ 30 | *.egg-info/ 31 | .installed.cfg 32 | *.egg 33 | 34 | # PyInstaller 35 | # Usually these files are written by a python script from a template 36 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 37 | *.manifest 38 | *.spec 39 | 40 | # Installer logs 41 | pip-log.txt 42 | pip-delete-this-directory.txt 43 | 44 | # Unit test / coverage reports 45 | htmlcov/ 46 | .tox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *,cover 53 | .hypothesis/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | 63 | # Flask stuff: 64 | instance/ 65 | .webassets-cache 66 | 67 | # Scrapy stuff: 68 | .scrapy 69 | 70 | # Sphinx documentation 71 | docs/_build/ 72 | 73 | # PyBuilder 74 | target/ 75 | 76 | # Jupyter Notebook 77 | .ipynb_checkpoints 78 | 79 | # pyenv 80 | .python-version 81 | 82 | # celery beat schedule file 83 | celerybeat-schedule 84 | 85 | # SageMath parsed files 86 | *.sage.py 87 | 88 | # dotenv 89 | .env 90 | 91 | # virtualenv 92 | .venv 93 | venv/ 94 | ENV/ 95 | 96 | # Spyder project settings 97 | .spyderproject 98 | 99 | # Rope project settings 100 | .ropeproject 101 | 102 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | - repo: git://github.com/pre-commit/mirrors-yapf 2 | sha: v0.16.3 3 | hooks: 4 | - id: yapf 5 | stages: [commit] 6 | exclude: ^.*\/generated\/.*(_pb2|_pb2_grpc).py$ 7 | #exclude: ^.*generated\/[a-zA-Z0-9_]*(_pb2|_pb2_grpc).py$ 8 | #exclude: > 9 | #(?x)^( 10 | #nitro/core_rec_api/generated/core_rec_api_pb2_grpc.py| 11 | #nitro/core_rec_api/generated/core_rec_api_pb2.py 12 | #)$ 13 | 14 | 15 | - repo: git@github.com:asottile/reorder_python_imports 16 | sha: v0.3.4 17 | hooks: 18 | - id: reorder-python-imports 19 | stages: [commit] 20 | exclude: ^.*\/generated\/.*(_pb2|_pb2_grpc).py$ 21 | 22 | - repo: git://github.com/pre-commit/pre-commit-hooks 23 | sha: v0.8.0 24 | hooks: 25 | - id: trailing-whitespace 26 | - id: check-ast 27 | - id: check-case-conflict 28 | - id: check-merge-conflict 29 | - id: check-symlinks 30 | - id: name-tests-test 31 | entry: name-tests-test --django # To allow test_foo.py filenames instead of foo_test.py 32 | #- id: requirements-txt-fixer 33 | - id: debug-statements 34 | # See https://github.com/pre-commit/pre-commit-hooks/issues/173 https://github.com/pre-commit/pre-commit/issues/191 35 | #- id: detect-aws-credentials 36 | #exclude: ^(pyzomato)$ 37 | #- id: detect-private-key # This hook is broken for submodules 38 | #exclude: ^(pyzomato)$ 39 | - id: double-quote-string-fixer 40 | exclude: ^.*\/generated\/.*(_pb2|_pb2_grpc).py$ 41 | - id: end-of-file-fixer 42 | exclude: ^.*\/generated\/.*(_pb2|_pb2_grpc).py$ 43 | - id: fix-encoding-pragma 44 | exclude: ^.*\/generated\/.*(_pb2|_pb2_grpc).py$ 45 | - id: flake8 46 | stages: [commit] 47 | exclude: ^.*\/generated\/.*(_pb2|_pb2_grpc).py$ 48 | 49 | - repo: local 50 | push_stage: [push] 51 | commit_stage: [commit] 52 | 53 | hooks: 54 | #- id: check-tox 55 | #name: Tox Tests 56 | #entry: python -m tox 57 | #stages: push_stage 58 | #language: system 59 | #files: \.py$ 60 | - id: check-nose2 61 | name: Nose2 Tests 62 | entry: sh -c "ls" 63 | #stages: commit_stage 64 | language: system 65 | files: \.py$ 66 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM continuumio/miniconda3:4.3.14 2 | MAINTAINER Amit Kushwaha 3 | 4 | ADD . /usr/src/app 5 | 6 | RUN mkdir -p /usr/src/app 7 | WORKDIR /usr/src/app 8 | 9 | RUN conda update --yes pip 10 | 11 | RUN set -eux \ 12 | && apt-get update 13 | 14 | RUN apt-get install -y make gcc g++ libsnappy-dev 15 | 16 | COPY requirements.txt /tmp/requirements.txt 17 | RUN pip install --no-cache-dir -r /tmp/requirements.txt \ 18 | && rm -fv /tmp/requirements.txt \ 19 | && conda clean --all --yes 20 | 21 | COPY setup.sh /tmp/setup.sh 22 | 23 | RUN bash /tmp/setup.sh 3 \ 24 | && rm -fv /tmp/setup.sh 25 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = AspectBasedSentimentAnalysis 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Aspect Based Sentiment Analysis 2 | ========================== 3 | 4 | ### Paper 5 | 6 | 8 | 9 | ### Dataset 10 | ABSA-15_Restaurants_Train_Final.xml 11 | 12 | 13 | ### Approach 14 | Natural Language Processing based. Multilabel classifier on top of syntactic extraction rules. 15 | 16 | 17 | 18 | ### Results 19 | ```bash 20 | 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 582/582 [02:18<00:00, 3.10it/s] 21 | For Data-set: dataset/ABSA15_Restaurants_Test.json 22 | precision recall f1-score support 23 | 24 | 0 0.00 0.00 0.00 54 25 | 1 0.83 0.50 0.63 542 26 | 27 | avg / total 0.76 0.46 0.57 596 28 | 29 | [root] [2017-09-27 17:59:03,966] INFO : Shape of array for dataset: X:(582, 13635) , Y:(582, 31) 30 | /Users/Amit/anaconda/lib/python3.5/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. 31 | 'precision', 'predicted', average, warn_for) 32 | Classification report on testing_data 33 | precision recall f1-score support 34 | 35 | 0 0.97 0.93 0.95 101 36 | 1 1.00 1.00 1.00 2 37 | 2 1.00 0.54 0.70 54 38 | 3 1.00 1.00 1.00 2 39 | 4 1.00 1.00 1.00 3 40 | 5 1.00 1.00 1.00 2 41 | 6 1.00 0.74 0.85 23 42 | 7 1.00 1.00 1.00 1 43 | 8 0.00 0.00 0.00 1 44 | 9 1.00 0.83 0.91 6 45 | 10 1.00 0.33 0.50 3 46 | 11 1.00 1.00 1.00 1 47 | 12 1.00 1.00 1.00 14 48 | 13 0.94 0.79 0.86 19 49 | 14 1.00 1.00 1.00 1 50 | 15 0.00 0.00 0.00 1 51 | 16 1.00 1.00 1.00 2 52 | 17 1.00 1.00 1.00 5 53 | 54 | avg / total 0.97 0.80 0.87 241 55 | 56 | [root] [2017-09-27 17:59:19,624] INFO : Dataset: dataset/ABSA15_Restaurants_Test.json 57 | 582it [00:00, 628.15it/s] 58 | NO PREDICTION FOR RULE: 397 out of: 582 59 | Task: ONLY_ASPECT_PREDICTION False 60 | Accuracy: 80.96885813148789 61 | Total: 289 , Correct: 234 62 | :::::::::::::::::: TESTING :::::::::::::::::: 63 | dataset/ABSA15_Restaurants_Test.json 64 | precision recall f1-score support 65 | 66 | 0 0.00 0.00 0.00 55 67 | 1 0.81 0.43 0.56 542 68 | 69 | avg / total 0.74 0.39 0.51 597 70 | 71 | ``` 72 | 73 | ### Setup 74 | ```bash 75 | # From project root, execute this command: 76 | 77 | pip install -r requirements.txt 78 | ``` 79 | ### Commands 80 | 81 | ```bash 82 | # From project root, execute this command: 83 | PYTHONPATH='.' python3 training/train_top_classifier.py 84 | ``` 85 | -------------------------------------------------------------------------------- /conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Configuration file for the Sphinx documentation builder. 4 | # 5 | # This file does only contain a selection of the most common options. For a 6 | # full list see the documentation: 7 | # http://www.sphinx-doc.org/en/master/config 8 | 9 | # -- Path setup -------------------------------------------------------------- 10 | 11 | # If extensions (or modules to document with autodoc) are in another directory, 12 | # add these directories to sys.path here. If the directory is relative to the 13 | # documentation root, use os.path.abspath to make it absolute, like shown here. 14 | # 15 | import os 16 | import sys 17 | sys.path.insert(0, os.path.abspath('.')) 18 | 19 | 20 | # -- Project information ----------------------------------------------------- 21 | import sphinx_rtd_theme 22 | 23 | project = 'Aspect Based Sentiment Analysis' 24 | copyright = '2019, Amit Kushwaha' 25 | author = 'Amit Kushwaha' 26 | 27 | # The short X.Y version 28 | version = '' 29 | # The full version, including alpha/beta/rc tags 30 | release = '' 31 | 32 | 33 | # -- General configuration --------------------------------------------------- 34 | 35 | # If your documentation needs a minimal Sphinx version, state it here. 36 | # 37 | # needs_sphinx = '1.0' 38 | 39 | # Add any Sphinx extension module names here, as strings. They can be 40 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 41 | # ones. 42 | extensions = [ 43 | 'sphinx.ext.autodoc', 44 | 'sphinx.ext.doctest', 45 | 'sphinx.ext.intersphinx', 46 | 'sphinx.ext.todo', 47 | 'sphinx.ext.coverage', 48 | 'sphinx.ext.mathjax', 49 | 'sphinx.ext.ifconfig', 50 | 'sphinx.ext.viewcode', 51 | 'sphinx.ext.githubpages', 52 | 'sphinx.ext.napoleon', 53 | ] 54 | 55 | source_parsers = { 56 | '.md': 'recommonmark.parser.CommonMarkParser' 57 | } 58 | 59 | # Add any paths that contain templates here, relative to this directory. 60 | templates_path = ['_templates'] 61 | 62 | # The suffix(es) of source filenames. 63 | # You can specify multiple suffix as a list of string: 64 | # 65 | # source_suffix = ['.rst', '.md'] 66 | source_suffix = '.rst' 67 | 68 | # The master toctree document. 69 | master_doc = 'index' 70 | 71 | # The language for content autogenerated by Sphinx. Refer to documentation 72 | # for a list of supported languages. 73 | # 74 | # This is also used if you do content translation via gettext catalogs. 75 | # Usually you set "language" from the command line for these cases. 76 | language = None 77 | 78 | # List of patterns, relative to source directory, that match files and 79 | # directories to ignore when looking for source files. 80 | # This pattern also affects html_static_path and html_extra_path . 81 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 82 | 83 | # The name of the Pygments (syntax highlighting) style to use. 84 | pygments_style = 'sphinx' 85 | 86 | 87 | # -- Options for HTML output ------------------------------------------------- 88 | 89 | # The theme to use for HTML and HTML Help pages. See the documentation for 90 | # a list of builtin themes. 91 | # 92 | html_theme = 'sphinx_rtd_theme' 93 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] 94 | 95 | 96 | # Theme options are theme-specific and customize the look and feel of a theme 97 | # further. For a list of options available for each theme, see the 98 | # documentation. 99 | # 100 | # html_theme_options = {} 101 | 102 | # Add any paths that contain custom static files (such as style sheets) here, 103 | # relative to this directory. They are copied after the builtin static files, 104 | # so a file named "default.css" will overwrite the builtin "default.css". 105 | html_static_path = ['_static'] 106 | 107 | # Custom sidebar templates, must be a dictionary that maps document names 108 | # to template names. 109 | # 110 | # The default sidebars (for documents that don't match any pattern) are 111 | # defined by theme itself. Builtin themes are using these templates by 112 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html', 113 | # 'searchbox.html']``. 114 | # 115 | # html_sidebars = {} 116 | 117 | 118 | # -- Options for HTMLHelp output --------------------------------------------- 119 | 120 | # Output file base name for HTML help builder. 121 | htmlhelp_basename = 'AspectBasedSentimentAnalysisdoc' 122 | 123 | 124 | # -- Options for LaTeX output ------------------------------------------------ 125 | 126 | latex_elements = { 127 | # The paper size ('letterpaper' or 'a4paper'). 128 | # 129 | # 'papersize': 'letterpaper', 130 | 131 | # The font size ('10pt', '11pt' or '12pt'). 132 | # 133 | # 'pointsize': '10pt', 134 | 135 | # Additional stuff for the LaTeX preamble. 136 | # 137 | # 'preamble': '', 138 | 139 | # Latex figure (float) alignment 140 | # 141 | # 'figure_align': 'htbp', 142 | } 143 | 144 | # Grouping the document tree into LaTeX files. List of tuples 145 | # (source start file, target name, title, 146 | # author, documentclass [howto, manual, or own class]). 147 | latex_documents = [ 148 | (master_doc, 'AspectBasedSentimentAnalysis.tex', 'Aspect Based Sentiment Analysis Documentation', 149 | 'Amit Kushwaha', 'manual'), 150 | ] 151 | 152 | 153 | # -- Options for manual page output ------------------------------------------ 154 | 155 | # One entry per manual page. List of tuples 156 | # (source start file, name, description, authors, manual section). 157 | man_pages = [ 158 | (master_doc, 'aspectbasedsentimentanalysis', 'Aspect Based Sentiment Analysis Documentation', 159 | [author], 1) 160 | ] 161 | 162 | 163 | # -- Options for Texinfo output ---------------------------------------------- 164 | 165 | # Grouping the document tree into Texinfo files. List of tuples 166 | # (source start file, target name, title, author, 167 | # dir menu entry, description, category) 168 | texinfo_documents = [ 169 | (master_doc, 'AspectBasedSentimentAnalysis', 'Aspect Based Sentiment Analysis Documentation', 170 | author, 'AspectBasedSentimentAnalysis', 'One line description of project.', 171 | 'Miscellaneous'), 172 | ] 173 | 174 | 175 | # -- Extension configuration ------------------------------------------------- -------------------------------------------------------------------------------- /dataset/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yardstick17/AspectBasedSentimentAnalysis/b1fcf830341a51f37a862b1144797d2a9c5db2c2/dataset/.DS_Store -------------------------------------------------------------------------------- /dataset/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yardstick17/AspectBasedSentimentAnalysis/b1fcf830341a51f37a862b1144797d2a9c5db2c2/dataset/__init__.py -------------------------------------------------------------------------------- /dataset/annoted_data.json_mid_training_data.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yardstick17/AspectBasedSentimentAnalysis/b1fcf830341a51f37a862b1144797d2a9c5db2c2/dataset/annoted_data.json_mid_training_data.csv -------------------------------------------------------------------------------- /dataset/customer_review_data/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yardstick17/AspectBasedSentimentAnalysis/b1fcf830341a51f37a862b1144797d2a9c5db2c2/dataset/customer_review_data/.DS_Store -------------------------------------------------------------------------------- /dataset/customer_review_data/Nikon coolpix 4300.txt: -------------------------------------------------------------------------------- 1 | [t]the best 4mp compact digital available 2 | camera[+2]##this camera is perfect for an enthusiastic amateur photographer . 3 | picture[+3], macro[+3]##the pictures are razor-sharp , even in macro . 4 | size[+2][u]##it is small enough to fit easily in a coat pocket or purse . 5 | weight[+1][u]##it is light enough to carry around all day without bother . 6 | feature[+2][u]##operating its many features is easy and often obvious - i 'm no annie lebovitz , but i was able to figure out most of its abilities just messing around with it at a camera store . 7 | manual[+2]##the manual does a fine job filling in any blanks that remain . 8 | auto focus[+2],scene mode[+2]##the auto-focus performs well , but i love having the 12 optional scene modes - they are dummy-proof , and correspond to many situations in which i would actually seek to use the camera . 9 | rechargable battery[+2]##comes with a 16 mb compact flash and one rechargable battery the charging unit , included , is fast and small . 10 | ##i bought a 256 mb cf and a second battery , so it 's good to go on a long vacation . 11 | camera[+2]##i enthusiastically recommend this camera . 12 | [t]perfect to grow into 13 | ##i got my camera three days back , and although i had some experience with digital cameras prior to purchasing this one , i still rate myself as a beginner . 14 | ##i bought this camera because it fit my budget and the pre-production and production model reviews were positive . 15 | camera[+2][p], use[+1][u], feature[+2]##it 's easy to use , and yet very feature rich . 16 | auto mode[+1],scene mode[+2]##in the auto mode it functions basically as a point and click , the scene modes are very easy to use and produce good results . 17 | manual mode[+2]##the manual mode is feature rich and i can 't wait to get the hang of it . 18 | macro mode[+3],picture[+3]##the macro mode is exceptional , the pictures are very clear and you can take the pictures with the lens unbelievably close the subject . 19 | battery life[+3]##the battery life is very good , i got about 90 minutes with the lcd turned on all the time , the first time around , and i have been using it with the lcd off every now and then , and have yet needed to recharge it . 20 | ##the camera comes with a lexar 16mb starter card , which stores about 10 images in fine mode at the highest resolution , i intend to buy a bigger card soon . 21 | [t]near perfection 22 | ##i love photography . 23 | ##i had an older camera that was simply a point and shoot camera . 24 | ##i needed something with more power , so i bought a nikon coolpix 4300 . 25 | camera[+3], use[+1]##i fell in love with this camera , it combines ease of use , with an immense amount of options and power . 26 | ##you can use the scene modes , or fine tune the options , i. you can change the iso level , shutter speed , etc. 27 | camera[+2]##this camera is ideal for people who want more power , but do n't want to spend 1000s dollars on a camera . 28 | [t]not too excited 29 | ##i bought coolpix 4300 two months after i had bought canon powershot s400 . 30 | ##reason for two ? 31 | ##it was not easy sharing one with my teen age kid . 32 | ##the two cameras are very similar in functionality and pricing . 33 | ##i 've had no problem with canon whatsoever . 34 | picture quality[+1]##with nikon , although picture qualities are as good as any other 4 mp cameras , i 've had the following headaches ; 35 | transfer[-2]##1 . pictures wo n't transfer to pc directly from the camera using the included transfer cable . 36 | ##i did everything i could , and it took many days of frustration before concluding that the only way to transfer to pc is with the card reader . 37 | ##card reader is included . 38 | ##2 . the speed is noticeably slower than canon , especially so with flashes on . 39 | ##3 . with low battery , it twice wiped out the entire pictures in the memory chip . 40 | ##i used lexar 256 mb and i still use it which means nothing is wrong with lexar . 41 | ##be very careful when the battery is low and make sure to carry extra batteries . 42 | [t]only " cons " here 43 | ##the other reviewers have clearly pointed all the good things about this camera , which i do agree . 44 | ##but there are certain issues might be they are to me here - all of them are minor; not major ones though , 45 | auto mode[-2]##. this camera keeps on autofocussing in auto mode with a buzzing sound which can 't be stopped . 46 | ##would be really good if they have given an option to stop this autofocussing . 47 | ##. if you want to have the date ; time on the image , its only through their software " nikon view " which reads the images date ; time from the images meta-data . 48 | ##so if you use your card reader ; copy images - you got to onceagain open them through their software to put the date ; time . 49 | ##in that too , there is n't a direct way to add date ; time - you got to say ' print images ' to a different directory in which there is an option to specify the date ; time . 50 | image[-2]##. even the slightest of the shakes totally distorts your image 51 | indoor image[-2]##. images taken indoor were n't so clear . 52 | ##you got to have flash ' on ' to get it eventhough your room is well lit 53 | lens cap[-3]##. lens cap is a really annoying 54 | ##. go ; get a high speed like lexar 12x or higher cf card . 55 | ##else with simpler viking cf cards , it takes couple of seconds to write a picture with approximately " fine **" picture quality " image size : 1600 . 56 | movie[-2]##. the movie clips taken will always have some ' noise ' in it - you can 't avoid that . 57 | camera[+2], picture[+2], close-up shooting[+3]##but overall this is a good camera with a ' really good ' picture clarity ; an exceptional close-up shooting capability . 58 | ##i would rate this is 4.5 stars picture quality ; image size defined above are specific to nikon coolpix . 59 | [t]bad service 60 | menu dial knob[-2]##within a year , there are problems with my menu dial knob . 61 | ##it became stucked which makes it almost impossible to switch between modes . 62 | servicing[-2]##i send my camera to nikon for servicing , took them a whole 6 weeks to diagnose the problem . 63 | ##worse of all , they claim that it 's some kind of internal damage and refuse to cover the cost via warranty ! 64 | ##they would n't repair my camera unless if i pay $ 100 for parts ? and labor ! 65 | camera[+2],customer service[-2]##it is a good camera in terms of the function and quality , but take your chance with it because nikon absolutely sucks when it comes to customer service . 66 | [t]system error 67 | system error[-2]##got a " system error " problem 30 days after purchase . 68 | camera[-2]##made the camera totally inoperable . 69 | lens cap[-2]##also , the lens cap design is flawed . 70 | ##you have to manually take the cap off in order to use it . 71 | audio[-2]##audio on video also lacking . 72 | picture[+3],delay[+1]##otherwise , it takes very good pictures ; shutter delay is n't so bad either . 73 | ##still , had to send it back to nikon for repair . 74 | [t]amazing camera 75 | ##i am an amateur photographer and here is a piece of advise to all the folks who are thinking about making a move the digital world . 76 | camera[+3]##nikon 4300 , i feel , is the best camera out there for the features and price . 77 | ##here were my requirements in a digital camera : 78 | ##1 . i had initially thought of buying a 2 or 3 megapixel camera but these are good for 4x6 " or 5x7 " prints and i wanted some really great 8x10 " photos once in a while . 79 | ##2 . i did not want a very small camera as it seems to get lost in my hands and i was n't comfortable with that . 80 | ##i wanted a decent sized camera with a contour for my fingers to hold it steadily . 81 | ##3 . i wanted a camera that had a lot of built-in settings for different types of suroundings while giving me an option to use my photography skills although , i am an amateur with an interest in photography by turning on the manual settings . 82 | ##4 . and , of course , i wanted a respected brand and had to stay within my budget because i had bought an expensive camcorder before but had n't used it much . 83 | ##depending on all the above requirements , i had narrowed down my search to nikon 4300 and canon powershot s400 models . 84 | ##nikon got the final nod for its settings auto and manual along with movie modes , medium; compact size , price , brand name , good software that is included and previous reviews . 85 | ##i should say i have been very happy with my decision ever since . 86 | picture[+3]##the pictures are absolutely amazing - the camera captures the minutest of details . 87 | ##a few things that might make it even better : 88 | indoor picture[-1]##1 . pictures taken in an indoor setting are a little dull and rarely hazy if you tend to take photographs from a distance . 89 | delay[-2][u]##2 . it takes a while for the camera to actually capture the photograph from the time you click the button and i have seen people becoming a little impatient waiting for the flash to glow . 90 | picture[-2]##3 . the pictures come out hazy if your hands shake even for a moment during the entire process of taking a picture . 91 | ##all the above , i think , are problems with all the digital cameras out there and so i have no major complaints . 92 | ##try to buy a tripod and that should solve some of those problems . 93 | ##whichever camera you buy , add upto about 200 dollars for an additional memory card i bought a 256 mb card , usb card reader , camera bag and the warranty . 94 | ##and finally , get your pictures developed at a kodak picture maker machine even though it costs you a little extra - no one can beat kodak processing in terms of clarity and brightness . 95 | ##hope this review has helped you in making up your mind . 96 | ##i have taken about 150 photographs so far and have absolutely loved the experience . 97 | ##i am looking forward to my vegas trip for some great photographs now . 98 | [t]super camera 99 | ##with our compact 35mm broken , and a vacation in europe at hand , i finally decided to enter the digital age . 100 | ##i first intended to buy the 3100 , but am really happy to invest the extra money for the 4300 . 101 | ##the lense is a lot better , and the 4mb produce fantastic pictures . 102 | auto mode[+2], manual mode[+2], scene mode[+2]##the possibilities with auto , manual , and the very helpful " scene " mode , which offers 11 optimized situational settings like portrait , landscape , beach / snow , sunset etc. , are endless . 103 | movie mode[+2]##the movie mode is also working great . 104 | software[+3]##the 4300 comes with excellent easy to install software i use it on xp and win98 without problem , battery charger , usb and video cable . 105 | ##a 64mb flash card camera comes with a 16mb holds 52 pictures on " normal " resolution . 106 | print[+2]##i experimented with online print ordering with the two major us brick and mortar retailers , and they both delivered great results . 107 | print[+3]##even at the " normal " setting , a 8x10 print ! shows not a single sign of visible rendering or pixels - you could never tell that the picture has been taken by a " digital " . 108 | battery life[+1]##the battery life is ok , but can be somewhat short when working a lot with the monitor on . 109 | ##i therefore bought a second battery as a backup , and due to the lightingfast charging with the included charger , which by the way works on 110v as well as 220v in europe , we ran never out of " juice " . 110 | ##i highly recommend the accessory kit , which provides you with everything else you need : 111 | ##a nice camera bag , which holds camera plus all the cables and the charger . 112 | ##furher it contains a card reader , a flash card folder for up to 8 cards , a lense cleaning rag , and an ac adapter , which is very helpful when working in an " outlet environment " . 113 | camera[+3]##over all a fantastic very compact camera , which made us experience photography on a totally new level ! 114 | [t]camera shake 115 | ##after reading all the glowing review on this camera , i went to my local camera shop to try one . 116 | ##i currently own an olympus d-550 which takes wonderful outdoor pictures . 117 | ##however , indoor pictures of subjects more than a few feet away can be blurry or grainy . 118 | ##anyway , in the store , i tried coolpix 4300 along with two other 4mp models minolta dimage s414 , and panasonic lumix . 119 | picture[-2]##focusing on a display rack about 20 feet away in a brightly lit room during day time , pictures produced by this camera were blurry and in a shade of orange . 120 | ##pictures produced by the minolta were orange but not blurry . 121 | ##pictures produced by the panasonic were excellent . 122 | ##the store clerk concluded that the blurriness may be caused by shaking the camera slightly when i press the button , which is understandable since this camera is lighter than the other two models . 123 | ##anyway , i ended up purchasing the lumix . 124 | ##i 'm sure that this camera has all the wonderful features that others talk about . 125 | ##however , as an average person with unsteady hands , i doubt it 'll perform well in lower light settings . 126 | [t]great camera 127 | ##i purchased the nikon 4300 after several weeks of searching . 128 | ##the value , name , and resolution signed the lease . 129 | picture[+3]##after nearly 800 pictures i have found that this nikon takes incredible pictures . 130 | digital zoom[+2],optical zoom[+2]##the digital zoom takes as good of pictures , as the optical zoom does ! 131 | picture[-2]##the inside pics , can be a bit challenging , but when you can instantly delete pics you dont like , that are not focused perfectly , or need lighting adjustments , make this camera very simple and effective . 132 | feature[+3]##i love all of the features , and presets as well . 133 | sunset feature[+3]##sunset feature takes incredible pics in the morning , and the evening ! 134 | software[+3]##the software you get with this camera is perfect . 135 | touchup[+2], redeye[+2]##touchups , redeye , and so on are very easy to alter , and correct . 136 | ##i definately recommend a large memory card . 137 | ##i use a 128mb card , and usually get around 70 to 100 pics 1024-1280 res. , with good resolution . 138 | ##very large pics take 8mb each ! 139 | battery life[+2]##battery life is good , although i recommend two rechargebles , or a alkaline backup , for those emergency pics you have to have . 140 | camera[+3]##all and all , i am extremely pleased with this camera . 141 | ##making the step from film to digital has been a great move . 142 | ##i find myself emailing pics to everyone i know . 143 | camera[+3]##great camera , great investment ! 144 | [t]excellent camera 145 | ##this is my second digital camera , though i also use one at work and have some experience with several nikon and canon models . 146 | use[+1][u],quality[+2],size[+1]##the nikon coolpix 4300 is wonderful -- easy to use , high quality , and compact in size . 147 | ##with an extra battery and a couple of good sized memory cards you are all set for a day of picture taking while on vacation . 148 | auto setting[+2]##the auto setting does a great job most of the time but there are manual settings for those who want them . 149 | ##if something were to happen to my nikon coolpix 4300 , i would order another just like it . 150 | [t]the ricksters review 151 | ##after much research i decided on the nikon coolpix 4300 . 152 | camera[+2][p]##it has many great features and very few bad ones . 153 | picture[+2],ease of use[+2]##it takes excellent pics and is very easy to use , if you read the manual . 154 | ##if you cant get great pics from this camera its because you havent read the manual . 155 | lcd[-1]##the only things i have found that i havent liked is that the lcd is hard to read in daylight but everyone elses is too . 156 | download[+2]##downloads are a snap and quick . 157 | control[+2]##controls easy yo use and easy to get to . 158 | camera[+3]##i 'm extremely glad i bought this camera . 159 | [t]super little camera 160 | camera[+3]##really happy with this little camera . 161 | size[+2][u]##it is n't big and offensive , though you would n't know it by looking at the pics it takes . 162 | ##got it for christmas , still learning a few of the settings , but i do n't take pictures every single day . 163 | picture[+2],print[+2]##settings for just about everything , beautiful pics and beautiful prints . 164 | zoomed image[+2]##i expected the 12x digital zoom to be grainy , but onfull size high to fine quality , the zoomed images are quite impressive . 165 | ##did buy a 256mb compact flash , it holds a lot of pics at any resolution . 166 | ##i only wish you could get high quality tiff images at all resolutions , but does n't seem any of the buget cameras do . 167 | camera[+2]##still , at least from my perspective , a great camera for the money , and i 'm a happy camper ! 168 | [t]beginner looks experienced with coolpix 169 | camera[+2]##this is a wonderful camera . 170 | ##my wife and i read and read and read reviews on different cameras for months . 171 | ##in the end this is the one that we decided upon and since have never had buyer 's remorse . 172 | 4mp[+2],optical zoom[+2]##4 mp gives you room for the future gaining experience ; cost offsets over time from wanting more in a camera , 3x optical and the fact that it carries nikon 's quality reputation behind it make the whole package prove itself worthwile at the price . 173 | picture[+2]##one neat thing - i have taken some pictures in what i thought would be impossible lighting conditions pitch black rooms - no problem for the camera - rooms looked like they had ample lighting . 174 | [t]can i have a few more stars please 175 | camera[+3]##this little camera has created a marriage between myself and nikon. never knew i could love a digital camera so much . 176 | software[+3], online service[+2]##the software that comes with it is amazing and the online service that comes free is really very neat. clean clear and well focused on over 95 % of all photos taken by a beginner . 177 | ##100 % perfect if you put some thought and time into reading the manual before you take the photos . 178 | ##i took the camera and just gave it to someone . 179 | ##no instructions . 180 | photo[+2]##just point and shoot and the photos were great . 181 | ##just imagine if you place a bit of knowledge when you take your photos . 182 | memory card[+2]##the memory card fit nicely since i also have a pocket pc and it automatically installed a picture software on my pda wolla . 183 | ##my pda becames a digital album . 184 | ##i have a 128mb card in it and can shoot over 150 hi res photos . 185 | camera[+2]##if you have to buy a camera on a buget, this has got to be the one . 186 | ##go get it . 187 | ##before they are sold out ! 188 | [t]great camera 189 | camera[+2]##great camera , i have been using this for several months and got excellent results , simple friendly usage , in many scenes indoor , outdoor , snow , close up macro etc. 190 | [t]great camera for a great price 191 | ##i bought my nikon coolpix4300 about a week ago . 192 | camera[+3], feature[+2]##this is my first digital camera and i can say that i am quite impressed with the features that it has . 193 | use[+2][u]##it is easy enough to use for a beginner like me but it has plenty of features for me to grow into . 194 | macro[+2]##i particularly like the way it aids me in taking my macro shots . 195 | picture quality[+2]##my friends were impressed with the quality of the pictures i took ! 196 | autofocus[-2]##one downside though is that autofocus does not work well when battery is low . 197 | ##i assume this is the case for most cameras since it does not have enough power to do autofocus . 198 | battery life[+1]##battery life is ok . 199 | ##i plan on buying a second battery and a larger cf for it . 200 | [t]not disappointed 201 | camera[+2]##great camera , and for the price you can 't beat it . 202 | ##i 've been using it for about 3 months now . 203 | control[+3]##i found the controls very intuitive and learned how to manuver through the menus quickly . 204 | quality[+3]##the quality is superb . 205 | size[+2]##the small size is perfect for my little hands , but may perhaps be uncomfortable or awkward for a bigger person . 206 | [t]field tested 207 | ##we use older versions 880 , 885 of this model in the office and i find them to produce excellent quality photos . 208 | nikon 4300[+3]##the 4300 is a very durable , compact package , and i find nikon to be a brand that i can trust . 209 | ##it comes with a 16mb flash card that you will need to upgrade ! 210 | ##we got a 256mb card , and have been hard-pressed to fill it over a weekend . 211 | ##the only drawbacks that i have noticed : 212 | battery[-2]##it comes with a rechargable battery that does not seem to last all that long , especially if you use the flash a lot . 213 | ##so , it 's a good idea to purchase an extra battery to insure that you are not left hanging . 214 | auto-focus assist light[-1]##it does not have an auto-focus assist light which is nice for taking shots in low-light situations it allows the camera to focus in low light . 215 | ##i would say that this is not a major issue , but it can be slightly annoying if you care about these things . 216 | optical zoom[-1]##it has a 3x optical zoom , which is average for these cameras . 217 | ##for more , you sacrifice weight and size : more optical zoom requires more glass and focal length . 218 | ##it does have a digital zoom on top of the optical zoom , but you sacrifice quality . 219 | ##i set the camera to the " manual " mode for one main reason : 220 | ##in any other mode it autofocuses continuously and burns batteries . 221 | ##in manual , you can set it to " single af " where it will focus by depressing the shudder button prior to taking the pic ; this is not really a hinderance , you just have to be familiar with its menus and features . 222 | ##in reality , these are the only drawbacks this camera has , much less than many other models ! 223 | ##it is easy to " focus " in on the drawbacks , so do n't think i hate this camera . 224 | camera[+3]##it really is an awesome camera that is hard to beat for general-to moderate snapping . 225 | ##for work , we use them in the field often , and they hold up really well . 226 | macro[+2]##they take excellent macro shots as well . 227 | menu[+2]##the menus and features are easy to manipulate and access , and it takes silent movies . 228 | [t]it do n't get better 229 | camera[+2]##i compared this to a lot of other cameras , believe me , it 's value for money . 230 | picture[+3]##the picturers are amazing . 231 | ##do n't waste time looking at anything else , just buy this one . 232 | ##it do n't get any better then the nikon 4300 . 233 | [t]excellent compact digital camera 234 | ##i am a nikon fan , and owned and used slr nikon fe , n50 and n90s for many many years . 235 | picture quality[+2],function[+2]##recently bought this coolpix 4300 along with a 256mb ultra flash card . as expected , i happily found that qualities of the picture are excellent with 3m-4m pixel / fine combos and many of the user-friendly functions are very similar to slr models , and with added convenience of a digital camera that you can see pictures immediately . 236 | txt file[+2]##i particularly like the " txt " file which records all the control information for all the pictures you take. 237 | size[+2][u]##the compact digital camera is especially useful if you have little children around and you can always take the camera with you and take many pictures of them . 238 | ##try to buy a high capacity ultra card , as it 's 2x as faster as the regular cf card . 239 | ##consider the time you save during picture taking especially not missing a good moment and during download , it 's well worth the little extra money . 240 | ##an extra rechargeable en-el 1 battery is also handy , so you will seldom run into " out of battery " situation . 241 | ##use the rechargeable because that saves you money and helps the environment in a long run . 242 | ##one thing puzzles me is that for " fine and 2200 + " quality pictures , one ultra 256mb cf card can only hold 130-140 pics , but the size of the pictures are only around 1.2 mb , the math does not add up . 243 | ##i would expecct the card to hold over 200 pictures for size of 1.2 mb/per . 244 | ##anybody can explain that ? 245 | nikon[+3]##a nikon will make you happier and leave you lots of good memories , as always . 246 | [t]you will not be sorry 247 | camera[+3],picture[+2]##i highly recommend this camera to anyone looking for a good digital camera that takes great pictures yet does n't take weeks to figure out how to operate . 248 | picture quality[+2]##we take this camera with us everywhere and are constantly amazed at the quality of the pictures that we get and the number of different ways the camera allows for pictures to be taken . 249 | camera[+3]##i am a picture fanatic so i consider myself picky and if your the same way this camera will not let you down.now buy it ! 250 | [t]absolutely great point and shoot camera 251 | ##first of all , let 's start with my needs : 1 $ 500 budget 2 4mp resolution , 3 optical clarity and color , 4 fast - very little shutter lag and quick sep-up for next picture , 5 compact size , and 6 good camera grip to minimize camera shake . 252 | ##for three months , i 've researched the internet and magazines , queried various sales personnel , and took hands-on in-store test drives . 253 | ##and i finally settled on the nikon coolpix 4300 . 254 | camera[+3]##i have it for a week now , and this camera has exceeded my expectations . 255 | picture[+2],indoor shot[+1]##the daylight pictures are brilliant and the indoor shots are very good provided that your subject is no more than about 8 feet away . 256 | ##the shutter lag is on par with what i 've experienced with point and shoot 35mm and aps film cameras and the next frame advance is about a couple seconds using a high speed 12x 128mb compact flash card a salesperson recommended the high speed card since next frame advance speed was important to me - and it was only a few dollars more . 257 | image download[+2][u]##uploading the images to my windows-based pc is very fast and simple . 258 | indoor shot[-1]##the only drawbacks i 've encountered are : 1 indoor shots from a distance > 8ft lack contrast and color , but i 've found this to be a problem with all cameras with built-in flashes that i 've used even on a nikon slr . 259 | ##right now , i 'm researching for an inexpensive slave flash unit that 's designed for digital cameras . 260 | battery life[-1]##2 the battery life seems to be on the short side but adequate for most situations . 261 | ##i got 45 outdoor and 15 indoor shots with the monitor on turning the camera off and on over a 4 hour period before the battery died . 262 | ##an extra for a back-up battery would be a good investment . 263 | software[-1]##3 the picture editing software it came with is adequate , but not very robust . 264 | ##i 'll still use my photosuite for editing . 265 | ##one more thing about indoor shots - many of the reviews knocked this camera for low light focusing . 266 | ##just for the heck of it , i took it into my dark kitchen with only indirect light coming in though the door , and the focusing was just fine . 267 | camera[+3]##in summary , i love this camera and would whole-heartedly recommend this to anyone wanting a high quality point and shoot camera in the medium price range . 268 | ##- since i wrote the review above in january 2003 , i still love this camera . 269 | ##i 've picked up a few accessories that i found to be useful . 270 | ##i bought a digi-slave dsf-1s flash unit . 271 | ##i do n't use a bracket , i just hold it above my head and aim the flash with my left hand . 272 | ##i also got a ur-e 4 step down ring adapter , lc-e 900 lens cap , and the nikon 28mm filter set . 273 | ##this provides added protection for the camera lens and allows the use of filters . 274 | ##i leave the adapter and uv filter on all the time . 275 | ##this also allows you to turn on the camera on without having to remove the lens cap . 276 | ##the downside is that you might accidentally take pictures with the lens cap on , the camera wo n't fit in the small cases , and 28mm filters are expensive and hard to find . 277 | [t]great camera 278 | camera[+2]##great camera . 279 | use[+2]##i am impressed with how easy this camera is to use . 280 | picture[+2]##it takes great pictures . 281 | manual[+2]##the manual is easy to understand , and it is mostly idiot proof . 282 | size[+2][u]##it is small , unlike my canon slr , so it easily fits in my pocket . 283 | ##i have not yet figured out how to make it snap quick pictures for action shots . 284 | ##now where did i put that manual ? 285 | [t]best 4-mp camera of its size and price range 286 | ##do n't let my five star rating fool you . 287 | ##the nikon coolpix 4300 deserves six . 288 | camera[+3]##i got this camera about a month ago and i can 't put it down . 289 | ##i was considering buying the minolta f-100 and the sony dsc-p 9 , but at the camera shop , realized this one went above and beyond . 290 | picture quality[+2]##i selected the 4300 right away and have gotten high quality pictures every time . 291 | autofocus[+1],scene mode[+1],manual mode[+1]##autofocus feels exactly like my film camera while scene and manual mode add a little fun challenge to it all . 292 | ##i would also recommend buying an extra battery and at least a 128 mb compact flash type i memory card . 293 | ##buy this camera and savour it . 294 | [t]great camera 295 | ##i reviewed several digital cameras before settling on this one . 296 | ##this one was rated very high by several people check out this site and epinions.com . 297 | camera[+2][p]##i have not been disappointed with my purchase . 298 | picture quality[+2]##picture quality in outdoor settings is indistinguishable from 35mm . 299 | indoor shot[-2]##on average , indoor shots with a flash are not quite as good as 35mm . 300 | ##however , the convenience of using digital far outweighs the slightly less accurate indoor pictures . 301 | camera[+3]##i am extremely satisfied with this camera . 302 | ##i purchased it from amazon on a since discontinued promotion . 303 | ##buy an extra battery - life is good but still somewhat short . 304 | ##do n't buy this camera if you are not going to purchase a 128 megabyte card . 305 | ##you need the storage to hold a decent amount of shots at 4 megapixel resolution . 306 | ##i have one thing to say to the numbskulls who complain that this camera 's controls are confusing and hard to remember : 307 | ##read the directions ! 308 | ##the regular mode in this camera works for 80 % of settings . 309 | scene mode[+2]##the " scene " mode works well for the remainder of shots that are not going to be in a " regular " setting . 310 | ##i wish the camera had a higher optical zoom so that i could take better wildlife photos . 311 | optical setting[+1]##however , its 3x optical setting is acceptable for 70 % of the shots that i take . 312 | ##buy this camera , you wo n't regret it . 313 | [t]digital nirvana 314 | camera[+2]##if you 're looking for a small , compact , super resolution digital camera , you can end your search with the nikon 4300 ! 315 | picture quality[+3]##after a lengthy and extensive journey for a feature-loaded , high performance , travel size camera , i initially gravitated to this little gem because of the superior picture quality 4 mega pixel . 316 | nikon[+3]##after using it for about a month , i can state unequivocably that this nikon definitely delivers a huge bang for the buck . 317 | use[+1], accessory[+2]##its easy to use and the sheer fun of being able to use an array of nikon accessories like the telephoto and wide-angle lenses is the proverbial icing on the cake . 318 | picture quality[+3],feature[+3]##yes , the picture quality and features which are too numerous to mention are unmatched for any camera in this price range . 319 | design[+2],construction[+2],optic[+2]##the design and construction are excellent -- as is the legendary quality of the nikon optics . 320 | ##i own a 35mm slr camera and this is my second digital . 321 | ##if you 're seeking that elusive state of photographic euphoria that is free of buyer 's remorse and filled with cool , then travel to the world of the coolpix 4300 ! 322 | [t]i love this baby 323 | ##had it for a week . 324 | camera[+2]##there are so many functions in this little , yet powerful camera ! 325 | [t]it 's so cool 326 | camera[+3][p]##seriously ! i love it ! 327 | ##my room is so dark and when i tried taking pics of it with my old camera , they came out black , but with this , they look the same in the picture as in real life . 328 | ##better actually cause my carpet looks clean , lol . 329 | camera[+2]##but this camera is great ! 330 | picture quality[+3],movie[+1]##the picture quality is amazing and you can connect it to your tv and could make silent movies that way if you wanted to . 331 | ##i can 't connect it to my computer though , but that 's a problem with windows me , not the camera . 332 | ##if you have windows me you should upgrade to xp anyway , lol . 333 | ##but it works perfectly fine on my parent 's computer . 334 | lense cap[+1]##it does have a lense cap , but it wo n't let you take pics with it on which is real good . 335 | battery[+2]##the battery recharges ral fast and the recharger thingy that comes with is real nice . 336 | camera[+2]##ooh , and i dropped it and it still works fine : this little camera has so many features i have n't even gotten to all of them yet . 337 | [t]excellent little camera 338 | ##i did quite a bit of research before buying this camera , and this one had everything i was looking for . 339 | use[+1],feature[+2], camera[+2]##it 's easy for beginners to use , but has features that more serious photographers will love , so it 's an excellent camera to grow into . 340 | weight[+2], picture[+2][u]##it 's light weight enough to take with you everywhere , but powerful enough to get outstanding pix . 341 | continuous shot mode[+3]##i love the continuous shot mode , which allows you to take up to 16 pix in rapid succession -- great for action shots . 342 | photo quality[+3],print[+2]##i 'm amazed at the photo quality -- prints are indistinguishable from 35mm prints . 343 | 8mb card[-2]##i only have one complaint , and that is the 8mb card included . 344 | 8mb[-2]##8mb for a camera like this is a joke . 345 | ##a card with more memory is a must . 346 | ##a minor quibble , since most cameras on the market still only include the 8mb cards . 347 | [t]perfect consumer digital camera 348 | ##if you do any research into digital cameras , you 'll quickly find that this camera is just about the best value out there . 349 | ##these are the reasons i think it 's great : 350 | feature[+2]##1 it has all the features an amatuer photo-enthusiast would want . 351 | use[+2]##2 it is easy enough for my grandmother to use . 352 | size[+2][u],control[+2]##3 it is very compact but the controls are so well designed that they 're still easy to use . 353 | ##4 it has optional lenses like wide angles and extra zooms . 354 | ##5 for " digital film " it uses compact flash cards , which are easy to find , cheap , reliable and supported by many other devices you may have at hoome . 355 | nikon support[+1]##6 the nikon coolpix line is a well-established line so you know support and parts are going to be available even when nikon releases a newer model . 356 | picture quality[+2]##7 picture quality is excellent . 357 | battery life[+2]##8 battery life is excellent . 358 | optical zoom[+2]##9 3x built-in optical zoom is a lot for this price range . 359 | price[+2]##10 great price for all the features . 360 | ##here 's what you may miss or be annoyed by with this camera : 361 | firewire[-1]##1 no direct firewire to the camera . 362 | ##there is usb , though . 363 | viewfinder[-1]##2 the camera is so small that when you attach some lenses i have the 19mm wide-angle -- wc-e 68 , the optical viewfinder is partially obscured . 364 | ##this does n't affect the picture , of course . 365 | ##you just have to use the lcd viewfinder to see the whole picture . 366 | ##it 's no problem for me personally . 367 | ##3 battery has to be removed to recharge . 368 | ##4 an ac adapter for powering the camera while the battery 's out is not included , although one can be purchased separately . 369 | ##5 two different adapters -- ur-e 4 and ur-e 7 -- are required for some attachment lenses . 370 | ##most of these " annoyances " , though , are comparable to what you 'll find on other cameras in this price range . 371 | [t]small camera 372 | ##i am new to the whole digital camera thing ; well , new to the whole camera thing period , really . 373 | price[+2][u],learn[+2],image[+3]##this camera was affordable , very easy to learn , and produces spectacular images . 374 | auto mode[+2],scene mode[+2],manual mode[+2]##the auto-mode is good enough for most shots but the 4300 also boasts 12 versatile scene modes as well as a manual mode though i admit i have n't played with it too much on manual . 375 | size[+2]##it 's size also makes it ideal for travel . 376 | camera[+3]##overall the nikon 4300 is a very dependable , robust , and useful little camera . 377 | [t]nikon does it again 378 | camera[+3], print quality[+3]##awesome camera with huge print quality in a tiny package . 379 | closeup mode[+2], battery[+2]##the same 4mp chip from the 4500 camera , plus a 3x zoom with the ability to expand upon that with extenders , great closeup mode , long lasting rechargable battery , etc etc. 380 | camera[+3]##in my opinion it 's the best camera for the money if you 're looking for something that 's easy to use , small good for travel , and provides excellent , sharp images . 381 | -------------------------------------------------------------------------------- /dataset/customer_review_data/Readme.txt: -------------------------------------------------------------------------------- 1 | ***************************************************************************** 2 | * Annotated by: Minqing Hu and Bing Liu, 2004. 3 | * Department of Computer Sicence 4 | * University of Illinois at Chicago 5 | * 6 | * Contact: Bing Liu, liub@cs.uic.edu 7 | * http://www.cs.uic.edu/~liub 8 | ***************************************************************************** 9 | 10 | Readme file 11 | 12 | This folder contains annotated customer reviews of 5 products. 13 | 14 | 1. digital camera: Canon G3 15 | 2. digital camera: Nikon coolpix 4300 16 | 3. celluar phone: Nokia 6610 17 | 4. mp3 player: Creative Labs Nomad Jukebox Zen Xtra 40GB 18 | 5. dvd player: Apex AD2600 Progressive-scan DVD player 19 | 20 | All the reviews were from amazon.com. They were used in the following 21 | two papers: 22 | 23 | Minqing Hu and Bing Liu. "Mining and summarizing customer reviews". 24 | Proceedings of the ACM SIGKDD International Conference on 25 | Knowledge Discovery & Data Mining (KDD-04), 2004. 26 | 27 | Minqing Hu and Bing Liu. "Mining Opinion Features in Customer 28 | Reviews." Proceedings of Nineteeth National Conference on 29 | Artificial Intellgience (AAAI-2004), 2004. 30 | 31 | Our project homepage: http://www.cs.uic.edu/~liub/FBS/FBS.html 32 | 33 | 34 | 35 | Symbols used in the annotated reviews: 36 | 37 | [t]: the title of the review: Each [t] tag starts a review. 38 | We did not use the title information in our papers. 39 | xxxx[+|-n]: xxxx is a product feature. 40 | [+n]: Positive opinion, n is the opinion strength: 3 strongest, 41 | and 1 weakest. Note that the strength is quite subjective. 42 | You may want ignore it, but only considering + and - 43 | [-n]: Negative opinion 44 | ## : start of each sentence. Each line is a sentence. 45 | [u] : feature not appeared in the sentence. 46 | [p] : feature not appeared in the sentence. Pronoun resolution is needed. 47 | [s] : suggestion or recommendation. 48 | [cc]: comparison with a competing product from a different brand. 49 | [cs]: comparison with a competing product from the same brand. 50 | 51 | 52 | Finally, tagging is a hard task. Errors and inconsistencies are inevitable. 53 | If you see some problems, please let us know. We also welcome your comments. 54 | 55 | 56 | -------------------------------------------------------------------------------- /dataset/read_dataset.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import json 3 | import re 4 | from xml.dom import minidom 5 | 6 | 7 | def read_json_formatted(dataset_filename): 8 | with open(dataset_filename) as data_file: 9 | data = json.load(data_file) 10 | return data 11 | 12 | 13 | def read_absa_2014_restaurant_xml(filename): 14 | """ 15 | 16 | :param filename: 17 | :return: 18 | """ 19 | import ipdb 20 | ipdb.set_trace() 21 | DOMTree = minidom.parse(filename) 22 | reviews = DOMTree.getElementsByTagName('sentences') 23 | list_of_meta_dict = [] 24 | for review in reviews: 25 | sentences = review.getElementsByTagName('sentence') 26 | for sentence in sentences: 27 | text_node = sentence.getElementsByTagName('text')[0] 28 | text = text_node.childNodes[0].nodeValue 29 | for opinion_dom in sentence.getElementsByTagName('aspectTerms'): 30 | opinion_list = opinion_dom.getElementsByTagName('aspectTerm') 31 | target_list = [] 32 | polarity_list = [] 33 | for opinion in opinion_list: 34 | target_list.append(opinion.getAttribute('term').strip()) 35 | polarity_list.append(opinion.getAttribute('polarity').strip()) 36 | 37 | d = { 38 | 'sentence': text, 39 | 'target': target_list, 40 | 'polarity': polarity_list 41 | } 42 | list_of_meta_dict.append(d) 43 | filename_json = filename.replace('.xml', '.json') 44 | if filename_json == filename: 45 | filename_json = filename + '.json' 46 | 47 | with open(filename_json, 'w') as outfile: 48 | json.dump(list_of_meta_dict, outfile) 49 | 50 | return filename_json 51 | 52 | 53 | def read_absa_2015_restaurant_xml(filename): 54 | """ 55 | 56 | :param filename: 57 | :return: 58 | """ 59 | DOMTree = minidom.parse(filename) 60 | reviews = DOMTree.getElementsByTagName('Reviews') 61 | list_of_meta_dict = [] 62 | for review in reviews: 63 | sentences = review.getElementsByTagName('sentence') 64 | for sentence in sentences: 65 | text_node = sentence.getElementsByTagName('text')[0] 66 | text = text_node.childNodes[0].nodeValue 67 | for opinion_dom in sentence.getElementsByTagName('Opinions'): 68 | opinion_list = opinion_dom.getElementsByTagName('Opinion') 69 | target_list = [] 70 | polarity_list = [] 71 | for opinion in opinion_list: 72 | target_list.append(opinion.getAttribute('target').strip()) 73 | polarity_list.append(opinion.getAttribute('polarity').strip()) 74 | 75 | d = { 76 | 'sentence': text, 77 | 'target': target_list, 78 | 'polarity': polarity_list 79 | } 80 | list_of_meta_dict.append(d) 81 | filename_json = filename.replace('.xml', '.json') 82 | if filename_json == filename: 83 | filename_json = filename + '.json' 84 | 85 | with open(filename_json, 'w') as outfile: 86 | json.dump(list_of_meta_dict, outfile) 87 | 88 | return filename_json 89 | 90 | 91 | def read_customer_review_data(filename): 92 | with open(filename) as f: 93 | content = f.readlines() 94 | content = [x.strip() for x in content] 95 | list_of_meta_dict = [] 96 | for line in content: 97 | if '[t]' not in line and '[p]' not in line and '[cs]' not in line \ 98 | and '[cc]' not in line and '[s]' not in line and '[u]' not in line: 99 | splitted_line = line.split('##') 100 | print(splitted_line, line) 101 | sentence = splitted_line[1] 102 | aspects_with_polarity = splitted_line[0].split(',') 103 | aspect_list = [] 104 | polarity_list = [] 105 | 106 | for aspect in aspects_with_polarity: 107 | if aspect: 108 | # aspect_list.append(aspect) 109 | stripped_aspect = re.sub('[^A-Za-z\s]+', '', aspect).strip() 110 | if '+' in aspect: 111 | aspect_list.append(stripped_aspect) 112 | polarity_list.append('positive') 113 | elif '-' in aspect: 114 | aspect_list.append(stripped_aspect) 115 | polarity_list.append('negative') 116 | d = { 117 | 'sentence': sentence, 118 | 'target': aspect_list, 119 | 'polarity': polarity_list 120 | } 121 | list_of_meta_dict.append(d) 122 | 123 | filename_json = filename.replace('.xml', '.json') 124 | if filename_json == filename: 125 | filename_json = filename + '.json' 126 | 127 | with open(filename_json, 'w') as outfile: 128 | json.dump(list_of_meta_dict, outfile) 129 | 130 | return filename_json 131 | -------------------------------------------------------------------------------- /dataset/sentiment_words_text_files/less_adj.csv: -------------------------------------------------------------------------------- 1 | ,less_adjective 2 | 0,top 3 | 1,lame 4 | 2,old 5 | 3,sucks 6 | 4,eat 7 | 5,homey 8 | 6,harassment 9 | 7,liar 10 | 8,lechery 11 | 9,sillily 12 | 10,tantalizing 13 | 11,careless 14 | 12,delaying 15 | 13,uncomfortably 16 | 14,bs 17 | 15,accomplished 18 | 16,repulsively 19 | 17,poorer 20 | 18,horrified 21 | 19,clumsy 22 | 20,regrets 23 | 21,horrendous 24 | 22,lecherous 25 | 23,problem 26 | 24,regrettably 27 | 25,cheap 28 | 26,clueless 29 | 27,losers 30 | 28,movenpick 31 | 29,loathing 32 | 30,offending 33 | 31,appal 34 | 32,ridicules 35 | 33,greasy 36 | 34,awesome 37 | 35,giant 38 | 36,sizzling 39 | 37,carelessness 40 | 38,blame 41 | 39,decent 42 | 40,heartbreaking 43 | 41,slow-moving 44 | 42,annoyances 45 | 43,abominable 46 | 44,top notch 47 | 45,unbearable 48 | 46,voted 49 | 47,killer 50 | 48,angry 51 | 49,delays 52 | 50,inexpensive 53 | 51,silly 54 | 52,remorseless 55 | 53,lackadaisical 56 | 54,crept 57 | 55,delectable 58 | 56,harassed 59 | 57,hazard 60 | 58,loath 61 | 59,ugly 62 | 60,idiotic 63 | 61,lacks 64 | 62,lies 65 | 63,ordered 66 | 64,local 67 | 65,annoying 68 | 66,fucking 69 | 67,painfull 70 | 68,annoy 71 | 69,bad 72 | 70,fuck 73 | 71,crazy 74 | 72,appall 75 | 73,uncooperative 76 | 74,better 77 | 75,delicious 78 | 76,idiotically 79 | 77,miserable 80 | 78,ugliest 81 | 79,rejects 82 | 80,cheapest 83 | 81,spot 84 | 82,poor 85 | 83,helplessness 86 | 84,annoyingly 87 | 85,contaminates 88 | 86,inhospitable 89 | 87,unhappy 90 | 88,lewdly 91 | 89,sickeningly 92 | 90,perfect 93 | 91,infected 94 | 92,enough 95 | 93,outstanding 96 | 94,served 97 | 95,bitch 98 | 96,bastard 99 | 97,angriness 100 | 98,sucker 101 | 99,best 102 | 100,delayed 103 | 101,pollute 104 | 102,shamefully 105 | 103,abysmally 106 | 104,finer 107 | 105,use fresh 108 | 106,ridiculously 109 | 107,lech 110 | 108,wow 111 | 109,authentic 112 | 110,appalled 113 | 111,awsome 114 | 112,yummiest 115 | 113,bestest 116 | 114,liars 117 | 115,tried 118 | 116,junk 119 | 117,slowww 120 | 118,contamination 121 | 119,posturing 122 | 120,frustrated 123 | 121,tastiest 124 | 122,painfully 125 | 123,hell-bent 126 | 124,horrify 127 | 125,worsen 128 | 126,regretted 129 | 127,fine 130 | 128,sh*t 131 | 129,callous 132 | 130,repulsive 133 | 131,ill-treated 134 | 132,procrastinate 135 | 133,excellent 136 | 134,appallingly 137 | 135,expressed 138 | 136,noises 139 | 137,shameless 140 | 138,nagging 141 | 139,worthlessly 142 | 140,remorsefully 143 | 141,negativity 144 | 142,unlimited 145 | 143,relish 146 | 144,sloooow 147 | 145,bastards 148 | 146,wonderful 149 | 147,vomited 150 | 148,angrily 151 | 149,regrettable 152 | 150,brilliant 153 | 151,uncouth 154 | 152,abysmal 155 | 153,lecher 156 | 154,leak 157 | 155,sucky 158 | 156,indecent 159 | 157,slooow 160 | 158,shit 161 | 159,noisy 162 | 160,bitchy 163 | 161,smelt 164 | 162,yummy 165 | 163,repugnantly 166 | 164,miraculous 167 | 165,mess 168 | 166,smelled 169 | 167,decayed 170 | 168,messy 171 | 169,horrible 172 | 170,thumb-down 173 | 171,loathsome 174 | 172,shamelessly 175 | 173,abominably 176 | 174,misbehave 177 | 175,regretful 178 | 176,frustrations 179 | 177,lousy 180 | 178,remorselessly 181 | 179,slower 182 | 180,loved 183 | 181,regret 184 | 182,shameful 185 | 183,leaks 186 | 184,lie 187 | 185,tortured 188 | 186,loser 189 | 187,defects 190 | 188,unattractive 191 | 189,get 192 | 190,negative 193 | 191,ludicrously 194 | 192,insultingly 195 | 193,illness 196 | 194,huge 197 | 195,complementary 198 | 196,defective 199 | 197,horrifies 200 | 198,costly 201 | 199,horrifying 202 | 200,arrogance 203 | 201,obnoxiously 204 | 202,bullshyt 205 | 203,unacceptable 206 | 204,annoyed 207 | 205,procrastination 208 | 206,repugnant 209 | 207,incredible 210 | 208,molest 211 | 209,badly 212 | 210,infested 213 | 211,selection 214 | 212,frustratingly 215 | 213,awfulness 216 | 214,contaminating 217 | 215,tortuous 218 | 216,vomits 219 | 217,inferior 220 | 218,crap 221 | 219,waste 222 | 220,shamefulness 223 | 221,rejected 224 | 222,great 225 | 223,complimentary 226 | 224,idiots 227 | 225,nice 228 | 226,illegitimate 229 | 227,uglier 230 | 228,insulted 231 | 229,appealing 232 | 230,foully 233 | 231,messed 234 | 232,pollution 235 | 233,worse 236 | 234,original 237 | 235,shortcoming 238 | 236,horrific 239 | 237,miserably 240 | 238,exemplary 241 | 239,arrogant 242 | 240,lier 243 | 241,finest 244 | 242,unique 245 | 243,creepy 246 | 244,serve 247 | 245,suck 248 | 246,loathly 249 | 247,serving 250 | 248,lame-duck 251 | 249,inferiority 252 | 250,affordable 253 | 251,repulsing 254 | 252,leakages 255 | 253,proper 256 | 254,recommended 257 | 255,inconsiderately 258 | 256,garbage 259 | 257,mistakes 260 | 258,infection 261 | 259,annoys 262 | 260,noise 263 | 261,lied 264 | 262,unbearablely 265 | 263,sloooooooooooooow 266 | 264,unappealing 267 | 265,mistakenly 268 | 266,delay 269 | 267,hazardous 270 | 268,offend 271 | 269,sloow 272 | 270,interesting 273 | 271,messes 274 | 272,bullshit 275 | 273,remorse 276 | 274,repulsed 277 | 275,cocky 278 | 276,awfully 279 | 277,listless 280 | 278,sloppy 281 | 279,arrogantly 282 | 280,life-threatening 283 | 281,leakage 284 | 282,loves 285 | 283,nicest 286 | 284,famous 287 | 285,good 288 | 286,insults 289 | 287,lethargy 290 | 288,blameworthy 291 | 289,ultimate 292 | 290,regreted 293 | 291,nuisance 294 | 292,ridiculous 295 | 293,gives 296 | 294,classic 297 | 295,executed 298 | 296,favorite 299 | 297,illiterate 300 | 298,creeps 301 | 299,quality 302 | 300,warm 303 | 301,pain 304 | 302,polluters 305 | 303,sweaty 306 | 304,slowwww 307 | 305,lethargic 308 | 306,creep 309 | 307,frustrates 310 | 308,offender 311 | 309,f**k 312 | 310,remorseful 313 | 311,abnormal 314 | 312,slowest 315 | 313,ruthless 316 | 314,unhealthy 317 | 315,bullies 318 | 316,inhospitality 319 | 317,sickness 320 | 318,the 321 | 319,nag 322 | 320,loathsomely 323 | 321,lewd 324 | 322,reject 325 | 323,junky 326 | 324,infest 327 | 325,popular 328 | 326,indecency 329 | 327,ugliness 330 | 328,indecently 331 | 329,rejection 332 | 330,appreciate 333 | 331,offer 334 | 332,painful 335 | 333,helplessly 336 | 334,offenses 337 | 335,humiliate 338 | 336,love 339 | 337,adventurous 340 | 338,frustrate 341 | 339,mediocrity 342 | 340,super 343 | 341,refreshing 344 | 342,inconsiderate 345 | 343,slowed 346 | 344,repugnance 347 | 345,sick 348 | 346,like 349 | 347,torturously 350 | 348,slow 351 | 349,insulting 352 | 350,rejecting 353 | 351,chilled 354 | 352,superb 355 | 353,helpless 356 | 354,repulsiveness 357 | 355,vomit 358 | 356,favorita 359 | 357,simple 360 | 358,uncomfortable 361 | 359,worsening 362 | 360,remorselessness 363 | 361,bull**** 364 | 362,poorly 365 | 363,obscenely 366 | 364,sublime 367 | 365,torturing 368 | 366,fresh 369 | 367,offensively 370 | 368,smells 371 | 369,carp 372 | 370,ludicrous 373 | 371,available 374 | 372,sucked 375 | 373,lackluster 376 | 374,offence 377 | 375,frustrating 378 | 376,leaky 379 | 377,worst 380 | 378,harasses 381 | 379,slime 382 | 380,tasty 383 | 381,miserableness 384 | 382,mistake 385 | 383,vomiting 386 | 384,filthy 387 | 385,special 388 | 386,useless 389 | 387,irregularity 390 | 388,bull---- 391 | 389,delightful 392 | 390,offensive 393 | 391,loses 394 | 392,defect 395 | 393,loathe 396 | 394,smell 397 | 395,molestation 398 | 396,ill-treatment 399 | 397,offered 400 | 398,obscene 401 | 399,noisier 402 | 400,give 403 | 401,brillaint 404 | 402,recommend 405 | 403,helpful 406 | 404,fantastic 407 | 405,messing 408 | 406,idiot 409 | 407,beautiful 410 | 408,profanity 411 | 409,mediocre 412 | 410,appalling 413 | 411,miss 414 | 412,offers 415 | 413,lazy 416 | 414,ridicule 417 | 415,issues 418 | 416,problems 419 | 417,option 420 | 418,torture 421 | 419,filth 422 | 420,adverse 423 | 421,giving 424 | 422,slowly 425 | 423,worthless 426 | 424,enjoy 427 | 425,horrid 428 | 426,offensiveness 429 | 427,regular 430 | 428,poorest 431 | 429,tortures 432 | 430,heartbreakingly 433 | 431,sickly 434 | 432,try 435 | 433,smelling 436 | 434,humiliation 437 | 435,stink 438 | 436,favourite 439 | 437,crappy 440 | 438,spl 441 | 439,worthlessness 442 | 440,foul 443 | 441,nasty 444 | 442,unacceptablely 445 | 443,anger 446 | 444,foulness 447 | 445,modern 448 | 446,sicken 449 | 447,pleasant 450 | 448,issue 451 | 449,fabulous 452 | 450,humiliating 453 | 451,regretfully 454 | 452,smelly 455 | 453,creeping 456 | 454,shortcomings 457 | 455,horrifys 458 | 456,profane 459 | 457,repulse 460 | 458,sloww 461 | 459,negatives 462 | 460,homemade 463 | 461,decay 464 | 462,amazing 465 | 463,problematic 466 | 464,exceptional 467 | 465,hells 468 | 466,awful 469 | 467,obscenity 470 | 468,breathtaking 471 | 469,extreme 472 | 470,shamelessness 473 | 471,unacceptably 474 | 472,procrastinates 475 | 473,lewdness 476 | 474,innovative 477 | 475,contaminate 478 | 476,ruthlessness 479 | 477,harass 480 | 478,mistaken 481 | 479,stinks 482 | 480,sickening 483 | 481,contaminated 484 | 482,frustration 485 | 483,heartbreaker 486 | 484,obnoxious 487 | 485,annoyance 488 | 486,luscious 489 | 487,assorted 490 | 488,sloppily 491 | 489,prohibit 492 | 490,horrendously 493 | 491,serves 494 | 492,shame 495 | 493,polluter 496 | 494,torturous 497 | 495,aweful 498 | 496,leaking 499 | 497,ruthlessly 500 | 498,thumbs-down 501 | 499,irregular 502 | 500,hell 503 | 501,enormous 504 | -------------------------------------------------------------------------------- /dataset/sentiment_words_text_files/positive_words.txt: -------------------------------------------------------------------------------- 1 | a+ 2 | abound 3 | abounds 4 | abundance 5 | abundant 6 | accessable 7 | accessible 8 | acclaim 9 | acclaimed 10 | acclamation 11 | accolade 12 | accolades 13 | accommodative 14 | accomodative 15 | accomplish 16 | accomplished 17 | accomplishment 18 | accomplishments 19 | accurate 20 | accurately 21 | achievable 22 | achievement 23 | achievements 24 | achievible 25 | acumen 26 | adaptable 27 | adaptive 28 | adequate 29 | adjustable 30 | admirable 31 | admirably 32 | admiration 33 | admire 34 | admirer 35 | admiring 36 | admiringly 37 | adorable 38 | adore 39 | adored 40 | adorer 41 | adoring 42 | adoringly 43 | adroit 44 | adroitly 45 | adulate 46 | adulation 47 | adulatory 48 | advanced 49 | advantage 50 | advantageous 51 | advantageously 52 | advantages 53 | adventuresome 54 | adventurous 55 | advocate 56 | advocated 57 | advocates 58 | affability 59 | affable 60 | affably 61 | affectation 62 | affection 63 | affectionate 64 | affinity 65 | affirm 66 | affirmation 67 | affirmative 68 | affluence 69 | affluent 70 | afford 71 | affordable 72 | affordably 73 | afordable 74 | agile 75 | agilely 76 | agility 77 | agreeable 78 | agreeableness 79 | agreeably 80 | all-around 81 | alluring 82 | alluringly 83 | altruistic 84 | altruistically 85 | amaze 86 | amazed 87 | amazement 88 | amazes 89 | amazing 90 | amazingly 91 | ambitious 92 | ambitiously 93 | ameliorate 94 | amenable 95 | amenity 96 | amiability 97 | amiabily 98 | amiable 99 | amicability 100 | amicable 101 | amicably 102 | amity 103 | ample 104 | amply 105 | amuse 106 | amusing 107 | amusingly 108 | angel 109 | angelic 110 | apotheosis 111 | appeal 112 | appealing 113 | applaud 114 | appreciable 115 | appreciate 116 | appreciated 117 | appreciates 118 | appreciative 119 | appreciatively 120 | appropriate 121 | approval 122 | approve 123 | ardent 124 | ardently 125 | ardor 126 | articulate 127 | aspiration 128 | aspirations 129 | aspire 130 | assurance 131 | assurances 132 | assure 133 | assuredly 134 | assuring 135 | astonish 136 | astonished 137 | astonishing 138 | astonishingly 139 | astonishment 140 | astound 141 | astounded 142 | astounding 143 | astoundingly 144 | astutely 145 | attentive 146 | attraction 147 | attractive 148 | attractively 149 | attune 150 | audible 151 | audibly 152 | auspicious 153 | authentic 154 | authoritative 155 | autonomous 156 | available 157 | aver 158 | avid 159 | avidly 160 | award 161 | awarded 162 | awards 163 | awe 164 | awed 165 | awesome 166 | awesomely 167 | awesomeness 168 | awestruck 169 | awsome 170 | backbone 171 | balanced 172 | bargain 173 | beauteous 174 | beautiful 175 | beautifullly 176 | beautifully 177 | beautify 178 | beauty 179 | beckon 180 | beckoned 181 | beckoning 182 | beckons 183 | believable 184 | believeable 185 | beloved 186 | benefactor 187 | beneficent 188 | beneficial 189 | beneficially 190 | beneficiary 191 | benefit 192 | benefits 193 | benevolence 194 | benevolent 195 | benifits 196 | best 197 | best-known 198 | best-performing 199 | best-selling 200 | better 201 | better-known 202 | better-than-expected 203 | beutifully 204 | blameless 205 | bless 206 | blessing 207 | bliss 208 | blissful 209 | blissfully 210 | blithe 211 | blockbuster 212 | bloom 213 | blossom 214 | bolster 215 | bonny 216 | bonus 217 | bonuses 218 | boom 219 | booming 220 | boost 221 | boundless 222 | bountiful 223 | brainiest 224 | brainy 225 | brand-new 226 | brave 227 | bravery 228 | bravo 229 | breakthrough 230 | breakthroughs 231 | breathlessness 232 | breathtaking 233 | breathtakingly 234 | breeze 235 | bright 236 | brighten 237 | brighter 238 | brightest 239 | brilliance 240 | brilliances 241 | brilliant 242 | brilliantly 243 | brisk 244 | brotherly 245 | bullish 246 | buoyant 247 | cajole 248 | calm 249 | calming 250 | calmness 251 | capability 252 | capable 253 | capably 254 | captivate 255 | captivating 256 | carefree 257 | cashback 258 | cashbacks 259 | catchy 260 | celebrate 261 | celebrated 262 | celebration 263 | celebratory 264 | champ 265 | champion 266 | charisma 267 | charismatic 268 | charitable 269 | charm 270 | charming 271 | charmingly 272 | chaste 273 | cheap 274 | cheaper 275 | cheapest 276 | cheer 277 | cheerful 278 | cheery 279 | cherish 280 | cherished 281 | cherub 282 | chic 283 | chill 284 | chilled 285 | chivalrous 286 | chivalry 287 | civility 288 | civilize 289 | clarity 290 | classic 291 | classy 292 | clean 293 | cleaner 294 | cleanest 295 | cleanliness 296 | cleanly 297 | clear 298 | clear-cut 299 | cleared 300 | clearer 301 | clearly 302 | clears 303 | clever 304 | cleverly 305 | cohere 306 | coherence 307 | coherent 308 | cohesive 309 | colorful 310 | comely 311 | comfort 312 | comfortable 313 | comfortably 314 | comforting 315 | comfy 316 | commend 317 | commendable 318 | commendably 319 | commitment 320 | commodious 321 | compact 322 | compactly 323 | compassion 324 | compassionate 325 | compatible 326 | competitive 327 | complement 328 | complementary 329 | complemented 330 | complements 331 | compliant 332 | compliment 333 | complimentary 334 | comprehensive 335 | conciliate 336 | conciliatory 337 | concise 338 | confidence 339 | confident 340 | congenial 341 | congratulate 342 | congratulation 343 | congratulations 344 | congratulatory 345 | conscientious 346 | considerate 347 | consistent 348 | consistently 349 | constructive 350 | consummate 351 | contentment 352 | continuity 353 | contrasty 354 | contribution 355 | convenience 356 | convenient 357 | conveniently 358 | convience 359 | convienient 360 | convient 361 | convincing 362 | convincingly 363 | cool 364 | coolest 365 | cooperative 366 | cooperatively 367 | cornerstone 368 | correct 369 | correctly 370 | cost-effective 371 | cost-saving 372 | counter-attack 373 | counter-attacks 374 | courage 375 | courageous 376 | courageously 377 | courageousness 378 | courteous 379 | courtly 380 | covenant 381 | cozy 382 | creative 383 | credence 384 | credible 385 | crisp 386 | crisper 387 | cure 388 | cure-all 389 | cushy 390 | cute 391 | cuteness 392 | danke 393 | danken 394 | daring 395 | daringly 396 | darling 397 | dashing 398 | dauntless 399 | dawn 400 | dazzle 401 | dazzled 402 | dazzling 403 | dead-cheap 404 | dead-on 405 | decency 406 | decent 407 | decisive 408 | decisiveness 409 | dedicated 410 | defeat 411 | defeated 412 | defeating 413 | defeats 414 | defender 415 | deference 416 | deft 417 | deginified 418 | delectable 419 | delicacy 420 | delicate 421 | delicious 422 | delight 423 | delighted 424 | delightful 425 | delightfully 426 | delightfulness 427 | dependable 428 | dependably 429 | deservedly 430 | deserving 431 | desirable 432 | desiring 433 | desirous 434 | destiny 435 | detachable 436 | devout 437 | devine 438 | dexterous 439 | dexterously 440 | dextrous 441 | dignified 442 | dignify 443 | dignity 444 | diligence 445 | diligent 446 | diligently 447 | diplomatic 448 | dirt-cheap 449 | distinction 450 | distinctive 451 | distinguished 452 | diversified 453 | divine 454 | divinely 455 | dominate 456 | dominated 457 | dominates 458 | dote 459 | dotingly 460 | doubtless 461 | dreamland 462 | dumbfounded 463 | dumbfounding 464 | dummy-proof 465 | durable 466 | dynamic 467 | eager 468 | eagerly 469 | eagerness 470 | earnest 471 | earnestly 472 | earnestness 473 | ease 474 | eased 475 | eases 476 | easier 477 | easiest 478 | easiness 479 | easing 480 | easy 481 | easy-to-use 482 | easygoing 483 | ebullience 484 | ebullient 485 | ebulliently 486 | ecenomical 487 | economical 488 | ecstasies 489 | ecstasy 490 | ecstatic 491 | ecstatically 492 | edify 493 | educated 494 | effective 495 | effectively 496 | effectiveness 497 | effectual 498 | efficacious 499 | efficient 500 | efficiently 501 | effortless 502 | effortlessly 503 | effusion 504 | effusive 505 | effusively 506 | effusiveness 507 | elan 508 | elate 509 | elated 510 | elatedly 511 | elation 512 | electrify 513 | elegance 514 | elegant 515 | elegantly 516 | elevate 517 | elite 518 | eloquence 519 | eloquent 520 | eloquently 521 | embolden 522 | eminence 523 | eminent 524 | empathize 525 | empathy 526 | empower 527 | empowerment 528 | enchant 529 | enchanted 530 | enchanting 531 | enchantingly 532 | encourage 533 | encouragement 534 | encouraging 535 | encouragingly 536 | endear 537 | endearing 538 | endorse 539 | endorsed 540 | endorsement 541 | endorses 542 | endorsing 543 | energetic 544 | energize 545 | energy-efficient 546 | energy-saving 547 | engaging 548 | engrossing 549 | enhance 550 | enhanced 551 | enhancement 552 | enhances 553 | enjoy 554 | enjoyable 555 | enjoyably 556 | enjoyed 557 | enjoying 558 | enjoyment 559 | enjoys 560 | enlighten 561 | enlightenment 562 | enliven 563 | ennoble 564 | enough 565 | enrapt 566 | enrapture 567 | enraptured 568 | enrich 569 | enrichment 570 | enterprising 571 | entertain 572 | entertaining 573 | entertains 574 | enthral 575 | enthrall 576 | enthralled 577 | enthuse 578 | enthusiasm 579 | enthusiast 580 | enthusiastic 581 | enthusiastically 582 | entice 583 | enticed 584 | enticing 585 | enticingly 586 | entranced 587 | entrancing 588 | entrust 589 | enviable 590 | enviably 591 | envious 592 | enviously 593 | enviousness 594 | envy 595 | equitable 596 | ergonomical 597 | err-free 598 | erudite 599 | ethical 600 | eulogize 601 | euphoria 602 | euphoric 603 | euphorically 604 | evaluative 605 | evenly 606 | eventful 607 | everlasting 608 | evocative 609 | exalt 610 | exaltation 611 | exalted 612 | exaltedly 613 | exalting 614 | exaltingly 615 | examplar 616 | examplary 617 | excallent 618 | exceed 619 | exceeded 620 | exceeding 621 | exceedingly 622 | exceeds 623 | excel 624 | exceled 625 | excelent 626 | excellant 627 | excelled 628 | excellence 629 | excellency 630 | excellent 631 | excellently 632 | excels 633 | exceptional 634 | exceptionally 635 | excite 636 | excited 637 | excitedly 638 | excitedness 639 | excitement 640 | excites 641 | exciting 642 | excitingly 643 | exellent 644 | exemplar 645 | exemplary 646 | exhilarate 647 | exhilarating 648 | exhilaratingly 649 | exhilaration 650 | exonerate 651 | expansive 652 | expeditiously 653 | expertly 654 | exquisite 655 | exquisitely 656 | extol 657 | extoll 658 | extraordinarily 659 | extraordinary 660 | exuberance 661 | exuberant 662 | exuberantly 663 | exult 664 | exultant 665 | exultation 666 | exultingly 667 | eye-catch 668 | eye-catching 669 | eyecatch 670 | eyecatching 671 | fabulous 672 | fabulously 673 | facilitate 674 | fair 675 | fairly 676 | fairness 677 | faith 678 | faithful 679 | faithfully 680 | faithfulness 681 | fame 682 | famed 683 | famous 684 | famously 685 | fancier 686 | fancinating 687 | fancy 688 | fanfare 689 | fans 690 | fantastic 691 | fantastically 692 | fascinate 693 | fascinating 694 | fascinatingly 695 | fascination 696 | fashionable 697 | fashionably 698 | fast 699 | fast-growing 700 | fast-paced 701 | faster 702 | fastest 703 | fastest-growing 704 | faultless 705 | fav 706 | fave 707 | favor 708 | favorable 709 | favored 710 | favorite 711 | favorited 712 | favour 713 | fearless 714 | fearlessly 715 | feasible 716 | feasibly 717 | feat 718 | feature-rich 719 | fecilitous 720 | feisty 721 | felicitate 722 | felicitous 723 | felicity 724 | fertile 725 | fervent 726 | fervently 727 | fervid 728 | fervidly 729 | fervor 730 | festive 731 | fidelity 732 | fiery 733 | fine 734 | fine-looking 735 | finely 736 | finer 737 | finest 738 | firmer 739 | first-class 740 | first-in-class 741 | first-rate 742 | flashy 743 | flatter 744 | flattering 745 | flatteringly 746 | flawless 747 | flawlessly 748 | flexibility 749 | flexible 750 | flourish 751 | flourishing 752 | fluent 753 | flutter 754 | fond 755 | fondly 756 | fondness 757 | foolproof 758 | foremost 759 | foresight 760 | formidable 761 | fortitude 762 | fortuitous 763 | fortuitously 764 | fortunate 765 | fortunately 766 | fortune 767 | fragrant 768 | free 769 | freed 770 | freedom 771 | freedoms 772 | fresh 773 | fresher 774 | freshest 775 | friendliness 776 | friendly 777 | frolic 778 | frugal 779 | fruitful 780 | ftw 781 | fulfillment 782 | fun 783 | futurestic 784 | futuristic 785 | gaiety 786 | gaily 787 | gain 788 | gained 789 | gainful 790 | gainfully 791 | gaining 792 | gains 793 | gallant 794 | gallantly 795 | galore 796 | geekier 797 | geeky 798 | gem 799 | gems 800 | generosity 801 | generous 802 | generously 803 | genial 804 | genius 805 | gentle 806 | gentlest 807 | genuine 808 | gifted 809 | glad 810 | gladden 811 | gladly 812 | gladness 813 | glamorous 814 | glee 815 | gleeful 816 | gleefully 817 | glimmer 818 | glimmering 819 | glisten 820 | glistening 821 | glitter 822 | glitz 823 | glorify 824 | glorious 825 | gloriously 826 | glory 827 | glow 828 | glowing 829 | glowingly 830 | god-given 831 | god-send 832 | godlike 833 | godsend 834 | gold 835 | golden 836 | good 837 | goodly 838 | goodness 839 | goodwill 840 | goood 841 | gooood 842 | gorgeous 843 | gorgeously 844 | grace 845 | graceful 846 | gracefully 847 | gracious 848 | graciously 849 | graciousness 850 | grand 851 | grandeur 852 | grateful 853 | gratefully 854 | gratification 855 | gratified 856 | gratifies 857 | gratify 858 | gratifying 859 | gratifyingly 860 | gratitude 861 | great 862 | greatest 863 | greatness 864 | grin 865 | groundbreaking 866 | guarantee 867 | guidance 868 | guiltless 869 | gumption 870 | gush 871 | gusto 872 | gutsy 873 | hail 874 | halcyon 875 | hale 876 | hallmark 877 | hallmarks 878 | hallowed 879 | handier 880 | handily 881 | hands-down 882 | handsome 883 | handsomely 884 | handy 885 | happier 886 | happily 887 | happiness 888 | happy 889 | hard-working 890 | hardier 891 | hardy 892 | harmless 893 | harmonious 894 | harmoniously 895 | harmonize 896 | harmony 897 | headway 898 | heal 899 | healthful 900 | healthy 901 | hearten 902 | heartening 903 | heartfelt 904 | heartily 905 | heartwarming 906 | heaven 907 | heavenly 908 | helped 909 | helpful 910 | helping 911 | hero 912 | heroic 913 | heroically 914 | heroine 915 | heroize 916 | heros 917 | high-quality 918 | high-spirited 919 | hilarious 920 | holy 921 | homage 922 | honest 923 | honesty 924 | honor 925 | honorable 926 | honored 927 | honoring 928 | hooray 929 | hopeful 930 | hospitable 931 | hot 932 | hotcake 933 | hotcakes 934 | hottest 935 | hug 936 | humane 937 | humble 938 | humility 939 | humor 940 | humorous 941 | humorously 942 | humour 943 | humourous 944 | ideal 945 | idealize 946 | ideally 947 | idol 948 | idolize 949 | idolized 950 | idyllic 951 | illuminate 952 | illuminati 953 | illuminating 954 | illumine 955 | illustrious 956 | ilu 957 | imaculate 958 | imaginative 959 | immaculate 960 | immaculately 961 | immense 962 | impartial 963 | impartiality 964 | impartially 965 | impassioned 966 | impeccable 967 | impeccably 968 | important 969 | impress 970 | impressed 971 | impresses 972 | impressive 973 | impressively 974 | impressiveness 975 | improve 976 | improved 977 | improvement 978 | improvements 979 | improves 980 | improving 981 | incredible 982 | incredibly 983 | indebted 984 | individualized 985 | indulgence 986 | indulgent 987 | industrious 988 | inestimable 989 | inestimably 990 | inexpensive 991 | infallibility 992 | infallible 993 | infallibly 994 | influential 995 | ingenious 996 | ingeniously 997 | ingenuity 998 | ingenuous 999 | ingenuously 1000 | innocuous 1001 | innovation 1002 | innovative 1003 | inpressed 1004 | insightful 1005 | insightfully 1006 | inspiration 1007 | inspirational 1008 | inspire 1009 | inspiring 1010 | instantly 1011 | instructive 1012 | instrumental 1013 | integral 1014 | integrated 1015 | intelligence 1016 | intelligent 1017 | intelligible 1018 | interesting 1019 | interests 1020 | intimacy 1021 | intimate 1022 | intricate 1023 | intrigue 1024 | intriguing 1025 | intriguingly 1026 | intuitive 1027 | invaluable 1028 | invaluablely 1029 | inventive 1030 | invigorate 1031 | invigorating 1032 | invincibility 1033 | invincible 1034 | inviolable 1035 | inviolate 1036 | invulnerable 1037 | irreplaceable 1038 | irreproachable 1039 | irresistible 1040 | irresistibly 1041 | issue-free 1042 | jaw-droping 1043 | jaw-dropping 1044 | jollify 1045 | jolly 1046 | jovial 1047 | joy 1048 | joyful 1049 | joyfully 1050 | joyous 1051 | joyously 1052 | jubilant 1053 | jubilantly 1054 | jubilate 1055 | jubilation 1056 | jubiliant 1057 | judicious 1058 | justly 1059 | keen 1060 | keenly 1061 | keenness 1062 | kid-friendly 1063 | kindliness 1064 | kindly 1065 | kindness 1066 | knowledgeable 1067 | kudos 1068 | large-capacity 1069 | laud 1070 | laudable 1071 | laudably 1072 | lavish 1073 | lavishly 1074 | law-abiding 1075 | lawful 1076 | lawfully 1077 | lead 1078 | leading 1079 | leads 1080 | lean 1081 | led 1082 | legendary 1083 | leverage 1084 | levity 1085 | liberate 1086 | liberation 1087 | liberty 1088 | lifesaver 1089 | light-hearted 1090 | lighter 1091 | likable 1092 | like 1093 | liked 1094 | likes 1095 | liking 1096 | lionhearted 1097 | lively 1098 | logical 1099 | long-lasting 1100 | lovable 1101 | lovably 1102 | love 1103 | loved 1104 | loveliness 1105 | lovely 1106 | lover 1107 | loves 1108 | loving 1109 | low-cost 1110 | low-price 1111 | low-priced 1112 | low-risk 1113 | lower-priced 1114 | loyal 1115 | loyalty 1116 | lucid 1117 | lucidly 1118 | luck 1119 | luckier 1120 | luckiest 1121 | luckiness 1122 | lucky 1123 | lucrative 1124 | luminous 1125 | lush 1126 | luster 1127 | lustrous 1128 | luxuriant 1129 | luxuriate 1130 | luxurious 1131 | luxuriously 1132 | luxury 1133 | lyrical 1134 | magic 1135 | magical 1136 | magnanimous 1137 | magnanimously 1138 | magnificence 1139 | magnificent 1140 | magnificently 1141 | majestic 1142 | majesty 1143 | manageable 1144 | maneuverable 1145 | marvel 1146 | marveled 1147 | marvelled 1148 | marvellous 1149 | marvelous 1150 | marvelously 1151 | marvelousness 1152 | marvels 1153 | master 1154 | masterful 1155 | masterfully 1156 | masterpiece 1157 | masterpieces 1158 | masters 1159 | mastery 1160 | matchless 1161 | mature 1162 | maturely 1163 | maturity 1164 | meaningful 1165 | memorable 1166 | merciful 1167 | mercifully 1168 | mercy 1169 | merit 1170 | meritorious 1171 | merrily 1172 | merriment 1173 | merriness 1174 | merry 1175 | mesmerize 1176 | mesmerized 1177 | mesmerizes 1178 | mesmerizing 1179 | mesmerizingly 1180 | meticulous 1181 | meticulously 1182 | mightily 1183 | mighty 1184 | mind-blowing 1185 | miracle 1186 | miracles 1187 | miraculous 1188 | miraculously 1189 | miraculousness 1190 | modern 1191 | modest 1192 | modesty 1193 | momentous 1194 | monumental 1195 | monumentally 1196 | morality 1197 | motivated 1198 | multi-purpose 1199 | navigable 1200 | neat 1201 | neatest 1202 | neatly 1203 | nice 1204 | nicely 1205 | nicer 1206 | nicest 1207 | nifty 1208 | nimble 1209 | noble 1210 | nobly 1211 | noiseless 1212 | non-violence 1213 | non-violent 1214 | notably 1215 | noteworthy 1216 | nourish 1217 | nourishing 1218 | nourishment 1219 | novelty 1220 | nurturing 1221 | oasis 1222 | obsession 1223 | obsessions 1224 | obtainable 1225 | openly 1226 | openness 1227 | optimal 1228 | optimism 1229 | optimistic 1230 | opulent 1231 | orderly 1232 | originality 1233 | outdo 1234 | outdone 1235 | outperform 1236 | outperformed 1237 | outperforming 1238 | outperforms 1239 | outshine 1240 | outshone 1241 | outsmart 1242 | outstanding 1243 | outstandingly 1244 | outstrip 1245 | outwit 1246 | ovation 1247 | overjoyed 1248 | overtake 1249 | overtaken 1250 | overtakes 1251 | overtaking 1252 | overtook 1253 | overture 1254 | pain-free 1255 | painless 1256 | painlessly 1257 | palatial 1258 | pamper 1259 | pampered 1260 | pamperedly 1261 | pamperedness 1262 | pampers 1263 | panoramic 1264 | paradise 1265 | paramount 1266 | pardon 1267 | passion 1268 | passionate 1269 | passionately 1270 | patience 1271 | patient 1272 | patiently 1273 | patriot 1274 | patriotic 1275 | peace 1276 | peaceable 1277 | peaceful 1278 | peacefully 1279 | peacekeepers 1280 | peach 1281 | peerless 1282 | pep 1283 | pepped 1284 | pepping 1285 | peppy 1286 | peps 1287 | perfect 1288 | perfection 1289 | perfectly 1290 | permissible 1291 | perseverance 1292 | persevere 1293 | personages 1294 | personalized 1295 | phenomenal 1296 | phenomenally 1297 | picturesque 1298 | piety 1299 | pinnacle 1300 | playful 1301 | playfully 1302 | pleasant 1303 | pleasantly 1304 | pleased 1305 | pleases 1306 | pleasing 1307 | pleasingly 1308 | pleasurable 1309 | pleasurably 1310 | pleasure 1311 | plentiful 1312 | pluses 1313 | plush 1314 | plusses 1315 | poetic 1316 | poeticize 1317 | poignant 1318 | poise 1319 | poised 1320 | polished 1321 | polite 1322 | politeness 1323 | popular 1324 | portable 1325 | posh 1326 | positive 1327 | positively 1328 | positives 1329 | powerful 1330 | powerfully 1331 | praise 1332 | praiseworthy 1333 | praising 1334 | pre-eminent 1335 | precious 1336 | precise 1337 | precisely 1338 | preeminent 1339 | prefer 1340 | preferable 1341 | preferably 1342 | prefered 1343 | preferes 1344 | preferring 1345 | prefers 1346 | premier 1347 | prestige 1348 | prestigious 1349 | prettily 1350 | pretty 1351 | priceless 1352 | pride 1353 | principled 1354 | privilege 1355 | privileged 1356 | prize 1357 | proactive 1358 | problem-free 1359 | problem-solver 1360 | prodigious 1361 | prodigiously 1362 | prodigy 1363 | productive 1364 | productively 1365 | proficient 1366 | proficiently 1367 | profound 1368 | profoundly 1369 | profuse 1370 | profusion 1371 | progress 1372 | progressive 1373 | prolific 1374 | prominence 1375 | prominent 1376 | promise 1377 | promised 1378 | promises 1379 | promising 1380 | promoter 1381 | prompt 1382 | promptly 1383 | proper 1384 | properly 1385 | propitious 1386 | propitiously 1387 | pros 1388 | prosper 1389 | prosperity 1390 | prosperous 1391 | prospros 1392 | protect 1393 | protection 1394 | protective 1395 | proud 1396 | proven 1397 | proves 1398 | providence 1399 | proving 1400 | prowess 1401 | prudence 1402 | prudent 1403 | prudently 1404 | punctual 1405 | pure 1406 | purify 1407 | purposeful 1408 | quaint 1409 | qualified 1410 | qualify 1411 | quicker 1412 | quiet 1413 | quieter 1414 | radiance 1415 | radiant 1416 | rapid 1417 | rapport 1418 | rapt 1419 | rapture 1420 | raptureous 1421 | raptureously 1422 | rapturous 1423 | rapturously 1424 | rational 1425 | razor-sharp 1426 | reachable 1427 | readable 1428 | readily 1429 | ready 1430 | reaffirm 1431 | reaffirmation 1432 | realistic 1433 | realizable 1434 | reasonable 1435 | reasonably 1436 | reasoned 1437 | reassurance 1438 | reassure 1439 | receptive 1440 | reclaim 1441 | recomend 1442 | recommend 1443 | recommendation 1444 | recommendations 1445 | recommended 1446 | reconcile 1447 | reconciliation 1448 | record-setting 1449 | recover 1450 | recovery 1451 | rectification 1452 | rectify 1453 | rectifying 1454 | redeem 1455 | redeeming 1456 | redemption 1457 | refine 1458 | refined 1459 | refinement 1460 | reform 1461 | reformed 1462 | reforming 1463 | reforms 1464 | refresh 1465 | refreshed 1466 | refreshing 1467 | refund 1468 | refunded 1469 | regal 1470 | regally 1471 | regard 1472 | rejoice 1473 | rejoicing 1474 | rejoicingly 1475 | rejuvenate 1476 | rejuvenated 1477 | rejuvenating 1478 | relaxed 1479 | relent 1480 | reliable 1481 | reliably 1482 | relief 1483 | relish 1484 | remarkable 1485 | remarkably 1486 | remedy 1487 | remission 1488 | remunerate 1489 | renaissance 1490 | renewed 1491 | renown 1492 | renowned 1493 | replaceable 1494 | reputable 1495 | reputation 1496 | resilient 1497 | resolute 1498 | resound 1499 | resounding 1500 | resourceful 1501 | resourcefulness 1502 | respect 1503 | respectable 1504 | respectful 1505 | respectfully 1506 | respite 1507 | resplendent 1508 | responsibly 1509 | responsive 1510 | restful 1511 | restored 1512 | restructure 1513 | restructured 1514 | restructuring 1515 | retractable 1516 | revel 1517 | revelation 1518 | revere 1519 | reverence 1520 | reverent 1521 | reverently 1522 | revitalize 1523 | revival 1524 | revive 1525 | revives 1526 | revolutionary 1527 | revolutionize 1528 | revolutionized 1529 | revolutionizes 1530 | reward 1531 | rewarding 1532 | rewardingly 1533 | rich 1534 | richer 1535 | richly 1536 | richness 1537 | right 1538 | righten 1539 | righteous 1540 | righteously 1541 | righteousness 1542 | rightful 1543 | rightfully 1544 | rightly 1545 | rightness 1546 | risk-free 1547 | robust 1548 | rock-star 1549 | rock-stars 1550 | rockstar 1551 | rockstars 1552 | romantic 1553 | romantically 1554 | romanticize 1555 | roomier 1556 | roomy 1557 | rosy 1558 | safe 1559 | safely 1560 | sagacity 1561 | sagely 1562 | saint 1563 | saintliness 1564 | saintly 1565 | salutary 1566 | salute 1567 | sane 1568 | satisfactorily 1569 | satisfactory 1570 | satisfied 1571 | satisfies 1572 | satisfy 1573 | satisfying 1574 | satisified 1575 | saver 1576 | savings 1577 | savior 1578 | savvy 1579 | scenic 1580 | seamless 1581 | seasoned 1582 | secure 1583 | securely 1584 | selective 1585 | self-determination 1586 | self-respect 1587 | self-satisfaction 1588 | self-sufficiency 1589 | self-sufficient 1590 | sensation 1591 | sensational 1592 | sensationally 1593 | sensations 1594 | sensible 1595 | sensibly 1596 | sensitive 1597 | serene 1598 | serenity 1599 | sexy 1600 | sharp 1601 | sharper 1602 | sharpest 1603 | shimmering 1604 | shimmeringly 1605 | shine 1606 | shiny 1607 | significant 1608 | silent 1609 | simpler 1610 | simplest 1611 | simplified 1612 | simplifies 1613 | simplify 1614 | simplifying 1615 | sincere 1616 | sincerely 1617 | sincerity 1618 | skill 1619 | skilled 1620 | skillful 1621 | skillfully 1622 | slammin 1623 | sleek 1624 | slick 1625 | smart 1626 | smarter 1627 | smartest 1628 | smartly 1629 | smile 1630 | smiles 1631 | smiling 1632 | smilingly 1633 | smitten 1634 | smooth 1635 | smoother 1636 | smoothes 1637 | smoothest 1638 | smoothly 1639 | snappy 1640 | snazzy 1641 | sociable 1642 | soft 1643 | softer 1644 | solace 1645 | solicitous 1646 | solicitously 1647 | solid 1648 | solidarity 1649 | soothe 1650 | soothingly 1651 | sophisticated 1652 | soulful 1653 | soundly 1654 | soundness 1655 | spacious 1656 | sparkle 1657 | sparkling 1658 | spectacular 1659 | spectacularly 1660 | speedily 1661 | speedy 1662 | spellbind 1663 | spellbinding 1664 | spellbindingly 1665 | spellbound 1666 | spirited 1667 | spiritual 1668 | splendid 1669 | splendidly 1670 | splendor 1671 | spontaneous 1672 | sporty 1673 | spotless 1674 | sprightly 1675 | stability 1676 | stabilize 1677 | stable 1678 | stainless 1679 | standout 1680 | state-of-the-art 1681 | stately 1682 | statuesque 1683 | staunch 1684 | staunchly 1685 | staunchness 1686 | steadfast 1687 | steadfastly 1688 | steadfastness 1689 | steadiest 1690 | steadiness 1691 | steady 1692 | stellar 1693 | stellarly 1694 | stimulate 1695 | stimulates 1696 | stimulating 1697 | stimulative 1698 | stirringly 1699 | straighten 1700 | straightforward 1701 | streamlined 1702 | striking 1703 | strikingly 1704 | striving 1705 | strong 1706 | stronger 1707 | strongest 1708 | stunned 1709 | stunning 1710 | stunningly 1711 | stupendous 1712 | stupendously 1713 | sturdier 1714 | sturdy 1715 | stylish 1716 | stylishly 1717 | stylized 1718 | suave 1719 | suavely 1720 | sublime 1721 | subsidize 1722 | subsidized 1723 | subsidizes 1724 | subsidizing 1725 | substantive 1726 | succeed 1727 | succeeded 1728 | succeeding 1729 | succeeds 1730 | succes 1731 | success 1732 | successes 1733 | successful 1734 | successfully 1735 | suffice 1736 | sufficed 1737 | suffices 1738 | sufficient 1739 | sufficiently 1740 | suitable 1741 | sumptuous 1742 | sumptuously 1743 | sumptuousness 1744 | super 1745 | superb 1746 | superbly 1747 | superior 1748 | superiority 1749 | supple 1750 | support 1751 | supported 1752 | supporter 1753 | supporting 1754 | supportive 1755 | supports 1756 | supremacy 1757 | supreme 1758 | supremely 1759 | supurb 1760 | supurbly 1761 | surmount 1762 | surpass 1763 | surreal 1764 | survival 1765 | survivor 1766 | sustainability 1767 | sustainable 1768 | swank 1769 | swankier 1770 | swankiest 1771 | swanky 1772 | sweeping 1773 | sweet 1774 | sweeten 1775 | sweetheart 1776 | sweetly 1777 | sweetness 1778 | swift 1779 | swiftness 1780 | talent 1781 | talented 1782 | talents 1783 | tantalize 1784 | tantalizing 1785 | tantalizingly 1786 | tasty 1787 | tempt 1788 | tempting 1789 | temptingly 1790 | tenacious 1791 | tenaciously 1792 | tenacity 1793 | tender 1794 | tenderly 1795 | terrific 1796 | terrifically 1797 | thank 1798 | thankful 1799 | thinner 1800 | thoughtful 1801 | thoughtfully 1802 | thoughtfulness 1803 | thrift 1804 | thrifty 1805 | thrill 1806 | thrilled 1807 | thrilling 1808 | thrillingly 1809 | thrills 1810 | thrive 1811 | thriving 1812 | thumb-up 1813 | thumbs-up 1814 | tickle 1815 | tidy 1816 | time-honored 1817 | timely 1818 | tingle 1819 | titillate 1820 | titillating 1821 | titillatingly 1822 | togetherness 1823 | tolerable 1824 | toll-free 1825 | top 1826 | top-notch 1827 | top-quality 1828 | topnotch 1829 | tops 1830 | tough 1831 | tougher 1832 | toughest 1833 | traction 1834 | tranquil 1835 | tranquility 1836 | transparent 1837 | treasure 1838 | tremendously 1839 | trendy 1840 | triumph 1841 | triumphal 1842 | triumphant 1843 | triumphantly 1844 | trivially 1845 | trophy 1846 | trouble-free 1847 | trump 1848 | trumpet 1849 | trust 1850 | trusted 1851 | trusting 1852 | trustingly 1853 | trustworthiness 1854 | trustworthy 1855 | trusty 1856 | truthful 1857 | truthfully 1858 | truthfulness 1859 | twinkly 1860 | ultra-crisp 1861 | unabashed 1862 | unabashedly 1863 | unaffected 1864 | unassailable 1865 | unbeatable 1866 | unbiased 1867 | unbound 1868 | uncomplicated 1869 | unconditional 1870 | undamaged 1871 | undaunted 1872 | understandable 1873 | undisputable 1874 | undisputably 1875 | undisputed 1876 | unencumbered 1877 | unequivocal 1878 | unequivocally 1879 | unfazed 1880 | unfettered 1881 | unforgettable 1882 | unity 1883 | unlimited 1884 | unmatched 1885 | unparalleled 1886 | unquestionable 1887 | unquestionably 1888 | unreal 1889 | unrestricted 1890 | unrivaled 1891 | unselfish 1892 | unwavering 1893 | upbeat 1894 | upgradable 1895 | upgradeable 1896 | upgraded 1897 | upheld 1898 | uphold 1899 | uplift 1900 | uplifting 1901 | upliftingly 1902 | upliftment 1903 | upscale 1904 | usable 1905 | useable 1906 | useful 1907 | user-friendly 1908 | user-replaceable 1909 | valiant 1910 | valiantly 1911 | valor 1912 | valuable 1913 | variety 1914 | venerate 1915 | verifiable 1916 | veritable 1917 | versatile 1918 | versatility 1919 | vibrant 1920 | vibrantly 1921 | victorious 1922 | victory 1923 | viewable 1924 | vigilance 1925 | vigilant 1926 | virtue 1927 | virtuous 1928 | virtuously 1929 | visionary 1930 | vivacious 1931 | vivid 1932 | vouch 1933 | vouchsafe 1934 | warm 1935 | warmer 1936 | warmhearted 1937 | warmly 1938 | warmth 1939 | wealthy 1940 | welcome 1941 | well 1942 | well-backlit 1943 | well-balanced 1944 | well-behaved 1945 | well-being 1946 | well-bred 1947 | well-connected 1948 | well-educated 1949 | well-established 1950 | well-informed 1951 | well-intentioned 1952 | well-known 1953 | well-made 1954 | well-managed 1955 | well-mannered 1956 | well-positioned 1957 | well-received 1958 | well-regarded 1959 | well-rounded 1960 | well-run 1961 | well-wishers 1962 | wellbeing 1963 | whoa 1964 | wholeheartedly 1965 | wholesome 1966 | whooa 1967 | whoooa 1968 | wieldy 1969 | willing 1970 | willingly 1971 | willingness 1972 | win 1973 | windfall 1974 | winnable 1975 | winner 1976 | winners 1977 | winning 1978 | wins 1979 | wisdom 1980 | wise 1981 | wisely 1982 | witty 1983 | won 1984 | wonder 1985 | wonderful 1986 | wonderfully 1987 | wonderous 1988 | wonderously 1989 | wonders 1990 | wondrous 1991 | woo 1992 | work 1993 | workable 1994 | worked 1995 | works 1996 | world-famous 1997 | worth 1998 | worth-while 1999 | worthiness 2000 | worthwhile 2001 | worthy 2002 | wow 2003 | wowed 2004 | wowing 2005 | wows 2006 | yay 2007 | youthful 2008 | zeal 2009 | zenith 2010 | zest 2011 | zippy 2012 | try 2013 | must try 2014 | -------------------------------------------------------------------------------- /feature_extraction/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yardstick17/AspectBasedSentimentAnalysis/b1fcf830341a51f37a862b1144797d2a9c5db2c2/feature_extraction/__init__.py -------------------------------------------------------------------------------- /feature_extraction/feature_vector_builder.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from feature_extraction.pos_pattern_feature.syntactic_pos_pattern import SyntacticPosPatternFeature 3 | 4 | 5 | def get_syntactic_grammar_feature_vector(sentence_text): 6 | """ 7 | 8 | :param sentence_text: 9 | :return: 10 | """ 11 | pos_tagger_pattern = SyntacticPosPatternFeature.extract_syntactic_rules_from_sentence(sentence_text) 12 | top_syntactic_grammar_list = SyntacticPosPatternFeature.get_top_syntactic_grammar_pos_pattern() 13 | X = [1 if j in pos_tagger_pattern else 0 for i, j in enumerate(top_syntactic_grammar_list)] 14 | return X 15 | -------------------------------------------------------------------------------- /feature_extraction/pos_pattern_feature/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yardstick17/AspectBasedSentimentAnalysis/b1fcf830341a51f37a862b1144797d2a9c5db2c2/feature_extraction/pos_pattern_feature/__init__.py -------------------------------------------------------------------------------- /feature_extraction/pos_pattern_feature/syntactic_pos_pattern.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from collections import Counter 3 | 4 | from nltk.util import ngrams 5 | from spacy.en import English 6 | 7 | from grammar.chunker import Chunker 8 | from grammar.pattern_grammar import PatternGrammar 9 | from training.mid_stage_prepare_dataset import get_dataset 10 | 11 | K = 1000000 12 | 13 | NGRAM_RANGE = [3, 4, 5, 6] 14 | parser = English() 15 | 16 | top_syntactic_grammar_list = None 17 | 18 | 19 | class SyntacticPosPatternFeature: 20 | DATASET_FILE = 'dataset/annoted_data.json' 21 | 22 | def __init__(self): 23 | pass 24 | 25 | @staticmethod 26 | def extract_top_syntactic_pos_pattern_from_corpus(): 27 | """ 28 | 29 | :return: 30 | """ 31 | file = 'dataset/annoted_data.json' 32 | dataset = get_dataset(file) 33 | pattern_counter = Counter() 34 | for data in dataset: 35 | sentence = data['sentence'] 36 | pattern_counter += SyntacticPosPatternFeature.extract_syntactic_rules_from_sentence(sentence) 37 | sorted_pattern_counter = sorted( 38 | list(dict(pattern_counter.most_common(K)).keys())) # return the actual Counter object 39 | return sorted_pattern_counter 40 | 41 | @staticmethod 42 | def extract_syntactic_rules_from_sentence(sentence): 43 | """ 44 | 45 | :param sentence: 46 | :return: 47 | """ 48 | trio_counter = Counter() 49 | syntactic_compiled_grammar = PatternGrammar().compile_all_syntactic_grammar() 50 | for index, compiled_grammar in sorted(syntactic_compiled_grammar.items(), key=lambda x: x, reverse=True): 51 | combos = SyntacticPosPatternFeature.extract_syntactic_grammar(sentence, grammar=compiled_grammar) 52 | trio_counter += Counter(combos) 53 | return trio_counter 54 | 55 | @staticmethod 56 | def extract_syntactic_grammar(sentence, grammar): 57 | """ 58 | 59 | :param sentence: 60 | :param grammar: 61 | :return: 62 | """ 63 | chunk_dict = Chunker(grammar).chunk_sentence(sentence) 64 | trigrams_list = [] 65 | for key, pos_tagged_sentences in chunk_dict.items(): 66 | pos_tags = [token[1] for pos_tagged_sentence in pos_tagged_sentences for token in pos_tagged_sentence] 67 | if len(pos_tags) >= 2: 68 | for ngram in NGRAM_RANGE: 69 | trigrams = ngrams(pos_tags, ngram) 70 | trigrams_list.extend([' '.join(trigram).strip() for trigram in trigrams]) 71 | return trigrams_list 72 | 73 | @staticmethod 74 | def get_top_syntactic_grammar_pos_pattern(): 75 | """ 76 | 77 | :return: 78 | """ 79 | global top_syntactic_grammar_list 80 | if top_syntactic_grammar_list is None: 81 | top_syntactic_grammar_list = SyntacticPosPatternFeature.extract_top_syntactic_pos_pattern_from_corpus() 82 | return top_syntactic_grammar_list 83 | -------------------------------------------------------------------------------- /feature_extraction/word_level_feature/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yardstick17/AspectBasedSentimentAnalysis/b1fcf830341a51f37a862b1144797d2a9c5db2c2/feature_extraction/word_level_feature/__init__.py -------------------------------------------------------------------------------- /grammar/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yardstick17/AspectBasedSentimentAnalysis/b1fcf830341a51f37a862b1144797d2a9c5db2c2/grammar/__init__.py -------------------------------------------------------------------------------- /grammar/chunker.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import nltk 4 | 5 | from grammar.pattern_grammar import PatternGrammar 6 | from grammar.pos_tagger import PosTagger 7 | 8 | 9 | class Chunker: 10 | def __init__(self, grammar: nltk.RegexpParser): 11 | self.grammar = grammar 12 | 13 | def chunk_sentence(self, sentence: str): 14 | """ 15 | 16 | :param sentence: 17 | :return: 18 | """ 19 | pos_tagged_sentence = PosTagger(sentence).pos_tag() 20 | return self.chunk_pos_tagged_sentence(pos_tagged_sentence) 21 | 22 | def chunk_pos_tagged_sentence(self, pos_tagged_sentence): 23 | """ 24 | 25 | :param pos_tagged_sentence: 26 | :return: 27 | """ 28 | chunked_tree = self.grammar.parse(pos_tagged_sentence) 29 | chunk_dict = self.extract_rule_and_chunk(chunked_tree) 30 | return chunk_dict 31 | 32 | def extract_rule_and_chunk(self, chunked_tree: nltk.Tree) -> dict: 33 | """ 34 | 35 | :param chunked_tree: 36 | :return: 37 | """ 38 | 39 | def recursively_get_pos_only(tree, collector_list=None, depth_limit=100): 40 | if collector_list is None: 41 | collector_list = [] 42 | if depth_limit <= 0: 43 | return collector_list 44 | for subtree in tree: 45 | if isinstance(subtree, nltk.Tree): 46 | recursively_get_pos_only(subtree, collector_list, depth_limit - 1) 47 | else: 48 | collector_list.append(subtree) 49 | return collector_list 50 | 51 | def get_pos_tagged_and_append_to_chunk_dict(chunk_dict, subtrees): # params can be removed now 52 | pos_tagged = recursively_get_pos_only(subtrees) 53 | chunk_dict[subtrees.label()].append(pos_tagged) 54 | 55 | chunk_dict = nltk.defaultdict(list) 56 | for subtrees in chunked_tree: 57 | if isinstance(subtrees, nltk.Tree): 58 | get_pos_tagged_and_append_to_chunk_dict(chunk_dict, subtrees) 59 | for sub in subtrees: 60 | if isinstance(sub, nltk.Tree): 61 | get_pos_tagged_and_append_to_chunk_dict(chunk_dict, sub) 62 | return chunk_dict 63 | 64 | @staticmethod 65 | def get_chunk(pos_tagged_sentence, src_target_grammar_key: str) -> list: 66 | """ 67 | 68 | :param pos_tagged_sentence: 69 | :param src_target_grammar_key: 70 | :return: 71 | """ 72 | compile_grammar = PatternGrammar().get_source_target_compiled_grammar(clause=src_target_grammar_key) 73 | return Chunker.apply_grammar_on_pos_tagged_chunk(compile_grammar, pos_tagged_sentence) 74 | 75 | @staticmethod 76 | def apply_grammar_on_pos_tagged_chunk(compile_grammar, pos_tagged_sentence): 77 | chunk_dict = Chunker(compile_grammar).chunk_pos_tagged_sentence(pos_tagged_sentence) 78 | return list(chunk_dict.values()) if chunk_dict else [] 79 | -------------------------------------------------------------------------------- /grammar/language_processor.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from collections import defaultdict 3 | from collections import namedtuple 4 | 5 | import numpy as np 6 | from nltk.sentiment.vader import NEGATE 7 | 8 | from grammar.chunker import Chunker 9 | from grammar.sentiment import NEGATIVE_SENTIMENT_SCORE 10 | from grammar.sentiment import POSITIVE_SENTIMENT_SCORE 11 | 12 | Target = namedtuple('Target', ['word', 'polarity']) 13 | 14 | NEGATE_SET = set(NEGATE) | {"n't", 'never'} 15 | 16 | 17 | class LanguageProcessor: 18 | POSITIVE_POLARITY = 'positive' 19 | NEGATIVE_POLARITY = 'negative' 20 | 21 | def __init__(self): 22 | import logging 23 | self._logger = logging.getLogger() 24 | 25 | @staticmethod 26 | def merge_two_dict(dict_x, dict_y): 27 | """ 28 | :param dict_x: {'a': [3, 4], 'b': [6]} 29 | :param dict_y: {'c': [3], 'a': [1, 2]} 30 | :return: {'c': [3], 'a': [3, 4, 1, 2], 'b': [6]} 31 | """ 32 | dict_z = dict_x.copy() # Never modify input param , or take inplace as param for explicit use case 33 | for key, value in dict_y.items(): 34 | if dict_z.get(key): 35 | dict_z[key].extend(value) 36 | else: 37 | dict_z[key] = value 38 | return dict_z 39 | 40 | @staticmethod 41 | def get_source_target_set(source_chunk, target_chunk_with_polarity: Target): 42 | """ 43 | :param source_chunk: list of source chunk , extracted using 44 | :param target_chunk_with_polarity: 45 | :return: 46 | """ 47 | target_chunk = target_chunk_with_polarity.word 48 | source_set, target_set = set(), set() 49 | 50 | target_chunk = [tgt for tgt in target_chunk if tgt not in source_chunk] 51 | for src in source_chunk: 52 | src_pos_tagged_part = src[0] 53 | np_phrase_pos_tagged_list = Chunker.get_chunk(src_pos_tagged_part, 'NN_all') 54 | # np_phrase_pos_tagged_part = np_phrase_pos_tagged_list[0] 55 | for np_phrase_pos_tagged_part in np_phrase_pos_tagged_list: 56 | for single_np_phrase in np_phrase_pos_tagged_part: 57 | source_word = ' '.join([i[0] for i in single_np_phrase]).strip() 58 | source_set.add(source_word) 59 | 60 | for tgt in target_chunk: 61 | tgt_pos_tagged_part = tgt[0] 62 | sentiment_phrase_pos_tagged_list = Chunker.get_chunk(tgt_pos_tagged_part, 'JJ_NN_RB_VB') 63 | # sentiment_phrase_pos_tagged_part = sentiment_phrase_pos_tagged_list[0] 64 | for sentiment_phrase_pos_tagged_part in sentiment_phrase_pos_tagged_list: 65 | for single_sentiment_phrase in sentiment_phrase_pos_tagged_part: 66 | target_word = ' '.join([i[0] for i in single_sentiment_phrase]).strip() 67 | target_set.add(target_word) 68 | return source_set, target_set 69 | 70 | @staticmethod 71 | def extract_src_target_chunk(key, pos_tagged_chunk: Target): 72 | """ 73 | 74 | :param key: 75 | :param pos_tagged_chunk: 76 | :return: 77 | """ 78 | source, target = [], [] 79 | if key in ['JJ_DESCRIBING_NN_V4']: 80 | source = Chunker.get_chunk(pos_tagged_chunk, 'NP_before_VB') 81 | target = Chunker.get_chunk(pos_tagged_chunk, 'JJ_AFTER_VB') 82 | if not target: 83 | target = Chunker.get_chunk(pos_tagged_chunk, 'NN_JJ_desc') 84 | 85 | if not source or not target: 86 | source, target_tuple_with_polarity = LanguageProcessor.extract_src_target_chunk('VBG_DESCRIBING_NN_V3', 87 | pos_tagged_chunk) 88 | return source, target_tuple_with_polarity 89 | elif key in ['VBG_RB_DESRIBING_NN', 'VBN_DESCRING_THE_FOLLOWING_NOUN', 'VBG_DESRIBING_NN']: 90 | source = Chunker.get_chunk(pos_tagged_chunk, 'NP_After_VB_i') 91 | target = Chunker.get_chunk(pos_tagged_chunk, 'VB_JJ_RB_desc') 92 | elif key in ['JJ_VBG_RB_DESRIBING_NN', 'JJ_VBG_RB_DESRIBING_NN_2']: 93 | source = Chunker.get_chunk(pos_tagged_chunk, 'NP_before_VB') 94 | target = Chunker.get_chunk(pos_tagged_chunk, 'NN_JJ_desc') 95 | target.extend(Chunker.get_chunk(pos_tagged_chunk, 'RB_AFTER_VB')) 96 | target.extend(Chunker.get_chunk(pos_tagged_chunk, 'VB_JJ_RB_desc')) 97 | if not source: 98 | source = Chunker.get_chunk(pos_tagged_chunk, 'NN_all') 99 | 100 | if not source or not target: 101 | source, target_tuple_with_polarity = LanguageProcessor.extract_src_target_chunk('VBG_DESCRIBING_NN_V3', 102 | pos_tagged_chunk) 103 | return source, target_tuple_with_polarity 104 | 105 | elif key in ['VBG_DESCRIBING_NN_V3', 'VBG_NN_DESCRIBING_NN']: 106 | source = Chunker.get_chunk(pos_tagged_chunk, 'NP_After_VB') 107 | target = Chunker.get_chunk(pos_tagged_chunk, 'VB_all') 108 | elif key in ['VBG_DESRIBING_NN_V2']: 109 | source = Chunker.get_chunk(pos_tagged_chunk, 'NP_After_VB') 110 | target = Chunker.get_chunk(pos_tagged_chunk, 'VB_JJ_RB_desc') 111 | elif key in ['VBG_DESCRIBIN_NN_V4', 'VB_DESCRBING_NN']: 112 | source = Chunker.get_chunk(pos_tagged_chunk, 'NP_After_VB') 113 | target = Chunker.get_chunk(pos_tagged_chunk, 'VB_JJ_RB_desc') 114 | elif key in ['RB_BEFORE_NN']: 115 | source = Chunker.get_chunk(pos_tagged_chunk, 'NN_all') 116 | target = Chunker.get_chunk(pos_tagged_chunk, 'RB_all') 117 | elif key in ['JJ_BEFORE_NN', 'NN_JJ']: 118 | source = Chunker.get_chunk(pos_tagged_chunk, 'NN_all') 119 | target = Chunker.get_chunk(pos_tagged_chunk, 'JJ_all') 120 | elif key in ['JJ_IN_NN']: 121 | source = Chunker.get_chunk(pos_tagged_chunk, 'NE_grammar') 122 | target = Chunker.get_chunk(pos_tagged_chunk, 'JJ_any_IN') 123 | elif key in ['JJ_TO_NN_VB']: 124 | source = Chunker.get_chunk(pos_tagged_chunk, 'TO_NN') 125 | target = Chunker.get_chunk(pos_tagged_chunk, 'JJ_multi') 126 | elif key in ['NN_MD_VB']: 127 | source = Chunker.get_chunk(pos_tagged_chunk, 'NE_grammar') 128 | target = Chunker.get_chunk(pos_tagged_chunk, 'VB_desc') 129 | elif key in ['VBN_IN_PRP_NN', 'VB_PRP_NNS']: 130 | source = Chunker.get_chunk(pos_tagged_chunk, 'NP_After_VB') 131 | target = Chunker.get_chunk(pos_tagged_chunk, 'VB_desc') 132 | elif key in ['PR_VB_JJ_JJ']: 133 | source = Chunker.get_chunk(pos_tagged_chunk, 'JJ_multi') 134 | target = Chunker.get_chunk(pos_tagged_chunk, 'VB_desc') 135 | elif key in ['JJ_BEFORE_NN_V3', 'I_JJ_NN']: 136 | source = Chunker.get_chunk(pos_tagged_chunk, 'NN_only') 137 | target = Chunker.get_chunk(pos_tagged_chunk, 'JJ_multi') 138 | elif key in ['NN_IN_DT_NN']: 139 | source = Chunker.get_chunk(pos_tagged_chunk, 'NN_IN') 140 | target = Chunker.get_chunk(pos_tagged_chunk, 'DT_NN') 141 | elif key in ['NN_VB_DT_JJ_NN']: 142 | source = Chunker.get_chunk(pos_tagged_chunk, 'JJ_NN_end') 143 | target = Chunker.get_chunk(pos_tagged_chunk, 'NN_beg') 144 | elif key in ['NN_Phrase']: 145 | source = Chunker.get_chunk(pos_tagged_chunk, 'NN_FW_only') 146 | target = Chunker.get_chunk(pos_tagged_chunk, 'NN_beg') 147 | elif key in ['NN_desc_NN']: 148 | source = Chunker.get_chunk(pos_tagged_chunk, 'NN_beg') 149 | target = Chunker.get_chunk(pos_tagged_chunk, 'NP_After_VB_must') 150 | elif key in ['NN_DT_NN', 'NN_desc_NN_reverse', 'NN_IN_DT_NN_reverse']: 151 | source = Chunker.get_chunk(pos_tagged_chunk, 'DT_NN') 152 | target = Chunker.get_chunk(pos_tagged_chunk, 'NN_beg') 153 | # print('target:', target) 154 | polarity = LanguageProcessor.get_polarity(target) 155 | target_tuple_with_polarity = Target(target, polarity) 156 | return source, target_tuple_with_polarity 157 | 158 | @staticmethod 159 | def get_polarity(pos_tagged_chunk): 160 | # print('pos_tagged_chunk:' , pos_tagged_chunk) 161 | set_target = set() 162 | for list_pos_chunk in pos_tagged_chunk: 163 | list_pos_chunk = list_pos_chunk [0] 164 | set_target.update(pos_tuple[0] for pos_tuple in list_pos_chunk) 165 | 166 | negation_word = NEGATE_SET & set_target 167 | # print('negation_word: ' , negation_word) 168 | return LanguageProcessor.POSITIVE_POLARITY if not negation_word else LanguageProcessor.NEGATIVE_POLARITY 169 | 170 | @staticmethod 171 | def get_target_pos_neg_scores_mean(target_pos_neg_scores): 172 | """ 173 | :param target_pos_neg_scores: [{'NegScore': 0.21875, 'PosScore': 0.375}, 174 | {'NegScore': 0.0625, 'PosScore': 0.0}, 175 | {'NegScore': 0.21875, 'PosScore': 0.375}] 176 | :return: {'NegScore': 0.16666, 'PosScore': 0.25} 177 | """ 178 | pos_scores_mean = np.mean(list(map(lambda x: x[POSITIVE_SENTIMENT_SCORE], target_pos_neg_scores))) 179 | neg_scores_mean = np.mean(list(map(lambda x: x[NEGATIVE_SENTIMENT_SCORE], target_pos_neg_scores))) 180 | return {POSITIVE_SENTIMENT_SCORE: pos_scores_mean, 181 | NEGATIVE_SENTIMENT_SCORE: neg_scores_mean} 182 | 183 | @staticmethod 184 | def reject_general_english_word(subject_to_target_mapping): 185 | """ 186 | 187 | :param subject_to_target_mapping: 188 | :return: 189 | """ 190 | source_target_mapping_new = defaultdict(list) 191 | for source, list_of_targets_with_polarity in subject_to_target_mapping.items(): 192 | for target_with_polarity in list_of_targets_with_polarity: 193 | word = target_with_polarity.word 194 | if word: 195 | source_target_mapping_new[source].append(Target(word=word, polarity=target_with_polarity.polarity)) 196 | 197 | return source_target_mapping_new 198 | -------------------------------------------------------------------------------- /grammar/pattern_grammar.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | from collections import namedtuple 4 | 5 | import nltk 6 | from nltk.sentiment.vader import NEGATE 7 | 8 | syntactic_compiled_grammar = {} 9 | source_target_compiled_grammar = {} 10 | Target = namedtuple('Target', ['word', 'polarity']) 11 | 12 | NEGATE_SET = set(NEGATE) | {"n't", 'never'} 13 | 14 | # [0, 8, 13, 14, 16, 17, 18, 19, 21, 23, 25, 26, 30] 15 | class PatternGrammar: 16 | @property 17 | def syntactic_grammars(self): 18 | grammar = { 19 | 20 | 1: """ 21 | VBG_RB_DESRIBING_NN: { ?**(??)++ }""", 22 | 2: """ 23 | # the place was amazing | 24 | VBG_DESRIBING_NN: {+*}""", 25 | 3: """ 26 | # I loved the ambiance and the food. | enjoyed the taste. 27 | VBG_DESCRIBING_NN_V3 : {+
??+(?
?*+)*} # noqa nopep8""", 28 | 4: """ 29 | # Amazingly satisfying food 30 | VBG_DESRIBING_NN_V2: {*+}""", 31 | 5: """ 32 | VBG_DESRIBING_NN_V5 :{*}""", 33 | 6: """ 34 | # perfect place to have varied options in burgers 35 | VBG_NN_DESCRIBING_NN: { }""", 36 | 7: """ 37 | # improved on their service 38 | VBN_IN_PRP_NN: { }""", 39 | 8: """ 40 | VBG_NN_DESCRIBING_NN: { }""", 41 | 9: """ 42 | VBN_DESCRING_THE_FOLLOWING_NOUN : { *+?(?
?+)+}""", 43 | 10: """ 44 | #i love east village pizza 45 | VB_DESCRBING_NN : { (?**)+}""", 46 | 47 | 11: """ 48 | # the place was ok and good 49 | # The Phirni here is a rich flavoured dessert | Place not worth visiting | the place was ok and good | the fish , chicken and biryani were so tastefull # noqa nopep8 50 | JJ_DESCRIBING_NN_V4 :{(*
?*+)+()**+?
?(**+*)+} # noqa nopep8""", 51 | 52 | 12: """ 53 | # this place is always crowded, noisy and full #JJ_VBG_RB_DESRIBING_NN 54 | # great food , service and ambience. | great food and service. | the served the lovely food. | they have a speedy delivery 55 | # food is amazing 56 | NN_IS_VBG : { }""", 57 | 13: """ 58 | # they have a speedy delivery 59 | PRP_VB_NN : {
?+ }""", 60 | 14: """ 61 | # they have a speedy delivery 62 | PRP_VB_NN : {
?+ }""", 63 | 15: """ 64 | # nice for trying some authentic chinese 65 | NN_VB_DT_JJ_NN: {
} 66 | """, 67 | 16: """ 68 | # vegans like me can also enjoy 69 | NN_MD_VB : {*??} """, 70 | 17: """ 71 | # They have awesome Indian and Chinese! 72 | PR_VB_JJ_JJ : { +* } """, 73 | 18: """ 74 | # impossible to order 75 | JJ_TO_NN_VB : { (+)+ } """, 76 | 19: """ 77 | RB_BEFORE_NN: {+*+} """, 78 | 20: """ 79 | # not a fan of biryani and rolls 80 | NN_IN_DT_NN_reverse : { +
(?
?*+)+ } """, 81 | 21: """ 82 | # place isnt worth the hype 83 | NN_IN_DT_NN: { +
} """, 84 | 22: """ # I was disappointed with the chicken tikka 85 | I_JJ_NN : {
?+} """, 86 | 23: """ 87 | # impeccable service 88 | NN_JJ : { +}""", 89 | 24: """JJ_BEFORE_NN : {*+}""", 90 | 25: """ 91 | # The asparagus, truffle oil, parmesan bruschetta is a winner. 92 | NN_desc_NN : { (*
?*+)+?
?+} """, 93 | 26: """ 94 | # Avoid this place 95 | NN_DT_NN : {
+} 96 | """, 97 | 27: """ 98 | # my favourite is the chicken biryani 99 | NN_desc_NN_reverse : {
+} """, 100 | 28: """ 101 | # grasps all NN if none rule captures the sentence 102 | NN_Phrase : { ??+ }""", 103 | 29: """ 104 | JJ_VBG_RB_DESRIBING_NN_2: { ( ?
? *+ )+ <..|...>* + } 105 | """, 106 | 30: """ 107 | JJ_VBG_RB_DESRIBING_NN: { (?*?)+*???*(??+)+} 108 | """, 109 | 31: """ 110 | AAAA_IGNORE_ALL_RULES: { } 111 | """ 112 | 113 | } 114 | return grammar 115 | 116 | @property 117 | def source_target_extraction_grammars(self): 118 | SRC_TARGET_GRAMMAR = { 119 | 'JJ_grammar': """JJ: {*}""", 120 | 'NN_JJ_desc': """"NN_JJ : {+*}""", 121 | 'NN_desc': """"RB_VB : {*+}""", 122 | # NN_MD_VB, 123 | 'VB_desc': """VB: { ? }""", 124 | 'JJ_multi': """JJ: { +(<,|CC>?)* }""", 125 | # JJ_IN_NN, 126 | 'JJ_IN_NN': """JJ: { + }""", 127 | 'JJ_any_IN': """JJ: { +<.*>* }""", 128 | 'JJ_NN_end': """NN: { (?*+)$ }""", 129 | 'NN_beg': """ NP: { (^)(*
?*?+)+ } """, 130 | 'NN_only': """NP: {+}""", 131 | 'NN_IN': """ NN: {?+} """, 132 | 'DT_NN': """ NN: {
(*
?*?+)+} """, 133 | 'TO_NN': """NP: { (+)+ }""", 134 | # JJ_IN_NN | NN_MD_VB, 135 | 'NE_grammar': """NP: {*+}, 136 | NP: {+}""", 137 | 'NN_all': """NP: {(+?)**+}""", 138 | 'NN_CC_JJ_multi': """NP: { (<,|CC>*?+)+ }""", 139 | 'NP_before_VB': """NP: {(*
?*?+)+()**+}""", 140 | # noqa nopep8 141 | 'NP_After_VB_must': """NP : {
*+}""", 142 | 'NP_After_VB': """NP : {(<,|CC>*?+)*}""", 143 | # was of great taste., 144 | 'NP_After_VB_i': """NP : {(<,|CC>*?+)*}""", 145 | # The food Hummus n Pita and Fish n Chips were 'mouth watering`., 146 | 'VB_JJ_RB_desc': """NN_JJ : {*+}""", # was amazing 147 | 'RB_AFTER_VB': """VB_RB : {+*}""", 148 | # the service was fast. | fast -> RB, 149 | 'VB_all': """VB_ : { +}""", 150 | 151 | # all verbs : i 'loved' the ambience. 152 | 'RB_all': """RB : {*+}""", 153 | 154 | 'JJ_all': """JJ : {+}""", 155 | 'NN_FW_only': """NN_FW : { ?+ }""", 156 | 'JJ_AFTER_VB': """VBG_JJ : { +}""", 157 | 'JJ_NN_RB_VB': 158 | """ JJ : {+} 159 | RB_JJ : {+*} 160 | RB_VB : {++} 161 | RB : {+} 162 | JJ : {+*} 163 | VB : {+} 164 | NN : { +} 165 | """ 166 | } 167 | return SRC_TARGET_GRAMMAR 168 | 169 | @staticmethod 170 | def extractor_mapping_dict(): 171 | extractor_dict = { 172 | 'JJ_DESCRIBING_NN_V4': {'source': PatternGrammar().get_source_target_compiled_grammar('NP_before_VB'), 173 | 'target': PatternGrammar().get_source_target_compiled_grammar('JJ_AFTER_VB')}, 174 | 'VBG_RB_DESRIBING_NN': {'source': PatternGrammar().get_source_target_compiled_grammar('NP_before_VB'), 175 | 'target': PatternGrammar().get_source_target_compiled_grammar('JJ_AFTER_VB')}, 176 | 'VBN_DESCRING_THE_FOLLOWING_NOUN': { 177 | 'source': PatternGrammar().get_source_target_compiled_grammar('NP_before_VB'), 178 | 'target': PatternGrammar().get_source_target_compiled_grammar('JJ_AFTER_VB')}, 179 | 'JJ_VBG_RB_DESRIBING_NN': {'source': PatternGrammar().get_source_target_compiled_grammar('NP_before_VB'), 180 | 'target': PatternGrammar().get_source_target_compiled_grammar('JJ_AFTER_VB')}, 181 | 'JJ_VBG_RB_DESRIBING_NN_2': {'source': PatternGrammar().get_source_target_compiled_grammar('NP_before_VB'), 182 | 'target': PatternGrammar().get_source_target_compiled_grammar('JJ_AFTER_VB')}, 183 | 'VBG_DESCRIBING_NN_V3': {'source': PatternGrammar().get_source_target_compiled_grammar('NP_before_VB'), 184 | 'target': PatternGrammar().get_source_target_compiled_grammar('JJ_AFTER_VB')}, 185 | 'VBG_NN_DESCRIBING_NN': {'source': PatternGrammar().get_source_target_compiled_grammar('NP_before_VB'), 186 | 'target': PatternGrammar().get_source_target_compiled_grammar('JJ_AFTER_VB')}, 187 | 'VBG_DESRIBING_NN_V2': {'source': PatternGrammar().get_source_target_compiled_grammar('NP_before_VB'), 188 | 'target': PatternGrammar().get_source_target_compiled_grammar('JJ_AFTER_VB')}, 189 | 'VBG_DESCRIBIN_NN_V4': {'source': PatternGrammar().get_source_target_compiled_grammar('NP_before_VB'), 190 | 'target': PatternGrammar().get_source_target_compiled_grammar('JJ_AFTER_VB')}, 191 | 'VB_DESCRBING_NN': {'source': PatternGrammar().get_source_target_compiled_grammar('NP_before_VB'), 192 | 'target': PatternGrammar().get_source_target_compiled_grammar('JJ_AFTER_VB')}, 193 | 'RB_BEFORE_NN': {'source': PatternGrammar().get_source_target_compiled_grammar('NP_before_VB'), 194 | 'target': PatternGrammar().get_source_target_compiled_grammar('JJ_AFTER_VB')}, 195 | 'JJ_BEFORE_NN': {'source': PatternGrammar().get_source_target_compiled_grammar('NP_before_VB'), 196 | 'target': PatternGrammar().get_source_target_compiled_grammar('JJ_AFTER_VB')}, 197 | 'NN_JJ': {'source': PatternGrammar().get_source_target_compiled_grammar('NP_before_VB'), 198 | 'target': PatternGrammar().get_source_target_compiled_grammar('JJ_AFTER_VB')}, 199 | 'JJ_IN_NN': {'source': PatternGrammar().get_source_target_compiled_grammar('NP_before_VB'), 200 | 'target': PatternGrammar().get_source_target_compiled_grammar('JJ_AFTER_VB')}, 201 | 'JJ_TO_NN_VB': {'source': PatternGrammar().get_source_target_compiled_grammar('NP_before_VB'), 202 | 'target': PatternGrammar().get_source_target_compiled_grammar('JJ_AFTER_VB')}, 203 | 'NN_MD_VB': {'source': PatternGrammar().get_source_target_compiled_grammar('NP_before_VB'), 204 | 'target': PatternGrammar().get_source_target_compiled_grammar('JJ_AFTER_VB')}, 205 | 'VBN_IN_PRP_NN': {'source': PatternGrammar().get_source_target_compiled_grammar('NP_before_VB'), 206 | 'target': PatternGrammar().get_source_target_compiled_grammar('JJ_AFTER_VB')}, 207 | 'VB_PRP_NNS': {'source': PatternGrammar().get_source_target_compiled_grammar('NP_before_VB'), 208 | 'target': PatternGrammar().get_source_target_compiled_grammar('JJ_AFTER_VB')}, 209 | 'PR_VB_JJ_JJ': {'source': PatternGrammar().get_source_target_compiled_grammar('NP_before_VB'), 210 | 'target': PatternGrammar().get_source_target_compiled_grammar('JJ_AFTER_VB')}, 211 | 'I_JJ_NN': {'source': PatternGrammar().get_source_target_compiled_grammar('NP_before_VB'), 212 | 'target': PatternGrammar().get_source_target_compiled_grammar('JJ_AFTER_VB')}, 213 | 'NN_IN_DT_NN': {'source': PatternGrammar().get_source_target_compiled_grammar('NP_before_VB'), 214 | 'target': PatternGrammar().get_source_target_compiled_grammar('JJ_AFTER_VB')}, 215 | 'NN_VB_DT_JJ_NN': {'source': PatternGrammar().get_source_target_compiled_grammar('NP_before_VB'), 216 | 'target': PatternGrammar().get_source_target_compiled_grammar('JJ_AFTER_VB')}, 217 | 'NN_Phrase': {'source': PatternGrammar().get_source_target_compiled_grammar('NP_before_VB'), 218 | 'target': PatternGrammar().get_source_target_compiled_grammar('JJ_AFTER_VB')}, 219 | 'NN_desc_NN': {'source': PatternGrammar().get_source_target_compiled_grammar('NP_before_VB'), 220 | 'target': PatternGrammar().get_source_target_compiled_grammar('JJ_AFTER_VB')}, 221 | 'NN_DT_NN': {'source': PatternGrammar().get_source_target_compiled_grammar('NP_before_VB'), 222 | 'target': PatternGrammar().get_source_target_compiled_grammar('JJ_AFTER_VB')}, 223 | 'NN_desc_NN_reverse': {'source': PatternGrammar().get_source_target_compiled_grammar('NP_before_VB'), 224 | 'target': PatternGrammar().get_source_target_compiled_grammar('JJ_AFTER_VB')}, 225 | 'NN_IN_DT_NN_reverse': {'source': PatternGrammar().get_source_target_compiled_grammar('NP_before_VB'), 226 | 'target': PatternGrammar().get_source_target_compiled_grammar('JJ_AFTER_VB')} 227 | } 228 | return extractor_dict 229 | 230 | def get_source_target_compiled_grammar(self, clause): 231 | global source_target_compiled_grammar 232 | compiled_grammar = source_target_compiled_grammar.get(clause, None) 233 | if compiled_grammar is None: 234 | compiled_grammar = self.compile_source_target_grammar(clause) 235 | source_target_compiled_grammar[clause] = compiled_grammar 236 | return compiled_grammar 237 | 238 | def compile_source_target_grammar(self, clause): 239 | return nltk.RegexpParser(self.source_target_extraction_grammars[clause]) 240 | 241 | def compile_all_source_target_grammar(self): 242 | global source_target_compiled_grammar 243 | clauses = list(self.source_target_extraction_grammars.keys()) 244 | for clause in clauses: 245 | _ = self.get_source_target_compiled_grammar(clause) 246 | return source_target_compiled_grammar 247 | 248 | def get_syntactic_grammar(self, index): 249 | global syntactic_compiled_grammar 250 | compiled_grammar = syntactic_compiled_grammar.get(index, None) 251 | if compiled_grammar is None: 252 | compiled_grammar = self.compile_syntactic_grammar(index) 253 | syntactic_compiled_grammar[index] = compiled_grammar 254 | return compiled_grammar 255 | 256 | def compile_syntactic_grammar(self, index): 257 | return nltk.RegexpParser(self.syntactic_grammars[index]) 258 | 259 | def compile_all_syntactic_grammar(self): 260 | global syntactic_compiled_grammar 261 | indexes = list(self.syntactic_grammars.keys()) 262 | for index in indexes: 263 | _ = self.get_syntactic_grammar(index) 264 | return syntactic_compiled_grammar 265 | 266 | @staticmethod 267 | def get_source_target_set(source_chunk, target_tuple_with_polarity): 268 | from grammar.chunker import Chunker 269 | 270 | """ 271 | :param source_chunk: list of source chunk , extracted using 272 | :param target_chunk_with_polarity: 273 | :return: 274 | """ 275 | 276 | source_set, target_set = set(), set() 277 | target_chunk = target_tuple_with_polarity.word 278 | target_chunk = [tgt for tgt in target_chunk if tgt not in source_chunk] 279 | for src in source_chunk: 280 | src_pos_tagged_part = src[0] 281 | np_phrase_pos_tagged_list = Chunker.get_chunk(src_pos_tagged_part, 'NN_all') 282 | # np_phrase_pos_tagged_part = np_phrase_pos_tagged_list[0] 283 | for np_phrase_pos_tagged_part in np_phrase_pos_tagged_list: 284 | for single_np_phrase in np_phrase_pos_tagged_part: 285 | source_word = ' '.join([i[0] for i in single_np_phrase]).strip() 286 | source_set.add(source_word) 287 | 288 | for tgt in target_chunk: 289 | tgt_pos_tagged_part = tgt[0] 290 | sentiment_phrase_pos_tagged_list = Chunker.get_chunk(tgt_pos_tagged_part, 'JJ_NN_RB_VB') 291 | # sentiment_phrase_pos_tagged_part = sentiment_phrase_pos_tagged_list[0] 292 | for sentiment_phrase_pos_tagged_part in sentiment_phrase_pos_tagged_list: 293 | for single_sentiment_phrase in sentiment_phrase_pos_tagged_part: 294 | target_word = ' '.join([i[0] for i in single_sentiment_phrase]).strip() 295 | target_set.add(target_word) 296 | return source_set, target_set 297 | -------------------------------------------------------------------------------- /grammar/pos_tagger.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import nltk 4 | from nltk import PerceptronTagger 5 | 6 | 7 | class PosTagger: 8 | def __init__(self, sentence): 9 | """ 10 | 11 | Args: 12 | sentence: 13 | """ 14 | self.sentence = sentence 15 | self.tagger = PosTagger.get_tagger() 16 | 17 | def pos_tag(self): 18 | """ 19 | 20 | Returns: 21 | 22 | """ 23 | tokens = nltk.word_tokenize(self.sentence) 24 | pos_tagged_tokens = self.tagger.tag(tokens) 25 | return pos_tagged_tokens 26 | 27 | @staticmethod 28 | def get_tagger(): 29 | """ 30 | 31 | Returns: 32 | 33 | """ 34 | return PerceptronTagger() 35 | -------------------------------------------------------------------------------- /grammar/sentiment.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import functools 3 | 4 | import numpy as np 5 | import pandas as pd 6 | from nltk.corpus import sentiwordnet 7 | from nltk.stem import PorterStemmer 8 | 9 | SENTINET_PREFIX = 9 10 | 11 | ROOT = '' 12 | NEGATIVE_WORDS_TXT = ROOT + 'dataset/sentiment_words_text_files/negative_words.txt' 13 | POSITIVE_WORDS_TXT = ROOT + 'dataset/sentiment_words_text_files/positive_words.txt' 14 | NEUTRAL_MODIFIERS_TXT = ROOT + 'dataset/sentiment_words_text_files/neutral_modifiers.txt' 15 | 16 | POSITIVE_SENTIMENT_SCORE = 'PosScore' 17 | NEGATIVE_SENTIMENT_SCORE = 'NegScore' 18 | stemmer = PorterStemmer() 19 | 20 | neutral_modifiers = None 21 | positive_modifiers = None 22 | negative_modifiers = None 23 | _POSITIVE_POLARITY = 'positive' 24 | _NEGATIVE_POLARITY = 'negative' 25 | 26 | 27 | class Sentiment: 28 | @staticmethod 29 | @functools.lru_cache(maxsize=16384) 30 | def get_sentiment_for_word(target: str): 31 | positive_list_score, negative_list_score = [], [] 32 | adj_list = target.split() # Handle word like "must try" as "must", "try" 33 | for adj in adj_list: 34 | pos, neg = Sentiment._find_sentiment_score_for(adj) 35 | if pos or neg: 36 | positive_list_score.append(pos) 37 | negative_list_score.append(neg) 38 | positive = np.mean(positive_list_score) if positive_list_score else 0 39 | negative = np.mean(negative_list_score) if negative_list_score else 0 40 | return { 41 | POSITIVE_SENTIMENT_SCORE: positive, 42 | NEGATIVE_SENTIMENT_SCORE: negative 43 | } 44 | 45 | @staticmethod 46 | def get_sentiment_with_polarity(target_polarity): 47 | sentiment_score = Sentiment.get_sentiment_for_word(target_polarity.word) 48 | if target_polarity.polarity == 'negative': 49 | reverse_sentiment_score = { 50 | POSITIVE_SENTIMENT_SCORE: sentiment_score[NEGATIVE_SENTIMENT_SCORE], 51 | NEGATIVE_SENTIMENT_SCORE: sentiment_score[POSITIVE_SENTIMENT_SCORE] 52 | } 53 | sentiment_score = reverse_sentiment_score 54 | return sentiment_score 55 | 56 | @staticmethod 57 | def _find_sentiment_score_for(word): 58 | pos_matched_word = pd.Series(list(sentiwordnet.senti_synsets(word))) 59 | neg_matched_word = pd.Series(list(sentiwordnet.senti_synsets(word))) 60 | pos = pos_matched_word.apply(lambda x: x.pos_score()).mean() if len(pos_matched_word) else 0 61 | neg = neg_matched_word.apply(lambda x: x.neg_score()).mean() if len(neg_matched_word) else 0 62 | return pos, neg 63 | 64 | @staticmethod 65 | def get_neutral_modifiers(): 66 | global neutral_modifiers 67 | if not neutral_modifiers: 68 | with open(NEUTRAL_MODIFIERS_TXT, 'r') as txt_file: 69 | neutral_modifiers = {w for word in txt_file 70 | for w in word.lower().strip()} 71 | # neutral_modifiers.update({token for word in neutral_modifiers for token in word.split()}) 72 | neutral_modifiers -= Sentiment.get_positive_modifiers() 73 | neutral_modifiers -= Sentiment.get_negative_modifiers() 74 | return neutral_modifiers 75 | 76 | @staticmethod 77 | def get_positive_modifiers(): 78 | global positive_modifiers 79 | if not positive_modifiers: 80 | with open(POSITIVE_WORDS_TXT, 'r') as txt_file: 81 | positive_modifiers = {word.lower().strip() for word in txt_file} 82 | return positive_modifiers 83 | 84 | @staticmethod 85 | def get_negative_modifiers(): 86 | global negative_modifiers 87 | if not negative_modifiers: 88 | with open(NEGATIVE_WORDS_TXT, 'r') as txt_file: 89 | negative_modifiers = {word.lower().strip() for word in txt_file} 90 | return negative_modifiers 91 | 92 | @staticmethod 93 | def neutral_words(word, stemmed_adj): 94 | return (word.strip().lower() in Sentiment.get_neutral_modifiers() or 95 | stemmed_adj.lower() in Sentiment.get_neutral_modifiers()) 96 | 97 | @staticmethod 98 | def positive_words(word, stemmed_adj): 99 | return (word.strip().lower() in Sentiment.get_positive_modifiers() or 100 | stemmed_adj.lower() in Sentiment.get_positive_modifiers()) 101 | 102 | @staticmethod 103 | def negative_words(word, stemmed_adj): 104 | return (word.strip().lower() in Sentiment.get_negative_modifiers() or 105 | stemmed_adj.lower() in Sentiment.get_negative_modifiers()) 106 | -------------------------------------------------------------------------------- /grammar/source_target_extractor.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | from collections import defaultdict 4 | 5 | import nltk 6 | from nltk.corpus import stopwords 7 | 8 | from grammar.chunker import Chunker 9 | from grammar.language_processor import LanguageProcessor 10 | from grammar.pattern_grammar import Target 11 | from grammar.pos_tagger import PosTagger 12 | from grammar.sentiment import Sentiment 13 | 14 | STOP_WORDS = set(stopwords.words('english')) 15 | # logging.info("STOP_WORDS SIZE: {}".format(len(STOP_WORDS))) 16 | import pandas as pd 17 | 18 | less_adj = set(pd.read_csv('dataset/sentiment_words_text_files/less_adj.csv')['less_adjective'].tolist()) 19 | 20 | 21 | class SourceTargetExtractor(LanguageProcessor): 22 | def __init__(self, text): 23 | """ 24 | 25 | :param text: can be a review or raw text 26 | """ 27 | super().__init__() 28 | self.text = text.lower() 29 | self.sentences = nltk.sent_tokenize(self.text) 30 | self.pos_tagged_sentences = [PosTagger(sentence=sentence).pos_tag() for sentence in self.sentences] 31 | 32 | def get_topic_sentiment_score_dict(self, compiled_grammar): 33 | # self._logger.debug('Getting topic sentiment') 34 | """ 35 | 36 | :param compiled_grammar: 37 | :return: 38 | """ 39 | source_target_mapping = self.get_source_and_target(compiled_grammar) 40 | source_target_score_mapping = {} 41 | for source, targets_with_polarity_dict in source_target_mapping.items(): 42 | if targets_with_polarity_dict: 43 | target_pos_neg_scores = list( 44 | map(Sentiment.get_sentiment_with_polarity, set(targets_with_polarity_dict))) 45 | target_pos_neg_score = self.get_target_pos_neg_scores_mean(target_pos_neg_scores) 46 | source = self.remove_stop_words(source) 47 | source_target_score_mapping[source] = target_pos_neg_score 48 | self._logger.debug( 49 | 'Source: %s Target %s Target Score: %s', 50 | source, str(targets_with_polarity_dict), str(target_pos_neg_score) 51 | ) 52 | return source_target_score_mapping 53 | 54 | def remove_stop_words(self, word): 55 | """ 56 | 57 | :param word: 58 | :return: 59 | """ 60 | tokens = word.split() 61 | new_word = ' '.join([token for token in tokens if token not in STOP_WORDS]).strip() 62 | # if word != new_word: 63 | # logging.debug('word: {} , new word: {}'.format(word, new_word)) 64 | return new_word 65 | 66 | def get_source_and_target(self, compiled_grammar): 67 | """ 68 | 69 | :param compiled_grammar: 70 | :return: 71 | """ 72 | subject_to_target_mapping = {} 73 | chunk_dict = self._get_source_target(compiled_grammar) 74 | for k, v in chunk_dict.items(): 75 | if subject_to_target_mapping.get(k): 76 | subject_to_target_mapping[k].extend(v) 77 | else: 78 | subject_to_target_mapping.update({k: v}) 79 | return subject_to_target_mapping 80 | 81 | @staticmethod 82 | def assign_source_and_target(source_set, target_set, polarity, subject_to_target_mapping): 83 | """ 84 | 85 | :param source_set: 86 | :param target_set: 87 | :param polarity: 88 | :param subject_to_target_mapping: 89 | :return: 90 | """ 91 | for subject in source_set: 92 | subject = SourceTargetExtractor.strip_to_root_word(subject) 93 | if subject: 94 | for target in target_set: 95 | subject_to_target_mapping[subject].append(Target(target, polarity)) 96 | if target not in source_set: 97 | subject_to_target_mapping[subject].append(Target(target, polarity)) 98 | return subject_to_target_mapping 99 | 100 | def _get_source_target(self, grammar): 101 | """ 102 | 103 | :param grammar: 104 | :return: 105 | """ 106 | 107 | chunk_dict = {} 108 | for pos_tagged_sentence in self.pos_tagged_sentences: 109 | single_chunk_dict = Chunker(grammar).chunk_pos_tagged_sentence(pos_tagged_sentence) 110 | chunk_dict = self.merge_two_dict(chunk_dict, single_chunk_dict) 111 | subject_to_target_mapping = defaultdict(list) 112 | for rule, pos_tagged_chunk_list in chunk_dict.items(): 113 | for pos_tagged_chunk in pos_tagged_chunk_list: 114 | source_chunk, target_tuple_with_polarity = self.extract_src_target_chunk(rule, pos_tagged_chunk) 115 | source_set, target_set = self.get_source_target_set(source_chunk, target_tuple_with_polarity) 116 | self.assign_source_and_target(source_set, target_set, target_tuple_with_polarity.polarity, 117 | subject_to_target_mapping) 118 | 119 | return subject_to_target_mapping 120 | 121 | @staticmethod 122 | def strip_to_root_word(word): 123 | # return word 124 | word = word.split() 125 | word_tokens = [w for w in word if w not in less_adj] 126 | return ' '.join(word_tokens).strip() 127 | -------------------------------------------------------------------------------- /index.rst: -------------------------------------------------------------------------------- 1 | .. Aspect Based Sentiment Analysis documentation master file, created by 2 | sphinx-quickstart on Sun Apr 7 13:32:16 2019. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to Aspect Based Sentiment Analysis's documentation! 7 | =========================================================== 8 | 9 | .. toctree:: 10 | :maxdepth: 2 11 | :caption: Contents: 12 | 13 | README 14 | dataset 15 | feature_extraction 16 | grammar 17 | training 18 | modules 19 | 20 | 21 | Indices and tables 22 | ================== 23 | 24 | * :ref:`genindex` 25 | * :ref:`modindex` 26 | * :ref:`search` 27 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pandas==0.17.1 2 | PyAthena -------------------------------------------------------------------------------- /review_highlight_paper.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yardstick17/AspectBasedSentimentAnalysis/b1fcf830341a51f37a862b1144797d2a9c5db2c2/review_highlight_paper.pdf -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | import inspect 4 | import os 5 | import platform 6 | import re 7 | 8 | from setuptools import find_packages 9 | from setuptools import setup 10 | 11 | __location__ = os.path.join(os.getcwd(), os.path.dirname(inspect.getfile(inspect.currentframe()))) 12 | 13 | __version__ = '0.0.4' 14 | 15 | 16 | def gh(name, version): 17 | package = name.split('/')[1] 18 | if 'GHE_ACCESS_TOKEN' in os.environ: 19 | proto = 'git+https://{}@'.format(os.environ['GHE_ACCESS_TOKEN']) 20 | elif 'CDP_BUILD_VERSION' in os.environ: 21 | proto = 'git+https://' 22 | else: 23 | proto = 'git+ssh://git@' 24 | return '{proto}github.com/{name}.git@{version}' \ 25 | '#egg={package}-{version}'.format(**locals()) 26 | 27 | 28 | py_major_version, py_minor_version, _ = ( 29 | int(re.sub('[^\d]+.*$', '', v)) for v in platform.python_version_tuple()) # dealing with 2.7.2+ and 2.7.15rc1 30 | 31 | 32 | def load_requirements_file(path): 33 | content = open(os.path.join(__location__, path)).read().splitlines() 34 | requires = [req for req in content if req != '' and not req.startswith("#")] 35 | return requires 36 | 37 | 38 | setup( 39 | name='aspect based_sentiment_analysis', 40 | packages=find_packages(), 41 | version=__version__, 42 | description='Aspect Based Sentiment Analysis', 43 | long_description=open('README.md').read(), 44 | keywords='Aspect Based Sentiment Analysis', 45 | author='Amit Kushwaha', 46 | url='https://github.com/yardstick17/AspectBasedSentimentAnalysis', 47 | setup_requires=['flake8'], 48 | install_requires=load_requirements_file('requirements.txt'), 49 | dependency_links=[ 50 | gh('yardstick17/PyAthena', 'v0.0.4'), 51 | ] 52 | 53 | ) 54 | -------------------------------------------------------------------------------- /setup.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | if [[ $1 = 3 ]]; then 3 | python -m nltk.downloader punkt 4 | python -m nltk.downloader wordnet 5 | python -m nltk.downloader sentiwordnet 6 | python -m nltk.downloader vader_lexicon 7 | python -m nltk.downloader stopwords 8 | python -m nltk.downloader averaged_perceptron_tagger 9 | else 10 | echo "Usage: ./setup.sh " 11 | fi 12 | -------------------------------------------------------------------------------- /sphinx_to_gh_pages.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # http://redsymbol.net/articles/unofficial-bash-strict-mode/ 3 | set -euo pipefail 4 | IFS=$'\n\t' 5 | 6 | buildDirectory=/tmp/_build 7 | 8 | # get a clean master branch assuming 9 | git checkout master 10 | git pull origin master 11 | git clean -df 12 | git checkout -- . 13 | git fetch --all 14 | 15 | # build html docs from sphinx files 16 | sphinx-apidoc -o . . 17 | sphinx-build -b html . "$buildDirectory" 18 | 19 | # create or use orphaned gh-pages branch 20 | branch_name=gh-pages 21 | if [ $(git branch --list "$branch_name") ] 22 | then 23 | git clean -d -fx . 24 | git checkout $branch_name 25 | git pull origin $branch_name 26 | #git stash apply 27 | # git checkout stash -- . # force git stash to overwrite added files 28 | else 29 | git checkout --orphan "$branch_name" 30 | fi 31 | 32 | if [ -d "$buildDirectory" ] 33 | then 34 | ls | grep -v _build | xargs rm -r 35 | mv "$buildDirectory"/* . && rm -rf _build 36 | git add . 37 | git commit -m "new pages version $(date)" 38 | git push origin $branch_name 39 | else 40 | echo "directory $buildDirectory does not exists" 41 | fi -------------------------------------------------------------------------------- /training/helpers.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os 3 | 4 | from tqdm import tqdm 5 | 6 | 7 | def makedirs_with_mode(path, mode=0o775): 8 | try: 9 | old_mask = os.umask(0) 10 | os.makedirs(path, mode) 11 | finally: 12 | os.umask(old_mask) 13 | 14 | 15 | def format_dataset(annotated_dataset): 16 | dataset = [] 17 | for row in tqdm( 18 | annotated_dataset, 19 | total=len(annotated_dataset), 20 | unit='Reading Section'): 21 | sources = [s.lower() for s in row['target']] 22 | targets = [s.lower() for s in row['polarity']] 23 | sentence_meta = {} 24 | sentence = row['sentence'] 25 | for source, target in zip(sources, targets): 26 | sentence_meta[source] = target 27 | dataset.append({'sentence': sentence, 'meta': sentence_meta}) 28 | return dataset 29 | -------------------------------------------------------------------------------- /training/mid_stage_prepare_dataset.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import itertools 3 | import logging 4 | 5 | import pandas as pd 6 | from sklearn.metrics import classification_report 7 | from sklearn.metrics import f1_score 8 | from tqdm import tqdm 9 | 10 | from dataset.read_dataset import read_json_formatted 11 | from grammar.pattern_grammar import PatternGrammar 12 | from grammar.source_target_extractor import SourceTargetExtractor 13 | 14 | LABEL_LIST_PKL = 'label_list.pkl' 15 | MID_TRAINING_DATASET = '{}_mid_training_data.csv' 16 | grammar_label = None 17 | SYNTACTIC_COMPILED_GRAMMAR_PKL_FILE = 'label_referenced_syntactic_compiled_grammar.pkl' 18 | MATCH_THRESHOLD = 0.2 19 | ONLY_ASPECT_PREDICTION = False 20 | POLARITY_ONLY_TASK = False 21 | 22 | 23 | def initialize_globals(): 24 | """ 25 | This method compiles the grammar and cache it in the global variable. 26 | 27 | """ 28 | PatternGrammar().compile_all_source_target_grammar() 29 | PatternGrammar().compile_all_syntactic_grammar() 30 | 31 | 32 | def dataset_expanded(dataset_filename): 33 | """ 34 | 35 | :param data-set_filename: Filename of the dataset to be read 36 | :return: return the pandas DataFrame with columns: [sentence,target,opinion] 37 | """ 38 | annotated_data = read_json_formatted(dataset_filename) 39 | dataset = [] 40 | for row in annotated_data: 41 | sources = [s.lower() for s in row['target']] 42 | targets = [s.lower() for s in row['polarity']] 43 | sentence_meta = {} 44 | sentence = row['sentence'] 45 | for source, target in zip(sources, targets): 46 | sentence_meta[source] = target 47 | dataset.append([sentence, source, target]) 48 | return pd.DataFrame(dataset, columns='sentence,target,opinion'.split(',')) 49 | 50 | 51 | def get_dataset(dataset_filename=None): 52 | """ 53 | 54 | :param data-set_filename: 55 | :return: 56 | """ 57 | annoted_data = read_json_formatted(dataset_filename) 58 | dataset = [] 59 | for row in annoted_data: 60 | sources = [s.lower() for s in row['target']] 61 | targets = [s.lower() for s in row['polarity']] 62 | sentence_meta = {} 63 | sentence = row['sentence'] 64 | for source, target in zip(sources, targets): 65 | sentence_meta[source] = target 66 | dataset.append({'sentence': sentence, 'meta': sentence_meta}) 67 | return dataset 68 | 69 | 70 | def get_polarity(score): 71 | """ 72 | 73 | :param score: 74 | :return: 75 | """ 76 | return 'negative' if score['PosScore'] < score['NegScore'] else 'positive' 77 | 78 | 79 | def get_syntactic_rules_in_list(): 80 | """ 81 | 82 | :return: 83 | """ 84 | global grammar_label 85 | if grammar_label is None: 86 | grammar_label = pd.read_pickle(LABEL_LIST_PKL) 87 | return grammar_label 88 | 89 | 90 | def get_max_combination(list_of_extracted_meta, expected_meta_form): 91 | """ 92 | 93 | :param list_of_extracted_meta: 94 | :param expected_meta_form: 95 | :return: 96 | """ 97 | total_rules = list(range(len(list_of_extracted_meta))) 98 | mid_training_label = [0] * len(list_of_extracted_meta) 99 | max_match_extracted = set() 100 | max_match_percent = 0 101 | # mid_training_label = [0] * len(list_of_extracted_meta) 102 | for i in range(1, 3): 103 | all_combinations = list(itertools.combinations(total_rules, i)) 104 | for combination in all_combinations: 105 | combination = list(combination) 106 | extracted_tags = set() 107 | for index in combination: 108 | extracted_tags.update(list_of_extracted_meta[index]) 109 | y_true_index, y_pred_index = get_y_pred_and_y_true_label(expected_meta_form, 110 | extracted_tags) 111 | match_percent = f1_score(y_true_index, y_pred_index) 112 | if match_percent >= MATCH_THRESHOLD and match_percent > max_match_percent: 113 | mid_training_label = [0] * len(list_of_extracted_meta) 114 | max_match_percent = match_percent 115 | max_match_extracted = extracted_tags 116 | for i_combination in combination: 117 | if len(list_of_extracted_meta[i_combination]) > 0: 118 | mid_training_label[i_combination] = 1 119 | 120 | return mid_training_label, max_match_extracted 121 | 122 | 123 | def extract_mid_stage_label_dataframe(dataset_filename): 124 | """ 125 | 126 | :param data-set_filename: 127 | :return: 128 | """ 129 | 130 | if type(dataset_filename) == str: 131 | logging.info('Dataset: {}'.format(dataset_filename)) 132 | annotated_dataset = get_dataset(dataset_filename) 133 | else: 134 | annotated_dataset = [] 135 | logging.info('Datasets : {}'.format(', '.join(f for f in dataset_filename))) 136 | for dset in dataset_filename: 137 | annotated_dataset.extend(get_dataset(dset)) 138 | initialize_globals() 139 | sorted_grammar_list = get_grammar() 140 | mid_training_data = [] 141 | Y_PRED = [] 142 | Y_TRUE = [] 143 | 144 | for row in tqdm(annotated_dataset): 145 | sentence = row['sentence'] 146 | logging.debug('sentence: ' + sentence) 147 | meta = {key: value for key, value in row['meta'].items() if key != 'null'} 148 | expected_meta_form = set(sorted(meta.items())) 149 | ste = SourceTargetExtractor(sentence) 150 | list_of_extracted_meta = list() 151 | for index, (_, compiled_grammar) in enumerate(sorted_grammar_list): 152 | score_dict = ste.get_topic_sentiment_score_dict(compiled_grammar) 153 | extracted_meta = get_polarity_form_result(score_dict) 154 | extracted_ote = set(extracted_meta.items()) 155 | list_of_extracted_meta.append(extracted_ote) 156 | mid_training_label, max_match_extracted = get_max_combination(list_of_extracted_meta, expected_meta_form) 157 | 158 | y_pred_index, y_true_index = get_y_pred_and_y_true_label(expected_meta_form, max_match_extracted) 159 | 160 | Y_TRUE.extend(y_true_index) 161 | Y_PRED.extend(y_pred_index) 162 | 163 | mid_training_data.append([sentence, meta, max_match_extracted, mid_training_label]) 164 | print('For Data-set: ', dataset_filename, '\n', classification_report(Y_TRUE, Y_PRED)) 165 | df = pd.DataFrame(mid_training_data, columns=['sentence', 'meta', 'max_match_extracted', 'y_true']) 166 | 167 | # df.to_csv(MID_TRAINING_DATASET.format(dataset_filename.split('.')[0])) 168 | 169 | return df 170 | 171 | 172 | def get_polarity_form_result(score_dict): 173 | """ 174 | 175 | :param score_dict: 176 | :return: 177 | """ 178 | extracted_meta = {} 179 | for source, score in score_dict.items(): 180 | source = source.lower().strip() 181 | if source != '': 182 | if score['PosScore'] < score['NegScore']: 183 | extracted_meta[source] = 'negative' 184 | else: 185 | extracted_meta[source] = 'positive' 186 | return extracted_meta 187 | 188 | 189 | def get_grammar(): 190 | """ 191 | 192 | :return: 193 | """ 194 | syntactic_compiled_grammar = PatternGrammar().compile_all_syntactic_grammar() 195 | return sorted(syntactic_compiled_grammar.items(), key=lambda x: x, reverse=True) 196 | 197 | 198 | def get_y_pred_and_y_true_label(expected_meta_form, extracted_meta_form, verbose=False): 199 | """ 200 | 201 | :param expected_meta_form: { ('a' , 'positive') , ('tgrfd', 'negative')} 202 | :param extracted_meta_form: 203 | :return: 204 | """ 205 | ext_dict = dict(extracted_meta_form) 206 | exp_dict = dict(expected_meta_form) 207 | 208 | if ONLY_ASPECT_PREDICTION: 209 | ext_dict = {key: 'ignore' for key in ext_dict} 210 | exp_dict = {key: 'ignore' for key in exp_dict} 211 | expected_meta_form = exp_dict.items() 212 | extracted_meta_form = ext_dict.items() 213 | 214 | elif verbose and POLARITY_ONLY_TASK: 215 | # verbose = True 216 | polarity_default_dict = dict(extracted_meta_form) 217 | default_dict = dict(expected_meta_form) 218 | tmp_result_dict = {key: 'negative' for key, value in default_dict.items()} 219 | for key, value in tmp_result_dict.items(): 220 | if key in polarity_default_dict: 221 | tmp_result_dict[key] = polarity_default_dict[key] 222 | 223 | extracted_meta_form = set(tmp_result_dict.items()) 224 | print('intersection:', extracted_meta_form & expected_meta_form) 225 | print('false_negatives:', expected_meta_form - extracted_meta_form) 226 | print('false_positives:', extracted_meta_form - expected_meta_form) 227 | 228 | y_true_index = [] 229 | y_pred_index = [] 230 | ext_dict = dict(extracted_meta_form) 231 | exp_dict = dict(expected_meta_form) 232 | ext_keys = set(ext_dict.keys()) 233 | exp_keys = set(exp_dict.keys()) 234 | 235 | intersecion_keys = ext_keys & exp_keys 236 | 237 | for key in intersecion_keys: 238 | if exp_dict[key] == ext_dict[key]: 239 | y_pred_index.extend([1]) 240 | y_true_index.extend([1]) 241 | else: 242 | y_pred_index.extend([0]) 243 | y_true_index.extend([1]) 244 | 245 | 246 | common_removed_expected_meta_form = {item for item in expected_meta_form 247 | if item[0] not in intersecion_keys} 248 | common_removed_extracted_meta_form = {item for item in extracted_meta_form 249 | if item[0] not in intersecion_keys} 250 | 251 | expected_meta_form = common_removed_expected_meta_form 252 | extracted_meta_form = common_removed_extracted_meta_form 253 | 254 | false_positives = len(extracted_meta_form - expected_meta_form) 255 | if false_positives: 256 | y_pred_index.extend([1] * false_positives) 257 | y_true_index.extend([0] * false_positives) 258 | 259 | false_negatives = len(expected_meta_form - extracted_meta_form) 260 | if false_negatives: 261 | y_pred_index.extend([0] * false_negatives) 262 | y_true_index.extend([1] * false_negatives) 263 | 264 | return y_pred_index, y_true_index 265 | -------------------------------------------------------------------------------- /training/pipeline/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yardstick17/AspectBasedSentimentAnalysis/b1fcf830341a51f37a862b1144797d2a9c5db2c2/training/pipeline/__init__.py -------------------------------------------------------------------------------- /training/pipeline/acquire_dataset.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import logging 3 | import os 4 | 5 | import luigi 6 | import pandas as pd 7 | 8 | from dataset.read_dataset import read_json_formatted 9 | from training.helpers import format_dataset 10 | from training.helpers import makedirs_with_mode 11 | 12 | logger = logging.getLogger(__name__) 13 | 14 | BASE_PROCESSED_DIR = os.path.expanduser('~/processed') 15 | 16 | 17 | class BaseTask(luigi.Task): 18 | @property 19 | def outfilename(self): 20 | return 'processed_{}.{}'.format(self.__class__.__name__.lower(), 21 | self.output_file_extension) 22 | 23 | @property 24 | def output_file_extension(self): 25 | return 'csv' 26 | 27 | @property 28 | def base_folder_path(self): 29 | return BASE_PROCESSED_DIR 30 | 31 | def output(self): 32 | if not os.path.isdir(self.base_folder_path): 33 | logger.debug('Creating non-existent path: %s', 34 | self.base_folder_path) 35 | makedirs_with_mode(self.base_folder_path) 36 | filepath = os.path.join(self.base_folder_path, self.outfilename) 37 | return luigi.LocalTarget(filepath) 38 | 39 | 40 | class AcquireDataset(BaseTask): 41 | dataset_filename = luigi.Parameter() 42 | 43 | def run(self): 44 | annotated_dataset = self.read_dataset() 45 | dataset = format_dataset(annotated_dataset) 46 | pd.to_pickle(dataset, self.output().path) 47 | 48 | def read_dataset(self): 49 | return read_json_formatted(self.dataset_filename) 50 | 51 | 52 | def get_dataset(dataset_filename=None): 53 | """ 54 | 55 | :param data-set_filename: 56 | :return: 57 | """ 58 | annoted_data = read_json_formatted(dataset_filename) 59 | dataset = [] 60 | for row in annoted_data: 61 | sources = [s.lower() for s in row['target']] 62 | targets = [s.lower() for s in row['polarity']] 63 | sentence_meta = {} 64 | sentence = row['sentence'] 65 | for source, target in zip(sources, targets): 66 | sentence_meta[source] = target 67 | dataset.append({'sentence': sentence, 'meta': sentence_meta}) 68 | return dataset 69 | 70 | 71 | if __name__ == '__main__': 72 | luigi.run() 73 | -------------------------------------------------------------------------------- /training/pipeline/data_processing.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import logging 3 | 4 | import luigi 5 | import numpy as np 6 | import pandas as pd 7 | from tqdm import tqdm 8 | 9 | from grammar.source_target_extractor import SourceTargetExtractor 10 | from training.mid_stage_prepare_dataset import get_grammar 11 | from training.mid_stage_prepare_dataset import get_max_combination 12 | from training.mid_stage_prepare_dataset import get_polarity_form_result 13 | from training.mid_stage_prepare_dataset import get_y_pred_and_y_true_label 14 | from training.mid_stage_prepare_dataset import initialize_globals 15 | from training.pipeline.acquire_dataset import AcquireDataset 16 | from training.pipeline.acquire_dataset import BaseTask 17 | from training.train_top_classifier import get_syntactic_feature 18 | from training.train_top_classifier import transform_to_label 19 | 20 | logger = logging.getLogger(__name__) 21 | 22 | 23 | def process_data_for_training(annotated_dataset): 24 | sorted_grammar_list = get_grammar() 25 | mid_training_data = [] 26 | Y_PRED = [] 27 | Y_TRUE = [] 28 | for row in tqdm(annotated_dataset): 29 | sentence = row['sentence'] 30 | logging.debug('sentence: ' + sentence) 31 | meta = { 32 | key: value 33 | for key, value in row['meta'].items() if key != 'null' 34 | } 35 | expected_meta_form = set(sorted(meta.items())) 36 | ste = SourceTargetExtractor(sentence) 37 | list_of_extracted_meta = list() 38 | for index, (_, compiled_grammar) in enumerate(sorted_grammar_list): 39 | score_dict = ste.get_topic_sentiment_score_dict(compiled_grammar) 40 | extracted_meta = get_polarity_form_result(score_dict) 41 | extracted_ote = set(extracted_meta.items()) 42 | list_of_extracted_meta.append(extracted_ote) 43 | mid_training_label, max_match_extracted = get_max_combination( 44 | list_of_extracted_meta, expected_meta_form) 45 | y_pred_index, y_true_index = get_y_pred_and_y_true_label( 46 | expected_meta_form, max_match_extracted) 47 | Y_TRUE.extend(y_true_index) 48 | Y_PRED.extend(y_pred_index) 49 | mid_training_data.append( 50 | [sentence, meta, max_match_extracted, mid_training_label]) 51 | dataframe = pd.DataFrame( 52 | mid_training_data, 53 | columns=['sentence', 'meta', 'max_match_extracted', 'y_true']) 54 | X = dataframe.apply(get_syntactic_feature, axis=1) 55 | Y = dataframe.apply(transform_to_label, axis=1) 56 | X = np.array(X.tolist()) 57 | Y = np.array(Y.tolist()) 58 | logging.info( 59 | 'Shape of array for dataset: X:{} , Y:{} '.format(X.shape, Y.shape)) 60 | td = TrainingData(X, Y) 61 | return td 62 | 63 | 64 | class TrainingData(object): 65 | def __init__(self, X, Y): 66 | self.X = X 67 | self.Y = Y 68 | 69 | 70 | class DataProcessing(BaseTask): 71 | dataset_filename = luigi.Parameter() 72 | 73 | def run(self): 74 | initialize_globals() 75 | annotated_dataset = self.get_annotated_data() 76 | 77 | td = process_data_for_training(annotated_dataset) 78 | pd.to_pickle(td, self.output().path) 79 | 80 | def get_annotated_data(self): 81 | filepath = self.requires().output().path 82 | return pd.read_pickle(filepath) 83 | 84 | def requires(self): 85 | return AcquireDataset(self.dataset_filename) 86 | 87 | 88 | if __name__ == '__main__': 89 | luigi.run() 90 | -------------------------------------------------------------------------------- /training/pipeline/train.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import logging 3 | 4 | import luigi 5 | import pandas as pd 6 | from sklearn.multioutput import MultiOutputRegressor 7 | from sklearn.svm import SVC 8 | 9 | from training.pipeline.acquire_dataset import BaseTask 10 | from training.pipeline.data_processing import DataProcessing 11 | 12 | logger = logging.getLogger(__name__) 13 | 14 | 15 | class TrainRuleFitnessClassifier(BaseTask): 16 | dataset_filename = luigi.Parameter() 17 | 18 | def run(self): 19 | training_data = self.get_features_and_label() 20 | classifier = self.get_multi_label_classifier() 21 | classifier.fit(training_data.X, training_data.Y) 22 | pd.to_pickle(classifier, self.output().path) 23 | 24 | def get_multi_label_classifier(self): 25 | forest = SVC(kernel='linear') 26 | return MultiOutputRegressor(forest, n_jobs=-1) 27 | 28 | def get_features_and_label(self): 29 | td = pd.read_pickle(self.requires().output().path) 30 | return td 31 | 32 | def requires(self): 33 | return DataProcessing(self.dataset_filename) 34 | 35 | 36 | if __name__ == '__main__': 37 | luigi.run() 38 | -------------------------------------------------------------------------------- /training/train_top_classifier.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import logging 3 | 4 | import click 5 | import numpy as np 6 | import pandas as pd 7 | from sklearn.metrics import classification_report 8 | from sklearn.multioutput import MultiOutputRegressor 9 | from sklearn.svm import SVC 10 | from tqdm import tqdm 11 | 12 | from feature_extraction.feature_vector_builder import get_syntactic_grammar_feature_vector 13 | from grammar.source_target_extractor import SourceTargetExtractor 14 | from training.mid_stage_prepare_dataset import extract_mid_stage_label_dataframe 15 | from training.mid_stage_prepare_dataset import get_dataset 16 | from training.mid_stage_prepare_dataset import get_grammar 17 | from training.mid_stage_prepare_dataset import get_polarity_form_result 18 | from training.mid_stage_prepare_dataset import get_y_pred_and_y_true_label 19 | from training.mid_stage_prepare_dataset import initialize_globals 20 | from training.mid_stage_prepare_dataset import ONLY_ASPECT_PREDICTION 21 | 22 | # FOREST = RandomForestClassifier(n_estimators=100, random_state=1) 23 | FOREST = SVC(kernel='linear') 24 | MULTI_TARGET_FOREST = MultiOutputRegressor(FOREST, n_jobs=-1) 25 | LABEL = 'training_label' 26 | SYNTACTIC_FEATURE = 'syntactic_feature' 27 | CLASSIFIER_PKL = '/tmp/ONLY_ASPECT_PREDICTION_{}_classifier.pkl'.format( 28 | ONLY_ASPECT_PREDICTION) 29 | COLUMN_PKL_FILE = '/tmp/_{}_column_to_delete.pkl'.format( 30 | ONLY_ASPECT_PREDICTION) 31 | TRAINING_DATA = [ 32 | 'dataset/annoted_data.json', 33 | # 'dataset/Restaurants_Train_2014.json', 34 | # 'dataset/ABSA-15_Restaurants_Train_Final.json', 35 | # 'dataset/customer_review_data/Apex AD2600 Progressive-scan DVD player.txt.json', 36 | # 'dataset/customer_review_data/Nokia 6610.txt.json', 37 | # 'dataset/customer_review_data/Creative Labs Nomad Jukebox Zen Xtra 40GB.txt.json', 38 | # 'dataset/customer_review_data/Nikon coolpix 4300.txt.json' 39 | ] 40 | # training_data = training_data[:1] 41 | 42 | # TESTING_DATA_FILE = 'dataset/customer_review_data/Canon G3.txt.json' 43 | TESTING_DATA_FILE = 'dataset/ABSA15_Restaurants_Test.json' 44 | syntactic_rules_in_list = None 45 | 46 | 47 | def get_syntactic_feature(row): 48 | """ 49 | 50 | :param row: 51 | :return: 52 | """ 53 | return get_syntactic_grammar_feature_vector(row.sentence) 54 | 55 | 56 | def transform_to_label(row): 57 | """ 58 | If the classifier expects label array of specific type, reformat here. 59 | 60 | :param row: 61 | :return: 62 | """ 63 | return row.y_true 64 | 65 | 66 | def get_features_and_label(dataset): 67 | """ 68 | 69 | :param dataset: 70 | :return: 71 | """ 72 | 73 | dataframe = extract_mid_stage_label_dataframe(dataset) 74 | X = dataframe.apply(get_syntactic_feature, axis=1) 75 | Y = dataframe.apply(transform_to_label, axis=1) 76 | 77 | X = np.array(X.tolist()) 78 | Y = np.array(Y.tolist()) 79 | logging.info( 80 | 'Shape of array for dataset: X:{} , Y:{} '.format(X.shape, Y.shape)) 81 | return X, Y, dataframe 82 | 83 | 84 | @click.command() 85 | @click.option( 86 | '--log', '-l', help='set log level for the processing', default='INFO') 87 | def main(log): 88 | """ 89 | This method extracts the feature-vector and corresponding label for top-level classifier. 90 | The classifier is targeted to learn which rules to apply on a sentence so that correct 91 | opinion target extraction is done. 92 | :param log: 93 | """ 94 | # print() 95 | # COLUMN_PKL_FILE = 'classifier.pkl' 96 | logging.basicConfig( 97 | format='[%(name)s] [%(asctime)s] %(levelname)s : %(message)s', 98 | level=logging._nameToLevel[log]) 99 | import os 100 | if os.path.isfile(CLASSIFIER_PKL) and os.path.isfile(COLUMN_PKL_FILE): 101 | classifier = pd.read_pickle(CLASSIFIER_PKL) 102 | columns_to_delete = pd.read_pickle(COLUMN_PKL_FILE) 103 | else: 104 | X, Y, _ = get_features_and_label(TRAINING_DATA) 105 | columns_to_delete, Y = get_valid_columns(Y) 106 | classifier = MULTI_TARGET_FOREST 107 | classifier.fit(X, Y) 108 | pd.to_pickle(columns_to_delete, COLUMN_PKL_FILE) 109 | pd.to_pickle(classifier, CLASSIFIER_PKL) 110 | # print('Classification report on training data\n', classification_report(Y, y_pred)) 111 | print('columns_to_delete:', columns_to_delete) 112 | X, Y, test_dataframe = get_features_and_label(TESTING_DATA_FILE) 113 | Y = np.delete(Y, columns_to_delete, axis=1) 114 | y_pred = classifier.predict(X) 115 | print('Classification report on testing_data\n', 116 | classification_report(Y, y_pred)) 117 | 118 | check_validity(TESTING_DATA_FILE, y_pred, columns_to_delete) 119 | 120 | 121 | def get_valid_columns(X): 122 | X = np.array(X) 123 | col = X.shape[1] 124 | columns_to_delete = [] 125 | for i in range(col): 126 | if not any(X[:, i]) or not any(map(lambda x: x != 1, X[:, i])): 127 | columns_to_delete.append(i) 128 | 129 | print('columns_to_delete', columns_to_delete) 130 | return columns_to_delete, np.delete(X, columns_to_delete, axis=1) 131 | 132 | 133 | def check_validity(dataset_filename, y_pred, columns_to_delete): 134 | """ 135 | 136 | :param dataset_filename: 137 | :return: 138 | """ 139 | 140 | logging.info('Dataset: {}'.format(dataset_filename)) 141 | initialize_globals() 142 | trainied_dataset = get_dataset(TRAINING_DATA[0]) 143 | seed_aspects = set() 144 | for row in trainied_dataset: 145 | seed_aspects.update(set(row['meta'].keys())) 146 | 147 | annotated_data_dataset = get_dataset(dataset_filename) 148 | sorted_grammar_list = get_grammar() 149 | sorted_grammar_list = [ 150 | grammar for index, grammar in enumerate(sorted_grammar_list) 151 | if index not in columns_to_delete 152 | ] 153 | Y_PRED = [] 154 | Y_TRUE = [] 155 | 156 | prediction_step_rows = [] 157 | for row, pred in tqdm(zip(annotated_data_dataset, y_pred)): 158 | sentence = row['sentence'] 159 | 160 | meta = { 161 | key: value 162 | for key, value in row['meta'].items() if key.lower() != 'null' 163 | } 164 | expected_meta_form = set(meta.items()) 165 | ste = SourceTargetExtractor(sentence) 166 | overall_extracted_meta = set() 167 | if any(pred): 168 | for index, rule_flag in enumerate(pred): 169 | if rule_flag == 1 or (len(sentence.split()) <= 2 170 | and index == 2): 171 | compiled_grammar = sorted_grammar_list[index][1] 172 | score_dict = ste.get_topic_sentiment_score_dict( 173 | compiled_grammar) 174 | extracted_meta = get_polarity_form_result(score_dict) 175 | overall_extracted_meta.update(extracted_meta.items()) 176 | else: 177 | for index, rule_flag in enumerate(pred): 178 | if (len(sentence.split()) <= 2 and index == 2): 179 | compiled_grammar = sorted_grammar_list[index][1] 180 | score_dict = ste.get_topic_sentiment_score_dict( 181 | compiled_grammar) 182 | extracted_meta = get_polarity_form_result(score_dict) 183 | overall_extracted_meta.update(extracted_meta.items()) 184 | y_pred_index, y_true_index = get_y_pred_and_y_true_label( 185 | expected_meta_form, overall_extracted_meta, True) 186 | 187 | Y_TRUE.extend(y_true_index) 188 | Y_PRED.extend(y_pred_index) 189 | prediction_step_rows.append([sentence, meta, overall_extracted_meta]) 190 | 191 | df = pd.DataFrame( 192 | prediction_step_rows, 193 | columns=['sentence', 'meta', 'overall_extracted_meta']) 194 | df.to_csv('prediction_step_rows.csv') 195 | correct = 0 196 | total = 0 197 | for x, y in zip(Y_PRED, Y_TRUE): 198 | if x == 1: 199 | total += 1 200 | if x == y: 201 | correct += 1 202 | 203 | print('NO PREDICTION FOR RULE: ', 204 | sum([1 for p in y_pred if not any(p)]), ' out of: ', len(y_pred)) 205 | print('Task: ONLY_ASPECT_PREDICTION', ONLY_ASPECT_PREDICTION) 206 | print('Accuracy: ', correct * 100 / float(total)) 207 | print('Total: ', total, ', Correct: ', correct) 208 | print(':::::::::::::::::: TESTING ::::::::::::::::::\n', 209 | dataset_filename, '\n', classification_report(Y_TRUE, Y_PRED)) 210 | 211 | 212 | if __name__ == '__main__': 213 | main() 214 | --------------------------------------------------------------------------------