├── .gitignore ├── .idea ├── .gitignore ├── NewsMTSC.iml ├── misc.xml ├── modules.xml └── vcs.xml ├── CODE_OF_CONDUCT.md ├── LICENSE ├── MANIFEST.in ├── NewsSentiment ├── .gitignore ├── DatasetPreparer.py ├── SentimentClasses.py ├── __init__.py ├── __main__.py ├── combinations_default.py ├── consts.py ├── controller.py ├── controller_data │ └── datasets │ │ ├── NewsMTSC-dataset │ │ ├── NewsMTSC-dataset.zip │ │ ├── NewsMTSC-preprint.pdf │ │ ├── devtest_mt.jsonl │ │ ├── devtest_rw.jsonl │ │ ├── readme.md │ │ └── train.jsonl │ │ ├── acl14twitter │ │ ├── test.raw.jsonl │ │ └── train.raw.jsonl │ │ ├── newsmtsc-train-and-test-mt │ │ ├── devtest_mtsc_only.jsonl │ │ └── train.jsonl │ │ ├── newsmtsc-train-and-test-rw │ │ ├── devtest_mtsc_and_single_primaries.jsonl │ │ └── train.jsonl │ │ ├── semeval14laptops │ │ ├── Laptops_Test_Gold.xml.seg.jsonl │ │ └── Laptops_Train.xml.seg.jsonl │ │ └── semeval14restaurants │ │ ├── Restaurants_Test_Gold.xml.seg.jsonl │ │ └── Restaurants_Train.xml.seg.jsonl ├── converter_huggingface.py ├── createoverview.py ├── customexceptions.py ├── dataset.py ├── diskdict.py ├── download.py ├── earlystopping.py ├── evaluator.py ├── experiments │ └── default │ │ └── datasets │ │ ├── newsmtsc-mt-hf │ │ ├── dev.jsonl │ │ ├── test.jsonl │ │ └── train.jsonl │ │ ├── newsmtsc-mt │ │ ├── dev.jsonl │ │ ├── test.jsonl │ │ └── train.jsonl │ │ ├── newsmtsc-rw-hf │ │ ├── dev.jsonl │ │ ├── test.jsonl │ │ └── train.jsonl │ │ └── newsmtsc-rw │ │ ├── dev.jsonl │ │ ├── test.jsonl │ │ └── train.jsonl ├── fxlogger.py ├── infer.py ├── inferrest.py ├── knowledge │ ├── __init__.py │ ├── bingliuopinion │ │ ├── bingliuopinion.py │ │ ├── converter.py │ │ ├── license.txt │ │ ├── negative-words.txt │ │ ├── opinion_polarity.ddict │ │ └── positive-words.txt │ ├── knowledgeutils.py │ ├── liwc │ │ ├── data │ │ │ ├── .gitignore │ │ │ └── readme.txt │ │ ├── dic.py │ │ ├── liwc.py │ │ ├── liwchelper.py │ │ └── trie.py │ ├── mpqasubjectivity │ │ ├── converter.py │ │ ├── mpqasubjectivity.py │ │ ├── subjclueslen1-HLTEMNLP05.tff │ │ └── subjclueslen1-HLTEMNLP05.tff.ddict │ ├── nrcemolex │ │ ├── NRC-Emotion-Lexicon-Wordlevel-v0.92.txt │ │ ├── NRC-Emotion-Lexicon-Wordlevel-v0.92.txt.ddict │ │ ├── converter.py │ │ └── nrcemolex.py │ └── zeros │ │ └── zerosknowledge.py ├── layers │ ├── AggregatorForBert.py │ ├── __init__.py │ ├── attention.py │ ├── dynamic_rnn.py │ ├── point_wise_feed_forward.py │ ├── pytorchnlpattention.py │ └── squeeze_embedding.py ├── losses │ ├── __init__.py │ ├── crossentropycrossweight.py │ ├── crossentropylosslsr.py │ ├── crossentropylosswithconfidence.py │ └── seq2seqloss.py ├── models │ ├── FXBaseModel.py │ ├── FXEnsemble.py │ ├── __init__.py │ ├── ensemble.py │ ├── ensembleb.py │ ├── multitargets │ │ ├── __init__.py │ │ ├── contrasting.py │ │ ├── random_multi.py │ │ ├── seq2seq.py │ │ ├── seq2seq_without_targetmask.py │ │ ├── tdbertlikemultitarget.py │ │ └── tdbertlikemultitarget_dense.py │ └── singletarget │ │ ├── __init__.py │ │ ├── aen.py │ │ ├── grutscsingle.py │ │ ├── lcf.py │ │ ├── lcf2.py │ │ ├── lcfs.py │ │ ├── lcfst.py │ │ ├── lcft.py │ │ ├── notargetcls.py │ │ ├── random_single.py │ │ ├── spc.py │ │ ├── td_bert.py │ │ ├── td_bert_qa.py │ │ └── tdbertlikesingle.py ├── plotter_utils.py ├── pretrained_models │ ├── .gitignore │ └── state_dicts │ │ ├── .gitignore │ │ └── grutsc_v1-0-0 ├── results │ └── .gitignore └── train.py ├── README.md ├── READMEpypi.md ├── hubconf.py ├── pyproject.toml ├── pythoninfo.md └── setup.cfg /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | ### Python template 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | pip-wheel-metadata/ 26 | share/python-wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | MANIFEST 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .nox/ 46 | .coverage 47 | .coverage.* 48 | .cache 49 | nosetests.xml 50 | coverage.xml 51 | *.cover 52 | .hypothesis/ 53 | .pytest_cache/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | db.sqlite3 63 | db.sqlite3-journal 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | 75 | # PyBuilder 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | .python-version 87 | 88 | # pipenv 89 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 90 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 91 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 92 | # install all needed dependencies. 93 | #Pipfile.lock 94 | 95 | # celery beat schedule file 96 | celerybeat-schedule 97 | 98 | # SageMath parsed files 99 | *.sage.py 100 | 101 | # Environments 102 | .env 103 | .venv 104 | env/ 105 | venv/ 106 | ENV/ 107 | env.bak/ 108 | venv.bak/ 109 | 110 | # Spyder project settings 111 | .spyderproject 112 | .spyproject 113 | 114 | # Rope project settings 115 | .ropeproject 116 | 117 | # mkdocs documentation 118 | /site 119 | 120 | # mypy 121 | .mypy_cache/ 122 | .dmypy.json 123 | dmypy.json 124 | 125 | # Pyre type checker 126 | .pyre/ 127 | 128 | ### JetBrains template 129 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm 130 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 131 | 132 | # User-specific stuff 133 | .idea/**/workspace.xml 134 | .idea/**/tasks.xml 135 | .idea/**/usage.statistics.xml 136 | .idea/**/dictionaries 137 | .idea/**/shelf 138 | 139 | # Generated files 140 | .idea/**/contentModel.xml 141 | 142 | # Sensitive or high-churn files 143 | .idea/**/dataSources/ 144 | .idea/**/dataSources.ids 145 | .idea/**/dataSources.local.xml 146 | .idea/**/sqlDataSources.xml 147 | .idea/**/dynamic.xml 148 | .idea/**/uiDesigner.xml 149 | .idea/**/dbnavigator.xml 150 | 151 | # Gradle 152 | .idea/**/gradle.xml 153 | .idea/**/libraries 154 | 155 | # Gradle and Maven with auto-import 156 | # When using Gradle or Maven with auto-import, you should exclude module files, 157 | # since they will be recreated, and may cause churn. Uncomment if using 158 | # auto-import. 159 | # .idea/modules.xml 160 | # .idea/*.iml 161 | # .idea/modules 162 | # *.iml 163 | # *.ipr 164 | 165 | # CMake 166 | cmake-build-*/ 167 | 168 | # Mongo Explorer plugin 169 | .idea/**/mongoSettings.xml 170 | 171 | # File-based project format 172 | *.iws 173 | 174 | # IntelliJ 175 | out/ 176 | 177 | # mpeltonen/sbt-idea plugin 178 | .idea_modules/ 179 | 180 | # JIRA plugin 181 | atlassian-ide-plugin.xml 182 | 183 | # Cursive Clojure plugin 184 | .idea/replstate.xml 185 | 186 | # Crashlytics plugin (for Android Studio and IntelliJ) 187 | com_crashlytics_export_strings.xml 188 | crashlytics.properties 189 | crashlytics-build.properties 190 | fabric.properties 191 | 192 | # Editor-based Rest Client 193 | .idea/httpRequests 194 | 195 | # Android studio 3.1+ serialized cache file 196 | .idea/caches/build_file_checksums.ser 197 | 198 | # OSX 199 | # 200 | .DS_Store 201 | -------------------------------------------------------------------------------- /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | # Datasource local storage ignored files 5 | /dataSources/ 6 | /dataSources.local.xml 7 | # Editor-based HTTP Client requests 8 | /httpRequests/ 9 | -------------------------------------------------------------------------------- /.idea/NewsMTSC.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. 6 | 7 | ## Our Standards 8 | 9 | Examples of behavior that contributes to creating a positive environment include: 10 | 11 | * Using welcoming and inclusive language 12 | * Being respectful of differing viewpoints and experiences 13 | * Gracefully accepting constructive criticism 14 | * Focusing on what is best for the community 15 | * Showing empathy towards other community members 16 | 17 | Examples of unacceptable behavior by participants include: 18 | 19 | * The use of sexualized language or imagery and unwelcome sexual attention or advances 20 | * Trolling, insulting/derogatory comments, and personal or political attacks 21 | * Public or private harassment 22 | * Publishing others' private information, such as a physical or electronic address, without explicit permission 23 | * Other conduct which could reasonably be considered inappropriate in a professional setting 24 | 25 | ## Our Responsibilities 26 | 27 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. 28 | 29 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. 30 | 31 | ## Scope 32 | 33 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. 34 | 35 | ## Enforcement 36 | 37 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at felix.hamborg@uni-konstanz.de. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. 38 | 39 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. 40 | 41 | ## Attribution 42 | 43 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version] 44 | 45 | [homepage]: http://contributor-covenant.org 46 | [version]: http://contributor-covenant.org/version/1/4/ 47 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | This project uses code from various projects, who may have their own license or copyright. 2 | - ABSA-PyTorch by Yury Soong et al., available at https://github.com/songyouwei/ABSA-PyTorch 3 | - early-stopping-pytorch by Bjarten, available at https://github.com/Bjarten/early-stopping-pytorch 4 | 5 | For newly developed files, the license information is: 6 | 7 | Copyright 2020 Felix Hamborg 8 | 9 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 14 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include READMEpypi.md 2 | include pythoninfo.md 3 | -------------------------------------------------------------------------------- /NewsSentiment/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fhamborg/NewsMTSC/a7bea7bc7265b4bfe4236e98965a47c767acbb30/NewsSentiment/.gitignore -------------------------------------------------------------------------------- /NewsSentiment/SentimentClasses.py: -------------------------------------------------------------------------------- 1 | from typing import Iterable 2 | 3 | 4 | class SentimentClasses: 5 | FILLUP_POLARITY_VALUE = -100 6 | FILLUP_POLARITY_LABEL = "fillup" 7 | SENTIMENT_CLASSES = None 8 | 9 | @staticmethod 10 | def initialize(sentiment_classes: dict): 11 | SentimentClasses.SENTIMENT_CLASSES = sentiment_classes 12 | 13 | @staticmethod 14 | def get_num_classes(): 15 | return len(SentimentClasses.SENTIMENT_CLASSES) 16 | 17 | @staticmethod 18 | def __evaluate_boundary(given_value: float, boundary: tuple): 19 | operator = boundary[0] 20 | value = boundary[1] 21 | if operator == "<=": 22 | return given_value <= value 23 | elif operator == "<": 24 | return given_value < value 25 | elif operator == ">=": 26 | return given_value >= value 27 | elif operator == ">": 28 | return given_value > value 29 | elif operator == "==": 30 | return given_value == value 31 | else: 32 | raise ValueError 33 | 34 | @staticmethod 35 | def __evaluate_boundaries_of_class( 36 | given_value: float, sentiment_boundaries: Iterable[tuple] 37 | ): 38 | assert len(sentiment_boundaries) >= 1 39 | for boundary in sentiment_boundaries: 40 | is_valid = SentimentClasses.__evaluate_boundary(given_value, boundary) 41 | if not is_valid: 42 | return False 43 | return True 44 | 45 | @staticmethod 46 | def __get_legacy_information(): 47 | # self.polarity_associations = {"positive": 2, "neutral": 1, "negative": 0} 48 | # self.polarity_associations_inv = {2: "positive", 1: "neutral", 0: "negative"} 49 | # self.sorted_expected_label_values = [0, 1, 2] 50 | # self.sorted_expected_label_names = ["negative", "neutral", "positive"] 51 | 52 | sentiment_labels = list(SentimentClasses.SENTIMENT_CLASSES.keys()) 53 | sentiment_normalized_values = [] 54 | for label in sentiment_labels: 55 | sentiment_normalized_values.append( 56 | SentimentClasses.SENTIMENT_CLASSES[label]["normalized_polarity"] 57 | ) 58 | 59 | polarity_associations = {} 60 | polarity_associations_inv = {} 61 | for label, value in zip(sentiment_labels, sentiment_normalized_values): 62 | polarity_associations[label] = value 63 | polarity_associations_inv[value] = label 64 | 65 | return { 66 | "polarity_associations": polarity_associations, 67 | "polarity_associations_inv": polarity_associations_inv, 68 | "sorted_expected_label_values": sentiment_normalized_values, 69 | "sorted_expected_label_names": sentiment_labels, 70 | } 71 | 72 | @staticmethod 73 | def get_sorted_expected_label_names(): 74 | return SentimentClasses.__get_legacy_information()[ 75 | "sorted_expected_label_names" 76 | ] 77 | 78 | @staticmethod 79 | def get_sorted_expected_label_values(): 80 | return SentimentClasses.__get_legacy_information()[ 81 | "sorted_expected_label_values" 82 | ] 83 | 84 | @staticmethod 85 | def get_polarity_associations(): 86 | return SentimentClasses.__get_legacy_information()["polarity_associations"] 87 | 88 | @staticmethod 89 | def get_polarity_associations_inverse(): 90 | return SentimentClasses.__get_legacy_information()["polarity_associations_inv"] 91 | 92 | @staticmethod 93 | def __find_sentiment_class(polarity: float): 94 | resulting_class = None 95 | for sentiment_label, info in SentimentClasses.SENTIMENT_CLASSES.items(): 96 | sentiment_boundaries = info["boundaries"] 97 | sentiment_normalized_polarity = info["normalized_polarity"] 98 | is_in_class_boundaries = SentimentClasses.__evaluate_boundaries_of_class( 99 | polarity, sentiment_boundaries 100 | ) 101 | if is_in_class_boundaries: 102 | # check polarity is not in another class, too 103 | assert ( 104 | resulting_class is None 105 | ), f"overlapping sentiment classes; previous class: {resulting_class}" 106 | resulting_class = (sentiment_label, sentiment_normalized_polarity) 107 | 108 | # check that a class was found 109 | assert resulting_class, f"result is not defined for polarity: {polarity}" 110 | 111 | return resulting_class 112 | 113 | @staticmethod 114 | def polarity2label(polarity: float) -> str: 115 | if polarity == SentimentClasses.FILLUP_POLARITY_VALUE: 116 | return SentimentClasses.FILLUP_POLARITY_LABEL 117 | 118 | sentiment_class = SentimentClasses.__find_sentiment_class(polarity) 119 | label = sentiment_class[0] 120 | return label 121 | 122 | @staticmethod 123 | def polarity2normalized_polarity(polarity: float) -> int: 124 | if polarity == SentimentClasses.FILLUP_POLARITY_VALUE: 125 | return int(SentimentClasses.FILLUP_POLARITY_VALUE) 126 | 127 | sentiment_class = SentimentClasses.__find_sentiment_class(polarity) 128 | normalized_polarity = sentiment_class[1] 129 | return normalized_polarity 130 | 131 | @staticmethod 132 | def Sentiment3ForNewsMtsc(): 133 | sentiment_classes = { 134 | "positive": { 135 | "boundaries": [(">=", 5), ("<=", 7)], 136 | "normalized_polarity": 2, 137 | }, 138 | "neutral": {"boundaries": [(">", 3), ("<", 5)], "normalized_polarity": 1}, 139 | "negative": { 140 | "boundaries": [(">=", 1), ("<=", 3)], 141 | "normalized_polarity": 0, 142 | }, 143 | } 144 | SentimentClasses.initialize(sentiment_classes) 145 | 146 | @staticmethod 147 | def SentimentStrong3ForNewsMtsc(): 148 | sentiment_classes = { 149 | "positive": { 150 | "boundaries": [(">=", 6), ("<=", 7)], 151 | "normalized_polarity": 2, 152 | }, 153 | "neutral": {"boundaries": [(">", 2), ("<", 6)], "normalized_polarity": 1}, 154 | "negative": { 155 | "boundaries": [(">=", 1), ("<=", 2)], 156 | "normalized_polarity": 0, 157 | }, 158 | } 159 | SentimentClasses.initialize(sentiment_classes) 160 | 161 | @staticmethod 162 | def SentimentWeak3ForNewsMtsc(): 163 | sentiment_classes = { 164 | "positive": { 165 | "boundaries": [(">=", 4.5), ("<=", 7)], 166 | "normalized_polarity": 2, 167 | }, 168 | "neutral": { 169 | "boundaries": [(">", 3.5), ("<", 4.5)], 170 | "normalized_polarity": 1, 171 | }, 172 | "negative": { 173 | "boundaries": [(">=", 1), ("<=", 3.5)], 174 | "normalized_polarity": 0, 175 | }, 176 | } 177 | SentimentClasses.initialize(sentiment_classes) 178 | -------------------------------------------------------------------------------- /NewsSentiment/__init__.py: -------------------------------------------------------------------------------- 1 | from NewsSentiment.infer import TargetSentimentClassifier 2 | -------------------------------------------------------------------------------- /NewsSentiment/__main__.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from NewsSentiment.download import Download 4 | 5 | if __name__ == '__main__': 6 | parser = argparse.ArgumentParser(prog='NewsSentiment') 7 | subparsers = parser.add_subparsers(dest='action') 8 | 9 | subparser_download = subparsers.add_parser('download', help=Download.add_subparser.__doc__) 10 | Download.add_subparser(subparser_download) 11 | 12 | args = parser.parse_args() 13 | action = args.action 14 | del args.action 15 | 16 | if action == 'download': 17 | Download.run_from_parser(args) 18 | -------------------------------------------------------------------------------- /NewsSentiment/combinations_default.py: -------------------------------------------------------------------------------- 1 | from NewsSentiment.consts import BERT_BASE_UNCASED 2 | 3 | combinations_default_0 = { 4 | "own_model_name": [ 5 | # baselines: single 6 | "notargetclsbert", 7 | "lcf_bert", 8 | "lcf_bert2", 9 | "lcfs_bert", 10 | "lcft_bert", 11 | "aen_bert", 12 | "spc_bert", 13 | "tdbert", 14 | "tdbert-qa-mul", 15 | "tdbert-qa-con", 16 | # own models: single 17 | "tdbertlikesingle", 18 | "lcfst_bert", 19 | "grutsc", 20 | # own models: multi 21 | "tdbertlikemulti", 22 | # "tdbertlikemulti_dense", 23 | "seq2seq", 24 | "seq2seq_withouttargetmask", 25 | "contrasting", 26 | # baselines 27 | # "random_single", 28 | # "random_multi", 29 | ], 30 | "optimizer": ["adam"], 31 | "initializer": ["xavier_uniform_"], 32 | "learning_rate": ["2e-5", "3e-5", "5e-5"], 33 | "batch_size": [ 34 | "16", 35 | # "32", 36 | ], # ['16', '32'], 37 | "balancing": ["None"], # ['None', 'lossweighting', 'oversampling'], 38 | "devmode": ["False"], 39 | "num_epoch": ["2", "3", "4"], 40 | "loss": [ 41 | "crossentropy", 42 | "crossentropy_lsr", 43 | "sequence", 44 | "crossentropy_crossweight", 45 | ], 46 | # "spc_lm_representation_distilbert": ["mean_last"], 47 | # ['sum_last', 'sum_last_four', 'sum_last_two', 'sum_all', 'mean_last', 'mean_last_four', 'mean_last_two', 'mean_all'], 48 | # "spc_lm_representation": ["pooler_output"], 49 | # ['pooler_output', 'sum_last', 'sum_last_four', 'sum_last_two', 'sum_all', 'mean_last', 'mean_last_four', 'mean_last_two', 'mean_all'], 50 | # "spc_input_order": ["text_target"], # 'target_text', 51 | # "aen_lm_representation": ["last"], 52 | # ['last', 'sum_last_four', 'sum_last_two', 'sum_all', 'mean_last_four'], # 'mean_last_two', 'mean_all'], 53 | "eval_only_after_last_epoch": ["True"], 54 | "local_context_focus": ["cdm", "cdw"], 55 | "SRD": ["3", "4", "5"], 56 | "pretrained_model_name": ["default"], 57 | # ['default', 'bert_news_ccnc_10mio_3ep', 'laptops_and_restaurants_2mio_ep15', 'laptops_1mio_ep30', 'restaurants_10mio_ep3'], 58 | "state_dict": ["None"], 59 | # ['None', 'lcf_bert_acl14twitter_val_recall_avg_0.7349_epoch3', 'lcf_bert_semeval14laptops_val_recall_avg_0.7853_epoch3', 'lcf_bert_semeval14restaurants_val_recall_avg_0.7672_epoch2', 'lcf_bert_newstsc_val_recall_avg_0.5954_epoch3'], 60 | "single_targets": [ 61 | "True" 62 | ], # using conditions in controller.py, we have single_targets only for single target models 63 | "multi_targets": [ 64 | "True" 65 | ], # using conditions in controller.py, we have multi_targets only for multi target models 66 | "targetclasses": [ 67 | "newsmtsc3", 68 | #"newsmtsc3strong", 69 | #"newsmtsc3weak", 70 | ], 71 | "knowledgesources": [ 72 | "nrc_emotions", "mpqa_subjectivity", "bingliu_opinion", "liwc", 73 | "nrc_emotions mpqa_subjectivity", "nrc_emotions liwc", 74 | "nrc_emotions bingliu_opinion", "mpqa_subjectivity bingliu_opinion", 75 | "mpqa_subjectivity liwc", "bingliu_opinion liwc", 76 | "nrc_emotions mpqa_subjectivity bingliu_opinion", 77 | "nrc_emotions mpqa_subjectivity liwc", 78 | "nrc_emotions liwc bingliu_opinion", 79 | "liwc mpqa_subjectivity bingliu_opinion", 80 | "nrc_emotions mpqa_subjectivity bingliu_opinion liwc", 81 | "zeros", 82 | ], 83 | "is_use_natural_target_phrase_for_spc": [ 84 | "True", 85 | "False" 86 | ], 87 | "default_lm": [ 88 | BERT_BASE_UNCASED, 89 | ], 90 | "coref_mode_in_training": [ 91 | "ignore", 92 | "in_targetmask", 93 | "additional_examples" 94 | ], 95 | } 96 | -------------------------------------------------------------------------------- /NewsSentiment/consts.py: -------------------------------------------------------------------------------- 1 | BERT_BASE_UNCASED = "bert-base-uncased" 2 | ROBERTA_BASE = "roberta-base" 3 | XLNET_BASE_CASED = "xlnet-base-cased" 4 | ALBERT_BASE = "albert-base-v2" 5 | ALBERT_LARGE = "albert-large-v2" 6 | ALBERT_XLARGE = "albert-xlarge-v2" 7 | ALBERT_XXLARGE = "albert-xxlarge-v2" 8 | __DEFAULT_LM = None 9 | 10 | 11 | def set_default_lm(new_name: str): 12 | global __DEFAULT_LM 13 | __DEFAULT_LM = new_name 14 | 15 | 16 | def get_default_lm(): 17 | return __DEFAULT_LM 18 | 19 | 20 | FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS = "text_ids_with_special_tokens" 21 | FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK = ( 22 | FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS + "_target_mask" 23 | ) 24 | FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_SELECTED_KNOWLEDGE_SOURCES = ( 25 | FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS + "_selectedknowledgesources" 26 | ) 27 | FIELD_IS_OVERFLOW = "is_overflow" 28 | FIELD_TEXT_THEN_TARGET_IDS_WITH_SPECIAL_TOKENS = ( 29 | "text_then_target_ids_with_special_tokens" 30 | ) 31 | # we used to have text-then-target target mask here, but won't use it, 32 | # since it would be identical to the text target mask (since we only 33 | # want to mark the target within the text, but not in the 2nd target 34 | # component) 35 | # FIELD_TEXT_THEN_TARGET_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK = ( 36 | # FIELD_TEXT_THEN_TARGET_IDS_WITH_SPECIAL_TOKENS + "_target_mask" 37 | # ) 38 | # same for knowledge sources 39 | # FIELD_TEXT_THEN_TARGET_IDS_WITH_SPECIAL_TOKENS_KNOWLEDGE_SOURCES = ( 40 | # FIELD_TEXT_THEN_TARGET_IDS_WITH_SPECIAL_TOKENS + "_selectedknowledgesources" 41 | # ) 42 | FIELD_TEXT_THEN_TARGET_IDS_WITH_SPECIAL_TOKENS_SEGMENT_IDS = ( 43 | FIELD_TEXT_THEN_TARGET_IDS_WITH_SPECIAL_TOKENS + "_segment_ids" 44 | ) 45 | FIELD_TARGET_IDS_WITH_SPECIAL_TOKENS = "target_ids_with_special_tokens" 46 | FIELD_SYNTAX_HOP_DISTANCE_TO_TARGET = "syntax_hop_distance_to_target" 47 | FIELD_SYNTAX_DEPENDENCY_MATRIX = "syntax_dependency_matrix" 48 | -------------------------------------------------------------------------------- /NewsSentiment/controller_data/datasets/NewsMTSC-dataset/NewsMTSC-dataset.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fhamborg/NewsMTSC/a7bea7bc7265b4bfe4236e98965a47c767acbb30/NewsSentiment/controller_data/datasets/NewsMTSC-dataset/NewsMTSC-dataset.zip -------------------------------------------------------------------------------- /NewsSentiment/controller_data/datasets/NewsMTSC-dataset/NewsMTSC-preprint.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fhamborg/NewsMTSC/a7bea7bc7265b4bfe4236e98965a47c767acbb30/NewsSentiment/controller_data/datasets/NewsMTSC-dataset/NewsMTSC-preprint.pdf -------------------------------------------------------------------------------- /NewsSentiment/controller_data/datasets/NewsMTSC-dataset/readme.md: -------------------------------------------------------------------------------- 1 | # Welcome 2 | 3 | The files contained in this archive are part of the dataset "NewsMTSC" described in our paper "NewsMTSC: (Multi-)Target-dependent Sentiment Classification in News Articles" published at the EACL 2021. 4 | 5 | ## Dataset 6 | 7 | ### Files 8 | The dataset consists of three splits. In practical terms, we suggest to use the files as follows (more detailed information can be found in the paper): 9 | 10 | * `train.jsonl` - For **training**. 11 | * `devtest_mt.jsonl` - To evaluate a model's classification performance only on sentences that contain **at least two target mentions**. Note that the mentions were extracted to refer to different persons but in a few cases might indeed refer to the same person since we extracted them automatically. 12 | * `devtest_mt.jsonl` - To evaluate a model's classification performance on a "**real-world**" set of sentences, i.e., the set was created with the objective to resemble real-world distribution as to sentiment and other factors mentioned in the paper. 13 | 14 | 15 | ### Format 16 | Each split is stored in a JSONL file. In JSONL, each line represents one JSON object. In our dataset, each JSON object consists of: 17 | 18 | 1. `sentence_normalized`: a single sentence 19 | 2. `primary_gid`: an identifier that is unique within NewsMTSC 20 | 3. `targets`: one or more targets 21 | 22 | Each target in `targets` consists of: 23 | 24 | 1. `Input.gid`: an identifier that is unique within NewsMTSC 25 | 2. `from`: the character-based, 0-indexed position of the first character of the target's mention within `sentence_normalized` 26 | 3. `to`: the last character of the target's mention 27 | 4. `mention`: the text of the mention 28 | 5. `polarity`: the sentiment of the sentence concerning the target's mention (2.0 = negative, 4.0 = neutral, 6.0 = positive) 29 | 6. `further_mentions` (optional): one or more coreferential mentions of the target within the sentence. Note that these were extracted automatically and thus might be incorrecet or not be complete. Further, our annotators labeled the sentiment concerning the main mention, which - depending on the sentence - might not be identical to the sentiment of the coreferences. 30 | 31 | ``` 32 | { 33 | "primary_gid":"allsides_1192_476_17_— Judge Neil M. Gorsuch_126_139", 34 | "sentence_normalized":"But neither side harbored any doubts, based on the judge’s opinions, other writings and the president who nominated him, that Judge Gorsuch would be a reliable conservative committed to following the original understanding of those who drafted and ratified the Constitution.", 35 | "targets":[ 36 | { 37 | "Input.gid":"allsides_1192_476_17_— Judge Neil M. Gorsuch_126_139", 38 | "from":126, 39 | "to":139, 40 | "mention":"Judge Gorsuch", 41 | "polarity":6.0, 42 | "further_mentions":[ 43 | { 44 | "from":116, 45 | "to":119, 46 | "mention":"him" 47 | } 48 | ] 49 | } 50 | ] 51 | } 52 | ``` 53 | 54 | ## Contact 55 | 56 | If you want to get in touch, feel free to contact Felix Hamborg. If you find an issue with the dataset or model or have a question concerning either, please open an issue in the repository. 57 | 58 | * Web: [https://felix.hamborg.eu/](https://felix.hamborg.eu/) 59 | * Mail: [felix.hamborg@uni-konstanz.de](mailto:felix.hamborg@uni-konstanz.de) 60 | * Repository: [https://github.com/fhamborg/NewsMTSC](https://github.com/fhamborg/NewsMTSC) 61 | 62 | 63 | ## How to cite 64 | 65 | If you use the dataset or parts of it, please cite our paper: 66 | 67 | ``` 68 | @InProceedings{Hamborg2021b, 69 | author = {Hamborg, Felix and Donnay, Karsten}, 70 | title = {NewsMTSC: (Multi-)Target-dependent Sentiment Classification in News Articles}, 71 | booktitle = {Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics (EACL 2021)}, 72 | year = {2021}, 73 | month = {Apr.}, 74 | location = {Virtual Event}, 75 | } 76 | ``` 77 | -------------------------------------------------------------------------------- /NewsSentiment/converter_huggingface.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file converts the dataset files (3 splits) into the format we'll use on 3 | Huggingface Hub, i.e., where examples with k targets are expanded to k examples, each 4 | having 1 target. 5 | """ 6 | 7 | import pathlib 8 | import jsonlines 9 | from loguru import logger 10 | 11 | 12 | def convert_polarity(polarity): 13 | if polarity == 2.0: 14 | return -1 15 | elif polarity == 4.0: 16 | return 0 17 | elif polarity == 6.0: 18 | return 1 19 | else: 20 | raise ValueError 21 | 22 | 23 | def convert_target(obj, target): 24 | converted_obj = { 25 | "mention": target["mention"], 26 | "polarity": convert_polarity(target["polarity"]), 27 | "from": target["from"], 28 | "to": target["to"], 29 | "sentence": obj["sentence_normalized"], 30 | "id": target["Input.gid"], 31 | } 32 | 33 | return converted_obj 34 | 35 | 36 | def convert_obj(obj): 37 | targets = obj["targets"] 38 | converted_objs = [] 39 | 40 | for target in targets: 41 | converted_objs.append(convert_target(obj, target)) 42 | 43 | return converted_objs 44 | 45 | 46 | def convert(path): 47 | files = [p for p in pathlib.Path(path).iterdir() if p.is_file()] 48 | 49 | for file in files: 50 | converted_lines = [] 51 | counter = 0 52 | with jsonlines.open(file) as reader: 53 | for obj in reader: 54 | converted_lines.extend(convert_obj(obj)) 55 | counter += 1 56 | logger.info( 57 | "converted {} lines to {} examples in {}", 58 | counter, 59 | len(converted_lines), 60 | file, 61 | ) 62 | 63 | with jsonlines.open(str(file) + "converted", mode="w") as writer: 64 | writer.write_all(converted_lines) 65 | 66 | 67 | if __name__ == "__main__": 68 | convert("experiments/default/datasets/newsmtsc-mt") 69 | convert("experiments/default/datasets/newsmtsc-rw") 70 | -------------------------------------------------------------------------------- /NewsSentiment/createoverview.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | from collections import defaultdict 4 | 5 | from diskdict import DiskDict 6 | 7 | import jsonlines 8 | import pandas as pd 9 | 10 | from NewsSentiment.fxlogger import get_logger 11 | 12 | logger = get_logger() 13 | 14 | 15 | def rename_flatten(dictionary, key_prefix): 16 | new_dict = {} 17 | 18 | for k, v in dictionary.items(): 19 | new_k = key_prefix + "-" + k 20 | new_dict[new_k] = v 21 | 22 | return new_dict 23 | 24 | 25 | def without_keys(d, keys): 26 | return {x: d[x] for x in d if x not in keys} 27 | 28 | 29 | def non_scalar_to_str(d): 30 | new_d = {} 31 | for k, v in d.items(): 32 | if type(v) in [list, dict]: 33 | new_v = json.dumps(v) 34 | else: 35 | new_v = v 36 | new_d[k] = new_v 37 | return new_d 38 | 39 | 40 | COL_NAME_TMP_GROUP = "_tmp_named_id_" 41 | 42 | 43 | def _find_run_ids(completed_tasks: dict): 44 | """ 45 | First check the maximum number of run ids, then ensures that each experiment has 46 | that many. 47 | :param completed_tasks: 48 | :return: 49 | """ 50 | run_ids = set() 51 | for named_id, result in completed_tasks.items(): 52 | vals = without_keys(result, ["details"]) 53 | run_id = vals["run_id"] 54 | run_ids.add(run_id) 55 | del vals["run_id"] 56 | del vals["experiment_named_id"] 57 | del vals["experiment_id"] 58 | named_experiment_id_wo_run_id = json.dumps(vals) 59 | result[COL_NAME_TMP_GROUP] = named_experiment_id_wo_run_id 60 | num_runs_per_experiment = len(run_ids) 61 | logger.info("found %s run_ids: %s", num_runs_per_experiment, run_ids) 62 | 63 | # check that each experiment has as many run ids 64 | named_id2run_ids = defaultdict(list) 65 | for named_id, result in completed_tasks.items(): 66 | vals = without_keys(result, ["details"]) 67 | named_experiment_id_wo_run_id = result["_tmp_named_id_"] 68 | run_id = vals["run_id"] 69 | named_id2run_ids[named_experiment_id_wo_run_id].append(run_id) 70 | 71 | count_too_few_runs = 0 72 | for named_id, run_ids in named_id2run_ids.items(): 73 | if len(run_ids) != num_runs_per_experiment: 74 | logger.debug("%s runs for %s", len(run_ids), named_id) 75 | count_too_few_runs += 1 76 | if count_too_few_runs == 0: 77 | logger.info( 78 | "GOOD: num experiments with too few runs: %s of %s", 79 | count_too_few_runs, 80 | len(named_id2run_ids), 81 | ) 82 | else: 83 | logger.warning( 84 | "num experiments with too few runs: %s of %s", 85 | count_too_few_runs, 86 | len(named_id2run_ids), 87 | ) 88 | 89 | return num_runs_per_experiment, completed_tasks 90 | 91 | 92 | def _aggregate_and_mean(df: pd.DataFrame): 93 | df = df.copy(deep=True) 94 | col_names_original_order = list(df.columns) 95 | df_aggr = df.groupby(COL_NAME_TMP_GROUP).mean() 96 | 97 | # from https://stackoverflow.com/a/35401886 98 | # this creates a df that contains aggregated values (from df_aggr) and also 99 | # all other columns (non-aggregated) 100 | aggr_col_names = list(df_aggr.columns) 101 | df.drop(aggr_col_names, axis=1, inplace=True) 102 | df.drop_duplicates(subset=COL_NAME_TMP_GROUP, keep="last", inplace=True) 103 | df = df.merge( 104 | right=df_aggr, right_index=True, left_on=COL_NAME_TMP_GROUP, how="right" 105 | ) 106 | 107 | # reorder the dataframe to have the established order of columsn 108 | # taken from: https://stackoverflow.com/a/13148611 109 | df = df[col_names_original_order] 110 | 111 | # delete temp col 112 | del df[COL_NAME_TMP_GROUP] 113 | 114 | return df 115 | 116 | 117 | def _dfs_to_excel(pathname, name2df): 118 | writer = pd.ExcelWriter(pathname, engine="xlsxwriter") 119 | for name, df in name2df.items(): 120 | if df is None: 121 | logger.info("skipping df because empty: %s", name) 122 | continue 123 | 124 | df.to_excel(writer, sheet_name=name, startrow=0, startcol=0) 125 | writer.save() 126 | 127 | 128 | def shelve2xlsx(opt, ignore_graceful_exit_experiments): 129 | completed_tasks = DiskDict(opt.results_path) 130 | logger.info( 131 | "found {} results in file {}".format(len(completed_tasks), opt.results_path) 132 | ) 133 | # get max run id 134 | num_runs_per_experiment, completed_tasks = _find_run_ids(completed_tasks) 135 | 136 | flattened_results = {} 137 | 138 | for named_id, result in completed_tasks.items(): 139 | if result["rc"] == 99 and ignore_graceful_exit_experiments: 140 | logger.info("found graceful exit (99), not adding to file: %s", named_id) 141 | continue 142 | elif result["rc"] == 0: 143 | test_stats = rename_flatten(result["details"]["test_stats"], "test_stats") 144 | dev_stats = rename_flatten(result["details"]["dev_stats"], "dev_stats") 145 | 146 | flattened_result = { 147 | **without_keys(result, ["details"]), 148 | **dev_stats, 149 | **test_stats, 150 | } 151 | else: 152 | flattened_result = {**without_keys(result, ["details"])} 153 | 154 | scalared_flattened_result = non_scalar_to_str(flattened_result) 155 | flattened_results[named_id] = scalared_flattened_result 156 | 157 | df = pd.DataFrame(data=flattened_results.values()) 158 | 159 | if num_runs_per_experiment >= 2: 160 | df_aggr = _aggregate_and_mean(df) 161 | else: 162 | df_aggr = None 163 | del df[COL_NAME_TMP_GROUP] 164 | 165 | _dfs_to_excel(opt.results_path + ".xlsx", {"raw": df, "aggr": df_aggr}) 166 | 167 | 168 | def jsonl2xlsx(opt): 169 | labels = {2: "positive", 1: "neutral", 0: "negative"} 170 | 171 | with jsonlines.open(opt.results_path, "r") as reader: 172 | lines = [] 173 | for line in reader: 174 | if line["true_label"] != line["pred_label"]: 175 | line["true_label"] = labels[line["true_label"]] 176 | line["pred_label"] = labels[line["pred_label"]] 177 | 178 | lines.append(line) 179 | 180 | df = pd.DataFrame(data=lines) 181 | df.to_excel(opt.results_path + ".xlsx") 182 | 183 | 184 | if __name__ == "__main__": 185 | parser = argparse.ArgumentParser() 186 | parser.add_argument( 187 | "--results_path", 188 | type=str, 189 | default="results/mtscall_stance0", 190 | ) 191 | parser.add_argument("--mode", type=str, default="shelve") 192 | opt = parser.parse_args() 193 | 194 | if opt.mode == "shelve": 195 | shelve2xlsx(opt, ignore_graceful_exit_experiments=False) 196 | elif opt.mode == "jsonl": 197 | jsonl2xlsx(opt) 198 | -------------------------------------------------------------------------------- /NewsSentiment/customexceptions.py: -------------------------------------------------------------------------------- 1 | class TooLongTextException(Exception): 2 | pass 3 | 4 | 5 | class TargetNotFoundException(Exception): 6 | pass 7 | -------------------------------------------------------------------------------- /NewsSentiment/diskdict.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import pickle 3 | from typing import Union 4 | 5 | from NewsSentiment.fxlogger import get_logger 6 | 7 | logger = get_logger() 8 | 9 | 10 | class DiskDict(dict): 11 | def __init__(self, sync_path: Union[Path, str]): 12 | if type(sync_path) == str: 13 | sync_path = Path(sync_path) 14 | self.path = sync_path 15 | 16 | if self.path.exists(): 17 | with open(self.path, "rb") as file: 18 | tmp_dct = pickle.load(file) 19 | super().update(tmp_dct) 20 | logger.info( 21 | "loaded DiskDict with %s items from %s", len(tmp_dct), self.path 22 | ) 23 | 24 | def sync_to_disk(self): 25 | with open(self.path, "wb") as file: 26 | tmp_dct = super().copy() 27 | pickle.dump(tmp_dct, file) 28 | # print(f"saved DiskDict with {len(tmp_dct)} items to {self.path}") 29 | -------------------------------------------------------------------------------- /NewsSentiment/download.py: -------------------------------------------------------------------------------- 1 | """ 2 | Download a specific version of a finetuned model and place it in pretrained_models. 3 | """ 4 | import os 5 | import string 6 | 7 | import torch 8 | 9 | from NewsSentiment.fxlogger import get_logger 10 | 11 | 12 | class Download: 13 | 14 | def __init__( 15 | self, own_model_name, version="default", force=False, list_versions=False 16 | ): 17 | from NewsSentiment.models.FXBaseModel import model_includes_pretrained 18 | from NewsSentiment.train import OWN_MODELNAME2CLASS 19 | 20 | logger = get_logger() 21 | own_model_name = own_model_name 22 | if version == "default": 23 | version = None 24 | try: 25 | model_cls = OWN_MODELNAME2CLASS[own_model_name] 26 | except KeyError: 27 | logger.error(f'Model "{own_model_name}" is unknown.') 28 | exit(2) 29 | if not model_includes_pretrained(model_cls): 30 | logger.error( 31 | f'Model "{own_model_name}" does not ship any pretrained models for download.' 32 | ) 33 | exit(2) 34 | if list_versions: 35 | self.list_versions(model_cls, own_model_name) 36 | else: 37 | self.download(model_cls, version, force) 38 | 39 | @staticmethod 40 | def list_versions(model_cls, own_model_name=""): 41 | default = model_cls.get_pretrained_default_version() 42 | versions = model_cls.get_pretrained_versions() 43 | if own_model_name: 44 | own_model_name = f' "{own_model_name}"' 45 | print(f"Model{own_model_name} provides following pretrained versions:") 46 | for version, source in versions.items(): 47 | default_str = "" 48 | if version == default: 49 | default_str = " (default)" 50 | print(f'"{version}"{default_str}: {source}') 51 | 52 | @classmethod 53 | def download(cls, model_cls, version=None, force=False, skip_if_exists=True): 54 | source = model_cls.get_pretrained_source(version) 55 | path = cls.model_path(model_cls, version) 56 | if not force and os.path.isfile(path): 57 | if skip_if_exists: 58 | return 59 | print("Model file already exists. Use --force to overwrite.") 60 | exit(2) 61 | print(f"Downloading to {path}:") 62 | os.makedirs(os.path.dirname(path), exist_ok=True) 63 | torch.hub.download_url_to_file(source, path) 64 | return path 65 | 66 | @staticmethod 67 | def model_filename(model_cls, version=None): 68 | version = version or model_cls.get_pretrained_default_version() 69 | source_filename = os.path.basename(model_cls.get_pretrained_source(version)) 70 | name = f"{source_filename}_{version}" 71 | allowed = set(f".-_ {string.ascii_letters}{string.digits}") 72 | filename = "".join(char for char in name if char in allowed) 73 | return filename.replace(".", "-").replace(" ", "_") 74 | 75 | @classmethod 76 | def model_path(cls, model_cls, version=None): 77 | return os.path.join(torch.hub.get_dir(), 78 | "pretrained_models", 79 | "state_dicts", 80 | cls.model_filename(model_cls, version)) 81 | 82 | @staticmethod 83 | def add_subparser(subparser): 84 | """Download models for NewsSentiment""" 85 | subparser.add_argument("--own_model_name", default="grutsc", type=str) 86 | subparser.add_argument( 87 | "--version", 88 | default=None, 89 | type=str, 90 | help="version of the model to download, use --force to overwrite a version which was already downloaded", 91 | ) 92 | subparser.add_argument( 93 | "--force", 94 | action="store_true", 95 | help="force the download of a model and overwrite potential previous versions", 96 | ) 97 | subparser.add_argument( 98 | "--list_versions", 99 | action="store_true", 100 | help="List all pretrained model versions which a model provides", 101 | ) 102 | return subparser 103 | 104 | @classmethod 105 | def run_from_parser(cls, args_namespace): 106 | args = vars(args_namespace) 107 | return cls(**args) 108 | 109 | -------------------------------------------------------------------------------- /NewsSentiment/earlystopping.py: -------------------------------------------------------------------------------- 1 | # taken from https://github.com/Bjarten/early-stopping-pytorch 2 | # Copyright by Bjarten 3 | # License: MIT 4 | 5 | from NewsSentiment.fxlogger import get_logger 6 | 7 | 8 | class EarlyStopping: 9 | """Early stops the training if validation loss doesn't improve after a given patience.""" 10 | 11 | def __init__(self, patience=2, delta=0.01): 12 | """ 13 | Args: 14 | patience (int): How long to wait after last time validation loss improved. 15 | Default: 2 16 | verbose (bool): If True, prints a message for each validation loss improvement. 17 | Default: False 18 | delta (float): Minimum change in the monitored quantity to qualify as an improvement. 19 | Default: 0 20 | """ 21 | self.patience = patience 22 | self.counter = 0 23 | self.best_score = None 24 | self.early_stop = False 25 | self.delta = delta 26 | self.logger = get_logger() 27 | self.flag_has_score_increased_since_last_check = False 28 | 29 | def __call__(self, dev_score): 30 | if self.best_score is None: 31 | self.best_score = dev_score 32 | self.logger.debug(f'computed first dev-set score {dev_score:.6f}).') 33 | self.flag_has_score_increased_since_last_check = True 34 | elif dev_score < self.best_score + self.delta: 35 | self.counter += 1 36 | self.logger.debug( 37 | f'patience counter: {self.counter} out of {self.patience} (cur-score: {dev_score}, best-score:' 38 | f' {self.best_score})') 39 | self.flag_has_score_increased_since_last_check = False 40 | if self.counter >= self.patience: 41 | self.early_stop = True 42 | else: 43 | self.best_score = dev_score 44 | self.counter = 0 45 | self.logger.debug(f'dev-set score increased ({self.best_score:.6f} --> {dev_score:.6f}).') 46 | self.flag_has_score_increased_since_last_check = True 47 | -------------------------------------------------------------------------------- /NewsSentiment/evaluator.py: -------------------------------------------------------------------------------- 1 | from collections import Counter 2 | from statistics import mean 3 | 4 | import jsonlines 5 | import numpy as np 6 | from sklearn import metrics 7 | 8 | from NewsSentiment.SentimentClasses import SentimentClasses 9 | from NewsSentiment.dataset import FXDataset 10 | from NewsSentiment.fxlogger import get_logger 11 | import torch 12 | 13 | 14 | class Evaluator: 15 | def __init__( 16 | self, 17 | sorted_expected_label_values, 18 | polarity_associations, 19 | snem_name, 20 | is_return_confidence, 21 | ): 22 | self.logger = get_logger() 23 | self.polarity_associations = polarity_associations 24 | self.pos_label_value = polarity_associations["positive"] 25 | self.neg_label_value = polarity_associations["negative"] 26 | self.sorted_expected_label_values = sorted_expected_label_values 27 | self.pos_label_index = self.sorted_expected_label_values.index( 28 | self.pos_label_value 29 | ) 30 | self.neg_label_index = self.sorted_expected_label_values.index( 31 | self.neg_label_value 32 | ) 33 | self.snem_name = snem_name 34 | self.is_return_confidence = is_return_confidence 35 | 36 | def mean_from_all_statistics(self, all_test_stats): 37 | # for Counters, we do not take the mean 38 | mean_test_stats = {} 39 | number_stats = len(all_test_stats) 40 | 41 | for key in all_test_stats[0]: 42 | value_type = type(all_test_stats[0][key]) 43 | 44 | if value_type in [float, np.float64, np.float32]: 45 | aggr_val = 0.0 46 | for test_stat in all_test_stats: 47 | aggr_val += test_stat[key] 48 | 49 | mean_test_stats[key] = aggr_val / number_stats 50 | 51 | elif value_type == Counter: 52 | aggr_val = Counter() 53 | for test_stat in all_test_stats: 54 | aggr_val += test_stat[key] 55 | mean_test_stats[key] = aggr_val 56 | 57 | return mean_test_stats 58 | 59 | def calc_statistics(self, y_true, y_pred, y_pred_confidence): 60 | """ 61 | Calculates performance statistics by comparing k-dimensional Tensors y_true and 62 | y_pred. Both y_true and y_pred's shape have to be(batchsize, maxtargetsperexample) 63 | :param y_true: 64 | :param y_pred: 65 | :return: 66 | """ 67 | assert y_true.shape == y_pred.shape, "different shapes" 68 | assert y_true.shape[1] == FXDataset.NUM_MAX_TARGETS_PER_ITEM 69 | 70 | # for now, the following doesn't keep track of which prediction or true answer 71 | # belongs to which examples. for other measures, this might be necessary, though 72 | # in that case, the code would have to be changed, e.g., by keeping the original 73 | # dimensions. for now, we just unpack the Tensors to so that the former 74 | # evaluation logic will work on them. practically, this treats each non-fillup 75 | # target as an example 76 | y_true = y_true.view(-1) 77 | y_pred = y_pred.view(-1) 78 | if y_pred_confidence is not None: 79 | y_pred_confidence = y_pred_confidence.view(-1) 80 | 81 | # in both tensors, keep only those scalars that are non-fill in y_true 82 | non_fillup_mask = y_true != SentimentClasses.FILLUP_POLARITY_VALUE 83 | y_true = y_true[non_fillup_mask] 84 | y_pred = y_pred[non_fillup_mask] 85 | if y_pred_confidence is not None: 86 | y_pred_confidence = y_pred_confidence[non_fillup_mask].tolist() 87 | 88 | # this is just the previous, single-target evaluation logic 89 | y_true_list = y_true.tolist() 90 | y_pred_list = y_pred.tolist() 91 | y_true_count = Counter(y_true_list) 92 | y_pred_count = Counter(y_pred_list) 93 | confidence_info = {} 94 | 95 | # perform confidence evaluation if vector was given 96 | if y_pred_confidence is not None: 97 | # compare where equal for confidence evaluation 98 | is_correct_list = [] 99 | for index in range(len(y_true_list)): 100 | y_true_item = y_true_list[index] 101 | y_pred_item = y_pred_list[index] 102 | if y_true_item == y_pred_item: 103 | is_correct_list.append(1) 104 | else: 105 | is_correct_list.append(0) 106 | # now we have a list whether the prediction is correct (1) or not (0) 107 | # let's compare it with the confidence 108 | 109 | # regression metrics 110 | mse = metrics.mean_squared_error(is_correct_list, y_pred_confidence) 111 | r2_score = metrics.r2_score(is_correct_list, y_pred_confidence) 112 | 113 | # convert to classification problem (uses 0.5 threshold) 114 | y_pred_confidence_classification = ( 115 | torch.FloatTensor(y_pred_confidence) > 0.5 116 | ).tolist() 117 | 118 | f1_macro_conf = metrics.f1_score( 119 | is_correct_list, y_pred_confidence_classification, average="macro" 120 | ) 121 | accuracy_conf = metrics.accuracy_score( 122 | is_correct_list, y_pred_confidence_classification 123 | ) 124 | f1_of_classes = metrics.f1_score( 125 | is_correct_list, y_pred_confidence_classification, average=None 126 | ).tolist() 127 | 128 | confidence_info = { 129 | "f1m": f1_macro_conf, 130 | "acc": accuracy_conf, 131 | "f1_classes": f1_of_classes, 132 | "mse": mse, 133 | "r2score": r2_score, 134 | "y_pred_confidence_classification": y_pred_confidence_classification, 135 | "y_pred_confidence": y_pred_confidence, 136 | "y_true_confidence": is_correct_list, 137 | } 138 | 139 | f1_macro = metrics.f1_score( 140 | y_true, y_pred, labels=self.sorted_expected_label_values, average="macro" 141 | ) 142 | f1_of_classes = metrics.f1_score( 143 | y_true, y_pred, labels=self.sorted_expected_label_values, average=None 144 | ) 145 | f1_posneg = ( 146 | f1_of_classes[self.pos_label_index] + f1_of_classes[self.neg_label_index] 147 | ) / 2.0 148 | confusion_matrix = metrics.confusion_matrix( 149 | y_true, y_pred, labels=self.sorted_expected_label_values 150 | ) 151 | recalls_of_classes = metrics.recall_score( 152 | y_true, y_pred, labels=self.sorted_expected_label_values, average=None 153 | ) 154 | recall_avg = mean(recalls_of_classes) 155 | recall_macro = metrics.recall_score( 156 | y_true, y_pred, labels=self.sorted_expected_label_values, average="macro" 157 | ) 158 | precision_macro = metrics.precision_score( 159 | y_true, y_pred, labels=self.sorted_expected_label_values, average="macro" 160 | ) 161 | accuracy = metrics.accuracy_score(y_true, y_pred) 162 | 163 | results = { 164 | "f1_macro": f1_macro, 165 | "confusion_matrix": confusion_matrix, 166 | "recalls_of_classes": recalls_of_classes, 167 | "recall_avg": recall_avg, 168 | "recall_macro": recall_macro, 169 | "precision_macro": precision_macro, 170 | "accuracy": accuracy, 171 | "f1_posneg": f1_posneg, 172 | "y_true_count": y_true_count, 173 | "y_pred_count": y_pred_count, 174 | } 175 | if y_pred_confidence is not None: 176 | results["confidence_info"] = confidence_info 177 | 178 | return results 179 | 180 | def print_stats(self, stats, description): 181 | self.logger.info(description) 182 | self.logger.info("{}: {})".format(self.snem_name, stats[self.snem_name])) 183 | self.logger.info( 184 | "y_true distribution: {}".format(sorted(stats["y_true_count"].items())) 185 | ) 186 | self.logger.info( 187 | "y_pred distribution: {}".format(sorted(stats["y_pred_count"].items())) 188 | ) 189 | self.logger.info( 190 | "> recall_avg: {:.4f}, f1_posneg: {:.4f}, acc: {:.4f}, f1_macro: {:.4f}".format( 191 | stats["recall_avg"], 192 | stats["f1_posneg"], 193 | stats["accuracy"], 194 | stats["f1_macro"], 195 | ) 196 | ) 197 | 198 | def write_error_table(self, y_true, y_pred, texts_list, filepath): 199 | y_true_list = y_true.tolist() 200 | y_pred_list = y_pred.tolist() 201 | 202 | with jsonlines.open(filepath, "w") as writer: 203 | for true_label, pred_label, text in zip( 204 | y_true_list, y_pred_list, texts_list 205 | ): 206 | writer.write( 207 | {"true_label": true_label, "pred_label": pred_label, "text": text} 208 | ) 209 | -------------------------------------------------------------------------------- /NewsSentiment/fxlogger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | logger = None 4 | 5 | 6 | def get_logger(): 7 | global logger 8 | 9 | if not logger: 10 | formatter = logging.Formatter(fmt='%(asctime)s %(levelname)s %(module)s(%(lineno)d):%(funcName)s|: %(message)s', 11 | datefmt='%H:%M:%S') 12 | 13 | handler = logging.StreamHandler() 14 | handler.setFormatter(formatter) 15 | 16 | logger = logging.getLogger("NewsSentiment") 17 | if logger.level == logging.NOTSET: 18 | logger.setLevel(logging.ERROR) 19 | 20 | if len(logger.handlers) == 0: 21 | logger.addHandler(handler) 22 | 23 | return logger 24 | -------------------------------------------------------------------------------- /NewsSentiment/inferrest.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, request 2 | 3 | from NewsSentiment import TargetSentimentClassifier 4 | 5 | app = Flask(__name__) 6 | 7 | tsc = TargetSentimentClassifier() 8 | 9 | 10 | @app.route("/infer", methods=["POST"]) 11 | def index(): 12 | text_left = request.form["left"] 13 | target_mention = request.form["target"] 14 | text_right = request.form["right"] 15 | return { 16 | "result": tsc.infer( 17 | text_left=text_left, target_mention=target_mention, text_right=text_right 18 | ) 19 | } 20 | 21 | 22 | def start_rest_server(port=13273): 23 | print("starting server...") 24 | app.run(host="0.0.0.0", port=port) 25 | print("done") 26 | 27 | 28 | if __name__ == "__main__": 29 | start_rest_server() 30 | -------------------------------------------------------------------------------- /NewsSentiment/knowledge/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fhamborg/NewsMTSC/a7bea7bc7265b4bfe4236e98965a47c767acbb30/NewsSentiment/knowledge/__init__.py -------------------------------------------------------------------------------- /NewsSentiment/knowledge/bingliuopinion/bingliuopinion.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from NewsSentiment.diskdict import DiskDict 4 | from NewsSentiment.knowledge.bingliuopinion.converter import PATH_DICT_BING_LIU_OPINION_POLARITY, POLARITY2INDEX 5 | from NewsSentiment.knowledge.knowledgeutils import find_key_original_or_lc 6 | 7 | __ddict_emolex = DiskDict(PATH_DICT_BING_LIU_OPINION_POLARITY) 8 | __ddict_emolex_keys_lower = {k.lower(): v for k, v in __ddict_emolex.items()} 9 | __num_emotions = 2 10 | assert len(__ddict_emolex) == 6726 11 | assert len(POLARITY2INDEX) == __num_emotions 12 | 13 | 14 | def get_num_bingliu_polarities(): 15 | return __num_emotions 16 | 17 | 18 | def get_bingliu_polarities_as_tensor(term: str): 19 | emotions = find_key_original_or_lc(__ddict_emolex, __ddict_emolex_keys_lower, term) 20 | tensor_emotions = torch.zeros(__num_emotions, dtype=torch.long) 21 | for emotion_index in emotions: 22 | tensor_emotions[emotion_index] = 1 23 | return tensor_emotions 24 | -------------------------------------------------------------------------------- /NewsSentiment/knowledge/bingliuopinion/converter.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict, Counter 2 | from pathlib import Path 3 | import csv 4 | import os 5 | 6 | from tqdm import tqdm 7 | 8 | from NewsSentiment.fxlogger import get_logger 9 | from NewsSentiment.diskdict import DiskDict 10 | 11 | POLARITY2INDEX = { 12 | "positive": 1, 13 | "negative": 0, 14 | } 15 | THIS_DIR = Path(os.path.dirname(os.path.realpath(__file__))) 16 | PATH_DICT_BING_LIU_OPINION_POLARITY = THIS_DIR / "opinion_polarity.ddict" 17 | 18 | logger = get_logger() 19 | 20 | 21 | def convert_single_txt_to_dict(path: Path): 22 | terms = set() 23 | with open(path, "r") as file: 24 | line_count = 0 25 | for line in file: 26 | if line_count > 29: 27 | terms.add(line.strip()) 28 | line_count += 1 29 | return terms 30 | 31 | 32 | def convert_txt_to_dict(): 33 | path_pos = THIS_DIR / "positive-words.txt" 34 | path_neg = THIS_DIR / "negative-words.txt" 35 | 36 | term2polarity = defaultdict(set) 37 | polarity_counter = Counter() 38 | 39 | positive_terms = convert_single_txt_to_dict(path_pos) 40 | negative_terms = convert_single_txt_to_dict(path_neg) 41 | all_terms = positive_terms.union(negative_terms) 42 | 43 | for term in all_terms: 44 | if term in positive_terms: 45 | term2polarity[term].add(POLARITY2INDEX["positive"]) 46 | polarity_counter["positive"] += 1 47 | if term in negative_terms: 48 | term2polarity[term].add(POLARITY2INDEX["negative"]) 49 | polarity_counter["negative"] += 1 50 | 51 | logger.info("read %s terms", len(term2polarity)) 52 | logger.info("polarity count:\n%s", polarity_counter.most_common()) 53 | 54 | logger.info("saving to %s...", PATH_DICT_BING_LIU_OPINION_POLARITY) 55 | ddict_emolex = DiskDict(PATH_DICT_BING_LIU_OPINION_POLARITY) 56 | ddict_emolex.update(term2polarity) 57 | ddict_emolex.sync_to_disk() 58 | logger.info("done") 59 | 60 | 61 | if __name__ == "__main__": 62 | convert_txt_to_dict() 63 | -------------------------------------------------------------------------------- /NewsSentiment/knowledge/bingliuopinion/license.txt: -------------------------------------------------------------------------------- 1 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2 | ; 3 | ; Both files (negative- and positive-words.txt) and the papers can all be downloaded from 4 | ; http://www.cs.uic.edu/~liub/FBS/sentiment-analysis.html 5 | ; 6 | ; If you use this list, please cite the following paper: 7 | ; 8 | ; Minqing Hu and Bing Liu. "Mining and Summarizing Customer Reviews." 9 | ; Proceedings of the ACM SIGKDD International Conference on Knowledge 10 | ; Discovery and Data Mining (KDD-2004), Aug 22-25, 2004, Seattle, 11 | ; Washington, USA, 12 | ; Notes: 13 | ; 1. The appearance of an opinion word in a sentence does not necessarily 14 | ; mean that the sentence expresses a positive or negative opinion. 15 | ; See the paper below: 16 | ; 17 | ; Bing Liu. "Sentiment Analysis and Subjectivity." An chapter in 18 | ; Handbook of Natural Language Processing, Second Edition, 19 | ; (editors: N. Indurkhya and F. J. Damerau), 2010. 20 | ; 21 | ; 2. You will notice many misspelled words in the list. They are not 22 | ; mistakes. They are included as these misspelled words appear 23 | ; frequently in social media content. 24 | ; 25 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 26 | -------------------------------------------------------------------------------- /NewsSentiment/knowledge/bingliuopinion/opinion_polarity.ddict: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fhamborg/NewsMTSC/a7bea7bc7265b4bfe4236e98965a47c767acbb30/NewsSentiment/knowledge/bingliuopinion/opinion_polarity.ddict -------------------------------------------------------------------------------- /NewsSentiment/knowledge/knowledgeutils.py: -------------------------------------------------------------------------------- 1 | def find_key_original_or_lc(data: dict, data_keys_lower: dict, key: str): 2 | # try to get the key as it is from the dict 3 | if key in data.keys(): 4 | return data[key] 5 | # if not contained, try whether if using case insensitivity we find an entry 6 | if key.lower() in data_keys_lower.keys(): 7 | return data_keys_lower[key.lower()] 8 | # if not, return None 9 | return set() 10 | -------------------------------------------------------------------------------- /NewsSentiment/knowledge/liwc/data/.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore everything in this directory 2 | * 3 | # Except this file 4 | !.gitignore 5 | !readme.txt 6 | -------------------------------------------------------------------------------- /NewsSentiment/knowledge/liwc/data/readme.txt: -------------------------------------------------------------------------------- 1 | In order to use LIWC, you need to acquire a license from them. 2 | -------------------------------------------------------------------------------- /NewsSentiment/knowledge/liwc/dic.py: -------------------------------------------------------------------------------- 1 | def _parse_categories(lines): 2 | """ 3 | Read (category_id, category_name) pairs from the categories section. 4 | Each line consists of an integer followed a tab and then the category name. 5 | This section is separated from the lexicon by a line consisting of a single "%". 6 | """ 7 | for line in lines: 8 | line = line.strip() 9 | if line == "%": 10 | return 11 | # ignore non-matching groups of categories 12 | if "\t" in line: 13 | category_id, category_name = line.split("\t", 1) 14 | yield category_id, category_name 15 | 16 | 17 | def _parse_lexicon(lines, category_mapping): 18 | """ 19 | Read (match_expression, category_names) pairs from the lexicon section. 20 | Each line consists of a match expression followed by a tab and then one or more 21 | tab-separated integers, which are mapped to category names using `category_mapping`. 22 | """ 23 | for line in lines: 24 | line = line.strip() 25 | parts = line.split("\t") 26 | yield parts[0], [category_mapping[category_id] for category_id in parts[1:]] 27 | 28 | 29 | def read_dic(filepath): 30 | """ 31 | Reads a LIWC lexicon from a file in the .dic format, returning a tuple of 32 | (lexicon, category_names), where: 33 | * `lexicon` is a dict mapping string patterns to lists of category names 34 | * `category_names` is a list of category names (as strings) 35 | """ 36 | with open(filepath) as lines: 37 | # read up to first "%" (should be very first line of file) 38 | for line in lines: 39 | if line.strip() == "%": 40 | break 41 | # read categories (a mapping from integer string to category name) 42 | category_mapping = dict(_parse_categories(lines)) 43 | # read lexicon (a mapping from matching string to a list of category names) 44 | lexicon = dict(_parse_lexicon(lines, category_mapping)) 45 | return lexicon, list(category_mapping.values()) 46 | -------------------------------------------------------------------------------- /NewsSentiment/knowledge/liwc/liwc.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from NewsSentiment.knowledge.liwc.liwchelper import load_token_parser 4 | 5 | parse, category_names = load_token_parser() 6 | 7 | LIWC_CATEGORY2INDEX = {} 8 | for index, category_name in enumerate(category_names): 9 | LIWC_CATEGORY2INDEX[category_name] = index 10 | 11 | 12 | def get_num_liwc_categories(): 13 | return len(category_names) 14 | 15 | 16 | def get_liwc_categories_as_tensor(term: str): 17 | categories = parse(term) 18 | categories_of_lowercased = parse(term.lower()) 19 | 20 | if len(categories) == 0 and len(categories_of_lowercased) > 0: 21 | # if we do not have categories of original term, but have them for lowercased term, use the latter 22 | categories = categories_of_lowercased 23 | 24 | tensor_emotions = torch.zeros(get_num_liwc_categories(), dtype=torch.long) 25 | for category in categories: 26 | index = LIWC_CATEGORY2INDEX[category] 27 | assert index < get_num_liwc_categories() 28 | tensor_emotions[index] = 1 29 | return tensor_emotions 30 | -------------------------------------------------------------------------------- /NewsSentiment/knowledge/liwc/liwchelper.py: -------------------------------------------------------------------------------- 1 | from .dic import read_dic 2 | from .trie import build_trie, search_trie 3 | 4 | try: 5 | import pkg_resources 6 | 7 | __version__ = pkg_resources.get_distribution("liwc").version 8 | except Exception: 9 | __version__ = None 10 | 11 | 12 | def load_token_parser(filepath="knowledge/liwc/data/LIWC2015_English.dic"): 13 | """ 14 | Reads a LIWC lexicon from a file in the .dic format, returning a tuple of 15 | (parse, category_names), where: 16 | * `parse` is a function from a token to a list of strings (potentially 17 | empty) of matching categories 18 | * `category_names` is a list of strings representing all LIWC categories in 19 | the lexicon 20 | """ 21 | lexicon, category_names = read_dic(filepath) 22 | trie = build_trie(lexicon) 23 | 24 | def parse_token(token): 25 | return search_trie(trie, token) 26 | 27 | return parse_token, category_names 28 | -------------------------------------------------------------------------------- /NewsSentiment/knowledge/liwc/trie.py: -------------------------------------------------------------------------------- 1 | def build_trie(lexicon): 2 | """ 3 | Build a character-trie from the plain pattern_string -> categories_list 4 | mapping provided by `lexicon`. 5 | 6 | Some LIWC patterns end with a `*` to indicate a wildcard match. 7 | """ 8 | trie = {} 9 | for pattern, category_names in lexicon.items(): 10 | cursor = trie 11 | for char in pattern: 12 | if char == "*": 13 | cursor["*"] = category_names 14 | break 15 | if char not in cursor: 16 | cursor[char] = {} 17 | cursor = cursor[char] 18 | cursor["$"] = category_names 19 | return trie 20 | 21 | 22 | def search_trie(trie, token, token_i=0): 23 | """ 24 | Search the given character-trie for paths that match the `token` string. 25 | """ 26 | if "*" in trie: 27 | return trie["*"] 28 | if "$" in trie and token_i == len(token): 29 | return trie["$"] 30 | if token_i < len(token): 31 | char = token[token_i] 32 | if char in trie: 33 | return search_trie(trie[char], token, token_i + 1) 34 | return [] 35 | -------------------------------------------------------------------------------- /NewsSentiment/knowledge/mpqasubjectivity/converter.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import defaultdict, Counter 3 | from pathlib import Path 4 | import csv 5 | 6 | from tqdm import tqdm 7 | 8 | from NewsSentiment.fxlogger import get_logger 9 | from NewsSentiment.diskdict import DiskDict 10 | 11 | POLARITY2INDEX = { 12 | "positive": 2, 13 | "neutral": 1, 14 | "negative": 0, 15 | } 16 | THIS_DIR = Path(os.path.dirname(os.path.realpath(__file__))) 17 | PATH_DICT_MPQA_SUBJECTIVITY = THIS_DIR / "subjclueslen1-HLTEMNLP05.tff.ddict" 18 | 19 | logger = get_logger() 20 | 21 | 22 | def get_value(entry_key_value: str): 23 | return entry_key_value.split("=")[1] 24 | 25 | 26 | def convert_txt_to_dict(): 27 | path_dict = THIS_DIR / "subjclueslen1-HLTEMNLP05.tff" 28 | 29 | term2polarity = defaultdict(set) 30 | polarity_counter = Counter() 31 | with open(path_dict, "r") as csv_file: 32 | csv_reader = csv.reader(csv_file, delimiter=" ") 33 | for line in csv_reader: 34 | entry_type, entry_length, entry_word, entry_pos, entry_is_stemmed, entry_polarity = line 35 | entry_type = get_value(entry_type) 36 | entry_length = get_value(entry_length) 37 | entry_word = get_value(entry_word) 38 | entry_pos = get_value(entry_pos) 39 | entry_is_stemmed = get_value(entry_is_stemmed) 40 | entry_polarity = get_value(entry_polarity) 41 | 42 | assert " " not in entry_word, f"cannot handle spaces in word" 43 | assert entry_polarity in POLARITY2INDEX.keys() or entry_polarity == "both", f"polarity label not known: {entry_polarity} for {entry_word}" 44 | 45 | if entry_polarity == "both": 46 | polarities = [POLARITY2INDEX["positive"], POLARITY2INDEX["negative"]] 47 | else: 48 | polarities = (POLARITY2INDEX[entry_polarity],) 49 | 50 | for polarity in polarities: 51 | term2polarity[entry_word].add(polarity) 52 | polarity_counter[polarity] += 1 53 | 54 | logger.info("read %s terms", len(term2polarity)) 55 | logger.info("polarity count:\n%s", polarity_counter.most_common()) 56 | 57 | logger.info("saving to %s...", PATH_DICT_MPQA_SUBJECTIVITY) 58 | ddict_emolex = DiskDict(PATH_DICT_MPQA_SUBJECTIVITY) 59 | ddict_emolex.update(term2polarity) 60 | ddict_emolex.sync_to_disk() 61 | logger.info("done") 62 | 63 | 64 | if __name__ == "__main__": 65 | convert_txt_to_dict() 66 | -------------------------------------------------------------------------------- /NewsSentiment/knowledge/mpqasubjectivity/mpqasubjectivity.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from NewsSentiment.diskdict import DiskDict 4 | from NewsSentiment.knowledge.knowledgeutils import find_key_original_or_lc 5 | from NewsSentiment.knowledge.mpqasubjectivity.converter import POLARITY2INDEX, PATH_DICT_MPQA_SUBJECTIVITY 6 | 7 | __ddict_emolex = DiskDict(PATH_DICT_MPQA_SUBJECTIVITY) 8 | __ddict_emolex_keys_lower = {k.lower(): v for k, v in __ddict_emolex.items()} 9 | __num_emotions = 3 10 | assert len(__ddict_emolex) == 6886, len(__ddict_emolex) 11 | assert len(POLARITY2INDEX) == __num_emotions 12 | 13 | 14 | def get_num_mpqa_subjectivity_polarities(): 15 | return __num_emotions 16 | 17 | 18 | def get_mpqa_subjectivity_polarities_as_tensor(term: str): 19 | emotions = find_key_original_or_lc(__ddict_emolex, __ddict_emolex_keys_lower, term) 20 | tensor_emotions = torch.zeros(__num_emotions, dtype=torch.long) 21 | for emotion_index in emotions: 22 | tensor_emotions[emotion_index] = 1 23 | return tensor_emotions 24 | -------------------------------------------------------------------------------- /NewsSentiment/knowledge/mpqasubjectivity/subjclueslen1-HLTEMNLP05.tff.ddict: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fhamborg/NewsMTSC/a7bea7bc7265b4bfe4236e98965a47c767acbb30/NewsSentiment/knowledge/mpqasubjectivity/subjclueslen1-HLTEMNLP05.tff.ddict -------------------------------------------------------------------------------- /NewsSentiment/knowledge/nrcemolex/NRC-Emotion-Lexicon-Wordlevel-v0.92.txt.ddict: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fhamborg/NewsMTSC/a7bea7bc7265b4bfe4236e98965a47c767acbb30/NewsSentiment/knowledge/nrcemolex/NRC-Emotion-Lexicon-Wordlevel-v0.92.txt.ddict -------------------------------------------------------------------------------- /NewsSentiment/knowledge/nrcemolex/converter.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import defaultdict, Counter 3 | from pathlib import Path 4 | import csv 5 | 6 | from NewsSentiment.fxlogger import get_logger 7 | from NewsSentiment.diskdict import DiskDict 8 | 9 | EMOTION2INDEX = { 10 | "anger": 0, 11 | "anticipation": 1, 12 | "disgust": 2, 13 | "fear": 3, 14 | "joy": 4, 15 | "negative": 5, 16 | "positive": 6, 17 | "sadness": 7, 18 | "surprise": 8, 19 | "trust": 9, 20 | } 21 | THIS_DIR = Path(os.path.dirname(os.path.realpath(__file__))) 22 | PATH_DICT_NRC_EMOLEX = THIS_DIR / "NRC-Emotion-Lexicon-Wordlevel-v0.92.txt.ddict" 23 | 24 | logger = get_logger() 25 | 26 | 27 | def convert_txt_to_dict(): 28 | path_dict = THIS_DIR / "NRC-Emotion-Lexicon-Wordlevel-v0.92.txt" 29 | 30 | term2emotions = defaultdict(set) 31 | emotion_counter = Counter() 32 | with open(path_dict, "r") as csv_file: 33 | csv_reader = csv.reader(csv_file, delimiter="\t") 34 | for term, emotion, is_present in csv_reader: 35 | if is_present == "1": 36 | emotion_index = EMOTION2INDEX[emotion] 37 | term2emotions[term].add(emotion_index) 38 | emotion_counter[emotion] += 1 39 | elif is_present == "0": 40 | pass 41 | else: 42 | raise ValueError 43 | logger.info("read %s terms", len(term2emotions)) 44 | logger.info("emotion count:\n%s", emotion_counter.most_common()) 45 | 46 | logger.info("saving to %s...", None) 47 | ddict_emolex = DiskDict(PATH_DICT_NRC_EMOLEX) 48 | ddict_emolex.update(term2emotions) 49 | ddict_emolex.sync_to_disk() 50 | logger.info("done") 51 | 52 | 53 | if __name__ == "__main__": 54 | convert_txt_to_dict() 55 | -------------------------------------------------------------------------------- /NewsSentiment/knowledge/nrcemolex/nrcemolex.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from NewsSentiment.knowledge.knowledgeutils import find_key_original_or_lc 4 | from NewsSentiment.knowledge.nrcemolex.converter import PATH_DICT_NRC_EMOLEX, EMOTION2INDEX 5 | from NewsSentiment.diskdict import DiskDict 6 | 7 | __ddict_emolex = DiskDict(PATH_DICT_NRC_EMOLEX) 8 | __ddict_emolex_keys_lower = {k.lower(): v for k, v in __ddict_emolex.items()} 9 | __num_emotions = 10 10 | assert len(__ddict_emolex) == 6468 11 | assert len(EMOTION2INDEX) == __num_emotions 12 | 13 | 14 | def get_num_nrc_emotions(): 15 | return __num_emotions 16 | 17 | 18 | def get_nrc_emotions_as_tensor(term: str): 19 | emotions = find_key_original_or_lc(__ddict_emolex, __ddict_emolex_keys_lower, term) 20 | tensor_emotions = torch.zeros(__num_emotions, dtype=torch.long) 21 | for emotion_index in emotions: 22 | tensor_emotions[emotion_index] = 1 23 | return tensor_emotions 24 | -------------------------------------------------------------------------------- /NewsSentiment/knowledge/zeros/zerosknowledge.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def get_num_zero_dimensions(): 5 | # while one dimension would suffice, using only one can cause (especially when using 6 | # single_targets=True) that the batcher by pytorch removes the singleton dimension. specifically, 7 | # with only one target per batch item, the size of the knowledge source will be 1, 150, 1, whereas the last dimension 8 | # is removed during batching by pytorch. so to keep things simple, we just add a two here to avoid having a 9 | # singleton dimension at the end 10 | return 2 11 | 12 | 13 | def get_zeros_as_tensor(term: str): 14 | tensor_emotions = torch.zeros(get_num_zero_dimensions(), dtype=torch.long) 15 | return tensor_emotions 16 | -------------------------------------------------------------------------------- /NewsSentiment/layers/AggregatorForBert.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | 4 | 5 | class AggregatorForBert(nn.Module): 6 | """ 7 | According to https://huggingface.co/transformers/model_doc/bert.html#bertmodel we 8 | should not (as in the original SPC version) the pooler_output but get the 9 | last_hidden_state and "averaging or pooling the sequence of hidden-states for the 10 | whole input sequence" 11 | """ 12 | def __init__(self, spc_lm_representation: str): 13 | super(AggregatorForBert, self).__init__() 14 | self.spc_lm_representation = spc_lm_representation 15 | 16 | def forward(self, last_hidden_state, pooler_output, all_hidden_states): 17 | if self.spc_lm_representation == "pooler_output": 18 | prepared_output = pooler_output 19 | elif self.spc_lm_representation == "mean_last": 20 | prepared_output = last_hidden_state.mean(dim=1) 21 | elif self.spc_lm_representation == "mean_last_four": 22 | prepared_output = ( 23 | torch.stack(all_hidden_states[-4:]).mean(dim=0).mean(dim=1) 24 | ) 25 | elif self.spc_lm_representation == "mean_last_two": 26 | prepared_output = ( 27 | torch.stack(all_hidden_states[-2:]).mean(dim=0).mean(dim=1) 28 | ) 29 | elif self.spc_lm_representation == "mean_all": 30 | prepared_output = torch.stack(all_hidden_states).mean(dim=0).mean(dim=1) 31 | elif self.spc_lm_representation == "sum_last": 32 | prepared_output = last_hidden_state.sum(dim=1) 33 | elif self.spc_lm_representation == "sum_last_four": 34 | prepared_output = torch.stack(all_hidden_states[-4:]).sum(dim=0).sum(dim=1) 35 | elif self.spc_lm_representation == "sum_last_two": 36 | prepared_output = torch.stack(all_hidden_states[-2:]).sum(dim=0).sum(dim=1) 37 | elif self.spc_lm_representation == "sum_all": 38 | prepared_output = torch.stack(all_hidden_states).sum(dim=0).sum(dim=1) 39 | return prepared_output 40 | -------------------------------------------------------------------------------- /NewsSentiment/layers/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # file: __init__.py 3 | # author: songyouwei 4 | # Copyright (C) 2018. All Rights Reserved. -------------------------------------------------------------------------------- /NewsSentiment/layers/attention.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # file: attention.py 3 | # author: songyouwei for top two classes, huggingface for third (slightly 4 | # adapted by FH) 5 | # Copyright (C) 2020. All Rights Reserved. 6 | 7 | import math 8 | import torch 9 | import torch.nn as nn 10 | import torch.nn.functional as F 11 | 12 | 13 | class Attention(nn.Module): 14 | def __init__( 15 | self, 16 | embed_dim, 17 | hidden_dim=None, 18 | out_dim=None, 19 | n_head=1, 20 | score_function="dot_product", 21 | dropout=0, 22 | ): 23 | """ Attention Mechanism 24 | :param embed_dim: 25 | :param hidden_dim: 26 | :param out_dim: 27 | :param n_head: num of head (Multi-Head Attention) 28 | :param score_function: scaled_dot_product / mlp (concat) / bi_linear (general dot) 29 | :return (?, q_len, out_dim,) 30 | """ 31 | super(Attention, self).__init__() 32 | if hidden_dim is None: 33 | hidden_dim = embed_dim // n_head 34 | if out_dim is None: 35 | out_dim = embed_dim 36 | self.embed_dim = embed_dim 37 | self.hidden_dim = hidden_dim 38 | self.n_head = n_head 39 | self.score_function = score_function 40 | self.w_k = nn.Linear(embed_dim, n_head * hidden_dim) 41 | self.w_q = nn.Linear(embed_dim, n_head * hidden_dim) 42 | self.proj = nn.Linear(n_head * hidden_dim, out_dim) 43 | self.dropout = nn.Dropout(dropout) 44 | if score_function == "mlp": 45 | self.weight = nn.Parameter(torch.Tensor(hidden_dim * 2)) 46 | elif self.score_function == "bi_linear": 47 | self.weight = nn.Parameter(torch.Tensor(hidden_dim, hidden_dim)) 48 | else: # dot_product / scaled_dot_product 49 | self.register_parameter("weight", None) 50 | self.reset_parameters() 51 | 52 | def reset_parameters(self): 53 | stdv = 1.0 / math.sqrt(self.hidden_dim) 54 | if self.weight is not None: 55 | self.weight.data.uniform_(-stdv, stdv) 56 | 57 | def forward(self, k, q): 58 | if len(q.shape) == 2: # q_len missing 59 | q = torch.unsqueeze(q, dim=1) 60 | if len(k.shape) == 2: # k_len missing 61 | k = torch.unsqueeze(k, dim=1) 62 | mb_size = k.shape[0] # ? 63 | k_len = k.shape[1] 64 | q_len = q.shape[1] 65 | # k: (?, k_len, embed_dim,) 66 | # q: (?, q_len, embed_dim,) 67 | # kx: (n_head*?, k_len, hidden_dim) 68 | # qx: (n_head*?, q_len, hidden_dim) 69 | # score: (n_head*?, q_len, k_len,) 70 | # output: (?, q_len, out_dim,) 71 | kx = self.w_k(k).view(mb_size, k_len, self.n_head, self.hidden_dim) 72 | kx = kx.permute(2, 0, 1, 3).contiguous().view(-1, k_len, self.hidden_dim) 73 | qx = self.w_q(q).view(mb_size, q_len, self.n_head, self.hidden_dim) 74 | qx = qx.permute(2, 0, 1, 3).contiguous().view(-1, q_len, self.hidden_dim) 75 | if self.score_function == "dot_product": 76 | kt = kx.permute(0, 2, 1) 77 | score = torch.bmm(qx, kt) 78 | elif self.score_function == "scaled_dot_product": 79 | kt = kx.permute(0, 2, 1) 80 | qkt = torch.bmm(qx, kt) 81 | score = torch.div(qkt, math.sqrt(self.hidden_dim)) 82 | elif self.score_function == "mlp": 83 | kxx = torch.unsqueeze(kx, dim=1).expand(-1, q_len, -1, -1) 84 | qxx = torch.unsqueeze(qx, dim=2).expand(-1, -1, k_len, -1) 85 | kq = torch.cat((kxx, qxx), dim=-1) # (n_head*?, q_len, k_len, hidden_dim*2) 86 | score = torch.tanh(torch.matmul(kq, self.weight)) 87 | elif self.score_function == "bi_linear": 88 | qw = torch.matmul(qx, self.weight) 89 | kt = kx.permute(0, 2, 1) 90 | score = torch.bmm(qw, kt) 91 | else: 92 | raise RuntimeError("invalid score_function") 93 | score = F.softmax(score, dim=-1) 94 | output = torch.bmm(score, kx) # (n_head*?, q_len, hidden_dim) 95 | output = torch.cat( 96 | torch.split(output, mb_size, dim=0), dim=-1 97 | ) # (?, q_len, n_head*hidden_dim) 98 | output = self.proj(output) # (?, q_len, out_dim) 99 | output = self.dropout(output) 100 | return output, score 101 | 102 | 103 | class NoQueryAttention(Attention): 104 | """q is a parameter""" 105 | 106 | def __init__( 107 | self, 108 | embed_dim, 109 | hidden_dim=None, 110 | out_dim=None, 111 | n_head=1, 112 | score_function="dot_product", 113 | q_len=1, 114 | dropout=0, 115 | ): 116 | super(NoQueryAttention, self).__init__( 117 | embed_dim, hidden_dim, out_dim, n_head, score_function, dropout 118 | ) 119 | self.q_len = q_len 120 | self.q = nn.Parameter(torch.Tensor(q_len, embed_dim)) 121 | self.reset_q() 122 | 123 | def reset_q(self): 124 | stdv = 1.0 / math.sqrt(self.embed_dim) 125 | self.q.data.uniform_(-stdv, stdv) 126 | 127 | def forward(self, k, **kwargs): 128 | mb_size = k.shape[0] 129 | q = self.q.expand(mb_size, -1, -1) 130 | return super(NoQueryAttention, self).forward(k, q) 131 | 132 | 133 | class FXBertSelfAttention(nn.Module): 134 | """ 135 | Identical to the class BertSelfAttention by transformers by huggingface, but 136 | can be used without the need to have a config instance of class BertConfig but 137 | instead the required values can be passed on directly 138 | """ 139 | def __init__(self, hidden_size, num_attention_heads, attention_probs_dropout_prob=0.1): 140 | super().__init__() 141 | if hidden_size % num_attention_heads != 0: 142 | raise ValueError( 143 | "The hidden size (%d) is not a multiple of the number of attention " 144 | "heads (%d)" % (hidden_size, num_attention_heads) 145 | ) 146 | 147 | self.num_attention_heads = num_attention_heads 148 | self.attention_head_size = int(hidden_size / num_attention_heads) 149 | self.all_head_size = self.num_attention_heads * self.attention_head_size 150 | 151 | self.query = nn.Linear(hidden_size, self.all_head_size) 152 | self.key = nn.Linear(hidden_size, self.all_head_size) 153 | self.value = nn.Linear(hidden_size, self.all_head_size) 154 | 155 | self.dropout = nn.Dropout(attention_probs_dropout_prob) 156 | 157 | def transpose_for_scores(self, x): 158 | new_x_shape = x.size()[:-1] + (self.num_attention_heads, self.attention_head_size) 159 | x = x.view(*new_x_shape) 160 | return x.permute(0, 2, 1, 3) 161 | 162 | def forward( 163 | self, 164 | hidden_states, 165 | attention_mask=None, 166 | head_mask=None, 167 | encoder_hidden_states=None, 168 | encoder_attention_mask=None, 169 | output_attentions=False, 170 | ): 171 | mixed_query_layer = self.query(hidden_states) 172 | 173 | # If this is instantiated as a cross-attention module, the keys 174 | # and values come from an encoder; the attention mask needs to be 175 | # such that the encoder's padding tokens are not attended to. 176 | if encoder_hidden_states is not None: 177 | mixed_key_layer = self.key(encoder_hidden_states) 178 | mixed_value_layer = self.value(encoder_hidden_states) 179 | attention_mask = encoder_attention_mask 180 | else: 181 | mixed_key_layer = self.key(hidden_states) 182 | mixed_value_layer = self.value(hidden_states) 183 | 184 | query_layer = self.transpose_for_scores(mixed_query_layer) 185 | key_layer = self.transpose_for_scores(mixed_key_layer) 186 | value_layer = self.transpose_for_scores(mixed_value_layer) 187 | 188 | # Take the dot product between "query" and "key" to get the raw attention scores. 189 | attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2)) 190 | attention_scores = attention_scores / math.sqrt(self.attention_head_size) 191 | if attention_mask is not None: 192 | # Apply the attention mask is (precomputed for all layers in BertModel forward() function) 193 | attention_scores = attention_scores + attention_mask 194 | 195 | # Normalize the attention scores to probabilities. 196 | attention_probs = nn.Softmax(dim=-1)(attention_scores) 197 | 198 | # This is actually dropping out entire tokens to attend to, which might 199 | # seem a bit unusual, but is taken from the original Transformer paper. 200 | attention_probs = self.dropout(attention_probs) 201 | 202 | # Mask heads if we want to 203 | if head_mask is not None: 204 | attention_probs = attention_probs * head_mask 205 | 206 | context_layer = torch.matmul(attention_probs, value_layer) 207 | 208 | context_layer = context_layer.permute(0, 2, 1, 3).contiguous() 209 | new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,) 210 | context_layer = context_layer.view(*new_context_layer_shape) 211 | 212 | outputs = (context_layer, attention_probs) if output_attentions else (context_layer,) 213 | return outputs -------------------------------------------------------------------------------- /NewsSentiment/layers/dynamic_rnn.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # file: dynamic_rnn.py 3 | # author: songyouwei 4 | # Copyright (C) 2018. All Rights Reserved. 5 | 6 | 7 | import torch 8 | import torch.nn as nn 9 | import numpy as np 10 | 11 | class DynamicLSTM(nn.Module): 12 | def __init__(self, input_size, hidden_size, num_layers=1, bias=True, batch_first=True, dropout=0, 13 | bidirectional=False, only_use_last_hidden_state=False, rnn_type = 'LSTM'): 14 | """ 15 | LSTM which can hold variable length sequence, use like TensorFlow's RNN(input, length...). 16 | 17 | :param input_size:The number of expected features in the input x 18 | :param hidden_size:The number of features in the hidden state h 19 | :param num_layers:Number of recurrent layers. 20 | :param bias:If False, then the layer does not use bias weights b_ih and b_hh. Default: True 21 | :param batch_first:If True, then the input and output tensors are provided as (batch, seq, feature) 22 | :param dropout:If non-zero, introduces a dropout layer on the outputs of each RNN layer except the last layer 23 | :param bidirectional:If True, becomes a bidirectional RNN. Default: False 24 | :param rnn_type: {LSTM, GRU, RNN} 25 | """ 26 | super(DynamicLSTM, self).__init__() 27 | self.input_size = input_size 28 | self.hidden_size = hidden_size 29 | self.num_layers = num_layers 30 | self.bias = bias 31 | self.batch_first = batch_first 32 | self.dropout = dropout 33 | self.bidirectional = bidirectional 34 | self.only_use_last_hidden_state = only_use_last_hidden_state 35 | self.rnn_type = rnn_type 36 | 37 | if self.rnn_type == 'LSTM': 38 | self.RNN = nn.LSTM( 39 | input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, 40 | bias=bias, batch_first=batch_first, dropout=dropout, bidirectional=bidirectional) 41 | elif self.rnn_type == 'GRU': 42 | self.RNN = nn.GRU( 43 | input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, 44 | bias=bias, batch_first=batch_first, dropout=dropout, bidirectional=bidirectional) 45 | elif self.rnn_type == 'RNN': 46 | self.RNN = nn.RNN( 47 | input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, 48 | bias=bias, batch_first=batch_first, dropout=dropout, bidirectional=bidirectional) 49 | 50 | 51 | def forward(self, x, x_len): 52 | """ 53 | sequence -> sort -> pad and pack ->process using RNN -> unpack ->unsort 54 | 55 | :param x: sequence embedding vectors 56 | :param x_len: numpy/tensor list 57 | :return: 58 | """ 59 | """sort""" 60 | x_sort_idx = torch.sort(-x_len)[1].long() 61 | x_unsort_idx = torch.sort(x_sort_idx)[1].long() 62 | x_len = x_len[x_sort_idx] 63 | x = x[x_sort_idx] 64 | """pack""" 65 | x_emb_p = torch.nn.utils.rnn.pack_padded_sequence(x, x_len, batch_first=self.batch_first) 66 | 67 | # process using the selected RNN 68 | if self.rnn_type == 'LSTM': 69 | out_pack, (ht, ct) = self.RNN(x_emb_p, None) 70 | else: 71 | out_pack, ht = self.RNN(x_emb_p, None) 72 | ct = None 73 | """unsort: h""" 74 | ht = torch.transpose(ht, 0, 1)[ 75 | x_unsort_idx] # (num_layers * num_directions, batch, hidden_size) -> (batch, ...) 76 | ht = torch.transpose(ht, 0, 1) 77 | 78 | if self.only_use_last_hidden_state: 79 | return ht 80 | else: 81 | """unpack: out""" 82 | out = torch.nn.utils.rnn.pad_packed_sequence(out_pack, batch_first=self.batch_first) # (sequence, lengths) 83 | out = out[0] # 84 | out = out[x_unsort_idx] 85 | """unsort: out c""" 86 | if self.rnn_type =='LSTM': 87 | ct = torch.transpose(ct, 0, 1)[ 88 | x_unsort_idx] # (num_layers * num_directions, batch, hidden_size) -> (batch, ...) 89 | ct = torch.transpose(ct, 0, 1) 90 | 91 | return out, (ht, ct) 92 | -------------------------------------------------------------------------------- /NewsSentiment/layers/point_wise_feed_forward.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # file: point_wise_feed_forward.py 3 | # author: songyouwei 4 | # Copyright (C) 2018. All Rights Reserved. 5 | 6 | import torch.nn as nn 7 | 8 | 9 | class PositionwiseFeedForward(nn.Module): 10 | ''' A two-feed-forward-layer module ''' 11 | def __init__(self, d_hid, d_inner_hid=None, dropout=0): 12 | super(PositionwiseFeedForward, self).__init__() 13 | if d_inner_hid is None: 14 | d_inner_hid = d_hid 15 | self.w_1 = nn.Conv1d(d_hid, d_inner_hid, 1) # position-wise 16 | self.w_2 = nn.Conv1d(d_inner_hid, d_hid, 1) # position-wise 17 | self.dropout = nn.Dropout(dropout) 18 | self.relu = nn.ReLU() 19 | 20 | def forward(self, x): 21 | output = self.relu(self.w_1(x.transpose(1, 2))) 22 | output = self.w_2(output).transpose(2, 1) 23 | output = self.dropout(output) 24 | return output 25 | -------------------------------------------------------------------------------- /NewsSentiment/layers/pytorchnlpattention.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | # from https://pytorchnlp.readthedocs.io/en/latest/_modules/torchnlp/nn/attention.html 5 | class ClassicAttention(nn.Module): 6 | """ Applies attention mechanism on the `context` using the `query`. 7 | 8 | **Thank you** to IBM for their initial implementation of :class:`Attention`. Here is 9 | their `License 10 | `__. 11 | 12 | Args: 13 | dimensions (int): Dimensionality of the query and context. 14 | attention_type (str, optional): How to compute the attention score: 15 | 16 | * dot: :math:`score(H_j,q) = H_j^T q` 17 | * general: :math:`score(H_j, q) = H_j^T W_a q` 18 | 19 | Example: 20 | 21 | >>> attention = Attention(256) 22 | >>> query = torch.randn(5, 1, 256) 23 | >>> context = torch.randn(5, 5, 256) 24 | >>> output, weights = attention(query, context) 25 | >>> output.size() 26 | torch.Size([5, 1, 256]) 27 | >>> weights.size() 28 | torch.Size([5, 1, 5]) 29 | """ 30 | 31 | def __init__(self, dimensions, attention_type="general"): 32 | super(ClassicAttention, self).__init__() 33 | 34 | if attention_type not in ["dot", "general"]: 35 | raise ValueError("Invalid attention type selected.") 36 | 37 | self.attention_type = attention_type 38 | if self.attention_type == "general": 39 | self.linear_in = nn.Linear(dimensions, dimensions, bias=False) 40 | 41 | self.linear_out = nn.Linear(dimensions * 2, dimensions, bias=False) 42 | self.softmax = nn.Softmax(dim=-1) 43 | self.tanh = nn.Tanh() 44 | 45 | def forward(self, query, context): 46 | """ 47 | Args: 48 | query (:class:`torch.FloatTensor` [batch size, output length, dimensions]): Sequence of 49 | queries to query the context. 50 | context (:class:`torch.FloatTensor` [batch size, query length, dimensions]): Data 51 | overwhich to apply the attention mechanism. 52 | 53 | Returns: 54 | :class:`tuple` with `output` and `weights`: 55 | * **output** (:class:`torch.LongTensor` [batch size, output length, dimensions]): 56 | Tensor containing the attended features. 57 | * **weights** (:class:`torch.FloatTensor` [batch size, output length, query length]): 58 | Tensor containing attention weights. 59 | """ 60 | batch_size, output_len, dimensions = query.size() 61 | query_len = context.size(1) 62 | 63 | if self.attention_type == "general": 64 | query = query.reshape(batch_size * output_len, dimensions) 65 | query = self.linear_in(query) 66 | query = query.reshape(batch_size, output_len, dimensions) 67 | 68 | # TODO: Include mask on PADDING_INDEX? 69 | 70 | # (batch_size, output_len, dimensions) * (batch_size, query_len, dimensions) -> 71 | # (batch_size, output_len, query_len) 72 | attention_scores = torch.bmm(query, context.transpose(1, 2).contiguous()) 73 | 74 | # Compute weights across every context sequence 75 | attention_scores = attention_scores.view(batch_size * output_len, query_len) 76 | attention_weights = self.softmax(attention_scores) 77 | attention_weights = attention_weights.view(batch_size, output_len, query_len) 78 | 79 | # (batch_size, output_len, query_len) * (batch_size, query_len, dimensions) -> 80 | # (batch_size, output_len, dimensions) 81 | mix = torch.bmm(attention_weights, context) 82 | 83 | # concat -> (batch_size * output_len, 2*dimensions) 84 | combined = torch.cat((mix, query), dim=2) 85 | combined = combined.view(batch_size * output_len, 2 * dimensions) 86 | 87 | # Apply linear_out on every 2nd dimension of concat 88 | # output -> (batch_size, output_len, dimensions) 89 | output = self.linear_out(combined).view(batch_size, output_len, dimensions) 90 | output = self.tanh(output) 91 | 92 | return output, attention_weights 93 | -------------------------------------------------------------------------------- /NewsSentiment/layers/squeeze_embedding.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # file: squeeze_embedding.py 3 | # author: songyouwei 4 | # Copyright (C) 2018. All Rights Reserved. 5 | 6 | 7 | import torch 8 | import torch.nn as nn 9 | import numpy as np 10 | 11 | class SqueezeEmbedding(nn.Module): 12 | """ 13 | Squeeze sequence embedding length to the longest one in the batch 14 | """ 15 | def __init__(self, batch_first=True): 16 | super(SqueezeEmbedding, self).__init__() 17 | self.batch_first = batch_first 18 | 19 | def forward(self, x, x_len): 20 | """ 21 | sequence -> sort -> pad and pack -> unpack ->unsort 22 | :param x: sequence embedding vectors 23 | :param x_len: numpy/tensor list 24 | :return: 25 | """ 26 | """sort""" 27 | x_sort_idx = torch.sort(-x_len)[1].long() 28 | x_unsort_idx = torch.sort(x_sort_idx)[1].long() 29 | x_len = x_len[x_sort_idx] 30 | x = x[x_sort_idx] 31 | """pack""" 32 | x_emb_p = torch.nn.utils.rnn.pack_padded_sequence(x, x_len, batch_first=self.batch_first) 33 | """unpack: out""" 34 | out = torch.nn.utils.rnn.pad_packed_sequence(x_emb_p, batch_first=self.batch_first) # (sequence, lengths) 35 | out = out[0] # 36 | """unsort""" 37 | out = out[x_unsort_idx] 38 | return out 39 | -------------------------------------------------------------------------------- /NewsSentiment/losses/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fhamborg/NewsMTSC/a7bea7bc7265b4bfe4236e98965a47c767acbb30/NewsSentiment/losses/__init__.py -------------------------------------------------------------------------------- /NewsSentiment/losses/crossentropycrossweight.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | # CrossEntropyLoss with additional cross weight loss for two targets 5 | from NewsSentiment.SentimentClasses import SentimentClasses 6 | 7 | 8 | class CrossEntropyLoss_CrossWeight(nn.Module): 9 | def __init__(self, device, ignore_index, weight=None, crossloss_weight=0.2): 10 | super(CrossEntropyLoss_CrossWeight, self).__init__() 11 | self.device = device 12 | self.class_loss = nn.CrossEntropyLoss(weight=weight, ignore_index=ignore_index,) 13 | # cosine similarity by default 14 | self.cross_weight_loss = nn.CosineSimilarity(dim=2) 15 | self.crossloss_weight = crossloss_weight 16 | self.crossentropyloss_weight = 1.0 - crossloss_weight 17 | self.ignore_index = ignore_index 18 | 19 | def forward( 20 | self, 21 | predictions: torch.Tensor, 22 | true_labels: torch.Tensor, 23 | cross_weight: torch.Tensor, 24 | target_mask: torch.Tensor, 25 | ): 26 | """ 27 | Calculate k-dimensional cross entropy loss by comparing predictions and 28 | true_labels. Additionally, calculates a cross_weight loss that maximizes the 29 | differences between the weights compared between both targets. 30 | 31 | :param predictions: shape: (batch, polarities e.g. 3, targets ie 2) 32 | :param true_labels: shape: (batch, targets) 33 | :param cross_weight:shape: (batch, targets, seqlen, bertdim) 34 | :return: 35 | prediction size earlier was batch polarities 36 | """ 37 | batch_size, num_classes, num_targets = predictions.size() 38 | bert_dim = cross_weight.shape[3] 39 | 40 | # calculate regular cross entropy loss 41 | classification_loss = self.class_loss(predictions, true_labels) 42 | 43 | # calculate cross-weight loss 44 | # we must not calc the cross weight loss for batch item of two targets, 45 | # where 1 or 2 is a fill up target, i.e., the true value is 46 | # SentimentClasses.FILLUP_POLARITY_VALUE 47 | # get all batch items where the first target is fill up 48 | is_ignored_a = true_labels[:, 0] 49 | is_ignored_a = is_ignored_a == SentimentClasses.FILLUP_POLARITY_VALUE 50 | # get all batch items where the second is 51 | is_ignored_b = true_labels[:, 1] 52 | is_ignored_b = is_ignored_b == SentimentClasses.FILLUP_POLARITY_VALUE 53 | # logical or both lists to one 54 | is_ignored_batch_item = is_ignored_a | is_ignored_b 55 | # select only those batch items where no target 56 | is_not_ignored_batch_item = ~is_ignored_batch_item 57 | target_mask = target_mask[is_not_ignored_batch_item, :, :] 58 | count_non_ignored_batch_items = target_mask.shape[0] 59 | 60 | # if we have identical targets (=target masks) set cross weight loss to 0 61 | target_mask_a = target_mask[:, 0, :] 62 | target_mask_b = target_mask[:, 1, :] 63 | diff_target_mask = target_mask_a - target_mask_b 64 | diff_target_mask = diff_target_mask.sum(dim=1) 65 | # shape: batch 66 | # diff target mask, will be 0 if the two targets in one batched item are 67 | # identical, and 1 if they are different 68 | is_different_target_per_two_batch_items = diff_target_mask != 0 69 | 70 | # only selecting different targets effectively zeros out values of identical 71 | # targets 72 | cross_weight = cross_weight[is_different_target_per_two_batch_items] 73 | 74 | # if there is not at least a single batch item with different targets, shape[0] 75 | # will be 0. we use this to test for this condition and if all targets over all 76 | # batch items are identical, we skip the cross weight part 77 | count_different_targets = cross_weight.shape[0] 78 | if count_different_targets == 0: 79 | cross_weight_loss = 0 80 | else: 81 | assert count_different_targets >= 1 82 | seq_len = cross_weight.shape[2] 83 | weight_a = cross_weight[:, 0, :, :] 84 | # weight_b = cross_weight[:, 1, :, :] 85 | weight_b = weight_a 86 | 87 | # we add the negative sign, since we actually want to maximize the distance 88 | # between both vectors 89 | cross_weight_similarity = self.cross_weight_loss(weight_a, weight_b) 90 | # cross_weight_similarity will be -1 for absolutely dissimilar values 91 | # 0 for unrelated and +1 for identical values 92 | # normalize between 0 and 1 93 | cross_weight_similarity = (cross_weight_similarity + 1) / 2 94 | # 0 = dissimilar, 1 = identical 95 | # mean over seq len 96 | cross_weight_loss = cross_weight_similarity.mean(dim=1) 97 | # at this point we have for each batch item its loss (0 if dissimilar 98 | # targets, 1 if identical target in the batch item) 99 | # finally, compute the single loss: sum 100 | cross_weight_loss = cross_weight_loss.sum() 101 | # normalize (divide by number of batch items; note that this can be different 102 | # from the number of different targets in the batch) 103 | cross_weight_loss = cross_weight_loss / count_non_ignored_batch_items 104 | 105 | # total_loss 106 | total_loss = ( 107 | self.crossentropyloss_weight * classification_loss 108 | + self.crossloss_weight * cross_weight_loss 109 | ) 110 | 111 | return total_loss 112 | -------------------------------------------------------------------------------- /NewsSentiment/losses/crossentropylosslsr.py: -------------------------------------------------------------------------------- 1 | # adapted from ABSA-PyTorch 2 | import torch 3 | import torch.nn as nn 4 | 5 | # CrossEntropyLoss for Label Smoothing Regularization 6 | from NewsSentiment.SentimentClasses import SentimentClasses 7 | 8 | 9 | class CrossEntropyLoss_LSR(nn.Module): 10 | def __init__(self, device, smoothing_value=0.2, weight=None): 11 | super(CrossEntropyLoss_LSR, self).__init__() 12 | self.smoothing_value = smoothing_value 13 | self.device = device 14 | self.logSoftmax = nn.LogSoftmax(dim=1) 15 | self.weight = weight 16 | 17 | def _create_smooth_one_hot_for_true_targets( 18 | self, labels, batch_size, num_classes, num_targets 19 | ): 20 | """ 21 | Produces a smooth one hot encoded tensor where all "false" scalars are set to 22 | base_prob, and all "true" scalars set to base_prob + 1 - smoothing_value. 23 | Effectively, for example for three classes, this results in: 24 | False classes: 0.03 (0.3333*0.1=baseprob) 25 | True classes: 0.03 + 1 - 0.1 = 0.93 26 | :param labels: the true classes 27 | :param batch_size: 28 | :param num_classes: 29 | :return: 30 | """ 31 | # prior label distribution is set uniform (see Sec 3.5, 32 | # https://arxiv.org/pdf/1902.09314.pdf ) 33 | prior_distribution = 1.0 / float(num_classes) 34 | # calculate base probability 35 | base_prob = self.smoothing_value * prior_distribution 36 | # initialize 0 tensor 37 | one_hot_label = torch.zeros( 38 | batch_size, num_classes, num_targets, device=self.device 39 | ) 40 | # set probability of all classes in all batches and targets to the base prob 41 | # (in normal one hot encoding, this would be 0 instead) 42 | one_hot_label = one_hot_label + base_prob 43 | 44 | if self.weight is not None: 45 | raise NotImplementedError( 46 | "test this first!!! currently untested with multi tsc" 47 | ) 48 | one_hot_label = one_hot_label * self.weight 49 | 50 | # iterate over each single item of the batch 51 | for batch_index in range(batch_size): 52 | for target_index in range(num_targets): 53 | # get the class index 54 | class_index = labels[batch_index, target_index].item() 55 | if class_index == SentimentClasses.FILLUP_POLARITY_VALUE: 56 | # cant set the one hot encoded here, since there is no "true" class 57 | # need to ignore this later 58 | pass 59 | else: 60 | # set the class probability 61 | one_hot_label[batch_index, class_index, target_index] += ( 62 | 1.0 - self.smoothing_value 63 | ) 64 | 65 | return one_hot_label 66 | 67 | def forward( 68 | self, predictions: torch.Tensor, labels: torch.Tensor, size_average=True 69 | ): 70 | """ 71 | Calculate k-dimensional cross entropy loss 72 | :param predictions: shape: (batch, polarities e.g. 3, targets e.g. 5) 73 | :param labels: shape: (batch, targets) 74 | :param size_average: 75 | :return: 76 | prediction size earlier was batch polarities 77 | """ 78 | batch_size, num_classes, num_targets = predictions.size() 79 | # calculate log of probability of each class (of each batch and target) 80 | pre_logsoftmax = self.logSoftmax(predictions) 81 | 82 | smooth_one_hot_for_true_targets = self._create_smooth_one_hot_for_true_targets( 83 | labels, batch_size, num_classes, num_targets 84 | ).to(self.device) 85 | 86 | # ignore those comparison where labels has a fillup value (=to be ignored) 87 | mask_nonfillup = labels != SentimentClasses.FILLUP_POLARITY_VALUE 88 | # shape is batch, targets 89 | # shape should be batch, classes, targets 90 | mask_nonfillup = mask_nonfillup.unsqueeze(1).repeat(1, num_classes, 1) 91 | # convert to 1 for True and 0 for False 92 | mask_nonfillup = mask_nonfillup.to(torch.int) 93 | # multiply so that scalars to be ignored are set to 0 (resulting in 0 loss for 94 | # those scalars, i.e., targets) 95 | # notes: categorical cross entropy loss does not directly punish on a low level 96 | # those predictions (scalars) that belong to incorrect classes (defined by true 97 | # or here, labels) but is only calculated by comparing the one true class ( 98 | # defined by true, or here, labels) where it has a 1 (one hot encoded). since 99 | # the probability is 100% of all classes (also the output of the neural network) 100 | # the loss still punishes wrong predictions, i.e., if the class probability 101 | # should be 100% but is only 25% or 70%, the loss will be non-zero) 102 | # as a consequence, when there is no right class in "true", there cannot be a 103 | # loss. so, the multiplication below, which sets all fillup-targets to 0, has 104 | # the expected effect (no loss can result from fill up values, as all their 105 | # class probabilities are set to 0) 106 | smooth_one_hot_for_true_targets = ( 107 | smooth_one_hot_for_true_targets * mask_nonfillup 108 | ) 109 | 110 | # multiply 111 | loss = -smooth_one_hot_for_true_targets * pre_logsoftmax 112 | 113 | # aggregate loss to scalar over classes 114 | loss = torch.sum(loss, dim=1) 115 | # aggregate loss to scalar over targets 116 | loss = torch.sum(loss, dim=1) 117 | 118 | if size_average: 119 | return torch.mean(loss) 120 | else: 121 | return torch.sum(loss) 122 | -------------------------------------------------------------------------------- /NewsSentiment/losses/crossentropylosswithconfidence.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | 4 | 5 | class CrossEntropyLossWithConfidence(nn.Module): 6 | def __init__(self, weight, ignore_index): 7 | super(CrossEntropyLossWithConfidence, self).__init__() 8 | self.crossentropyloss = nn.CrossEntropyLoss( 9 | weight=weight, ignore_index=ignore_index 10 | ) 11 | self.loss_for_confidence = nn.HuberLoss() 12 | self.w_classes = 0.5 13 | 14 | def forward(self, predictions: torch.Tensor, labels: torch.Tensor): 15 | # get the prediction components 16 | class_preds = predictions[:, 0:3, :] 17 | confidence_preds = predictions[:, 3:4, :].squeeze(dim=1) 18 | 19 | # calc the regular class-based loss 20 | class_loss = self.crossentropyloss(class_preds, labels) 21 | 22 | # now calc the confidence-based loss 23 | confidence_loss = 0 24 | 25 | # get the predicted classes 26 | predicted_classes = class_preds.argmax(dim=1) 27 | # and compare with the correct classes 28 | is_correct = torch.eq(predicted_classes, labels) 29 | is_correct_as_float = is_correct.float() 30 | # calc the confidence loss 31 | confidence_loss = self.loss_for_confidence( 32 | confidence_preds, is_correct_as_float 33 | ) 34 | 35 | # calc the total loss 36 | total_loss = ( 37 | self.w_classes * class_loss + (1 - self.w_classes) * confidence_loss 38 | ) 39 | 40 | return total_loss 41 | -------------------------------------------------------------------------------- /NewsSentiment/losses/seq2seqloss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from NewsSentiment.SentimentClasses import SentimentClasses 5 | 6 | 7 | class SequenceLoss(nn.Module): 8 | """ 9 | Input to this loss are sequences, see models/multitargets/seq2seq.py 10 | """ 11 | 12 | def __init__(self, device, weight): 13 | super(SequenceLoss, self).__init__() 14 | self.device = device 15 | self.weight = weight 16 | self.actual_loss = nn.CrossEntropyLoss( 17 | weight=self.weight, ignore_index=SentimentClasses.FILLUP_POLARITY_VALUE, 18 | ) 19 | 20 | assert self.weight is None, "not implemented, weight must be None" 21 | 22 | def forward( 23 | self, 24 | predictions: torch.Tensor, 25 | true_classes: torch.Tensor, 26 | true_target_mask: torch.Tensor, 27 | ): 28 | """ 29 | :param predictions: shape: batch, numclasses, seqlen 30 | :param true_classes: shape: batch, targets 31 | :param true_target_mask: shape: batch, targets, seqlen 32 | :return: 33 | """ 34 | batch_size = predictions.shape[0] 35 | num_classes = predictions.shape[1] 36 | seq_len = predictions.shape[2] 37 | 38 | # create true_sequence, shape: batch, seqlen 39 | true_sequence = ( 40 | torch.ones([batch_size, seq_len], dtype=torch.int64, device=self.device) 41 | * SentimentClasses.FILLUP_POLARITY_VALUE 42 | ) 43 | # update individual scalars 44 | for batch_index in range(batch_size): 45 | for target_index in range(true_target_mask.shape[1]): 46 | # get the sentiment class of this target in this batch 47 | true_sentiment_class = true_classes[batch_index, target_index].item() 48 | # if the true_sentiment_class is FILLUP_POLARITY_VALUE, ignore (no need 49 | # to update because we initialized the whole true_sequence with 50 | # FILLUP_POLARITY_VALUE 51 | if true_sentiment_class == SentimentClasses.FILLUP_POLARITY_VALUE: 52 | continue 53 | 54 | # if not FILLUP_POLARITY_VALUE, update the true_sequence 55 | # iterate all tokens 56 | for seq_index in range(seq_len): 57 | # determine if at the current token there is a target 58 | is_target = true_target_mask[ 59 | batch_index, target_index, seq_index 60 | ].item() 61 | 62 | if is_target == 1: 63 | # there is a target 64 | # we should update the scalar in true_sequence at the 65 | # corresponding part (thereby disregarding the target dimension, 66 | # since all targets are merged into one dimension). ensure, that 67 | # -100 is still there (if there is another value, this means 68 | # that we have overlapping targets) 69 | prev_value = true_sequence[batch_index, seq_index].item() 70 | if prev_value == SentimentClasses.FILLUP_POLARITY_VALUE: 71 | # the previous value is FILLUP_POLARITY_VALUE, so there is 72 | # no target at this token already. so, we can update 73 | true_sequence[batch_index, seq_index] = true_sentiment_class 74 | else: 75 | # there is already a target class -> overlapping targets 76 | # this can happen for probably two reasons: 77 | # 1) there are actually different targets in the data that 78 | # overlap 79 | # 2) we duplicated a target in FXDataset 80 | # either way, for now, if the value to be set is identical 81 | # to the one already present, we continue, otherwise throw 82 | # an error 83 | if prev_value == true_sentiment_class: 84 | pass 85 | else: 86 | raise ValueError( 87 | f"tried to update true_sequence[{batch_index},{seq_index}]={prev_value}" 88 | ) 89 | 90 | elif is_target == 0: 91 | # no target 92 | # since we initialized the true_sequence tensor with -100 93 | # scalars, there's not need to update the value for non-target 94 | # nodes 95 | pass 96 | else: 97 | raise ValueError( 98 | f"true_target_mask must be either 0 or 1, is: {is_target}" 99 | ) 100 | 101 | loss = self.actual_loss(predictions, true_sequence) 102 | return loss 103 | -------------------------------------------------------------------------------- /NewsSentiment/models/FXBaseModel.py: -------------------------------------------------------------------------------- 1 | import os 2 | from copy import copy 3 | 4 | import torch.nn as nn 5 | 6 | from abc import abstractmethod 7 | 8 | from transformers import ( 9 | XLNetModel, 10 | AlbertModel, 11 | BertModel, 12 | RobertaModel, 13 | PreTrainedModel, 14 | ) 15 | from functools import wraps 16 | 17 | from NewsSentiment.download import Download 18 | 19 | 20 | class FXBaseModel(PreTrainedModel): 21 | def __init__(self, *args, **kwargs): 22 | super().__init__(*args, **kwargs) 23 | 24 | @staticmethod 25 | @abstractmethod 26 | def get_language_models(): 27 | return 28 | 29 | @staticmethod 30 | @abstractmethod 31 | def get_input_field_ids(): 32 | return 33 | 34 | def invoke_language_model(self, lm, input_ids, token_type_ids=None): 35 | type_lm = type(lm) 36 | if type_lm == XLNetModel: 37 | last_hidden_state, mems, all_hidden_states = lm( 38 | input_ids=input_ids, token_type_ids=token_type_ids, 39 | ) 40 | elif type_lm in [AlbertModel, BertModel, RobertaModel]: 41 | if token_type_ids is None: 42 | last_hidden_state, pooler_output, hidden_states = lm( 43 | input_ids=input_ids, 44 | ) 45 | else: 46 | # last_hidden_state, pooler_output, hidden_states = lm( 47 | # input_ids=input_ids, token_type_ids=token_type_ids 48 | # ) 49 | model_results = lm(input_ids=input_ids, token_type_ids=token_type_ids) 50 | last_hidden_state = model_results.last_hidden_state 51 | pooler_output = model_results.pooler_output 52 | hidden_states = model_results.hidden_states 53 | else: 54 | raise NotImplementedError 55 | 56 | return last_hidden_state 57 | 58 | 59 | def provide_pretrained(version, pretrained_url): 60 | """ 61 | Usage: 62 | 63 | @provide_pretrained("v1.0.0", "https://example.com/link/to/state_dict") 64 | class Example(nn.Module): 65 | pass 66 | """ 67 | 68 | def decorator(model_class): 69 | # The actual decorator to use before the class 70 | wraps(model_class) 71 | 72 | wrapper = __get_pretrained_wrapper_class(model_class) 73 | wrapper._provide_pretrained_versions[version] = pretrained_url 74 | 75 | return wrapper 76 | 77 | return decorator 78 | 79 | 80 | def default_pretrained(version): 81 | """ 82 | Set the version which should be used as the default version and will be used when running with --pretrained. 83 | 84 | Usage: 85 | 86 | @default_pretrained("v1.0.0") 87 | @provide_pretrained("v1.0.0", "https://example.com/link/to/state_dict") 88 | class Example(nn.Module): 89 | pass 90 | """ 91 | 92 | def decorator(model_class): 93 | # The actual decorator to use before the class 94 | wraps(model_class) 95 | 96 | wrapper = __get_pretrained_wrapper_class(model_class) 97 | wrapper._provide_pretrained_default = version 98 | 99 | return wrapper 100 | 101 | return decorator 102 | 103 | 104 | def model_includes_pretrained(model): 105 | """ 106 | Checks if a model-class includes the methods to load pretrained models. 107 | 108 | Arguments: 109 | model Model-class to check. 110 | 111 | Returns: 112 | True if it includes the functionality. 113 | """ 114 | return hasattr(model, "has_pretrained_state_dict") and hasattr( 115 | model, "get_pretrained_state_dict" 116 | ) 117 | 118 | 119 | __pretrained_wrapper_classes = set() 120 | 121 | 122 | def __get_pretrained_wrapper_class(base_class): 123 | if base_class in __pretrained_wrapper_classes: 124 | return base_class 125 | 126 | class PretrainedWrapper(base_class): 127 | _provide_pretrained_default = None 128 | _provide_pretrained_versions = {} 129 | 130 | def __init__(self, *args, **kwargs): 131 | super().__init__(*args, **kwargs) 132 | 133 | @classmethod 134 | def has_pretrained_state_dict(cls, version=None): 135 | version = version or cls._provide_pretrained_default 136 | return version in cls._provide_pretrained_versions 137 | 138 | @classmethod 139 | def get_pretrained_state_dict( 140 | cls, version=None, download_if_not_exists=True, **kwargs 141 | ): 142 | path = Download.model_path(cls, version) 143 | if os.path.isfile(path): 144 | if download_if_not_exists: 145 | Download.download(cls, version, False) 146 | else: 147 | raise FileNotFoundError("State dict not found") 148 | return cls.load_state_dict(path, **kwargs) 149 | 150 | @classmethod 151 | def get_pretrained_versions(cls): 152 | return copy(cls._provide_pretrained_versions) 153 | 154 | @classmethod 155 | def get_pretrained_source(cls, version=None): 156 | return cls._provide_pretrained_versions[ 157 | version or cls._provide_pretrained_default 158 | ] 159 | 160 | @classmethod 161 | def get_pretrained_default_version(cls): 162 | return cls._provide_pretrained_default 163 | 164 | __pretrained_wrapper_classes.add(PretrainedWrapper) 165 | return PretrainedWrapper 166 | -------------------------------------------------------------------------------- /NewsSentiment/models/FXEnsemble.py: -------------------------------------------------------------------------------- 1 | from abc import ABC 2 | from argparse import Namespace 3 | from typing import List, Dict 4 | import torch.nn as nn 5 | import torch 6 | 7 | from NewsSentiment.consts import * 8 | from NewsSentiment.dataset import FXDataset 9 | from NewsSentiment.models.FXBaseModel import FXBaseModel 10 | 11 | 12 | class FXEnsemble(FXBaseModel): 13 | @staticmethod 14 | def get_language_models(): 15 | return ( 16 | BERT_BASE_UNCASED, 17 | ROBERTA_BASE, 18 | XLNET_BASE_CASED, 19 | ) 20 | 21 | @staticmethod 22 | def get_input_field_ids(): 23 | return [ 24 | (BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS), 25 | (BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK), 26 | (ROBERTA_BASE, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS), 27 | (ROBERTA_BASE, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK), 28 | (XLNET_BASE_CASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS), 29 | (XLNET_BASE_CASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK), 30 | ] 31 | 32 | def __init__(self, transformer_models: Dict, opt: Namespace): 33 | super().__init__() 34 | 35 | # language models 36 | self.bert = transformer_models[BERT_BASE_UNCASED] 37 | self.roberta = transformer_models[ROBERTA_BASE] 38 | self.xlnet = transformer_models[XLNET_BASE_CASED] 39 | self.num_models = 3 40 | 41 | # params 42 | assert ( 43 | self.bert.config.hidden_size 44 | == self.roberta.config.hidden_size 45 | == self.xlnet.config.hidden_size 46 | ) 47 | self.sequence_length = opt.max_seq_len 48 | self.hidden_size = self.bert.config.hidden_size 49 | 50 | # other neural network components 51 | self.dropout = nn.Dropout(opt.dropout) 52 | self.target_dependent_text_combiner = nn.Linear( 53 | self.hidden_size * self.num_models, opt.polarities_dim 54 | ) 55 | 56 | def _combine_text_out_with_target_mask( 57 | self, batch_size, text_last_hidden_state, target_mask 58 | ): 59 | roberta_target_mask = target_mask.reshape((batch_size, 1, self.sequence_length)) 60 | roberta_target_dependent_text = torch.bmm( 61 | roberta_target_mask, text_last_hidden_state 62 | ) 63 | roberta_target_dependent_text = roberta_target_dependent_text.reshape( 64 | (batch_size, self.hidden_size) 65 | ) 66 | return roberta_target_dependent_text 67 | 68 | def forward(self, inputs: List): 69 | # alternatively, we could also use this 70 | # FXDataset.get_all_inputs_for_model(input, self) 71 | bert_text_ids = FXDataset.get_input_by_params( 72 | inputs, BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS 73 | ) 74 | bert_target_mask = FXDataset.get_input_by_params( 75 | inputs, BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK 76 | ) 77 | roberta_text_ids = FXDataset.get_input_by_params( 78 | inputs, ROBERTA_BASE, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS 79 | ) 80 | roberta_target_mask = FXDataset.get_input_by_params( 81 | inputs, ROBERTA_BASE, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK 82 | ) 83 | xlnet_text_ids = FXDataset.get_input_by_params( 84 | inputs, XLNET_BASE_CASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS 85 | ) 86 | xlnet_target_mask = FXDataset.get_input_by_params( 87 | inputs, XLNET_BASE_CASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK 88 | ) 89 | 90 | # get variables 91 | batch_size = bert_text_ids.shape[0] 92 | 93 | # dev notes: 94 | # batch_size = 4, sequence_length = 150, hidden_size = 768 95 | 96 | # bert_text_out returns list with following elements 97 | # 0: last_hidden_state (batch_size, sequence_length, hidden_size) 98 | # 1: pooler_output (batch_size, hidden_size) 99 | # 2: hidden_states (batch_size, sequence_length, hidden_size) 100 | ( 101 | bert_text_last_hidden_state, 102 | bert_text_pooler_output, 103 | bert_text_hidden_states, 104 | ) = self.bert(bert_text_ids) 105 | # roberta_text_out returns same output as bert 106 | ( 107 | roberta_text_last_hidden_state, 108 | roberta_text_pooler_output, 109 | roberta_text_hidden_states, 110 | ) = self.roberta(roberta_text_ids) 111 | # xlnet_text_out returns list with following elements 112 | # 0: last_hidden_state (batch_size, sequence_length, hidden_size) 113 | # does not exist - (1: mems, a list of length config.n_layers) 114 | # 2: hidden_states (batch_size, sequence_length, hidden_size) 115 | xlnet_text_last_hidden_state, xlnet_text_hidden_states = self.xlnet( 116 | xlnet_text_ids 117 | ) 118 | 119 | # incorporate target masks with (for now) last layer's states 120 | # *_target_dependent_text_out will be of shape (batch_size, hidden_size) 121 | bert_target_dependent_text_out = self._combine_text_out_with_target_mask( 122 | batch_size, bert_text_last_hidden_state, bert_target_mask 123 | ) 124 | roberta_target_dependent_text_out = self._combine_text_out_with_target_mask( 125 | batch_size, roberta_text_last_hidden_state, roberta_target_mask 126 | ) 127 | xlnet_target_dependent_text_out = self._combine_text_out_with_target_mask( 128 | batch_size, xlnet_text_last_hidden_state, xlnet_target_mask 129 | ) 130 | 131 | # cat outputs 132 | cat_target_dependent_text_out = torch.cat( 133 | [ 134 | bert_target_dependent_text_out, 135 | roberta_target_dependent_text_out, 136 | xlnet_target_dependent_text_out, 137 | ], 138 | dim=1, 139 | ) 140 | 141 | # dropout for better learning 142 | cat_target_dependent_text_out = self.dropout(cat_target_dependent_text_out) 143 | 144 | # combine and get 3 dimensions 145 | logits = self.target_dependent_text_combiner(cat_target_dependent_text_out) 146 | 147 | return logits 148 | -------------------------------------------------------------------------------- /NewsSentiment/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fhamborg/NewsMTSC/a7bea7bc7265b4bfe4236e98965a47c767acbb30/NewsSentiment/models/__init__.py -------------------------------------------------------------------------------- /NewsSentiment/models/ensemble.py: -------------------------------------------------------------------------------- 1 | from argparse import Namespace 2 | from typing import List, Dict 3 | from NewsSentiment.consts import * 4 | import torch.nn as nn 5 | import torch 6 | 7 | from NewsSentiment.models.FXBaseModel import FXBaseModel 8 | from NewsSentiment.models.singletarget.td_bert import TD_BERT 9 | 10 | 11 | class EnsembleTopA(FXBaseModel): 12 | @staticmethod 13 | def get_language_models(): 14 | return (get_default_lm(),) 15 | 16 | @staticmethod 17 | def get_input_field_ids(): 18 | return [ 19 | # tdbert 20 | (get_default_lm(), FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS), 21 | (get_default_lm(), FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK), 22 | # hosseinia 23 | (get_default_lm(), FIELD_TEXT_THEN_TARGET_IDS_WITH_SPECIAL_TOKENS), 24 | (get_default_lm(), FIELD_TEXT_THEN_TARGET_IDS_WITH_SPECIAL_TOKENS_SEGMENT_IDS), 25 | # while we use text-then-target as bert input, we can use text targetmask and text knowledge source mask 26 | # because it is identical to a hypothetical text-then-target target mask or text-then-target knowledge 27 | # source mask (we would not highlight the target in the 2nd component in the corresponding mask) 28 | (get_default_lm(), FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK), 29 | (get_default_lm(), FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_SELECTED_KNOWLEDGE_SOURCES), 30 | ] 31 | 32 | def __init__(self, transformer_models: Dict, opt: Namespace): 33 | super().__init__() 34 | from models.singletarget.stancedetectionpaper import StanceDetectionHosseinia 35 | 36 | # submodels models 37 | self.td_bert = TD_BERT(transformer_models, opt) 38 | self.hosseinia = StanceDetectionHosseinia(transformer_models, opt) 39 | 40 | # ensemble related 41 | self.hosseinia_dense = nn.Linear( 42 | self.hosseinia.language_model.config.hidden_size * 3 * 2 * 2, 43 | self.hosseinia.language_model.config.hidden_size 44 | ) 45 | self.ensemble_combiner = nn.Linear( 46 | self.hosseinia.language_model.config.hidden_size * 2, 47 | opt.polarities_dim 48 | ) 49 | 50 | def forward(self, inputs: List): 51 | # shape: batch, bertdim 52 | td_bert_out = self.td_bert(inputs, is_return_ensemble_values=True) 53 | # shape: batch, 3 * 2 * 2 * bertdim 54 | hosseinia_out = self.hosseinia(inputs, is_return_ensemble_values=True) 55 | 56 | # to ensure that both models have more or less similar impact on the result, apply a dense layer to hosseinia 57 | # so that its new shape is: batch, bertdim 58 | hosseinia_out = self.hosseinia_dense(hosseinia_out) 59 | 60 | # combine 61 | combined_out = torch.cat((td_bert_out, hosseinia_out), dim=1) 62 | 63 | logits = self.ensemble_combiner(combined_out) 64 | 65 | return logits 66 | -------------------------------------------------------------------------------- /NewsSentiment/models/ensembleb.py: -------------------------------------------------------------------------------- 1 | from argparse import Namespace 2 | from typing import List, Dict 3 | from NewsSentiment.consts import * 4 | import torch.nn as nn 5 | import torch 6 | 7 | from NewsSentiment.models.FXBaseModel import FXBaseModel 8 | from NewsSentiment.models.singletarget.td_bert import TD_BERT 9 | 10 | 11 | class EnsembleTopB(FXBaseModel): 12 | @staticmethod 13 | def get_language_models(): 14 | return (get_default_lm(),) 15 | 16 | @staticmethod 17 | def get_input_field_ids(): 18 | return [ 19 | # tdbert 20 | (get_default_lm(), FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS), 21 | (get_default_lm(), FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK), 22 | # hosseinia 23 | (get_default_lm(), FIELD_TEXT_THEN_TARGET_IDS_WITH_SPECIAL_TOKENS), 24 | (get_default_lm(), FIELD_TEXT_THEN_TARGET_IDS_WITH_SPECIAL_TOKENS_SEGMENT_IDS), 25 | # while we use text-then-target as bert input, we can use text targetmask and text knowledge source mask 26 | # because it is identical to a hypothetical text-then-target target mask or text-then-target knowledge 27 | # source mask (we would not highlight the target in the 2nd component in the corresponding mask) 28 | (get_default_lm(), FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK), 29 | (get_default_lm(), FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_SELECTED_KNOWLEDGE_SOURCES), 30 | ] 31 | 32 | def __init__(self, transformer_models: Dict, opt: Namespace): 33 | super().__init__() 34 | from models.singletarget.knowledgesourceshosseiniatdbert import KnowledgeSourcesHosseiniaTdBert 35 | 36 | # submodels models 37 | self.td_bert = TD_BERT(transformer_models, opt) 38 | self.hosseiniatdbert = KnowledgeSourcesHosseiniaTdBert(transformer_models, opt) 39 | 40 | # ensemble related 41 | self.ensemble_combiner = nn.Linear( 42 | self.hosseiniatdbert.language_model.config.hidden_size * 2, 43 | opt.polarities_dim 44 | ) 45 | 46 | def forward(self, inputs: List): 47 | # shape: batch, bertdim 48 | td_bert_out = self.td_bert(inputs, is_return_ensemble_values=True) 49 | # shape: batch, 3 * 2 * 2 * bertdim 50 | hosseinia_out = self.hosseiniatdbert(inputs, is_return_ensemble_values=True) 51 | 52 | # combine 53 | combined_out = torch.cat((td_bert_out, hosseinia_out), dim=1) 54 | 55 | logits = self.ensemble_combiner(combined_out) 56 | 57 | return logits 58 | -------------------------------------------------------------------------------- /NewsSentiment/models/multitargets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fhamborg/NewsMTSC/a7bea7bc7265b4bfe4236e98965a47c767acbb30/NewsSentiment/models/multitargets/__init__.py -------------------------------------------------------------------------------- /NewsSentiment/models/multitargets/contrasting.py: -------------------------------------------------------------------------------- 1 | from argparse import Namespace 2 | from typing import Dict 3 | 4 | import torch 5 | import torch.nn as nn 6 | 7 | from NewsSentiment.consts import * 8 | from NewsSentiment.dataset import FXDataset 9 | from NewsSentiment.models.FXBaseModel import FXBaseModel 10 | 11 | 12 | class Contrasting(FXBaseModel): 13 | """ 14 | This model returns a sequence that only contains the hidden states of those output 15 | nodes that represent a word piece of the target phrase. All other hidden states are 16 | set to 0. The length of the output sequence is opt.max_seq_len 17 | 18 | From a conceptual perspective, TD-BERT is in some aspects similar (e.g., only the 19 | output of target-phrase-related nodes is used), but in other aspects not similar or 20 | it is not quite clear how the authors of TD-BERT implemented them. An email I sent 21 | to them was not answered yet. 22 | """ 23 | 24 | @staticmethod 25 | def get_language_models(): 26 | return (BERT_BASE_UNCASED,) 27 | 28 | @staticmethod 29 | def get_input_field_ids(): 30 | return [ 31 | (BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS), 32 | (BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK), 33 | ] 34 | 35 | def __init__(self, transformer_models: Dict, opt: Namespace): 36 | super(Contrasting, self).__init__() 37 | self.language_model = transformer_models[BERT_BASE_UNCASED] 38 | self.dropout = nn.Dropout(opt.dropout) 39 | 40 | self.contrasting_weight_dense = nn.Linear(opt.max_seq_len, opt.max_seq_len,) 41 | self.dense = nn.Linear( 42 | self.language_model.config.hidden_size, opt.polarities_dim 43 | ) 44 | 45 | def forward(self, inputs): 46 | # get inputs 47 | text_bert_indices = FXDataset.get_input_by_params( 48 | inputs, BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS 49 | ) 50 | text_bert_indices_targets_mask = FXDataset.get_input_by_params( 51 | inputs, BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK 52 | ) 53 | assert text_bert_indices.shape[1] == 2, "contrasting requires two targets" 54 | 55 | # target a 56 | a_text_bert_indices = text_bert_indices[:, 0, :] 57 | a_text_bert_indices_targets_mask = text_bert_indices_targets_mask[:, 0, :] 58 | 59 | # target b 60 | b_text_bert_indices = text_bert_indices[:, 1, :] 61 | b_text_bert_indices_targets_mask = text_bert_indices_targets_mask[:, 1, :] 62 | 63 | # bert 64 | ( 65 | a_last_hidden_states, 66 | a_pooler_output, 67 | a_all_hidden_states, 68 | ) = self.language_model(input_ids=a_text_bert_indices) 69 | ( 70 | b_last_hidden_states, 71 | b_pooler_output, 72 | b_all_hidden_states, 73 | ) = self.language_model(input_ids=b_text_bert_indices) 74 | stacked_bert_outs_ab = torch.stack( 75 | (a_last_hidden_states, b_last_hidden_states), dim=1 76 | ) 77 | # stacked_bert_outs_ab 78 | stacked_bert_outs_ab = self.dropout(stacked_bert_outs_ab) 79 | # shape: batch, 2, seqlen, bertdim 80 | 81 | # create weight 82 | cross_weight = self.contrasting_weight_dense(text_bert_indices_targets_mask) 83 | cross_weight = cross_weight.unsqueeze(3).repeat( 84 | 1, 1, 1, stacked_bert_outs_ab.shape[3] 85 | ) 86 | cross_weight = self.dropout(cross_weight) 87 | # shape: batch, 2, seqlen 88 | 89 | weighted_stacked_bert_outs_ab = stacked_bert_outs_ab * cross_weight 90 | 91 | # sum 92 | weighted_stacked_bert_outs_ab = weighted_stacked_bert_outs_ab.sum(dim=2) 93 | 94 | # dense 95 | logits = self.dense(weighted_stacked_bert_outs_ab) 96 | 97 | return logits, cross_weight 98 | -------------------------------------------------------------------------------- /NewsSentiment/models/multitargets/random_multi.py: -------------------------------------------------------------------------------- 1 | from argparse import Namespace 2 | from typing import Dict 3 | 4 | import torch 5 | import torch.nn as nn 6 | 7 | from NewsSentiment.consts import * 8 | from NewsSentiment.dataset import FXDataset 9 | from NewsSentiment.models.FXBaseModel import FXBaseModel 10 | 11 | 12 | class RandomMulti(FXBaseModel): 13 | """ 14 | 15 | """ 16 | 17 | @staticmethod 18 | def get_language_models(): 19 | """ 20 | All architecture assumes that at least one model is used so we just require 21 | bert here for compatibility. 22 | :return: 23 | """ 24 | return (BERT_BASE_UNCASED,) 25 | 26 | @staticmethod 27 | def get_input_field_ids(): 28 | return [ 29 | (BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS), 30 | (BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK), 31 | ] 32 | 33 | def __init__(self, transformer_models: Dict, opt: Namespace): 34 | super(RandomMulti, self).__init__() 35 | self.num_classes = opt.polarities_dim 36 | 37 | def forward(self, inputs): 38 | text_bert_indices = FXDataset.get_input_by_params( 39 | inputs, BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS 40 | ) 41 | batch_size = text_bert_indices.shape[0] 42 | num_targets = text_bert_indices.shape[1] 43 | 44 | # get a random tensor 45 | logits = torch.rand(batch_size, num_targets, self.num_classes) 46 | 47 | return logits 48 | -------------------------------------------------------------------------------- /NewsSentiment/models/multitargets/seq2seq.py: -------------------------------------------------------------------------------- 1 | from argparse import Namespace 2 | from typing import Dict 3 | 4 | import torch.nn as nn 5 | 6 | from NewsSentiment.SentimentClasses import SentimentClasses 7 | from NewsSentiment.consts import * 8 | from NewsSentiment.dataset import FXDataset 9 | from NewsSentiment.models.FXBaseModel import FXBaseModel 10 | 11 | 12 | class SeqTwoSeq(FXBaseModel): 13 | """ 14 | Outputs the class probabilities for each token. So, the output will be: 15 | (batch, seqlen (150), classnum (3)) 16 | """ 17 | 18 | @staticmethod 19 | def get_language_models(): 20 | return (get_default_lm(),) 21 | 22 | @staticmethod 23 | def get_input_field_ids(): 24 | return [ 25 | (get_default_lm(), FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS), 26 | (get_default_lm(), FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK), 27 | ] 28 | 29 | def __init__(self, transformer_models: Dict, opt: Namespace): 30 | super(SeqTwoSeq, self).__init__() 31 | self.language_model = transformer_models[get_default_lm()] 32 | self.dropout = nn.Dropout(opt.dropout) 33 | # https://pytorch.org/docs/stable/nn.html#linear supports multi-dimensional 34 | # input; only the last dimension has to be specified for Linear creation 35 | 36 | self.attentionlike_dense = nn.Bilinear( 37 | self.language_model.config.hidden_size, 38 | FXDataset.NUM_MAX_TARGETS_PER_ITEM, 39 | SentimentClasses.get_num_classes(), 40 | ) 41 | 42 | def forward(self, inputs): 43 | # get inputs 44 | text_bert_indices = FXDataset.get_input_by_params( 45 | inputs, get_default_lm(), FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS 46 | ) 47 | text_bert_indices_targets_mask = FXDataset.get_input_by_params( 48 | inputs, get_default_lm(), FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK 49 | ) 50 | # prepare inputs 51 | # for text only, we do not need target specific information, i.e., all text 52 | # vectors are identical. also, bert, can 53 | # only process one sequence of size max_seq_len (more specifically, a tensor 54 | # of size batch_size x max_seq_ken). thus, we select only the first element from 55 | # the second dimension (the dimensions are: batch, targets, hidden_states) 56 | text_bert_indices = text_bert_indices[:, 0, :] 57 | # apply bert 58 | last_hidden_states = self.invoke_language_model( 59 | lm=self.language_model, 60 | input_ids=text_bert_indices, 61 | ) 62 | # shape: batch, seqlen, bertdim 63 | last_hidden_states = self.dropout(last_hidden_states) 64 | 65 | # stack hidden states with target mask 66 | # hidden: batch, seqlen, bertdim -> stay 67 | # targetmask: batch, target, seqlen, -> batch, seqlen, target 68 | text_bert_indices_targets_mask = text_bert_indices_targets_mask.permute( 69 | 0, 2, 1 70 | ).contiguous() 71 | 72 | sequence_logits = self.attentionlike_dense( 73 | last_hidden_states, text_bert_indices_targets_mask 74 | ) 75 | 76 | return sequence_logits 77 | -------------------------------------------------------------------------------- /NewsSentiment/models/multitargets/seq2seq_without_targetmask.py: -------------------------------------------------------------------------------- 1 | from argparse import Namespace 2 | from typing import Dict 3 | 4 | import torch.nn as nn 5 | 6 | from NewsSentiment.SentimentClasses import SentimentClasses 7 | from NewsSentiment.consts import * 8 | from NewsSentiment.dataset import FXDataset 9 | from NewsSentiment.models.FXBaseModel import FXBaseModel 10 | 11 | 12 | class SeqTwoSeqWithoutTargetMask(FXBaseModel): 13 | """ 14 | Outputs the class probabilities for each token. So, the output will be: 15 | (batch, seqlen (150), classnum (3)) 16 | """ 17 | 18 | @staticmethod 19 | def get_language_models(): 20 | return (BERT_BASE_UNCASED,) 21 | 22 | @staticmethod 23 | def get_input_field_ids(): 24 | return [ 25 | (BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS), 26 | (BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK), 27 | ] 28 | 29 | def __init__(self, transformer_models: Dict, opt: Namespace): 30 | super(SeqTwoSeqWithoutTargetMask, self).__init__() 31 | self.language_model = transformer_models[BERT_BASE_UNCASED] 32 | self.dropout = nn.Dropout(opt.dropout) 33 | # https://pytorch.org/docs/stable/nn.html#linear supports multi-dimensional 34 | # input; only the last dimension has to be specified for Linear creation 35 | 36 | self.attentionlike_dense = nn.Linear( 37 | self.language_model.config.hidden_size, 38 | SentimentClasses.get_num_classes(), 39 | ) 40 | 41 | def forward(self, inputs): 42 | # get inputs 43 | text_bert_indices = FXDataset.get_input_by_params( 44 | inputs, BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS 45 | ) 46 | 47 | # prepare inputs 48 | # for text only, we do not need target specific information, i.e., all text 49 | # vectors are identical. also, bert, can 50 | # only process one sequence of size max_seq_len (more specifically, a tensor 51 | # of size batch_size x max_seq_ken). thus, we select only the first element from 52 | # the second dimension (the dimensions are: batch, targets, hidden_states) 53 | text_bert_indices = text_bert_indices[:, 0, :] 54 | # apply bert 55 | last_hidden_states, pooler_output, all_hidden_states = self.language_model( 56 | input_ids=text_bert_indices 57 | ) 58 | # shape: batch, seqlen, bertdim 59 | last_hidden_states = self.dropout(last_hidden_states) 60 | 61 | sequence_logits = self.attentionlike_dense(last_hidden_states) 62 | 63 | return sequence_logits 64 | -------------------------------------------------------------------------------- /NewsSentiment/models/multitargets/tdbertlikemultitarget.py: -------------------------------------------------------------------------------- 1 | from argparse import Namespace 2 | from typing import Dict 3 | 4 | import torch.nn as nn 5 | 6 | from NewsSentiment.consts import * 7 | from NewsSentiment.dataset import FXDataset 8 | from NewsSentiment.models.FXBaseModel import FXBaseModel 9 | 10 | 11 | class TDBertLikeMultiTarget(FXBaseModel): 12 | """ 13 | This model returns a sequence that only contains the hidden states of those output 14 | nodes that represent a word piece of the target phrase. All other hidden states are 15 | set to 0. The length of the output sequence is opt.max_seq_len 16 | 17 | From a conceptual perspective, TD-BERT is in some aspects similar (e.g., only the 18 | output of target-phrase-related nodes is used), but in other aspects not similar or 19 | it is not quite clear how the authors of TD-BERT implemented them. An email I sent 20 | to them was not answered yet. 21 | """ 22 | 23 | @staticmethod 24 | def get_language_models(): 25 | return (BERT_BASE_UNCASED,) 26 | 27 | @staticmethod 28 | def get_input_field_ids(): 29 | return [ 30 | (BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS), 31 | (BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK), 32 | # ( 33 | # BERT_BASE_UNCASED, 34 | # FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_SELECTED_KNOWLEDGE_SOURCES, 35 | # ), 36 | ] 37 | 38 | def __init__(self, transformer_models: Dict, opt: Namespace): 39 | super(TDBertLikeMultiTarget, self).__init__() 40 | self.language_model = transformer_models[BERT_BASE_UNCASED] 41 | self.dropout = nn.Dropout(opt.dropout) 42 | self.dense = nn.Linear( 43 | self.language_model.config.hidden_size, opt.polarities_dim 44 | ) 45 | 46 | def forward(self, inputs): 47 | # get inputs 48 | text_bert_indices = FXDataset.get_input_by_params( 49 | inputs, BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS 50 | ) 51 | text_bert_indices_targets_mask = FXDataset.get_input_by_params( 52 | inputs, BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK 53 | ) 54 | # text_bert_indices_nrc_emolex = FXDataset.get_input_by_params( 55 | # inputs, 56 | # BERT_BASE_UNCASED, 57 | # FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_SELECTED_KNOWLEDGE_SOURCES, 58 | # ) 59 | # prepare inputs 60 | # for text only, we do not need target specific information, i.e., all text 61 | # vectors are identical. also, bert, can 62 | # only process one sequence of size max_seq_len (more specifically, a tensor 63 | # of size batch_size x max_seq_ken). thus, we select only the first element from 64 | # the second dimension (the dimensions are: batch, targets, hidden_states) 65 | text_bert_indices = text_bert_indices[:, 0, :] 66 | # apply bert 67 | last_hidden_states, pooler_output, all_hidden_states = self.language_model( 68 | input_ids=text_bert_indices 69 | ) 70 | # element-wise multiplication with target mask 71 | # align the dimensions of the tensors 72 | # last_hidden_states.shape = 4,150,768; should be 4,5,150,768 73 | # insert a new singleton dimension after the first dimension 74 | # new shape: 4,1,150,768 75 | last_hidden_states = last_hidden_states.unsqueeze(1) 76 | # repeat k times along the new 2nd dimension, where k is the target size 77 | last_hidden_states = last_hidden_states.repeat( 78 | 1, text_bert_indices_targets_mask.shape[1], 1, 1 79 | ) 80 | # text_bert_indices_targets_mask.shape = 4,5,150; should be 4,5,150,768 81 | # insert singleton simension after the three already existing dimensions 82 | text_bert_indices_targets_mask = text_bert_indices_targets_mask.unsqueeze(3) 83 | text_bert_indices_targets_mask = text_bert_indices_targets_mask.repeat( 84 | 1, 1, 1, last_hidden_states.shape[3] 85 | ) 86 | 87 | last_hidden_states_only_targets = ( 88 | last_hidden_states * text_bert_indices_targets_mask 89 | ) 90 | 91 | # similar to TD-BERT, perform max pooling TODO not implemented yet, instead: 92 | # for now, retrieve only the values of the target's output tokens and then 93 | # calculate the mean: 94 | # (batchsize, targetsize, 150, 768) -> (batchsize, targetsize, 768) 95 | # get the positions of target nodes. note that we cannot simply take the mean 96 | # as it would divide by the number of the sequence length, whereas the effective 97 | # length is only of size k, where k is the number of non-zero scalars in the 98 | # input mask (since we are only interested in those values) 99 | last_hidden_states_aggregated_per_target = last_hidden_states_only_targets.sum( 100 | dim=2 101 | ) 102 | # get the sum for each (batch, hidden states) (sum over the sequence length 103 | # dim) 104 | denominator_for_mean = text_bert_indices_targets_mask.sum(dim=2) 105 | # divide for each (batch, hidden states) by the denominator to get the mean 106 | last_hidden_states_aggregated_per_target = ( 107 | last_hidden_states_aggregated_per_target / denominator_for_mean 108 | ) 109 | # dropout before dense layer, as in most other tsc models 110 | last_hidden_states_aggregated_per_target = self.dropout( 111 | last_hidden_states_aggregated_per_target 112 | ) 113 | # dense layer 114 | logits = self.dense(last_hidden_states_aggregated_per_target) 115 | 116 | return logits 117 | -------------------------------------------------------------------------------- /NewsSentiment/models/multitargets/tdbertlikemultitarget_dense.py: -------------------------------------------------------------------------------- 1 | from argparse import Namespace 2 | from typing import Dict 3 | 4 | import torch.nn as nn 5 | 6 | from NewsSentiment.consts import * 7 | from NewsSentiment.dataset import FXDataset 8 | from NewsSentiment.models.FXBaseModel import FXBaseModel 9 | 10 | 11 | class TDBertLikeMultiTargetDense(FXBaseModel): 12 | """ 13 | This model returns a sequence that only contains the hidden states of those output 14 | nodes that represent a word piece of the target phrase. All other hidden states are 15 | set to 0. The length of the output sequence is opt.max_seq_len 16 | 17 | From a conceptual perspective, TD-BERT is in some aspects similar (e.g., only the 18 | output of target-phrase-related nodes is used), but in other aspects not similar or 19 | it is not quite clear how the authors of TD-BERT implemented them. An email I sent 20 | to them was not answered yet. 21 | """ 22 | 23 | @staticmethod 24 | def get_language_models(): 25 | return (BERT_BASE_UNCASED,) 26 | 27 | @staticmethod 28 | def get_input_field_ids(): 29 | return [ 30 | (BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS), 31 | (BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK), 32 | ] 33 | 34 | def __init__(self, transformer_models: Dict, opt: Namespace): 35 | super(TDBertLikeMultiTargetDense, self).__init__() 36 | self.language_model = transformer_models[BERT_BASE_UNCASED] 37 | self.dropout = nn.Dropout(opt.dropout) 38 | self.dense = nn.Bilinear( 39 | self.language_model.config.hidden_size, 40 | FXDataset.NUM_MAX_TARGETS_PER_ITEM, 41 | FXDataset.NUM_MAX_TARGETS_PER_ITEM, 42 | ) 43 | self.dense2 = nn.Linear(opt.max_seq_len, opt.polarities_dim) 44 | 45 | def forward(self, inputs): 46 | # get inputs 47 | text_bert_indices = FXDataset.get_input_by_params( 48 | inputs, BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS 49 | ) 50 | text_bert_indices_targets_mask = FXDataset.get_input_by_params( 51 | inputs, BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK 52 | ) 53 | # prepare inputs 54 | # for text only, we do not need target specific information, i.e., all text 55 | # vectors are identical. also, bert, can 56 | # only process one sequence of size max_seq_len (more specifically, a tensor 57 | # of size batch_size x max_seq_ken). thus, we select only the first element from 58 | # the second dimension (the dimensions are: batch, targets, hidden_states) 59 | text_bert_indices = text_bert_indices[:, 0, :] 60 | # apply bert 61 | last_hidden_states, pooler_output, all_hidden_states = self.language_model( 62 | input_ids=text_bert_indices 63 | ) 64 | 65 | # dropout 66 | last_hidden_states = self.dropout(last_hidden_states) 67 | 68 | # shapes: 69 | # last_hidden_states: batch, seqlen, bertdim 70 | # text_bert_indices_targets_mask: batch, target, seqlen 71 | # new text_bert_indices_targets_mask: batch, seqlen, target 72 | text_bert_indices_targets_mask = text_bert_indices_targets_mask.permute( 73 | 0, 2, 1 74 | ).clone() 75 | 76 | logits = self.dense(last_hidden_states, text_bert_indices_targets_mask) 77 | logits = logits.permute(0, 2, 1) 78 | logits = self.dense2(logits) 79 | 80 | return logits 81 | -------------------------------------------------------------------------------- /NewsSentiment/models/singletarget/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fhamborg/NewsMTSC/a7bea7bc7265b4bfe4236e98965a47c767acbb30/NewsSentiment/models/singletarget/__init__.py -------------------------------------------------------------------------------- /NewsSentiment/models/singletarget/aen.py: -------------------------------------------------------------------------------- 1 | # adapted from absa-pytorch 2 | from argparse import Namespace 3 | from typing import Dict 4 | 5 | import torch 6 | import torch.nn as nn 7 | 8 | from NewsSentiment.dataset import FXDataset 9 | from NewsSentiment.layers.attention import Attention 10 | from NewsSentiment.layers.point_wise_feed_forward import PositionwiseFeedForward 11 | from NewsSentiment.layers.squeeze_embedding import SqueezeEmbedding 12 | 13 | from NewsSentiment.consts import * 14 | from NewsSentiment.fxlogger import get_logger 15 | from NewsSentiment.models.FXBaseModel import FXBaseModel 16 | 17 | logger = get_logger() 18 | 19 | 20 | class AEN_Base(FXBaseModel): 21 | @staticmethod 22 | def get_language_models(): 23 | return (BERT_BASE_UNCASED,) 24 | 25 | @staticmethod 26 | def get_input_field_ids(): 27 | return [ 28 | (BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS), 29 | (BERT_BASE_UNCASED, FIELD_TARGET_IDS_WITH_SPECIAL_TOKENS), 30 | (BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK), 31 | ] 32 | 33 | def __init__(self, transformer_models: Dict, opt: Namespace): 34 | super(AEN_Base, self).__init__() 35 | logger.info("creating AEN_Base") 36 | self.device = opt.device 37 | 38 | self.language_model = transformer_models[BERT_BASE_UNCASED] 39 | self.name = "aen_bert" 40 | self.lm_representation = "last" 41 | embed_dim = self.language_model.config.hidden_size 42 | 43 | self.squeeze_embedding = SqueezeEmbedding() 44 | self.dropout = nn.Dropout(opt.dropout) 45 | hidden_dim = embed_dim # or should this be 300, as mentioned in the paper 46 | 47 | self.attn_k = Attention( 48 | embed_dim, 49 | out_dim=hidden_dim, 50 | n_head=8, 51 | score_function="mlp", 52 | dropout=opt.dropout, 53 | ) 54 | self.attn_q = Attention( 55 | embed_dim, 56 | out_dim=hidden_dim, 57 | n_head=8, 58 | score_function="mlp", 59 | dropout=opt.dropout, 60 | ) 61 | self.ffn_c = PositionwiseFeedForward(hidden_dim, dropout=opt.dropout) 62 | self.ffn_t = PositionwiseFeedForward(hidden_dim, dropout=opt.dropout) 63 | 64 | self.attn_s1 = Attention( 65 | hidden_dim, n_head=8, score_function="mlp", dropout=opt.dropout 66 | ) 67 | 68 | self.dense = nn.Linear(hidden_dim * 3, opt.polarities_dim) 69 | 70 | def apply_lm(self, _input, _input_attention=None): 71 | if self.name in ["aen_bert", "aen_roberta"]: 72 | last_hidden, _, all_hidden = self.language_model( 73 | input_ids=_input, attention_mask=_input_attention 74 | ) 75 | elif self.name == "aen_distilbert": 76 | last_hidden, all_hidden = self.language_model( 77 | input_ids=_input, attention_mask=_input_attention 78 | ) 79 | else: 80 | raise Exception("unknown model name") 81 | 82 | if self.lm_representation == "last": 83 | return last_hidden 84 | elif self.lm_representation == "sum_last_four": 85 | last_four = all_hidden[-4:] # list of four, each has shape: 16, 80, 768 86 | last_four_stacked = torch.stack(last_four) # shape: 4, 16, 80, 768 87 | sum_last_four = torch.sum(last_four_stacked, dim=0) 88 | return sum_last_four 89 | elif self.lm_representation == "mean_last_four": 90 | last_four = all_hidden[-4:] # list of four, each has shape: 16, 80, 768 91 | last_four_stacked = torch.stack(last_four) # shape: 4, 16, 80, 768 92 | mean_last_four = torch.mean(last_four_stacked, dim=0) 93 | return mean_last_four 94 | elif self.lm_representation == "sum_last_two": 95 | last_two = all_hidden[-2:] 96 | last_two_stacked = torch.stack(last_two) 97 | sum_last_two = torch.sum(last_two_stacked, dim=0) 98 | return sum_last_two 99 | elif self.lm_representation == "mean_last_two": 100 | last_two = all_hidden[-2:] 101 | last_two_stacked = torch.stack(last_two) 102 | mean_last_two = torch.mean(last_two_stacked, dim=0) 103 | return mean_last_two 104 | elif self.lm_representation == "sum_all": 105 | all_stacked = torch.stack(all_hidden) 106 | sum_all = torch.sum(all_stacked, dim=0) 107 | return sum_all 108 | elif self.lm_representation == "mean_all": 109 | all_stacked = torch.stack(all_hidden) 110 | mean_all = torch.mean(all_stacked, dim=0) 111 | return mean_all 112 | 113 | def forward(self, inputs): 114 | context = FXDataset.get_input_by_params( 115 | inputs, BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS 116 | ) 117 | target = FXDataset.get_input_by_params( 118 | inputs, BERT_BASE_UNCASED, FIELD_TARGET_IDS_WITH_SPECIAL_TOKENS 119 | ) 120 | context_len = torch.sum(context != 0, dim=-1) 121 | target_len = torch.sum(target != 0, dim=-1) 122 | 123 | context = self.squeeze_embedding(context, context_len) 124 | # context_attention = self.squeeze_embedding(context_attention, context_len) 125 | context = self.apply_lm(context) 126 | context = self.dropout(context) 127 | 128 | target = self.squeeze_embedding(target, target_len) 129 | # target_attention = self.squeeze_embedding(target_attention, target_len) 130 | target = self.apply_lm(target) 131 | target = self.dropout(target) 132 | 133 | hc, _ = self.attn_k(context, context) 134 | hc = self.ffn_c(hc) 135 | 136 | ht, _ = self.attn_q(context, target) 137 | ht = self.ffn_t(ht) 138 | 139 | s1, _ = self.attn_s1(hc, ht) 140 | 141 | context_len = torch.tensor(context_len, dtype=torch.float).to(self.device) 142 | target_len = torch.tensor(target_len, dtype=torch.float).to(self.device) 143 | 144 | hc_mean = torch.div( 145 | torch.sum(hc, dim=1), context_len.view(context_len.size(0), 1) 146 | ) 147 | ht_mean = torch.div( 148 | torch.sum(ht, dim=1), target_len.view(target_len.size(0), 1) 149 | ) 150 | s1_mean = torch.div( 151 | torch.sum(s1, dim=1), context_len.view(context_len.size(0), 1) 152 | ) 153 | 154 | x = torch.cat((hc_mean, s1_mean, ht_mean), dim=-1) 155 | out = self.dense(x) 156 | 157 | return out 158 | -------------------------------------------------------------------------------- /NewsSentiment/models/singletarget/grutscsingle.py: -------------------------------------------------------------------------------- 1 | from argparse import Namespace 2 | from typing import Dict 3 | 4 | import torch 5 | import torch.nn as nn 6 | from transformers import PretrainedConfig 7 | 8 | from NewsSentiment.consts import * 9 | from NewsSentiment.dataset import FXDataset, FXEasyTokenizer 10 | from NewsSentiment.models.FXBaseModel import ( 11 | FXBaseModel, 12 | provide_pretrained, 13 | default_pretrained, 14 | ) 15 | 16 | 17 | @default_pretrained("v1.0.0") 18 | @provide_pretrained( 19 | "v1.0.0", "https://github.com/fhamborg/NewsMTSC/releases/download/v1.0.0/grutsc" 20 | ) 21 | class GRUTSCSingle(FXBaseModel): 22 | """ 23 | Inspired from https://arxiv.org/pdf/2006.00052.pdf 24 | Differences: 25 | - instead of question ("Is the ACLU good for USA?") then text (1 or more sentences), 26 | we use text then target (and no question, similar to BERT-SPC) 27 | - no vader 28 | - additionally we can flexibly use any knowledge source as well as multiple 29 | - we have one large matrix for all concatenated knowledge source embeddings, whereas 30 | in the original paper they use individual, smaller matrices for each knowledge 31 | source embedding 32 | - target mask (mostly useful for BERT) 33 | - fine-tuning LM enabled 34 | """ 35 | 36 | @staticmethod 37 | def get_language_models(): 38 | return (get_default_lm(),) 39 | 40 | @staticmethod 41 | def get_input_field_ids(): 42 | return [ 43 | (get_default_lm(), FIELD_TEXT_THEN_TARGET_IDS_WITH_SPECIAL_TOKENS), 44 | ( 45 | get_default_lm(), 46 | FIELD_TEXT_THEN_TARGET_IDS_WITH_SPECIAL_TOKENS_SEGMENT_IDS, 47 | ), 48 | # while we use text-then-target as bert input, we can use text targetmask 49 | # and text knowledge source mask because it is identical to a hypothetical 50 | # text-then-target target mask or text-then-target knowledge source mask 51 | # (we would not highlight the target in the 2nd component in the 52 | # corresponding mask) 53 | (get_default_lm(), FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK), 54 | ( 55 | get_default_lm(), 56 | FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_SELECTED_KNOWLEDGE_SOURCES, 57 | ), 58 | ] 59 | 60 | def __init__( 61 | self, transformer_models: Dict, opt: Namespace, config: PretrainedConfig 62 | ): 63 | super().__init__(config) 64 | self.language_model = transformer_models[get_default_lm()] 65 | self.ks_embeddings_dense = nn.Linear( 66 | FXEasyTokenizer.NUM_CATEGORIES_OF_SELECTED_KNOWLEDGE_SOURCES, 67 | self.language_model.config.hidden_size, 68 | ) 69 | if get_default_lm() == BERT_BASE_UNCASED: 70 | self.is_use_targetmask = True 71 | else: 72 | self.is_use_targetmask = False 73 | 74 | num_input_embeddings = 2 75 | if self.is_use_targetmask: 76 | num_input_embeddings = 3 77 | 78 | self.gru = nn.GRU( 79 | self.language_model.config.hidden_size * num_input_embeddings, 80 | self.language_model.config.hidden_size * num_input_embeddings, 81 | bidirectional=True, 82 | batch_first=True, 83 | ) 84 | self.dropout = nn.Dropout(opt.dropout) 85 | num_output_dim = opt.polarities_dim 86 | if opt.is_return_confidence: 87 | num_output_dim += 1 88 | 89 | self.dense = nn.Linear( 90 | # 3 inputs (original last gru out, mean, max), 2 inputs to gru (bert and 91 | # knowledge embedding), 2 (because bidirectional gru) 92 | self.language_model.config.hidden_size * 3 * num_input_embeddings * 2, 93 | num_output_dim, 94 | ) 95 | 96 | def forward(self, inputs, is_return_ensemble_values: bool = False): 97 | # get inputs 98 | text_target_bert_indices = FXDataset.get_input_by_params( 99 | inputs, get_default_lm(), FIELD_TEXT_THEN_TARGET_IDS_WITH_SPECIAL_TOKENS 100 | ) 101 | text_target_bert_segments_ids = FXDataset.get_input_by_params( 102 | inputs, 103 | get_default_lm(), 104 | FIELD_TEXT_THEN_TARGET_IDS_WITH_SPECIAL_TOKENS_SEGMENT_IDS, 105 | ) 106 | text_bert_indices_target_mask = FXDataset.get_input_by_params( 107 | inputs, get_default_lm(), FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK 108 | ) 109 | text_bert_indices_selected_knowledge_sources = FXDataset.get_input_by_params( 110 | inputs, 111 | get_default_lm(), 112 | FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_SELECTED_KNOWLEDGE_SOURCES, 113 | ) 114 | 115 | # apply bert 116 | last_hidden_states = self.invoke_language_model( 117 | lm=self.language_model, 118 | input_ids=text_target_bert_indices, 119 | token_type_ids=text_target_bert_segments_ids, 120 | ) 121 | # shape: batch, seqlen, hiddendim 122 | 123 | # apply knowledge embedding 124 | knowledge_embedded = self.ks_embeddings_dense( 125 | text_bert_indices_selected_knowledge_sources.float() 126 | ) 127 | # shape: batch, seqlen, hiddendim 128 | 129 | if self.is_use_targetmask: 130 | # repeat 131 | target_mask = text_bert_indices_target_mask.unsqueeze(dim=2).repeat( 132 | 1, 1, knowledge_embedded.shape[2] 133 | ) 134 | # shape: batch, seqlen, hiddendim 135 | 136 | # concat (called x_t in paper) 137 | bert_and_knowledge = torch.cat( 138 | (last_hidden_states, knowledge_embedded, target_mask), dim=2 139 | ) 140 | # batch x seq x bert+knowledge+targetmask 141 | else: 142 | # concat (called x_t in paper) 143 | bert_and_knowledge = torch.cat( 144 | (last_hidden_states, knowledge_embedded), dim=2 145 | ) 146 | # batch x seq x bert+knowledge 147 | 148 | # apply gru (result called z_t in paper) 149 | gru_all_hidden, gru_last_hidden = self.gru( 150 | bert_and_knowledge, 151 | torch.zeros( 152 | 2, 153 | bert_and_knowledge.shape[0], 154 | self.language_model.config.hidden_size * 2, 155 | ).to(self.device), 156 | ) 157 | # all hidden shape: batch x seq x 4*hidden (contains hidden states for each 158 | # part of the input seq) 159 | # last hidden shap: numdir x batch x 2*hidden (contains hidden states for last 160 | # part of input seq) 161 | 162 | # gru_last_hidden_own = gru_all_hidden[:,-1:,] 163 | # get both directions 164 | gru_last_hidden_dir0 = gru_last_hidden[0, :, :] 165 | gru_last_hidden_dir1 = gru_last_hidden[1, :, :] 166 | # shape each: batch x 2*hidden 167 | gru_last_hidden_stacked = torch.cat( 168 | (gru_last_hidden_dir0, gru_last_hidden_dir1), dim=1 169 | ) 170 | # batch x 4*hidden 171 | 172 | # pooling 173 | # according to original paper: "max-pooling returns a vector with maximum 174 | # weights across all hidden states of input tokens for each dimension. in this 175 | # way, the input tokens with higher weights will be engaged for stance 176 | # prediction." 177 | gru_avg = torch.mean(gru_all_hidden, dim=1) 178 | gru_max, _ = torch.max(gru_all_hidden, dim=1) 179 | 180 | # concat (called "u" in original paper) 181 | gru_complete_concatted = torch.cat( 182 | (gru_last_hidden_stacked, gru_avg, gru_max), dim=1 183 | ) 184 | 185 | if is_return_ensemble_values: 186 | return gru_complete_concatted 187 | else: 188 | # dense 189 | logits = self.dense(gru_complete_concatted) 190 | 191 | return logits 192 | -------------------------------------------------------------------------------- /NewsSentiment/models/singletarget/lcf.py: -------------------------------------------------------------------------------- 1 | # adapted from https://github.com/yangheng95/LCF-ABSA 2 | from argparse import Namespace 3 | from typing import Dict 4 | 5 | import numpy as np 6 | import torch 7 | import torch.nn as nn 8 | # from transformers.modeling_bert import BertPooler, BertSelfAttention 9 | 10 | from NewsSentiment.consts import * 11 | from NewsSentiment.dataset import FXDataset 12 | from NewsSentiment.models.FXBaseModel import FXBaseModel 13 | 14 | 15 | class GlobalContext(nn.Module): 16 | def __init__(self, global_context_seqs_per_doc): 17 | super(GlobalContext, self).__init__() 18 | self.global_context_seqs_per_doc = global_context_seqs_per_doc 19 | 20 | def forward(self, inputs): 21 | pass 22 | 23 | 24 | class SelfAttention(nn.Module): 25 | def __init__(self, config, opt): 26 | super(SelfAttention, self).__init__() 27 | self.opt = opt 28 | self.config = config 29 | self.SA = None # BertSelfAttention(config) 30 | self.tanh = torch.nn.Tanh() 31 | 32 | def forward(self, inputs): 33 | zero_tensor = torch.tensor( 34 | np.zeros((inputs.size(0), 1, 1, self.opt.max_seq_len), dtype=np.float32), 35 | dtype=torch.float32, 36 | ).to(self.opt.device) 37 | SA_out = self.SA(inputs, zero_tensor) 38 | return self.tanh(SA_out[0]) 39 | 40 | 41 | class LCF_BERT(FXBaseModel): 42 | @staticmethod 43 | def get_language_models(): 44 | return (BERT_BASE_UNCASED,) 45 | 46 | @staticmethod 47 | def get_input_field_ids(): 48 | return [ 49 | (BERT_BASE_UNCASED, FIELD_TEXT_THEN_TARGET_IDS_WITH_SPECIAL_TOKENS), 50 | ( 51 | BERT_BASE_UNCASED, 52 | FIELD_TEXT_THEN_TARGET_IDS_WITH_SPECIAL_TOKENS_SEGMENT_IDS, 53 | ), 54 | (BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS), 55 | (BERT_BASE_UNCASED, FIELD_TARGET_IDS_WITH_SPECIAL_TOKENS), 56 | (BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK), 57 | ] 58 | 59 | def __init__(self, transformer_models: Dict, opt: Namespace): 60 | super(LCF_BERT, self).__init__() 61 | 62 | bert = transformer_models[BERT_BASE_UNCASED] 63 | self.bert_spc = bert 64 | self.opt = opt 65 | 66 | # self.bert_local = copy.deepcopy(bert) # Uncomment the line to use dual Bert 67 | self.bert_local = ( 68 | bert # Default to use single Bert and reduce memory requirements 69 | ) 70 | self.dropout = nn.Dropout(self.opt.dropout) 71 | # while the paper describes 3 self attentions, the original implementation by the authors 72 | # uses only one. we stick with the original implementation. 73 | # answer by the author: the version found PyTorch-ABSA repository and below is better than what was 74 | # described in the paper (cf. https://github.com/yangheng95/LC-ABSA/issues/10#issuecomment-670301603) 75 | # self.bert_local_SA = SelfAttention(bert.config, self.opt) 76 | # self.bert_global_SA = SelfAttention(bert.config, self.opt) 77 | self.linear_double = nn.Linear( 78 | bert.config.hidden_size * 2, bert.config.hidden_size 79 | ) 80 | self.bert_SA = SelfAttention(bert.config, self.opt) 81 | self.linear_single = nn.Linear(bert.config.hidden_size, bert.config.hidden_size) 82 | self.bert_pooler = None # BertPooler(bert.config) 83 | 84 | self.dense = nn.Linear(bert.config.hidden_size, self.opt.polarities_dim) 85 | 86 | def feature_dynamic_mask(self, text_local_indices, aspect_indices): 87 | texts = text_local_indices.cpu().numpy() 88 | asps = aspect_indices.cpu().numpy() 89 | mask_len = self.opt.SRD 90 | masked_text_raw_indices = np.ones( 91 | ( 92 | text_local_indices.size(0), 93 | self.opt.max_seq_len, 94 | self.bert_local.config.hidden_size, 95 | ), 96 | dtype=np.float32, 97 | ) 98 | for text_i, asp_i in zip(range(len(texts)), range(len(asps))): 99 | asp_len = np.count_nonzero(asps[asp_i]) - 2 100 | try: 101 | asp_begin = np.argwhere(texts[text_i] == asps[asp_i][1])[0][0] 102 | except: 103 | continue 104 | if asp_begin >= mask_len: 105 | mask_begin = asp_begin - mask_len 106 | else: 107 | mask_begin = 0 108 | for i in range(mask_begin): 109 | masked_text_raw_indices[text_i][i] = np.zeros( 110 | (self.bert_local.config.hidden_size), dtype=np.float 111 | ) 112 | for j in range(asp_begin + asp_len + mask_len, self.opt.max_seq_len): 113 | masked_text_raw_indices[text_i][j] = np.zeros( 114 | (self.bert_local.config.hidden_size), dtype=np.float 115 | ) 116 | masked_text_raw_indices = torch.from_numpy(masked_text_raw_indices) 117 | return masked_text_raw_indices.to(self.opt.device) 118 | 119 | def feature_dynamic_weighted(self, text_local_indices, aspect_indices): 120 | texts = text_local_indices.cpu().numpy() 121 | asps = aspect_indices.cpu().numpy() 122 | masked_text_raw_indices = np.ones( 123 | ( 124 | text_local_indices.size(0), 125 | self.opt.max_seq_len, 126 | self.bert_local.config.hidden_size, 127 | ), 128 | dtype=np.float32, 129 | ) 130 | for text_i, asp_i in zip(range(len(texts)), range(len(asps))): 131 | asp_len = np.count_nonzero(asps[asp_i]) - 2 132 | try: 133 | asp_begin = np.argwhere(texts[text_i] == asps[asp_i][1])[0][0] 134 | asp_avg_index = (asp_begin * 2 + asp_len) / 2 135 | except: 136 | continue 137 | distances = np.zeros(np.count_nonzero(texts[text_i]), dtype=np.float32) 138 | for i in range(1, np.count_nonzero(texts[text_i]) - 1): 139 | if abs(i - asp_avg_index) + asp_len / 2 > self.opt.SRD: 140 | distances[i] = 1 - ( 141 | abs(i - asp_avg_index) + asp_len / 2 - self.opt.SRD 142 | ) / np.count_nonzero(texts[text_i]) 143 | else: 144 | distances[i] = 1 145 | for i in range(len(distances)): 146 | masked_text_raw_indices[text_i][i] = ( 147 | masked_text_raw_indices[text_i][i] * distances[i] 148 | ) 149 | masked_text_raw_indices = torch.from_numpy(masked_text_raw_indices) 150 | return masked_text_raw_indices.to(self.opt.device) 151 | 152 | def forward(self, inputs): 153 | text_target_bert_indices = FXDataset.get_input_by_params( 154 | inputs, BERT_BASE_UNCASED, FIELD_TEXT_THEN_TARGET_IDS_WITH_SPECIAL_TOKENS, 155 | ) 156 | 157 | text_target_bert_segments_ids = FXDataset.get_input_by_params( 158 | inputs, 159 | BERT_BASE_UNCASED, 160 | FIELD_TEXT_THEN_TARGET_IDS_WITH_SPECIAL_TOKENS_SEGMENT_IDS, 161 | ) 162 | text_local_indices = FXDataset.get_input_by_params( 163 | inputs, BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS 164 | ) 165 | aspect_indices = FXDataset.get_input_by_params( 166 | inputs, BERT_BASE_UNCASED, FIELD_TARGET_IDS_WITH_SPECIAL_TOKENS 167 | ) 168 | 169 | # apply bert and dropout 170 | bert_spc_out, _, _ = self.bert_spc( 171 | text_target_bert_indices, text_target_bert_segments_ids 172 | ) 173 | bert_spc_out = self.dropout(bert_spc_out) 174 | 175 | bert_local_out, _, _ = self.bert_local(text_local_indices) 176 | bert_local_out = self.dropout(bert_local_out) 177 | 178 | if self.opt.local_context_focus == "cdm": 179 | masked_local_text_vec = self.feature_dynamic_mask( 180 | text_local_indices, aspect_indices 181 | ) 182 | bert_local_out = torch.mul(bert_local_out, masked_local_text_vec) 183 | elif self.opt.local_context_focus == "cdw": 184 | weighted_text_local_features = self.feature_dynamic_weighted( 185 | text_local_indices, aspect_indices 186 | ) 187 | bert_local_out = torch.mul(bert_local_out, weighted_text_local_features) 188 | 189 | # attention 190 | # bert_local_out = self.bert_local_SA(bert_local_out) 191 | # bert_spc_out = self.bert_global_SA(bert_spc_out) 192 | 193 | # cat 194 | out_cat = torch.cat((bert_local_out, bert_spc_out), dim=-1) 195 | 196 | # "interactive learning layer" 197 | mean_pool = self.linear_double(out_cat) 198 | self_attention_out = self.bert_SA(mean_pool) 199 | pooled_out = self.bert_pooler(self_attention_out) 200 | 201 | dense_out = self.dense(pooled_out) 202 | return dense_out 203 | -------------------------------------------------------------------------------- /NewsSentiment/models/singletarget/lcf2.py: -------------------------------------------------------------------------------- 1 | # adapted from https://github.com/yangheng95/LC-ABSA/blob/c945a94e0f86116c5578245aa9ad36c46c7b9c4a/models/lc_apc/lcf_bert.py 2 | # according to 3 | import copy 4 | from argparse import Namespace 5 | from typing import Dict 6 | 7 | import numpy as np 8 | import torch 9 | import torch.nn as nn 10 | # from transformers.modeling_bert import BertPooler, BertSelfAttention 11 | 12 | from NewsSentiment.consts import * 13 | from NewsSentiment.dataset import FXDataset 14 | from NewsSentiment.layers.attention import FXBertSelfAttention 15 | from NewsSentiment.models.FXBaseModel import FXBaseModel 16 | 17 | 18 | class GlobalContext(nn.Module): 19 | def __init__(self, global_context_seqs_per_doc): 20 | super(GlobalContext, self).__init__() 21 | self.global_context_seqs_per_doc = global_context_seqs_per_doc 22 | 23 | def forward(self, inputs): 24 | pass 25 | 26 | 27 | class SelfAttention(nn.Module): 28 | def __init__(self, config, opt): 29 | super(SelfAttention, self).__init__() 30 | self.opt = opt 31 | self.config = config 32 | self.SA = FXBertSelfAttention( 33 | hidden_size=config.hidden_size, 34 | num_attention_heads=config.num_attention_heads, 35 | attention_probs_dropout_prob=0.1, 36 | ) 37 | self.tanh = torch.nn.Tanh() 38 | 39 | def forward(self, inputs): 40 | zero_tensor = torch.tensor( 41 | np.zeros((inputs.size(0), 1, 1, self.opt.max_seq_len), dtype=np.float32), 42 | dtype=torch.float32, 43 | ).to(self.opt.device) 44 | SA_out = self.SA(inputs, zero_tensor) 45 | return self.tanh(SA_out[0]) 46 | 47 | 48 | class LCF_BERT2Dual(FXBaseModel): 49 | """ 50 | While lcf.py:LCF_BERT is the implementation as implemented in PyTorch-ABSA repository, this implementation here 51 | (LCF_BERT2Dual) is following the implementation as in the author's repository, which according to 52 | https://github.com/yangheng95/LC-ABSA/issues/10#issuecomment-670301603 has seen some more improvements compared to 53 | the version from PyTorch-ABSA 54 | """ 55 | 56 | @staticmethod 57 | def get_language_models(): 58 | return (get_default_lm(),) 59 | 60 | @staticmethod 61 | def get_input_field_ids(): 62 | return [ 63 | (get_default_lm(), FIELD_TEXT_THEN_TARGET_IDS_WITH_SPECIAL_TOKENS), 64 | ( 65 | get_default_lm(), 66 | FIELD_TEXT_THEN_TARGET_IDS_WITH_SPECIAL_TOKENS_SEGMENT_IDS, 67 | ), 68 | (get_default_lm(), FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS), 69 | (get_default_lm(), FIELD_TARGET_IDS_WITH_SPECIAL_TOKENS), 70 | (get_default_lm(), FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK), 71 | ] 72 | 73 | def __init__(self, transformer_models: Dict, opt: Namespace): 74 | super(LCF_BERT2Dual, self).__init__() 75 | 76 | bert = transformer_models[get_default_lm()] 77 | 78 | self.bert4global = bert 79 | # note that we use a second bert here, which should slightly improve performance 80 | # cf. https://github.com/yangheng95/LC-ABSA/#tips 81 | # self.bert4local = copy.deepcopy(bert) 82 | # we can't do this on scc because even for batch size = only 16 we run out of 83 | # memory. because of that, we use the same bert for both local and global 84 | # (just as in lcf.py) 85 | self.bert4local = bert 86 | self.opt = opt 87 | self.dropout = nn.Dropout(self.opt.dropout) 88 | self.bert_SA = SelfAttention(bert.config, self.opt) 89 | self.linear2 = nn.Linear(bert.config.hidden_size * 2, bert.config.hidden_size) 90 | # self.linear3 = nn.Linear(bert.config.hidden_size * 3, bert.config.hidden_size) 91 | self.bert_pooler = None # BertPooler(bert.config) 92 | self.dense = nn.Linear(bert.config.hidden_size, self.opt.polarities_dim) 93 | 94 | def feature_dynamic_mask(self, text_local_indices, aspect_indices): 95 | texts = text_local_indices.cpu().numpy() 96 | asps = aspect_indices.cpu().numpy() 97 | mask_len = self.opt.SRD 98 | masked_text_raw_indices = np.ones( 99 | ( 100 | text_local_indices.size(0), 101 | self.opt.max_seq_len, 102 | self.bert4local.config.hidden_size, 103 | ), 104 | dtype=np.float32, 105 | ) 106 | for text_i, asp_i in zip(range(len(texts)), range(len(asps))): 107 | asp_len = np.count_nonzero(asps[asp_i]) - 2 108 | try: 109 | asp_begin = np.argwhere(texts[text_i] == asps[asp_i][1])[0][0] 110 | except: 111 | continue 112 | if asp_begin >= mask_len: 113 | mask_begin = asp_begin - mask_len 114 | else: 115 | mask_begin = 0 116 | for i in range(mask_begin): 117 | masked_text_raw_indices[text_i][i] = np.zeros( 118 | (self.bert4local.config.hidden_size), dtype=np.float 119 | ) 120 | for j in range(asp_begin + asp_len + mask_len, self.opt.max_seq_len): 121 | masked_text_raw_indices[text_i][j] = np.zeros( 122 | (self.bert4local.config.hidden_size), dtype=np.float 123 | ) 124 | masked_text_raw_indices = torch.from_numpy(masked_text_raw_indices) 125 | return masked_text_raw_indices.to(self.opt.device) 126 | 127 | def feature_dynamic_weighted(self, text_local_indices, aspect_indices): 128 | texts = text_local_indices.cpu().numpy() 129 | asps = aspect_indices.cpu().numpy() 130 | masked_text_raw_indices = np.ones( 131 | ( 132 | text_local_indices.size(0), 133 | self.opt.max_seq_len, 134 | self.bert4local.config.hidden_size, 135 | ), 136 | dtype=np.float32, 137 | ) 138 | for text_i, asp_i in zip(range(len(texts)), range(len(asps))): 139 | asp_len = np.count_nonzero(asps[asp_i]) - 2 140 | try: 141 | asp_begin = np.argwhere(texts[text_i] == asps[asp_i][1])[0][0] 142 | asp_avg_index = (asp_begin * 2 + asp_len) / 2 143 | except: 144 | continue 145 | distances = np.zeros(np.count_nonzero(texts[text_i]), dtype=np.float32) 146 | for i in range(1, np.count_nonzero(texts[text_i]) - 1): 147 | if abs(i - asp_avg_index) + asp_len / 2 > self.opt.SRD: 148 | distances[i] = 1 - ( 149 | abs(i - asp_avg_index) + asp_len / 2 - self.opt.SRD 150 | ) / np.count_nonzero(texts[text_i]) 151 | else: 152 | distances[i] = 1 153 | for i in range(len(distances)): 154 | masked_text_raw_indices[text_i][i] = ( 155 | masked_text_raw_indices[text_i][i] * distances[i] 156 | ) 157 | masked_text_raw_indices = torch.from_numpy(masked_text_raw_indices) 158 | return masked_text_raw_indices.to(self.opt.device) 159 | 160 | def forward(self, inputs): 161 | text_target_bert_indices = FXDataset.get_input_by_params( 162 | inputs, get_default_lm(), FIELD_TEXT_THEN_TARGET_IDS_WITH_SPECIAL_TOKENS, 163 | ) 164 | text_target_bert_segments_ids = FXDataset.get_input_by_params( 165 | inputs, 166 | get_default_lm(), 167 | FIELD_TEXT_THEN_TARGET_IDS_WITH_SPECIAL_TOKENS_SEGMENT_IDS, 168 | ) 169 | text_local_indices = FXDataset.get_input_by_params( 170 | inputs, get_default_lm(), FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS 171 | ) 172 | aspect_indices = FXDataset.get_input_by_params( 173 | inputs, get_default_lm(), FIELD_TARGET_IDS_WITH_SPECIAL_TOKENS 174 | ) 175 | 176 | # bert 177 | global_context_features = self.invoke_language_model( 178 | self.bert4global, 179 | input_ids=text_target_bert_indices, 180 | token_type_ids=text_target_bert_segments_ids, 181 | ) 182 | local_context_features = self.invoke_language_model( 183 | self.bert4local, text_local_indices 184 | ) 185 | 186 | # mask 187 | if self.opt.local_context_focus == "cdm": 188 | lcf_matrix = self.feature_dynamic_mask(text_local_indices, aspect_indices) 189 | elif self.opt.local_context_focus == "cdw": 190 | lcf_matrix = self.feature_dynamic_weighted( 191 | text_local_indices, aspect_indices 192 | ) 193 | 194 | # LCF layer 195 | lcf_features = torch.mul(local_context_features, lcf_matrix) 196 | lcf_features = self.bert_SA(lcf_features) 197 | 198 | cat_features = torch.cat((lcf_features, global_context_features), dim=-1) 199 | cat_features = self.linear2(cat_features) 200 | cat_features = self.dropout(cat_features) 201 | 202 | pooled_out = self.bert_pooler(cat_features) 203 | dense_out = self.dense(pooled_out) 204 | 205 | return dense_out 206 | -------------------------------------------------------------------------------- /NewsSentiment/models/singletarget/notargetcls.py: -------------------------------------------------------------------------------- 1 | from argparse import Namespace 2 | from typing import Dict 3 | 4 | import torch.nn as nn 5 | 6 | from NewsSentiment.consts import * 7 | from NewsSentiment.dataset import FXDataset 8 | from NewsSentiment.layers.AggregatorForBert import AggregatorForBert 9 | from NewsSentiment.models.FXBaseModel import FXBaseModel 10 | 11 | 12 | class NoTargetClsBert(FXBaseModel): 13 | @staticmethod 14 | def get_language_models(): 15 | return (BERT_BASE_UNCASED,) 16 | 17 | @staticmethod 18 | def get_input_field_ids(): 19 | return [ 20 | (BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS), 21 | (BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK), 22 | ] 23 | 24 | def __init__(self, transformer_models: Dict, opt: Namespace): 25 | super(NoTargetClsBert, self).__init__() 26 | self.language_model = transformer_models[BERT_BASE_UNCASED] 27 | self.aggregator_for_bert = AggregatorForBert(opt.spc_lm_representation) 28 | self.dropout = nn.Dropout(opt.dropout) 29 | self.dense = nn.Linear( 30 | self.language_model.config.hidden_size, opt.polarities_dim 31 | ) 32 | 33 | def forward(self, inputs): 34 | text_bert_indices = FXDataset.get_input_by_params( 35 | inputs, BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS 36 | ) 37 | 38 | last_hidden_state, pooler_output, all_hidden_states = self.language_model( 39 | input_ids=text_bert_indices 40 | ) 41 | prepared_output = self.aggregator_for_bert( 42 | last_hidden_state, pooler_output, all_hidden_states 43 | ) 44 | 45 | prepared_output = self.dropout(prepared_output) 46 | logits = self.dense(prepared_output) 47 | 48 | return logits 49 | -------------------------------------------------------------------------------- /NewsSentiment/models/singletarget/random_single.py: -------------------------------------------------------------------------------- 1 | from argparse import Namespace 2 | from typing import Dict 3 | 4 | import torch 5 | import torch.nn as nn 6 | 7 | from NewsSentiment.consts import * 8 | from NewsSentiment.dataset import FXDataset 9 | from NewsSentiment.models.FXBaseModel import FXBaseModel 10 | 11 | 12 | class RandomSingle(FXBaseModel): 13 | """ 14 | 15 | """ 16 | 17 | @staticmethod 18 | def get_language_models(): 19 | """ 20 | All architecture assumes that at least one model is used so we just require 21 | bert here for compatibility. 22 | :return: 23 | """ 24 | return (BERT_BASE_UNCASED,) 25 | 26 | @staticmethod 27 | def get_input_field_ids(): 28 | return [ 29 | (BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS), 30 | (BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK), 31 | ] 32 | 33 | def __init__(self, transformer_models: Dict, opt: Namespace): 34 | super(RandomSingle, self).__init__() 35 | self.num_classes = opt.polarities_dim 36 | 37 | def forward(self, inputs): 38 | text_bert_indices = FXDataset.get_input_by_params( 39 | inputs, BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS 40 | ) 41 | batch_size = text_bert_indices.shape[0] 42 | num_targets = text_bert_indices.shape[1] 43 | 44 | # get a random tensor 45 | logits = torch.rand(batch_size, num_targets, self.num_classes) 46 | 47 | return logits 48 | -------------------------------------------------------------------------------- /NewsSentiment/models/singletarget/spc.py: -------------------------------------------------------------------------------- 1 | from argparse import Namespace 2 | from typing import Dict 3 | 4 | import torch.nn as nn 5 | 6 | from NewsSentiment.consts import * 7 | from NewsSentiment.dataset import FXDataset 8 | from NewsSentiment.layers.AggregatorForBert import AggregatorForBert 9 | from NewsSentiment.models.FXBaseModel import FXBaseModel 10 | 11 | 12 | class SPC_Base(FXBaseModel): 13 | @staticmethod 14 | def get_language_models(): 15 | return (get_default_lm(),) 16 | 17 | @staticmethod 18 | def get_input_field_ids(): 19 | return [ 20 | (get_default_lm(), FIELD_TEXT_THEN_TARGET_IDS_WITH_SPECIAL_TOKENS), 21 | ( 22 | get_default_lm(), 23 | FIELD_TEXT_THEN_TARGET_IDS_WITH_SPECIAL_TOKENS_SEGMENT_IDS, 24 | ), 25 | (get_default_lm(), FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK), 26 | ] 27 | 28 | def __init__(self, transformer_models: Dict, opt: Namespace): 29 | super(SPC_Base, self).__init__() 30 | self.language_model = transformer_models[get_default_lm()] 31 | self.aggregator_for_bert = AggregatorForBert(opt.spc_lm_representation) 32 | self.dropout = nn.Dropout(opt.dropout) 33 | self.dense = nn.Linear( 34 | self.language_model.config.hidden_size, opt.polarities_dim 35 | ) 36 | 37 | def forward(self, inputs): 38 | text_target_bert_indices = FXDataset.get_input_by_params( 39 | inputs, get_default_lm(), FIELD_TEXT_THEN_TARGET_IDS_WITH_SPECIAL_TOKENS 40 | ) 41 | text_target_bert_segments_ids = FXDataset.get_input_by_params( 42 | inputs, 43 | get_default_lm(), 44 | FIELD_TEXT_THEN_TARGET_IDS_WITH_SPECIAL_TOKENS_SEGMENT_IDS, 45 | ) 46 | 47 | last_hidden_state = self.invoke_language_model( 48 | lm=self.language_model, 49 | input_ids=text_target_bert_indices, 50 | token_type_ids=text_target_bert_segments_ids, 51 | ) 52 | # the following two variables can only be derived for some model, whereas invoke_language_model currently 53 | # returns only one last_hidden_state. 54 | assert self.aggregator_for_bert.spc_lm_representation == "mean_last" 55 | pooler_output, all_hidden_states = None, None 56 | prepared_output = self.aggregator_for_bert( 57 | last_hidden_state, pooler_output, all_hidden_states 58 | ) 59 | prepared_output = self.dropout(prepared_output) 60 | logits = self.dense(prepared_output) 61 | 62 | return logits 63 | -------------------------------------------------------------------------------- /NewsSentiment/models/singletarget/td_bert.py: -------------------------------------------------------------------------------- 1 | # this file re-implements TD-BERT by Gao Zhengjie et al. 2 | # while this file aims to be conceptually identical to TD-BERT, one technical difference is that we do not calculate 3 | # the target mask within the model (here) but do this step as part of the dataset processing. in case there are strong 4 | # performance differences between original TD-BERT and this implementation, this technical difference might be worth 5 | # exploring whether it actually yields an identical implementation. 6 | from argparse import Namespace 7 | from typing import Dict 8 | 9 | import torch 10 | import torch.nn as nn 11 | import torch.nn.functional as F 12 | 13 | from NewsSentiment.consts import * 14 | from NewsSentiment.dataset import FXDataset 15 | from NewsSentiment.models.FXBaseModel import FXBaseModel 16 | 17 | 18 | class TD_BERT(FXBaseModel): 19 | @staticmethod 20 | def get_language_models(): 21 | return (get_default_lm(),) 22 | 23 | @staticmethod 24 | def get_input_field_ids(): 25 | return [ 26 | (get_default_lm(), FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS), 27 | (get_default_lm(), FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK), 28 | ] 29 | 30 | def __init__(self, transformer_models: Dict, opt: Namespace): 31 | super(TD_BERT, self).__init__() 32 | self.opt = opt 33 | self.language_model = transformer_models[get_default_lm()] 34 | self.dropout = nn.Dropout(opt.dropout) 35 | self.fc = nn.Linear(self.language_model.config.hidden_size, opt.polarities_dim) 36 | 37 | def forward(self, inputs, is_return_ensemble_values: bool = False): 38 | # get inputs 39 | text_bert_indices = FXDataset.get_input_by_params( 40 | inputs, get_default_lm(), FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS 41 | ) 42 | text_bert_indices_target_mask = FXDataset.get_input_by_params( 43 | inputs, get_default_lm(), FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK 44 | ) 45 | 46 | # apply bert 47 | last_hidden_states = self.invoke_language_model( 48 | lm=self.language_model, 49 | input_ids=text_bert_indices, 50 | ) 51 | 52 | # element-wise multiplication with target mask 53 | # unsqueeze, cf. https://stackoverflow.com/q/62559382 54 | text_bert_indices_target_mask_unsqueezed = text_bert_indices_target_mask.unsqueeze( 55 | -1 56 | ) 57 | last_hidden_states_only_target = ( 58 | last_hidden_states * text_bert_indices_target_mask_unsqueezed 59 | ) 60 | 61 | # as in TD-BERT, perform max pooling 62 | last_hidden_states_only_target_aggregated, _ = last_hidden_states_only_target.max( 63 | dim=1 64 | ) 65 | 66 | # dropout before dense layer, as in most other tsc models 67 | last_hidden_states_only_target_aggregated = self.dropout( 68 | last_hidden_states_only_target_aggregated 69 | ) 70 | 71 | if is_return_ensemble_values: 72 | return last_hidden_states_only_target_aggregated 73 | else: 74 | # dense layer 75 | logits = self.fc(last_hidden_states_only_target_aggregated) 76 | # removed tanh, which was invoked in original tdbert. for training, we dont 77 | # need it to properly compute the loss. we would, however, need softmax during 78 | # inferring to have the probabilities of all mutually exclusive classes 79 | # to sum up to 1 80 | return logits 81 | 82 | 83 | 84 | -------------------------------------------------------------------------------- /NewsSentiment/models/singletarget/td_bert_qa.py: -------------------------------------------------------------------------------- 1 | # this file re-implements TD-BERT-QA by Gao Zhengjie et al. 2 | from argparse import Namespace 3 | from typing import Dict 4 | 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | 9 | from NewsSentiment.consts import * 10 | from NewsSentiment.dataset import FXDataset 11 | from NewsSentiment.models.FXBaseModel import FXBaseModel 12 | 13 | 14 | class TD_BERT_QA_MUL(FXBaseModel): 15 | @staticmethod 16 | def get_language_models(): 17 | return (BERT_BASE_UNCASED,) 18 | 19 | @staticmethod 20 | def get_input_field_ids(): 21 | return [ 22 | (BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS), 23 | (BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK), 24 | ] 25 | 26 | def __init__(self, transformer_models: Dict, opt: Namespace): 27 | super(TD_BERT_QA_MUL, self).__init__() 28 | self.opt = opt 29 | self.language_model = transformer_models[BERT_BASE_UNCASED] 30 | self.dropout = nn.Dropout(opt.dropout) 31 | self.fc = nn.Linear(self.language_model.config.hidden_size, opt.polarities_dim) # 全连接层 bbfc 32 | self.bn = nn.BatchNorm1d(self.language_model.config.hidden_size) 33 | 34 | def forward(self, inputs): 35 | # get inputs 36 | text_bert_indices = FXDataset.get_input_by_params( 37 | inputs, BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS 38 | ) 39 | text_bert_indices_target_mask = FXDataset.get_input_by_params( 40 | inputs, BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK 41 | ) 42 | 43 | # apply bert 44 | last_hidden_states, pooler_output, all_hidden_states = self.language_model( 45 | input_ids=text_bert_indices 46 | ) 47 | 48 | # element-wise multiplication with target mask 49 | # unsqueeze, cf. https://stackoverflow.com/q/62559382 50 | text_bert_indices_target_mask_unsqueezed = text_bert_indices_target_mask.unsqueeze( 51 | -1 52 | ) 53 | last_hidden_states_only_target = ( 54 | last_hidden_states * text_bert_indices_target_mask_unsqueezed 55 | ) 56 | 57 | # as in TD-BERT, perform max pooling 58 | last_hidden_states_only_target_aggregated, _ = last_hidden_states_only_target.max( 59 | dim=1 60 | ) 61 | 62 | target_in_sent_embed = self.bn(last_hidden_states_only_target_aggregated) 63 | target_in_sent_embed = target_in_sent_embed.mul(pooler_output) 64 | cat = self.dropout(target_in_sent_embed) 65 | 66 | logits = self.fc(cat) 67 | # removed tanh, which was invoked in original tdbert. for training, we dont 68 | # need it to properly compute the loss. we would, however, need softmax during 69 | # inferring to have the probabilities of all mutually exclusive classes 70 | # to sum up to 1 71 | 72 | return logits 73 | 74 | 75 | class TD_BERT_QA_CON(FXBaseModel): 76 | @staticmethod 77 | def get_language_models(): 78 | return (BERT_BASE_UNCASED,) 79 | 80 | @staticmethod 81 | def get_input_field_ids(): 82 | return [ 83 | (BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS), 84 | (BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK), 85 | ] 86 | 87 | def __init__(self, transformer_models: Dict, opt: Namespace): 88 | super(TD_BERT_QA_CON, self).__init__() 89 | self.opt = opt 90 | self.language_model = transformer_models[BERT_BASE_UNCASED] 91 | self.dropout = nn.Dropout(opt.dropout) 92 | self.fc = nn.Linear(self.language_model.config.hidden_size*2, opt.polarities_dim) # 全连接层 bbfc 93 | self.bn = nn.BatchNorm1d(self.language_model.config.hidden_size) 94 | 95 | def forward(self, inputs): 96 | # get inputs 97 | text_bert_indices = FXDataset.get_input_by_params( 98 | inputs, BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS 99 | ) 100 | text_bert_indices_target_mask = FXDataset.get_input_by_params( 101 | inputs, BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK 102 | ) 103 | 104 | # apply bert 105 | last_hidden_states, pooler_output, all_hidden_states = self.language_model( 106 | input_ids=text_bert_indices 107 | ) 108 | 109 | # element-wise multiplication with target mask 110 | # unsqueeze, cf. https://stackoverflow.com/q/62559382 111 | text_bert_indices_target_mask_unsqueezed = text_bert_indices_target_mask.unsqueeze( 112 | -1 113 | ) 114 | last_hidden_states_only_target = ( 115 | last_hidden_states * text_bert_indices_target_mask_unsqueezed 116 | ) 117 | 118 | # as in TD-BERT, perform max pooling 119 | last_hidden_states_only_target_aggregated, _ = last_hidden_states_only_target.max( 120 | dim=1 121 | ) 122 | 123 | # not entirely sure whether this is as in original tdbertqa-con, because the code does not exist in the repo 124 | # (seems to be part of the commented lines in there) 125 | pooler_output = self.bn(pooler_output) 126 | 127 | cat = torch.cat([pooler_output, last_hidden_states_only_target_aggregated], dim=1) 128 | cat = self.dropout(cat) 129 | logits = self.fc(cat) 130 | # removed tanh, which was invoked in original tdbert. for training, we dont 131 | # need it to properly compute the loss. we would, however, need softmax during 132 | # inferring to have the probabilities of all mutually exclusive classes 133 | # to sum up to 1 134 | 135 | return logits 136 | -------------------------------------------------------------------------------- /NewsSentiment/models/singletarget/tdbertlikesingle.py: -------------------------------------------------------------------------------- 1 | from argparse import Namespace 2 | from typing import Dict 3 | 4 | import torch 5 | import torch.nn as nn 6 | 7 | from NewsSentiment.consts import * 8 | from NewsSentiment.dataset import FXDataset 9 | from NewsSentiment.models.FXBaseModel import FXBaseModel 10 | 11 | 12 | class TDBertLikeSingle(FXBaseModel): 13 | """ 14 | This model returns uses a target mask for a single target to obtain only the hidden 15 | states of those last layer nodes that correspond to a wordpiece of the target 16 | phrase. Then aggregation, dropout and a dense layer is applied to retrieve the 17 | 3-class logits. 18 | 19 | From a conceptual perspective, TD-BERT is in some aspects similar (e.g., only the 20 | output of target-phrase-related nodes is used), but in other aspects not similar, 21 | e.g., we don't use max pooling but a mean on all non-0 target nodes. In other cases, 22 | it is not quite clear how the authors of TD-BERT implemented them. An email I sent 23 | to them was not answered yet. 24 | """ 25 | 26 | @staticmethod 27 | def get_language_models(): 28 | return (BERT_BASE_UNCASED,) 29 | 30 | @staticmethod 31 | def get_input_field_ids(): 32 | return [ 33 | (BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS), 34 | (BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK), 35 | ] 36 | 37 | def __init__(self, transformer_models: Dict, opt: Namespace): 38 | super(TDBertLikeSingle, self).__init__() 39 | self.language_model = transformer_models[BERT_BASE_UNCASED] 40 | self.dropout = nn.Dropout(opt.dropout) 41 | self.dense = nn.Linear( 42 | self.language_model.config.hidden_size, opt.polarities_dim 43 | ) 44 | 45 | def forward(self, inputs): 46 | # get inputs 47 | text_bert_indices = FXDataset.get_input_by_params( 48 | inputs, BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS 49 | ) 50 | text_bert_indices_target_mask = FXDataset.get_input_by_params( 51 | inputs, BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK 52 | ) 53 | 54 | # apply bert 55 | last_hidden_states, pooler_output, all_hidden_states = self.language_model( 56 | input_ids=text_bert_indices 57 | ) 58 | 59 | # element-wise multiplication with target mask 60 | # unsqueeze, cf. https://stackoverflow.com/q/62559382 61 | text_bert_indices_target_mask_unsqueezed = text_bert_indices_target_mask.unsqueeze( 62 | -1 63 | ) 64 | last_hidden_states_only_target = ( 65 | last_hidden_states * text_bert_indices_target_mask_unsqueezed 66 | ) 67 | 68 | # similar to TD-BERT, perform max pooling TODO not implemented yet, instead: 69 | # for now, retrieve only the values of the target's output tokens and then 70 | # calculate the mean: (batchsize, 150, 768) -> (batchsize, 768) 71 | # get the positions of target nodes. note that we cannot simply take the mean 72 | # as it would divide by the number of the sequence length, whereas the effective 73 | # length is only of size k, where k is the number of non-zero scalars in the 74 | # input mask (since we are only interested in those values) 75 | last_hidden_states_only_target_aggregated = last_hidden_states_only_target.sum( 76 | dim=1 77 | ) 78 | # get the sum for each (batch, hidden states) (sum over the sequence length 79 | # dim) 80 | denominator_for_mean = text_bert_indices_target_mask_unsqueezed.sum(dim=1) 81 | # divide for each (batch, hidden states) by the denominator to get the mean 82 | last_hidden_states_only_target_aggregated = ( 83 | last_hidden_states_only_target_aggregated / denominator_for_mean 84 | ) 85 | # dropout before dense layer, as in most other tsc models 86 | last_hidden_states_only_target_aggregated = self.dropout( 87 | last_hidden_states_only_target_aggregated 88 | ) 89 | # dense layer 90 | logits = self.dense(last_hidden_states_only_target_aggregated) 91 | 92 | return logits 93 | -------------------------------------------------------------------------------- /NewsSentiment/plotter_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | from sklearn.metrics import confusion_matrix 6 | 7 | from NewsSentiment.fxlogger import get_logger 8 | 9 | logger = get_logger() 10 | 11 | 12 | def create_save_plotted_confusion_matrix(conf_matrix, expected_labels, basepath): 13 | ax, title = plot_confusion_matrix(conf_matrix, expected_labels, normalize=False) 14 | filepath = os.path.join(basepath, 'stats.png') 15 | plt.savefig(filepath, bbox_inches='tight') 16 | logger.debug("created confusion matrices in path: {}".format(filepath)) 17 | 18 | 19 | def plot_confusion_matrix(cm, classes, normalize=False, title=None, cmap=plt.cm.Blues): 20 | """ 21 | This function prints and plots the confusion matrix. Normalization can be applied by setting `normalize=True`. 22 | based on https://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html#sphx-glr-auto-examples-model-selection-plot-confusion-matrix-py 23 | """ 24 | if not title: 25 | if normalize: 26 | title = 'Normalized confusion matrix' 27 | else: 28 | title = 'Confusion matrix, without normalization' 29 | 30 | if normalize: 31 | cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] 32 | logger.debug("Normalized confusion matrix") 33 | else: 34 | logger.debug('Confusion matrix, without normalization') 35 | 36 | logger.debug(cm) 37 | 38 | fig, ax = plt.subplots() 39 | im = ax.imshow(cm, interpolation='nearest', cmap=cmap) 40 | ax.figure.colorbar(im, ax=ax) 41 | # We want to show all ticks... 42 | ax.set(xticks=np.arange(cm.shape[1]), 43 | yticks=np.arange(cm.shape[0]), 44 | # ... and label them with the respective list entries 45 | xticklabels=classes, yticklabels=classes, 46 | title=title, 47 | ylabel='True label', 48 | xlabel='Predicted label') 49 | 50 | # Rotate the tick labels and set their alignment. 51 | plt.setp(ax.get_xticklabels(), rotation=45, ha="right", 52 | rotation_mode="anchor") 53 | 54 | # Loop over data dimensions and create text annotations. 55 | fmt = '.2f' if normalize else 'd' 56 | thresh = cm.max() / 2. 57 | for i in range(cm.shape[0]): 58 | for j in range(cm.shape[1]): 59 | ax.text(j, i, format(cm[i, j], fmt), 60 | ha="center", va="center", 61 | color="white" if cm[i, j] > thresh else "black") 62 | fig.tight_layout() 63 | 64 | return ax, title 65 | 66 | 67 | if __name__ == '__main__': 68 | y_true = ["cat", "ant", "cat", "cat", "ant", "bird"] 69 | y_pred = ["ant", "ant", "cat", "cat", "ant", "cat"] 70 | confmat = confusion_matrix(y_true, y_pred, labels=["ant", "bird", "cat"]) 71 | 72 | create_save_plotted_confusion_matrix(confmat, ["ant", "bird", "cat"], '.') 73 | -------------------------------------------------------------------------------- /NewsSentiment/pretrained_models/.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore everything in this directory 2 | * 3 | # Except this file 4 | !.gitignore 5 | !state_dicts 6 | -------------------------------------------------------------------------------- /NewsSentiment/pretrained_models/state_dicts/.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore everything in this directory 2 | * 3 | # Except this file 4 | !.gitignore 5 | -------------------------------------------------------------------------------- /NewsSentiment/pretrained_models/state_dicts/grutsc_v1-0-0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fhamborg/NewsMTSC/a7bea7bc7265b4bfe4236e98965a47c767acbb30/NewsSentiment/pretrained_models/state_dicts/grutsc_v1-0-0 -------------------------------------------------------------------------------- /NewsSentiment/results/.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore everything in this directory 2 | * 3 | # Except this file 4 | !.gitignore 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # NewsMTSC: (Multi-)Target-dependent Sentiment Classification in News Articles 2 | NewsMTSC is a dataset for target-dependent sentiment classification (TSC) on 3 | news articles reporting on policy issues. The dataset consists of more than 11k labeled 4 | sentences, which we sampled from news articles from online US news outlets. More 5 | information can be found in our paper published at the EACL 2021. 6 | 7 | This repository contains the **dataset** for target-dependent 8 | sentiment classification in news articles reporting on policy issues. Additionally, 9 | the repository contains our **model** named GRU-TSC, which achieves state-of-the-art 10 | TSC classification performance on NewsMTSC. Check it out - it **works out of the box** :-) 11 | 12 | # Quick start 13 | 14 | * **I want to classify sentiment**: check out our easy-to-use, high-quality sentiment classifier on [PyPI](https://pypi.org/project/NewsSentiment/) 15 | * **I need the dataset**: you can [download it here](https://github.com/fhamborg/NewsMTSC/raw/main/NewsSentiment/controller_data/datasets/NewsMTSC-dataset/NewsMTSC-dataset.zip) or [view it here](https://github.com/fhamborg/NewsMTSC/tree/main/NewsSentiment/controller_data/datasets/NewsMTSC-dataset). We also offer NewsMTSC as a dataset on [Huggingface Hub](https://huggingface.co/datasets/fhamborg/news_sentiment_newsmtsc) and on [Kaggle](https://www.kaggle.com/fhamborg/news-articles-sentiment). 16 | * **I want to train my own models**: read the remainder of this file. 17 | 18 | Reminder: the following description is only relevant if you in fact want to train your own models. If that's not the case, please check above for links to the dataset and our easy-to-use python package. 19 | 20 | 21 | # Installation 22 | It's super easy, we promise! Note that following these instructions is only necessary if you're planning to train a model using our tool. If you only want to predict the sentiment of sentences, please use our [Python package](https://pypi.org/project/NewsSentiment/), which is even easier to install and use :-) 23 | 24 | NewsMTSC was tested on MacOS and Ubuntu; other OS may work, too. Let us know :-) 25 | 26 | **1. Setup the environment:** 27 | 28 | This step is optional if you have Python 3.8 installed already (`python --version`). If you don't have Python 3.8, we recommend using Anaconda for setting up requirements. If you do not have it yet, follow Anaconda's 29 | [installation instructions](https://docs.anaconda.com/anaconda/install/). 30 | 31 | To setup a Python 3.8 environment (in case you don't have one yet) you may use, for example: 32 | ```bash 33 | conda create --yes -n newsmtsc python=3.8 34 | conda activate newsmtsc 35 | ``` 36 | 37 | FYI, for users of virtualenv, the equivalent command would be: 38 | ```bash 39 | virtualenv -ppython3.8 --setuptools 45 venv 40 | source venv/bin/activate 41 | ``` 42 | 43 | Note: We recommend Python 3.8, however we have successfully tested NewsMTSC with Python version >=3.8, <3.12. 44 | 45 | **2. Setup NewsMTSC:** 46 | ```bash 47 | git clone git@github.com:fhamborg/NewsMTSC.git 48 | ``` 49 | 50 | Afterward, for example, open the project in your IDE and follow the instruction described in the section "Training". 51 | 52 | Note that if you only want to classify sentiment using our model, we recommend that you use our PyPI package [NewsSentiment](https://pypi.org/project/NewsSentiment/). Getting it is as simple as `pip install NewsSentiment` and using it is four lines of code :-) 53 | 54 | 55 | # Training 56 | If you want to train one of our models or your own model, please clone the repository first. 57 | 58 | 59 | There are two entry points to the system. `train.py` is used to train and evaluate a specific model on a specific dataset using 60 | specific hyperparameters. We call a single run an _experiment_. `controller.py` is used to run multiple experiments 61 | automatically. This is for example useful for model selection and evaluating hundreds or thousands of combinations of 62 | models, hyperparameters, and datasets. 63 | 64 | ## Running a single experiment 65 | Goal: training a model with a user-defined (hyper)parameter combination. 66 | 67 | `train.py` allows fine-grained control over the training and evaluation process, yet for most command line arguments 68 | we provide useful defaults. Two arguments are required: 69 | 70 | * `--own_model_name` (which model is used, e.g., `grutsc`), 71 | * `--dataset_name` (which dataset is used, e.g., `newsmtsc-rw`). 72 | 73 | For more information refer to `train.py` and 74 | `combinations_absadata_0.py`. If you just want to get started quickly, the command below should work out of the box. 75 | 76 | ``` 77 | python train.py --own_model_name grutsc --dataset_name newsmtsc-rw 78 | ``` 79 | 80 | ## Running multiple experiments 81 | Goal: finding the (hyper)parameter combination to train a model that achieves the best performance. 82 | 83 | `controller.py` takes a set of values for each argument, creates combinations of arguments, applies conditions to remove 84 | unnecessary combinations (e.g., some arguments may only be used for a specific model), and creates a multiprocessing 85 | pool to run experiments of these argument combinations in parallel. After completion, `controller.py` creates a summary, 86 | which contains detailed results, including evaluation performance, of all experiments. By using `createoverview.py`, you 87 | can export this summary into an Excel spreadsheet. 88 | 89 | # Support 90 | If you have questions on how to use NewsMTSC or its library, please create a new [issue](https://github.com/fhamborg/NewsMTSC/issues) on GitHub. Please understand that we are not able to provide individual support via email. We think that help is more valuable if it is shared publicly so that more people can benefit from it. 91 | 92 | # Acknowledgements 93 | This repository is in part based on [ABSA-PyTorch](https://github.com/songyouwei/ABSA-PyTorch). 94 | We thank Song et al. for making their excellent repository open source. 95 | 96 | # How to cite 97 | If you use the dataset or model, please cite our [paper](https://www.aclweb.org/anthology/2021.eacl-main.142/) ([PDF](https://www.aclweb.org/anthology/2021.eacl-main.142.pdf)): 98 | 99 | ``` 100 | @InProceedings{Hamborg2021b, 101 | author = {Hamborg, Felix and Donnay, Karsten}, 102 | title = {NewsMTSC: (Multi-)Target-dependent Sentiment Classification in News Articles}, 103 | booktitle = {Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics (EACL 2021)}, 104 | year = {2021}, 105 | month = {Apr.}, 106 | location = {Virtual Event}, 107 | } 108 | ``` 109 | -------------------------------------------------------------------------------- /READMEpypi.md: -------------------------------------------------------------------------------- 1 | # NewsSentiment: easy-to-use, high-quality target-dependent sentiment classification for news articles 2 | NewsSentiment is an easy-to-use Python library that achieves state-of-the-art performance 3 | for target-dependent sentiment classification on news articles. 4 | NewsSentiment uses the currently [best performing](https://aclanthology.org/2021.eacl-main.142.pdf) 5 | targeted sentiment classifier for news articles. In contrast to regular sentiment 6 | classification, targeted sentiment classification allows you to provide a target in a sentence. 7 | Only for this target, the sentiment is then predicted. This is more reliable in many 8 | cases, as demonstrated by the following simplistic example: "I like Bert, but I hate Robert." 9 | 10 | We designed NewsSentiment to serve as an easy-to-use wrapper around the sophisticated 11 | GRU-TSC model, which was trained on the NewsMTSC dataset consisting of more than 10k 12 | labeled sentences sampled from political news articles. More information on the dataset 13 | and the model can be found [here](https://aclanthology.org/2021.eacl-main.142.pdf). The 14 | dataset, the model, and its source code can be viewed in our [GitHub repository](https://github.com/fhamborg/NewsMTSC). 15 | 16 | # Installation 17 | It's super easy, we promise! 18 | 19 | You just need a Python 3.8 environment. See [here](https://raw.githubusercontent.com/fhamborg/NewsMTSC/main/pythoninfo.md) if you 20 | don't have Python or a different version (run `python --version` in a terminal to see 21 | your version). Then run: 22 | 23 | ```bash 24 | pip3 install NewsSentiment # without cuda support (choose this if you don't know what cuda is) 25 | pip3 install NewsSentiment[cuda] # with cuda support 26 | ``` 27 | 28 | You're all set now :-) 29 | 30 | # Target-dependent Sentiment Classification 31 | 32 | Note that using NewsSentiment the first time will take *a few minutes* because it needs 33 | to download the fine-tuned language model. Please do not abort this initial download. 34 | Since this is a one-time process, future use of NewsSentiment will be much faster. 35 | 36 | ```python 37 | from NewsSentiment import TargetSentimentClassifier 38 | tsc = TargetSentimentClassifier() 39 | 40 | data = [ 41 | ("I like ", "Peter", " but I don't like Robert."), 42 | ("", "Mark Meadows", "'s coverup of Trump’s coup attempt is falling apart."), 43 | ] 44 | 45 | sentiments = tsc.infer(targets=data) 46 | 47 | for i, result in enumerate(sentiments): 48 | print("Sentiment: ", i, result[0]) 49 | ``` 50 | 51 | This method will internally split the data into batches of size 16 for increased speed. You can adjust the 52 | batch size using the `batch_size` parameter, e.g., `batch_size=32`. 53 | 54 | Alternatively, you can also use the `infer_from_text` method to infer sentiment for a single target: 55 | 56 | ```python 57 | sentiment = tsc.infer_from_text("I like " ,"Peter", " but I don't like Robert.") 58 | print(sentiment[0]) 59 | ``` 60 | 61 | # How to identify a person in a sentence? 62 | 63 | In case your data is not separated as shown in the examples above, i.e., in three segments, you will need to identify one (or more) targets first. 64 | How this is done best depends on your project and analysis task but you may, for example, use NER. This [example](https://github.com/fhamborg/NewsMTSC/issues/30#issuecomment-1700645679) shows a simple way of doing so. 65 | 66 | # Acknowledgements 67 | 68 | Thanks to [Tilman Hornung](https://github.com/t1h0) for adding the batching functionality and various other improvements. 69 | 70 | # How to cite 71 | If you use the dataset or model, please cite our [paper](https://www.aclweb.org/anthology/2021.eacl-main.142/) ([PDF](https://www.aclweb.org/anthology/2021.eacl-main.142.pdf)): 72 | 73 | ``` 74 | @InProceedings{Hamborg2021b, 75 | author = {Hamborg, Felix and Donnay, Karsten}, 76 | title = {NewsMTSC: (Multi-)Target-dependent Sentiment Classification in News Articles}, 77 | booktitle = {Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics (EACL 2021)}, 78 | year = {2021}, 79 | month = {Apr.}, 80 | location = {Virtual Event}, 81 | } 82 | ``` 83 | -------------------------------------------------------------------------------- /hubconf.py: -------------------------------------------------------------------------------- 1 | from NewsSentiment.models.singletarget.grutscsingle import GRUTSCSingle 2 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "setuptools>=45", 4 | "wheel" 5 | ] 6 | build-backend = "setuptools.build_meta" 7 | -------------------------------------------------------------------------------- /pythoninfo.md: -------------------------------------------------------------------------------- 1 | This step is optional if you have Python >=3.8, <3.12 installed (run `python --version` 2 | in a terminal and check the version that is printed; we recommend 3.8). If you don't 3 | have Python (in the correct version), we recommend using Anaconda for setting up 4 | requirements because it is very easy (but any way of installing is fine). 5 | If you do not have Anaconda yet, follow their 6 | [installation instructions](https://docs.anaconda.com/anaconda/install/). 7 | 8 | After installing Anaconda, to set up a Python 3.8 environment (in case you don't have one 9 | yet) execute: 10 | 11 | ```bash 12 | conda create --yes -n newsmtsc python=3.8 13 | conda activate newsmtsc 14 | ``` 15 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = NewsSentiment 3 | version = 1.2.28 4 | author = Felix Hamborg 5 | author_email = felix.hamborg@uni-konstanz.de 6 | description = Easy-to-use, high-quality target-dependent sentiment classification for English news articles 7 | long_description = file: READMEpypi.md 8 | long_description_content_type = text/markdown 9 | url = https://github.com/fhamborg/NewsMTSC 10 | project_urls = 11 | Bug Tracker = https://github.com/fhamborg/NewsMTSC/issues 12 | classifiers = 13 | Development Status :: 5 - Production/Stable 14 | Environment :: Console 15 | License :: OSI Approved :: MIT License 16 | Operating System :: OS Independent 17 | Programming Language :: Python :: 3 18 | Programming Language :: Python :: 3.8 19 | Programming Language :: Python :: 3.9 20 | Programming Language :: Python :: 3.10 21 | Programming Language :: Python :: 3.11 22 | Intended Audience :: Developers 23 | Intended Audience :: Science/Research 24 | Topic :: Scientific/Engineering 25 | Topic :: Scientific/Engineering :: Information Analysis 26 | Topic :: Scientific/Engineering :: Artificial Intelligence 27 | Topic :: Text Processing :: Linguistic 28 | 29 | [options] 30 | package_dir = 31 | = . 32 | packages = find_namespace: 33 | python_requires = >=3.8, <3.12 34 | # include_package_data = true 35 | install_requires = 36 | boto3>=1.19.7 37 | gensim>=4.0.1 38 | imbalanced-learn>=0.8.1 39 | jsonlines>=2.0.0 40 | matplotlib>=3.4.3 41 | networkx>=2.6.3 42 | openpyxl>=3.0.5 43 | pandas>=1.3.3 44 | regex>=2021.10.23 45 | requests>=2.26.0 46 | sacremoses>=0.0.46 47 | scikit-learn>=1.0.1 48 | spacy>=3.2 49 | tabulate>=0.8.9 50 | tqdm>=4.62.3 51 | transformers>=4.17,<=4.24 52 | torch>=1.12,<2.1 53 | 54 | [options.packages.find] 55 | where = . 56 | 57 | [options.package_data] 58 | * = *.txt, *.ddict, *.tff, *.gitignore, *.gitkeep 59 | 60 | [options.data_files] 61 | git = 62 | *.gitignore 63 | 64 | [options.extras_require] 65 | cuda = 66 | cudatoolkit==10.1 67 | --------------------------------------------------------------------------------