├── .gitignore
├── .idea
├── .gitignore
├── NewsMTSC.iml
├── misc.xml
├── modules.xml
└── vcs.xml
├── CODE_OF_CONDUCT.md
├── LICENSE
├── MANIFEST.in
├── NewsSentiment
├── .gitignore
├── DatasetPreparer.py
├── SentimentClasses.py
├── __init__.py
├── __main__.py
├── combinations_default.py
├── consts.py
├── controller.py
├── controller_data
│ └── datasets
│ │ ├── NewsMTSC-dataset
│ │ ├── NewsMTSC-dataset.zip
│ │ ├── NewsMTSC-preprint.pdf
│ │ ├── devtest_mt.jsonl
│ │ ├── devtest_rw.jsonl
│ │ ├── readme.md
│ │ └── train.jsonl
│ │ ├── acl14twitter
│ │ ├── test.raw.jsonl
│ │ └── train.raw.jsonl
│ │ ├── newsmtsc-train-and-test-mt
│ │ ├── devtest_mtsc_only.jsonl
│ │ └── train.jsonl
│ │ ├── newsmtsc-train-and-test-rw
│ │ ├── devtest_mtsc_and_single_primaries.jsonl
│ │ └── train.jsonl
│ │ ├── semeval14laptops
│ │ ├── Laptops_Test_Gold.xml.seg.jsonl
│ │ └── Laptops_Train.xml.seg.jsonl
│ │ └── semeval14restaurants
│ │ ├── Restaurants_Test_Gold.xml.seg.jsonl
│ │ └── Restaurants_Train.xml.seg.jsonl
├── converter_huggingface.py
├── createoverview.py
├── customexceptions.py
├── dataset.py
├── diskdict.py
├── download.py
├── earlystopping.py
├── evaluator.py
├── experiments
│ └── default
│ │ └── datasets
│ │ ├── newsmtsc-mt-hf
│ │ ├── dev.jsonl
│ │ ├── test.jsonl
│ │ └── train.jsonl
│ │ ├── newsmtsc-mt
│ │ ├── dev.jsonl
│ │ ├── test.jsonl
│ │ └── train.jsonl
│ │ ├── newsmtsc-rw-hf
│ │ ├── dev.jsonl
│ │ ├── test.jsonl
│ │ └── train.jsonl
│ │ └── newsmtsc-rw
│ │ ├── dev.jsonl
│ │ ├── test.jsonl
│ │ └── train.jsonl
├── fxlogger.py
├── infer.py
├── inferrest.py
├── knowledge
│ ├── __init__.py
│ ├── bingliuopinion
│ │ ├── bingliuopinion.py
│ │ ├── converter.py
│ │ ├── license.txt
│ │ ├── negative-words.txt
│ │ ├── opinion_polarity.ddict
│ │ └── positive-words.txt
│ ├── knowledgeutils.py
│ ├── liwc
│ │ ├── data
│ │ │ ├── .gitignore
│ │ │ └── readme.txt
│ │ ├── dic.py
│ │ ├── liwc.py
│ │ ├── liwchelper.py
│ │ └── trie.py
│ ├── mpqasubjectivity
│ │ ├── converter.py
│ │ ├── mpqasubjectivity.py
│ │ ├── subjclueslen1-HLTEMNLP05.tff
│ │ └── subjclueslen1-HLTEMNLP05.tff.ddict
│ ├── nrcemolex
│ │ ├── NRC-Emotion-Lexicon-Wordlevel-v0.92.txt
│ │ ├── NRC-Emotion-Lexicon-Wordlevel-v0.92.txt.ddict
│ │ ├── converter.py
│ │ └── nrcemolex.py
│ └── zeros
│ │ └── zerosknowledge.py
├── layers
│ ├── AggregatorForBert.py
│ ├── __init__.py
│ ├── attention.py
│ ├── dynamic_rnn.py
│ ├── point_wise_feed_forward.py
│ ├── pytorchnlpattention.py
│ └── squeeze_embedding.py
├── losses
│ ├── __init__.py
│ ├── crossentropycrossweight.py
│ ├── crossentropylosslsr.py
│ ├── crossentropylosswithconfidence.py
│ └── seq2seqloss.py
├── models
│ ├── FXBaseModel.py
│ ├── FXEnsemble.py
│ ├── __init__.py
│ ├── ensemble.py
│ ├── ensembleb.py
│ ├── multitargets
│ │ ├── __init__.py
│ │ ├── contrasting.py
│ │ ├── random_multi.py
│ │ ├── seq2seq.py
│ │ ├── seq2seq_without_targetmask.py
│ │ ├── tdbertlikemultitarget.py
│ │ └── tdbertlikemultitarget_dense.py
│ └── singletarget
│ │ ├── __init__.py
│ │ ├── aen.py
│ │ ├── grutscsingle.py
│ │ ├── lcf.py
│ │ ├── lcf2.py
│ │ ├── lcfs.py
│ │ ├── lcfst.py
│ │ ├── lcft.py
│ │ ├── notargetcls.py
│ │ ├── random_single.py
│ │ ├── spc.py
│ │ ├── td_bert.py
│ │ ├── td_bert_qa.py
│ │ └── tdbertlikesingle.py
├── plotter_utils.py
├── pretrained_models
│ ├── .gitignore
│ └── state_dicts
│ │ ├── .gitignore
│ │ └── grutsc_v1-0-0
├── results
│ └── .gitignore
└── train.py
├── README.md
├── READMEpypi.md
├── hubconf.py
├── pyproject.toml
├── pythoninfo.md
└── setup.cfg
/.gitignore:
--------------------------------------------------------------------------------
1 | # Created by .ignore support plugin (hsz.mobi)
2 | ### Python template
3 | # Byte-compiled / optimized / DLL files
4 | __pycache__/
5 | *.py[cod]
6 | *$py.class
7 |
8 | # C extensions
9 | *.so
10 |
11 | # Distribution / packaging
12 | .Python
13 | build/
14 | develop-eggs/
15 | dist/
16 | downloads/
17 | eggs/
18 | .eggs/
19 | lib/
20 | lib64/
21 | parts/
22 | sdist/
23 | var/
24 | wheels/
25 | pip-wheel-metadata/
26 | share/python-wheels/
27 | *.egg-info/
28 | .installed.cfg
29 | *.egg
30 | MANIFEST
31 |
32 | # PyInstaller
33 | # Usually these files are written by a python script from a template
34 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
35 | *.manifest
36 | *.spec
37 |
38 | # Installer logs
39 | pip-log.txt
40 | pip-delete-this-directory.txt
41 |
42 | # Unit test / coverage reports
43 | htmlcov/
44 | .tox/
45 | .nox/
46 | .coverage
47 | .coverage.*
48 | .cache
49 | nosetests.xml
50 | coverage.xml
51 | *.cover
52 | .hypothesis/
53 | .pytest_cache/
54 |
55 | # Translations
56 | *.mo
57 | *.pot
58 |
59 | # Django stuff:
60 | *.log
61 | local_settings.py
62 | db.sqlite3
63 | db.sqlite3-journal
64 |
65 | # Flask stuff:
66 | instance/
67 | .webassets-cache
68 |
69 | # Scrapy stuff:
70 | .scrapy
71 |
72 | # Sphinx documentation
73 | docs/_build/
74 |
75 | # PyBuilder
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | .python-version
87 |
88 | # pipenv
89 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
90 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
91 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
92 | # install all needed dependencies.
93 | #Pipfile.lock
94 |
95 | # celery beat schedule file
96 | celerybeat-schedule
97 |
98 | # SageMath parsed files
99 | *.sage.py
100 |
101 | # Environments
102 | .env
103 | .venv
104 | env/
105 | venv/
106 | ENV/
107 | env.bak/
108 | venv.bak/
109 |
110 | # Spyder project settings
111 | .spyderproject
112 | .spyproject
113 |
114 | # Rope project settings
115 | .ropeproject
116 |
117 | # mkdocs documentation
118 | /site
119 |
120 | # mypy
121 | .mypy_cache/
122 | .dmypy.json
123 | dmypy.json
124 |
125 | # Pyre type checker
126 | .pyre/
127 |
128 | ### JetBrains template
129 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
130 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
131 |
132 | # User-specific stuff
133 | .idea/**/workspace.xml
134 | .idea/**/tasks.xml
135 | .idea/**/usage.statistics.xml
136 | .idea/**/dictionaries
137 | .idea/**/shelf
138 |
139 | # Generated files
140 | .idea/**/contentModel.xml
141 |
142 | # Sensitive or high-churn files
143 | .idea/**/dataSources/
144 | .idea/**/dataSources.ids
145 | .idea/**/dataSources.local.xml
146 | .idea/**/sqlDataSources.xml
147 | .idea/**/dynamic.xml
148 | .idea/**/uiDesigner.xml
149 | .idea/**/dbnavigator.xml
150 |
151 | # Gradle
152 | .idea/**/gradle.xml
153 | .idea/**/libraries
154 |
155 | # Gradle and Maven with auto-import
156 | # When using Gradle or Maven with auto-import, you should exclude module files,
157 | # since they will be recreated, and may cause churn. Uncomment if using
158 | # auto-import.
159 | # .idea/modules.xml
160 | # .idea/*.iml
161 | # .idea/modules
162 | # *.iml
163 | # *.ipr
164 |
165 | # CMake
166 | cmake-build-*/
167 |
168 | # Mongo Explorer plugin
169 | .idea/**/mongoSettings.xml
170 |
171 | # File-based project format
172 | *.iws
173 |
174 | # IntelliJ
175 | out/
176 |
177 | # mpeltonen/sbt-idea plugin
178 | .idea_modules/
179 |
180 | # JIRA plugin
181 | atlassian-ide-plugin.xml
182 |
183 | # Cursive Clojure plugin
184 | .idea/replstate.xml
185 |
186 | # Crashlytics plugin (for Android Studio and IntelliJ)
187 | com_crashlytics_export_strings.xml
188 | crashlytics.properties
189 | crashlytics-build.properties
190 | fabric.properties
191 |
192 | # Editor-based Rest Client
193 | .idea/httpRequests
194 |
195 | # Android studio 3.1+ serialized cache file
196 | .idea/caches/build_file_checksums.ser
197 |
198 | # OSX
199 | #
200 | .DS_Store
201 |
--------------------------------------------------------------------------------
/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 | # Datasource local storage ignored files
5 | /dataSources/
6 | /dataSources.local.xml
7 | # Editor-based HTTP Client requests
8 | /httpRequests/
9 |
--------------------------------------------------------------------------------
/.idea/NewsMTSC.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Contributor Covenant Code of Conduct
2 |
3 | ## Our Pledge
4 |
5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation.
6 |
7 | ## Our Standards
8 |
9 | Examples of behavior that contributes to creating a positive environment include:
10 |
11 | * Using welcoming and inclusive language
12 | * Being respectful of differing viewpoints and experiences
13 | * Gracefully accepting constructive criticism
14 | * Focusing on what is best for the community
15 | * Showing empathy towards other community members
16 |
17 | Examples of unacceptable behavior by participants include:
18 |
19 | * The use of sexualized language or imagery and unwelcome sexual attention or advances
20 | * Trolling, insulting/derogatory comments, and personal or political attacks
21 | * Public or private harassment
22 | * Publishing others' private information, such as a physical or electronic address, without explicit permission
23 | * Other conduct which could reasonably be considered inappropriate in a professional setting
24 |
25 | ## Our Responsibilities
26 |
27 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.
28 |
29 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.
30 |
31 | ## Scope
32 |
33 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers.
34 |
35 | ## Enforcement
36 |
37 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at felix.hamborg@uni-konstanz.de. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately.
38 |
39 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership.
40 |
41 | ## Attribution
42 |
43 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version]
44 |
45 | [homepage]: http://contributor-covenant.org
46 | [version]: http://contributor-covenant.org/version/1/4/
47 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | This project uses code from various projects, who may have their own license or copyright.
2 | - ABSA-PyTorch by Yury Soong et al., available at https://github.com/songyouwei/ABSA-PyTorch
3 | - early-stopping-pytorch by Bjarten, available at https://github.com/Bjarten/early-stopping-pytorch
4 |
5 | For newly developed files, the license information is:
6 |
7 | Copyright 2020 Felix Hamborg
8 |
9 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
10 |
11 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
12 |
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
14 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include READMEpypi.md
2 | include pythoninfo.md
3 |
--------------------------------------------------------------------------------
/NewsSentiment/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fhamborg/NewsMTSC/a7bea7bc7265b4bfe4236e98965a47c767acbb30/NewsSentiment/.gitignore
--------------------------------------------------------------------------------
/NewsSentiment/SentimentClasses.py:
--------------------------------------------------------------------------------
1 | from typing import Iterable
2 |
3 |
4 | class SentimentClasses:
5 | FILLUP_POLARITY_VALUE = -100
6 | FILLUP_POLARITY_LABEL = "fillup"
7 | SENTIMENT_CLASSES = None
8 |
9 | @staticmethod
10 | def initialize(sentiment_classes: dict):
11 | SentimentClasses.SENTIMENT_CLASSES = sentiment_classes
12 |
13 | @staticmethod
14 | def get_num_classes():
15 | return len(SentimentClasses.SENTIMENT_CLASSES)
16 |
17 | @staticmethod
18 | def __evaluate_boundary(given_value: float, boundary: tuple):
19 | operator = boundary[0]
20 | value = boundary[1]
21 | if operator == "<=":
22 | return given_value <= value
23 | elif operator == "<":
24 | return given_value < value
25 | elif operator == ">=":
26 | return given_value >= value
27 | elif operator == ">":
28 | return given_value > value
29 | elif operator == "==":
30 | return given_value == value
31 | else:
32 | raise ValueError
33 |
34 | @staticmethod
35 | def __evaluate_boundaries_of_class(
36 | given_value: float, sentiment_boundaries: Iterable[tuple]
37 | ):
38 | assert len(sentiment_boundaries) >= 1
39 | for boundary in sentiment_boundaries:
40 | is_valid = SentimentClasses.__evaluate_boundary(given_value, boundary)
41 | if not is_valid:
42 | return False
43 | return True
44 |
45 | @staticmethod
46 | def __get_legacy_information():
47 | # self.polarity_associations = {"positive": 2, "neutral": 1, "negative": 0}
48 | # self.polarity_associations_inv = {2: "positive", 1: "neutral", 0: "negative"}
49 | # self.sorted_expected_label_values = [0, 1, 2]
50 | # self.sorted_expected_label_names = ["negative", "neutral", "positive"]
51 |
52 | sentiment_labels = list(SentimentClasses.SENTIMENT_CLASSES.keys())
53 | sentiment_normalized_values = []
54 | for label in sentiment_labels:
55 | sentiment_normalized_values.append(
56 | SentimentClasses.SENTIMENT_CLASSES[label]["normalized_polarity"]
57 | )
58 |
59 | polarity_associations = {}
60 | polarity_associations_inv = {}
61 | for label, value in zip(sentiment_labels, sentiment_normalized_values):
62 | polarity_associations[label] = value
63 | polarity_associations_inv[value] = label
64 |
65 | return {
66 | "polarity_associations": polarity_associations,
67 | "polarity_associations_inv": polarity_associations_inv,
68 | "sorted_expected_label_values": sentiment_normalized_values,
69 | "sorted_expected_label_names": sentiment_labels,
70 | }
71 |
72 | @staticmethod
73 | def get_sorted_expected_label_names():
74 | return SentimentClasses.__get_legacy_information()[
75 | "sorted_expected_label_names"
76 | ]
77 |
78 | @staticmethod
79 | def get_sorted_expected_label_values():
80 | return SentimentClasses.__get_legacy_information()[
81 | "sorted_expected_label_values"
82 | ]
83 |
84 | @staticmethod
85 | def get_polarity_associations():
86 | return SentimentClasses.__get_legacy_information()["polarity_associations"]
87 |
88 | @staticmethod
89 | def get_polarity_associations_inverse():
90 | return SentimentClasses.__get_legacy_information()["polarity_associations_inv"]
91 |
92 | @staticmethod
93 | def __find_sentiment_class(polarity: float):
94 | resulting_class = None
95 | for sentiment_label, info in SentimentClasses.SENTIMENT_CLASSES.items():
96 | sentiment_boundaries = info["boundaries"]
97 | sentiment_normalized_polarity = info["normalized_polarity"]
98 | is_in_class_boundaries = SentimentClasses.__evaluate_boundaries_of_class(
99 | polarity, sentiment_boundaries
100 | )
101 | if is_in_class_boundaries:
102 | # check polarity is not in another class, too
103 | assert (
104 | resulting_class is None
105 | ), f"overlapping sentiment classes; previous class: {resulting_class}"
106 | resulting_class = (sentiment_label, sentiment_normalized_polarity)
107 |
108 | # check that a class was found
109 | assert resulting_class, f"result is not defined for polarity: {polarity}"
110 |
111 | return resulting_class
112 |
113 | @staticmethod
114 | def polarity2label(polarity: float) -> str:
115 | if polarity == SentimentClasses.FILLUP_POLARITY_VALUE:
116 | return SentimentClasses.FILLUP_POLARITY_LABEL
117 |
118 | sentiment_class = SentimentClasses.__find_sentiment_class(polarity)
119 | label = sentiment_class[0]
120 | return label
121 |
122 | @staticmethod
123 | def polarity2normalized_polarity(polarity: float) -> int:
124 | if polarity == SentimentClasses.FILLUP_POLARITY_VALUE:
125 | return int(SentimentClasses.FILLUP_POLARITY_VALUE)
126 |
127 | sentiment_class = SentimentClasses.__find_sentiment_class(polarity)
128 | normalized_polarity = sentiment_class[1]
129 | return normalized_polarity
130 |
131 | @staticmethod
132 | def Sentiment3ForNewsMtsc():
133 | sentiment_classes = {
134 | "positive": {
135 | "boundaries": [(">=", 5), ("<=", 7)],
136 | "normalized_polarity": 2,
137 | },
138 | "neutral": {"boundaries": [(">", 3), ("<", 5)], "normalized_polarity": 1},
139 | "negative": {
140 | "boundaries": [(">=", 1), ("<=", 3)],
141 | "normalized_polarity": 0,
142 | },
143 | }
144 | SentimentClasses.initialize(sentiment_classes)
145 |
146 | @staticmethod
147 | def SentimentStrong3ForNewsMtsc():
148 | sentiment_classes = {
149 | "positive": {
150 | "boundaries": [(">=", 6), ("<=", 7)],
151 | "normalized_polarity": 2,
152 | },
153 | "neutral": {"boundaries": [(">", 2), ("<", 6)], "normalized_polarity": 1},
154 | "negative": {
155 | "boundaries": [(">=", 1), ("<=", 2)],
156 | "normalized_polarity": 0,
157 | },
158 | }
159 | SentimentClasses.initialize(sentiment_classes)
160 |
161 | @staticmethod
162 | def SentimentWeak3ForNewsMtsc():
163 | sentiment_classes = {
164 | "positive": {
165 | "boundaries": [(">=", 4.5), ("<=", 7)],
166 | "normalized_polarity": 2,
167 | },
168 | "neutral": {
169 | "boundaries": [(">", 3.5), ("<", 4.5)],
170 | "normalized_polarity": 1,
171 | },
172 | "negative": {
173 | "boundaries": [(">=", 1), ("<=", 3.5)],
174 | "normalized_polarity": 0,
175 | },
176 | }
177 | SentimentClasses.initialize(sentiment_classes)
178 |
--------------------------------------------------------------------------------
/NewsSentiment/__init__.py:
--------------------------------------------------------------------------------
1 | from NewsSentiment.infer import TargetSentimentClassifier
2 |
--------------------------------------------------------------------------------
/NewsSentiment/__main__.py:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 | from NewsSentiment.download import Download
4 |
5 | if __name__ == '__main__':
6 | parser = argparse.ArgumentParser(prog='NewsSentiment')
7 | subparsers = parser.add_subparsers(dest='action')
8 |
9 | subparser_download = subparsers.add_parser('download', help=Download.add_subparser.__doc__)
10 | Download.add_subparser(subparser_download)
11 |
12 | args = parser.parse_args()
13 | action = args.action
14 | del args.action
15 |
16 | if action == 'download':
17 | Download.run_from_parser(args)
18 |
--------------------------------------------------------------------------------
/NewsSentiment/combinations_default.py:
--------------------------------------------------------------------------------
1 | from NewsSentiment.consts import BERT_BASE_UNCASED
2 |
3 | combinations_default_0 = {
4 | "own_model_name": [
5 | # baselines: single
6 | "notargetclsbert",
7 | "lcf_bert",
8 | "lcf_bert2",
9 | "lcfs_bert",
10 | "lcft_bert",
11 | "aen_bert",
12 | "spc_bert",
13 | "tdbert",
14 | "tdbert-qa-mul",
15 | "tdbert-qa-con",
16 | # own models: single
17 | "tdbertlikesingle",
18 | "lcfst_bert",
19 | "grutsc",
20 | # own models: multi
21 | "tdbertlikemulti",
22 | # "tdbertlikemulti_dense",
23 | "seq2seq",
24 | "seq2seq_withouttargetmask",
25 | "contrasting",
26 | # baselines
27 | # "random_single",
28 | # "random_multi",
29 | ],
30 | "optimizer": ["adam"],
31 | "initializer": ["xavier_uniform_"],
32 | "learning_rate": ["2e-5", "3e-5", "5e-5"],
33 | "batch_size": [
34 | "16",
35 | # "32",
36 | ], # ['16', '32'],
37 | "balancing": ["None"], # ['None', 'lossweighting', 'oversampling'],
38 | "devmode": ["False"],
39 | "num_epoch": ["2", "3", "4"],
40 | "loss": [
41 | "crossentropy",
42 | "crossentropy_lsr",
43 | "sequence",
44 | "crossentropy_crossweight",
45 | ],
46 | # "spc_lm_representation_distilbert": ["mean_last"],
47 | # ['sum_last', 'sum_last_four', 'sum_last_two', 'sum_all', 'mean_last', 'mean_last_four', 'mean_last_two', 'mean_all'],
48 | # "spc_lm_representation": ["pooler_output"],
49 | # ['pooler_output', 'sum_last', 'sum_last_four', 'sum_last_two', 'sum_all', 'mean_last', 'mean_last_four', 'mean_last_two', 'mean_all'],
50 | # "spc_input_order": ["text_target"], # 'target_text',
51 | # "aen_lm_representation": ["last"],
52 | # ['last', 'sum_last_four', 'sum_last_two', 'sum_all', 'mean_last_four'], # 'mean_last_two', 'mean_all'],
53 | "eval_only_after_last_epoch": ["True"],
54 | "local_context_focus": ["cdm", "cdw"],
55 | "SRD": ["3", "4", "5"],
56 | "pretrained_model_name": ["default"],
57 | # ['default', 'bert_news_ccnc_10mio_3ep', 'laptops_and_restaurants_2mio_ep15', 'laptops_1mio_ep30', 'restaurants_10mio_ep3'],
58 | "state_dict": ["None"],
59 | # ['None', 'lcf_bert_acl14twitter_val_recall_avg_0.7349_epoch3', 'lcf_bert_semeval14laptops_val_recall_avg_0.7853_epoch3', 'lcf_bert_semeval14restaurants_val_recall_avg_0.7672_epoch2', 'lcf_bert_newstsc_val_recall_avg_0.5954_epoch3'],
60 | "single_targets": [
61 | "True"
62 | ], # using conditions in controller.py, we have single_targets only for single target models
63 | "multi_targets": [
64 | "True"
65 | ], # using conditions in controller.py, we have multi_targets only for multi target models
66 | "targetclasses": [
67 | "newsmtsc3",
68 | #"newsmtsc3strong",
69 | #"newsmtsc3weak",
70 | ],
71 | "knowledgesources": [
72 | "nrc_emotions", "mpqa_subjectivity", "bingliu_opinion", "liwc",
73 | "nrc_emotions mpqa_subjectivity", "nrc_emotions liwc",
74 | "nrc_emotions bingliu_opinion", "mpqa_subjectivity bingliu_opinion",
75 | "mpqa_subjectivity liwc", "bingliu_opinion liwc",
76 | "nrc_emotions mpqa_subjectivity bingliu_opinion",
77 | "nrc_emotions mpqa_subjectivity liwc",
78 | "nrc_emotions liwc bingliu_opinion",
79 | "liwc mpqa_subjectivity bingliu_opinion",
80 | "nrc_emotions mpqa_subjectivity bingliu_opinion liwc",
81 | "zeros",
82 | ],
83 | "is_use_natural_target_phrase_for_spc": [
84 | "True",
85 | "False"
86 | ],
87 | "default_lm": [
88 | BERT_BASE_UNCASED,
89 | ],
90 | "coref_mode_in_training": [
91 | "ignore",
92 | "in_targetmask",
93 | "additional_examples"
94 | ],
95 | }
96 |
--------------------------------------------------------------------------------
/NewsSentiment/consts.py:
--------------------------------------------------------------------------------
1 | BERT_BASE_UNCASED = "bert-base-uncased"
2 | ROBERTA_BASE = "roberta-base"
3 | XLNET_BASE_CASED = "xlnet-base-cased"
4 | ALBERT_BASE = "albert-base-v2"
5 | ALBERT_LARGE = "albert-large-v2"
6 | ALBERT_XLARGE = "albert-xlarge-v2"
7 | ALBERT_XXLARGE = "albert-xxlarge-v2"
8 | __DEFAULT_LM = None
9 |
10 |
11 | def set_default_lm(new_name: str):
12 | global __DEFAULT_LM
13 | __DEFAULT_LM = new_name
14 |
15 |
16 | def get_default_lm():
17 | return __DEFAULT_LM
18 |
19 |
20 | FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS = "text_ids_with_special_tokens"
21 | FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK = (
22 | FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS + "_target_mask"
23 | )
24 | FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_SELECTED_KNOWLEDGE_SOURCES = (
25 | FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS + "_selectedknowledgesources"
26 | )
27 | FIELD_IS_OVERFLOW = "is_overflow"
28 | FIELD_TEXT_THEN_TARGET_IDS_WITH_SPECIAL_TOKENS = (
29 | "text_then_target_ids_with_special_tokens"
30 | )
31 | # we used to have text-then-target target mask here, but won't use it,
32 | # since it would be identical to the text target mask (since we only
33 | # want to mark the target within the text, but not in the 2nd target
34 | # component)
35 | # FIELD_TEXT_THEN_TARGET_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK = (
36 | # FIELD_TEXT_THEN_TARGET_IDS_WITH_SPECIAL_TOKENS + "_target_mask"
37 | # )
38 | # same for knowledge sources
39 | # FIELD_TEXT_THEN_TARGET_IDS_WITH_SPECIAL_TOKENS_KNOWLEDGE_SOURCES = (
40 | # FIELD_TEXT_THEN_TARGET_IDS_WITH_SPECIAL_TOKENS + "_selectedknowledgesources"
41 | # )
42 | FIELD_TEXT_THEN_TARGET_IDS_WITH_SPECIAL_TOKENS_SEGMENT_IDS = (
43 | FIELD_TEXT_THEN_TARGET_IDS_WITH_SPECIAL_TOKENS + "_segment_ids"
44 | )
45 | FIELD_TARGET_IDS_WITH_SPECIAL_TOKENS = "target_ids_with_special_tokens"
46 | FIELD_SYNTAX_HOP_DISTANCE_TO_TARGET = "syntax_hop_distance_to_target"
47 | FIELD_SYNTAX_DEPENDENCY_MATRIX = "syntax_dependency_matrix"
48 |
--------------------------------------------------------------------------------
/NewsSentiment/controller_data/datasets/NewsMTSC-dataset/NewsMTSC-dataset.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fhamborg/NewsMTSC/a7bea7bc7265b4bfe4236e98965a47c767acbb30/NewsSentiment/controller_data/datasets/NewsMTSC-dataset/NewsMTSC-dataset.zip
--------------------------------------------------------------------------------
/NewsSentiment/controller_data/datasets/NewsMTSC-dataset/NewsMTSC-preprint.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fhamborg/NewsMTSC/a7bea7bc7265b4bfe4236e98965a47c767acbb30/NewsSentiment/controller_data/datasets/NewsMTSC-dataset/NewsMTSC-preprint.pdf
--------------------------------------------------------------------------------
/NewsSentiment/controller_data/datasets/NewsMTSC-dataset/readme.md:
--------------------------------------------------------------------------------
1 | # Welcome
2 |
3 | The files contained in this archive are part of the dataset "NewsMTSC" described in our paper "NewsMTSC: (Multi-)Target-dependent Sentiment Classification in News Articles" published at the EACL 2021.
4 |
5 | ## Dataset
6 |
7 | ### Files
8 | The dataset consists of three splits. In practical terms, we suggest to use the files as follows (more detailed information can be found in the paper):
9 |
10 | * `train.jsonl` - For **training**.
11 | * `devtest_mt.jsonl` - To evaluate a model's classification performance only on sentences that contain **at least two target mentions**. Note that the mentions were extracted to refer to different persons but in a few cases might indeed refer to the same person since we extracted them automatically.
12 | * `devtest_mt.jsonl` - To evaluate a model's classification performance on a "**real-world**" set of sentences, i.e., the set was created with the objective to resemble real-world distribution as to sentiment and other factors mentioned in the paper.
13 |
14 |
15 | ### Format
16 | Each split is stored in a JSONL file. In JSONL, each line represents one JSON object. In our dataset, each JSON object consists of:
17 |
18 | 1. `sentence_normalized`: a single sentence
19 | 2. `primary_gid`: an identifier that is unique within NewsMTSC
20 | 3. `targets`: one or more targets
21 |
22 | Each target in `targets` consists of:
23 |
24 | 1. `Input.gid`: an identifier that is unique within NewsMTSC
25 | 2. `from`: the character-based, 0-indexed position of the first character of the target's mention within `sentence_normalized`
26 | 3. `to`: the last character of the target's mention
27 | 4. `mention`: the text of the mention
28 | 5. `polarity`: the sentiment of the sentence concerning the target's mention (2.0 = negative, 4.0 = neutral, 6.0 = positive)
29 | 6. `further_mentions` (optional): one or more coreferential mentions of the target within the sentence. Note that these were extracted automatically and thus might be incorrecet or not be complete. Further, our annotators labeled the sentiment concerning the main mention, which - depending on the sentence - might not be identical to the sentiment of the coreferences.
30 |
31 | ```
32 | {
33 | "primary_gid":"allsides_1192_476_17_— Judge Neil M. Gorsuch_126_139",
34 | "sentence_normalized":"But neither side harbored any doubts, based on the judge’s opinions, other writings and the president who nominated him, that Judge Gorsuch would be a reliable conservative committed to following the original understanding of those who drafted and ratified the Constitution.",
35 | "targets":[
36 | {
37 | "Input.gid":"allsides_1192_476_17_— Judge Neil M. Gorsuch_126_139",
38 | "from":126,
39 | "to":139,
40 | "mention":"Judge Gorsuch",
41 | "polarity":6.0,
42 | "further_mentions":[
43 | {
44 | "from":116,
45 | "to":119,
46 | "mention":"him"
47 | }
48 | ]
49 | }
50 | ]
51 | }
52 | ```
53 |
54 | ## Contact
55 |
56 | If you want to get in touch, feel free to contact Felix Hamborg. If you find an issue with the dataset or model or have a question concerning either, please open an issue in the repository.
57 |
58 | * Web: [https://felix.hamborg.eu/](https://felix.hamborg.eu/)
59 | * Mail: [felix.hamborg@uni-konstanz.de](mailto:felix.hamborg@uni-konstanz.de)
60 | * Repository: [https://github.com/fhamborg/NewsMTSC](https://github.com/fhamborg/NewsMTSC)
61 |
62 |
63 | ## How to cite
64 |
65 | If you use the dataset or parts of it, please cite our paper:
66 |
67 | ```
68 | @InProceedings{Hamborg2021b,
69 | author = {Hamborg, Felix and Donnay, Karsten},
70 | title = {NewsMTSC: (Multi-)Target-dependent Sentiment Classification in News Articles},
71 | booktitle = {Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics (EACL 2021)},
72 | year = {2021},
73 | month = {Apr.},
74 | location = {Virtual Event},
75 | }
76 | ```
77 |
--------------------------------------------------------------------------------
/NewsSentiment/converter_huggingface.py:
--------------------------------------------------------------------------------
1 | """
2 | This file converts the dataset files (3 splits) into the format we'll use on
3 | Huggingface Hub, i.e., where examples with k targets are expanded to k examples, each
4 | having 1 target.
5 | """
6 |
7 | import pathlib
8 | import jsonlines
9 | from loguru import logger
10 |
11 |
12 | def convert_polarity(polarity):
13 | if polarity == 2.0:
14 | return -1
15 | elif polarity == 4.0:
16 | return 0
17 | elif polarity == 6.0:
18 | return 1
19 | else:
20 | raise ValueError
21 |
22 |
23 | def convert_target(obj, target):
24 | converted_obj = {
25 | "mention": target["mention"],
26 | "polarity": convert_polarity(target["polarity"]),
27 | "from": target["from"],
28 | "to": target["to"],
29 | "sentence": obj["sentence_normalized"],
30 | "id": target["Input.gid"],
31 | }
32 |
33 | return converted_obj
34 |
35 |
36 | def convert_obj(obj):
37 | targets = obj["targets"]
38 | converted_objs = []
39 |
40 | for target in targets:
41 | converted_objs.append(convert_target(obj, target))
42 |
43 | return converted_objs
44 |
45 |
46 | def convert(path):
47 | files = [p for p in pathlib.Path(path).iterdir() if p.is_file()]
48 |
49 | for file in files:
50 | converted_lines = []
51 | counter = 0
52 | with jsonlines.open(file) as reader:
53 | for obj in reader:
54 | converted_lines.extend(convert_obj(obj))
55 | counter += 1
56 | logger.info(
57 | "converted {} lines to {} examples in {}",
58 | counter,
59 | len(converted_lines),
60 | file,
61 | )
62 |
63 | with jsonlines.open(str(file) + "converted", mode="w") as writer:
64 | writer.write_all(converted_lines)
65 |
66 |
67 | if __name__ == "__main__":
68 | convert("experiments/default/datasets/newsmtsc-mt")
69 | convert("experiments/default/datasets/newsmtsc-rw")
70 |
--------------------------------------------------------------------------------
/NewsSentiment/createoverview.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import json
3 | from collections import defaultdict
4 |
5 | from diskdict import DiskDict
6 |
7 | import jsonlines
8 | import pandas as pd
9 |
10 | from NewsSentiment.fxlogger import get_logger
11 |
12 | logger = get_logger()
13 |
14 |
15 | def rename_flatten(dictionary, key_prefix):
16 | new_dict = {}
17 |
18 | for k, v in dictionary.items():
19 | new_k = key_prefix + "-" + k
20 | new_dict[new_k] = v
21 |
22 | return new_dict
23 |
24 |
25 | def without_keys(d, keys):
26 | return {x: d[x] for x in d if x not in keys}
27 |
28 |
29 | def non_scalar_to_str(d):
30 | new_d = {}
31 | for k, v in d.items():
32 | if type(v) in [list, dict]:
33 | new_v = json.dumps(v)
34 | else:
35 | new_v = v
36 | new_d[k] = new_v
37 | return new_d
38 |
39 |
40 | COL_NAME_TMP_GROUP = "_tmp_named_id_"
41 |
42 |
43 | def _find_run_ids(completed_tasks: dict):
44 | """
45 | First check the maximum number of run ids, then ensures that each experiment has
46 | that many.
47 | :param completed_tasks:
48 | :return:
49 | """
50 | run_ids = set()
51 | for named_id, result in completed_tasks.items():
52 | vals = without_keys(result, ["details"])
53 | run_id = vals["run_id"]
54 | run_ids.add(run_id)
55 | del vals["run_id"]
56 | del vals["experiment_named_id"]
57 | del vals["experiment_id"]
58 | named_experiment_id_wo_run_id = json.dumps(vals)
59 | result[COL_NAME_TMP_GROUP] = named_experiment_id_wo_run_id
60 | num_runs_per_experiment = len(run_ids)
61 | logger.info("found %s run_ids: %s", num_runs_per_experiment, run_ids)
62 |
63 | # check that each experiment has as many run ids
64 | named_id2run_ids = defaultdict(list)
65 | for named_id, result in completed_tasks.items():
66 | vals = without_keys(result, ["details"])
67 | named_experiment_id_wo_run_id = result["_tmp_named_id_"]
68 | run_id = vals["run_id"]
69 | named_id2run_ids[named_experiment_id_wo_run_id].append(run_id)
70 |
71 | count_too_few_runs = 0
72 | for named_id, run_ids in named_id2run_ids.items():
73 | if len(run_ids) != num_runs_per_experiment:
74 | logger.debug("%s runs for %s", len(run_ids), named_id)
75 | count_too_few_runs += 1
76 | if count_too_few_runs == 0:
77 | logger.info(
78 | "GOOD: num experiments with too few runs: %s of %s",
79 | count_too_few_runs,
80 | len(named_id2run_ids),
81 | )
82 | else:
83 | logger.warning(
84 | "num experiments with too few runs: %s of %s",
85 | count_too_few_runs,
86 | len(named_id2run_ids),
87 | )
88 |
89 | return num_runs_per_experiment, completed_tasks
90 |
91 |
92 | def _aggregate_and_mean(df: pd.DataFrame):
93 | df = df.copy(deep=True)
94 | col_names_original_order = list(df.columns)
95 | df_aggr = df.groupby(COL_NAME_TMP_GROUP).mean()
96 |
97 | # from https://stackoverflow.com/a/35401886
98 | # this creates a df that contains aggregated values (from df_aggr) and also
99 | # all other columns (non-aggregated)
100 | aggr_col_names = list(df_aggr.columns)
101 | df.drop(aggr_col_names, axis=1, inplace=True)
102 | df.drop_duplicates(subset=COL_NAME_TMP_GROUP, keep="last", inplace=True)
103 | df = df.merge(
104 | right=df_aggr, right_index=True, left_on=COL_NAME_TMP_GROUP, how="right"
105 | )
106 |
107 | # reorder the dataframe to have the established order of columsn
108 | # taken from: https://stackoverflow.com/a/13148611
109 | df = df[col_names_original_order]
110 |
111 | # delete temp col
112 | del df[COL_NAME_TMP_GROUP]
113 |
114 | return df
115 |
116 |
117 | def _dfs_to_excel(pathname, name2df):
118 | writer = pd.ExcelWriter(pathname, engine="xlsxwriter")
119 | for name, df in name2df.items():
120 | if df is None:
121 | logger.info("skipping df because empty: %s", name)
122 | continue
123 |
124 | df.to_excel(writer, sheet_name=name, startrow=0, startcol=0)
125 | writer.save()
126 |
127 |
128 | def shelve2xlsx(opt, ignore_graceful_exit_experiments):
129 | completed_tasks = DiskDict(opt.results_path)
130 | logger.info(
131 | "found {} results in file {}".format(len(completed_tasks), opt.results_path)
132 | )
133 | # get max run id
134 | num_runs_per_experiment, completed_tasks = _find_run_ids(completed_tasks)
135 |
136 | flattened_results = {}
137 |
138 | for named_id, result in completed_tasks.items():
139 | if result["rc"] == 99 and ignore_graceful_exit_experiments:
140 | logger.info("found graceful exit (99), not adding to file: %s", named_id)
141 | continue
142 | elif result["rc"] == 0:
143 | test_stats = rename_flatten(result["details"]["test_stats"], "test_stats")
144 | dev_stats = rename_flatten(result["details"]["dev_stats"], "dev_stats")
145 |
146 | flattened_result = {
147 | **without_keys(result, ["details"]),
148 | **dev_stats,
149 | **test_stats,
150 | }
151 | else:
152 | flattened_result = {**without_keys(result, ["details"])}
153 |
154 | scalared_flattened_result = non_scalar_to_str(flattened_result)
155 | flattened_results[named_id] = scalared_flattened_result
156 |
157 | df = pd.DataFrame(data=flattened_results.values())
158 |
159 | if num_runs_per_experiment >= 2:
160 | df_aggr = _aggregate_and_mean(df)
161 | else:
162 | df_aggr = None
163 | del df[COL_NAME_TMP_GROUP]
164 |
165 | _dfs_to_excel(opt.results_path + ".xlsx", {"raw": df, "aggr": df_aggr})
166 |
167 |
168 | def jsonl2xlsx(opt):
169 | labels = {2: "positive", 1: "neutral", 0: "negative"}
170 |
171 | with jsonlines.open(opt.results_path, "r") as reader:
172 | lines = []
173 | for line in reader:
174 | if line["true_label"] != line["pred_label"]:
175 | line["true_label"] = labels[line["true_label"]]
176 | line["pred_label"] = labels[line["pred_label"]]
177 |
178 | lines.append(line)
179 |
180 | df = pd.DataFrame(data=lines)
181 | df.to_excel(opt.results_path + ".xlsx")
182 |
183 |
184 | if __name__ == "__main__":
185 | parser = argparse.ArgumentParser()
186 | parser.add_argument(
187 | "--results_path",
188 | type=str,
189 | default="results/mtscall_stance0",
190 | )
191 | parser.add_argument("--mode", type=str, default="shelve")
192 | opt = parser.parse_args()
193 |
194 | if opt.mode == "shelve":
195 | shelve2xlsx(opt, ignore_graceful_exit_experiments=False)
196 | elif opt.mode == "jsonl":
197 | jsonl2xlsx(opt)
198 |
--------------------------------------------------------------------------------
/NewsSentiment/customexceptions.py:
--------------------------------------------------------------------------------
1 | class TooLongTextException(Exception):
2 | pass
3 |
4 |
5 | class TargetNotFoundException(Exception):
6 | pass
7 |
--------------------------------------------------------------------------------
/NewsSentiment/diskdict.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | import pickle
3 | from typing import Union
4 |
5 | from NewsSentiment.fxlogger import get_logger
6 |
7 | logger = get_logger()
8 |
9 |
10 | class DiskDict(dict):
11 | def __init__(self, sync_path: Union[Path, str]):
12 | if type(sync_path) == str:
13 | sync_path = Path(sync_path)
14 | self.path = sync_path
15 |
16 | if self.path.exists():
17 | with open(self.path, "rb") as file:
18 | tmp_dct = pickle.load(file)
19 | super().update(tmp_dct)
20 | logger.info(
21 | "loaded DiskDict with %s items from %s", len(tmp_dct), self.path
22 | )
23 |
24 | def sync_to_disk(self):
25 | with open(self.path, "wb") as file:
26 | tmp_dct = super().copy()
27 | pickle.dump(tmp_dct, file)
28 | # print(f"saved DiskDict with {len(tmp_dct)} items to {self.path}")
29 |
--------------------------------------------------------------------------------
/NewsSentiment/download.py:
--------------------------------------------------------------------------------
1 | """
2 | Download a specific version of a finetuned model and place it in pretrained_models.
3 | """
4 | import os
5 | import string
6 |
7 | import torch
8 |
9 | from NewsSentiment.fxlogger import get_logger
10 |
11 |
12 | class Download:
13 |
14 | def __init__(
15 | self, own_model_name, version="default", force=False, list_versions=False
16 | ):
17 | from NewsSentiment.models.FXBaseModel import model_includes_pretrained
18 | from NewsSentiment.train import OWN_MODELNAME2CLASS
19 |
20 | logger = get_logger()
21 | own_model_name = own_model_name
22 | if version == "default":
23 | version = None
24 | try:
25 | model_cls = OWN_MODELNAME2CLASS[own_model_name]
26 | except KeyError:
27 | logger.error(f'Model "{own_model_name}" is unknown.')
28 | exit(2)
29 | if not model_includes_pretrained(model_cls):
30 | logger.error(
31 | f'Model "{own_model_name}" does not ship any pretrained models for download.'
32 | )
33 | exit(2)
34 | if list_versions:
35 | self.list_versions(model_cls, own_model_name)
36 | else:
37 | self.download(model_cls, version, force)
38 |
39 | @staticmethod
40 | def list_versions(model_cls, own_model_name=""):
41 | default = model_cls.get_pretrained_default_version()
42 | versions = model_cls.get_pretrained_versions()
43 | if own_model_name:
44 | own_model_name = f' "{own_model_name}"'
45 | print(f"Model{own_model_name} provides following pretrained versions:")
46 | for version, source in versions.items():
47 | default_str = ""
48 | if version == default:
49 | default_str = " (default)"
50 | print(f'"{version}"{default_str}: {source}')
51 |
52 | @classmethod
53 | def download(cls, model_cls, version=None, force=False, skip_if_exists=True):
54 | source = model_cls.get_pretrained_source(version)
55 | path = cls.model_path(model_cls, version)
56 | if not force and os.path.isfile(path):
57 | if skip_if_exists:
58 | return
59 | print("Model file already exists. Use --force to overwrite.")
60 | exit(2)
61 | print(f"Downloading to {path}:")
62 | os.makedirs(os.path.dirname(path), exist_ok=True)
63 | torch.hub.download_url_to_file(source, path)
64 | return path
65 |
66 | @staticmethod
67 | def model_filename(model_cls, version=None):
68 | version = version or model_cls.get_pretrained_default_version()
69 | source_filename = os.path.basename(model_cls.get_pretrained_source(version))
70 | name = f"{source_filename}_{version}"
71 | allowed = set(f".-_ {string.ascii_letters}{string.digits}")
72 | filename = "".join(char for char in name if char in allowed)
73 | return filename.replace(".", "-").replace(" ", "_")
74 |
75 | @classmethod
76 | def model_path(cls, model_cls, version=None):
77 | return os.path.join(torch.hub.get_dir(),
78 | "pretrained_models",
79 | "state_dicts",
80 | cls.model_filename(model_cls, version))
81 |
82 | @staticmethod
83 | def add_subparser(subparser):
84 | """Download models for NewsSentiment"""
85 | subparser.add_argument("--own_model_name", default="grutsc", type=str)
86 | subparser.add_argument(
87 | "--version",
88 | default=None,
89 | type=str,
90 | help="version of the model to download, use --force to overwrite a version which was already downloaded",
91 | )
92 | subparser.add_argument(
93 | "--force",
94 | action="store_true",
95 | help="force the download of a model and overwrite potential previous versions",
96 | )
97 | subparser.add_argument(
98 | "--list_versions",
99 | action="store_true",
100 | help="List all pretrained model versions which a model provides",
101 | )
102 | return subparser
103 |
104 | @classmethod
105 | def run_from_parser(cls, args_namespace):
106 | args = vars(args_namespace)
107 | return cls(**args)
108 |
109 |
--------------------------------------------------------------------------------
/NewsSentiment/earlystopping.py:
--------------------------------------------------------------------------------
1 | # taken from https://github.com/Bjarten/early-stopping-pytorch
2 | # Copyright by Bjarten
3 | # License: MIT
4 |
5 | from NewsSentiment.fxlogger import get_logger
6 |
7 |
8 | class EarlyStopping:
9 | """Early stops the training if validation loss doesn't improve after a given patience."""
10 |
11 | def __init__(self, patience=2, delta=0.01):
12 | """
13 | Args:
14 | patience (int): How long to wait after last time validation loss improved.
15 | Default: 2
16 | verbose (bool): If True, prints a message for each validation loss improvement.
17 | Default: False
18 | delta (float): Minimum change in the monitored quantity to qualify as an improvement.
19 | Default: 0
20 | """
21 | self.patience = patience
22 | self.counter = 0
23 | self.best_score = None
24 | self.early_stop = False
25 | self.delta = delta
26 | self.logger = get_logger()
27 | self.flag_has_score_increased_since_last_check = False
28 |
29 | def __call__(self, dev_score):
30 | if self.best_score is None:
31 | self.best_score = dev_score
32 | self.logger.debug(f'computed first dev-set score {dev_score:.6f}).')
33 | self.flag_has_score_increased_since_last_check = True
34 | elif dev_score < self.best_score + self.delta:
35 | self.counter += 1
36 | self.logger.debug(
37 | f'patience counter: {self.counter} out of {self.patience} (cur-score: {dev_score}, best-score:'
38 | f' {self.best_score})')
39 | self.flag_has_score_increased_since_last_check = False
40 | if self.counter >= self.patience:
41 | self.early_stop = True
42 | else:
43 | self.best_score = dev_score
44 | self.counter = 0
45 | self.logger.debug(f'dev-set score increased ({self.best_score:.6f} --> {dev_score:.6f}).')
46 | self.flag_has_score_increased_since_last_check = True
47 |
--------------------------------------------------------------------------------
/NewsSentiment/evaluator.py:
--------------------------------------------------------------------------------
1 | from collections import Counter
2 | from statistics import mean
3 |
4 | import jsonlines
5 | import numpy as np
6 | from sklearn import metrics
7 |
8 | from NewsSentiment.SentimentClasses import SentimentClasses
9 | from NewsSentiment.dataset import FXDataset
10 | from NewsSentiment.fxlogger import get_logger
11 | import torch
12 |
13 |
14 | class Evaluator:
15 | def __init__(
16 | self,
17 | sorted_expected_label_values,
18 | polarity_associations,
19 | snem_name,
20 | is_return_confidence,
21 | ):
22 | self.logger = get_logger()
23 | self.polarity_associations = polarity_associations
24 | self.pos_label_value = polarity_associations["positive"]
25 | self.neg_label_value = polarity_associations["negative"]
26 | self.sorted_expected_label_values = sorted_expected_label_values
27 | self.pos_label_index = self.sorted_expected_label_values.index(
28 | self.pos_label_value
29 | )
30 | self.neg_label_index = self.sorted_expected_label_values.index(
31 | self.neg_label_value
32 | )
33 | self.snem_name = snem_name
34 | self.is_return_confidence = is_return_confidence
35 |
36 | def mean_from_all_statistics(self, all_test_stats):
37 | # for Counters, we do not take the mean
38 | mean_test_stats = {}
39 | number_stats = len(all_test_stats)
40 |
41 | for key in all_test_stats[0]:
42 | value_type = type(all_test_stats[0][key])
43 |
44 | if value_type in [float, np.float64, np.float32]:
45 | aggr_val = 0.0
46 | for test_stat in all_test_stats:
47 | aggr_val += test_stat[key]
48 |
49 | mean_test_stats[key] = aggr_val / number_stats
50 |
51 | elif value_type == Counter:
52 | aggr_val = Counter()
53 | for test_stat in all_test_stats:
54 | aggr_val += test_stat[key]
55 | mean_test_stats[key] = aggr_val
56 |
57 | return mean_test_stats
58 |
59 | def calc_statistics(self, y_true, y_pred, y_pred_confidence):
60 | """
61 | Calculates performance statistics by comparing k-dimensional Tensors y_true and
62 | y_pred. Both y_true and y_pred's shape have to be(batchsize, maxtargetsperexample)
63 | :param y_true:
64 | :param y_pred:
65 | :return:
66 | """
67 | assert y_true.shape == y_pred.shape, "different shapes"
68 | assert y_true.shape[1] == FXDataset.NUM_MAX_TARGETS_PER_ITEM
69 |
70 | # for now, the following doesn't keep track of which prediction or true answer
71 | # belongs to which examples. for other measures, this might be necessary, though
72 | # in that case, the code would have to be changed, e.g., by keeping the original
73 | # dimensions. for now, we just unpack the Tensors to so that the former
74 | # evaluation logic will work on them. practically, this treats each non-fillup
75 | # target as an example
76 | y_true = y_true.view(-1)
77 | y_pred = y_pred.view(-1)
78 | if y_pred_confidence is not None:
79 | y_pred_confidence = y_pred_confidence.view(-1)
80 |
81 | # in both tensors, keep only those scalars that are non-fill in y_true
82 | non_fillup_mask = y_true != SentimentClasses.FILLUP_POLARITY_VALUE
83 | y_true = y_true[non_fillup_mask]
84 | y_pred = y_pred[non_fillup_mask]
85 | if y_pred_confidence is not None:
86 | y_pred_confidence = y_pred_confidence[non_fillup_mask].tolist()
87 |
88 | # this is just the previous, single-target evaluation logic
89 | y_true_list = y_true.tolist()
90 | y_pred_list = y_pred.tolist()
91 | y_true_count = Counter(y_true_list)
92 | y_pred_count = Counter(y_pred_list)
93 | confidence_info = {}
94 |
95 | # perform confidence evaluation if vector was given
96 | if y_pred_confidence is not None:
97 | # compare where equal for confidence evaluation
98 | is_correct_list = []
99 | for index in range(len(y_true_list)):
100 | y_true_item = y_true_list[index]
101 | y_pred_item = y_pred_list[index]
102 | if y_true_item == y_pred_item:
103 | is_correct_list.append(1)
104 | else:
105 | is_correct_list.append(0)
106 | # now we have a list whether the prediction is correct (1) or not (0)
107 | # let's compare it with the confidence
108 |
109 | # regression metrics
110 | mse = metrics.mean_squared_error(is_correct_list, y_pred_confidence)
111 | r2_score = metrics.r2_score(is_correct_list, y_pred_confidence)
112 |
113 | # convert to classification problem (uses 0.5 threshold)
114 | y_pred_confidence_classification = (
115 | torch.FloatTensor(y_pred_confidence) > 0.5
116 | ).tolist()
117 |
118 | f1_macro_conf = metrics.f1_score(
119 | is_correct_list, y_pred_confidence_classification, average="macro"
120 | )
121 | accuracy_conf = metrics.accuracy_score(
122 | is_correct_list, y_pred_confidence_classification
123 | )
124 | f1_of_classes = metrics.f1_score(
125 | is_correct_list, y_pred_confidence_classification, average=None
126 | ).tolist()
127 |
128 | confidence_info = {
129 | "f1m": f1_macro_conf,
130 | "acc": accuracy_conf,
131 | "f1_classes": f1_of_classes,
132 | "mse": mse,
133 | "r2score": r2_score,
134 | "y_pred_confidence_classification": y_pred_confidence_classification,
135 | "y_pred_confidence": y_pred_confidence,
136 | "y_true_confidence": is_correct_list,
137 | }
138 |
139 | f1_macro = metrics.f1_score(
140 | y_true, y_pred, labels=self.sorted_expected_label_values, average="macro"
141 | )
142 | f1_of_classes = metrics.f1_score(
143 | y_true, y_pred, labels=self.sorted_expected_label_values, average=None
144 | )
145 | f1_posneg = (
146 | f1_of_classes[self.pos_label_index] + f1_of_classes[self.neg_label_index]
147 | ) / 2.0
148 | confusion_matrix = metrics.confusion_matrix(
149 | y_true, y_pred, labels=self.sorted_expected_label_values
150 | )
151 | recalls_of_classes = metrics.recall_score(
152 | y_true, y_pred, labels=self.sorted_expected_label_values, average=None
153 | )
154 | recall_avg = mean(recalls_of_classes)
155 | recall_macro = metrics.recall_score(
156 | y_true, y_pred, labels=self.sorted_expected_label_values, average="macro"
157 | )
158 | precision_macro = metrics.precision_score(
159 | y_true, y_pred, labels=self.sorted_expected_label_values, average="macro"
160 | )
161 | accuracy = metrics.accuracy_score(y_true, y_pred)
162 |
163 | results = {
164 | "f1_macro": f1_macro,
165 | "confusion_matrix": confusion_matrix,
166 | "recalls_of_classes": recalls_of_classes,
167 | "recall_avg": recall_avg,
168 | "recall_macro": recall_macro,
169 | "precision_macro": precision_macro,
170 | "accuracy": accuracy,
171 | "f1_posneg": f1_posneg,
172 | "y_true_count": y_true_count,
173 | "y_pred_count": y_pred_count,
174 | }
175 | if y_pred_confidence is not None:
176 | results["confidence_info"] = confidence_info
177 |
178 | return results
179 |
180 | def print_stats(self, stats, description):
181 | self.logger.info(description)
182 | self.logger.info("{}: {})".format(self.snem_name, stats[self.snem_name]))
183 | self.logger.info(
184 | "y_true distribution: {}".format(sorted(stats["y_true_count"].items()))
185 | )
186 | self.logger.info(
187 | "y_pred distribution: {}".format(sorted(stats["y_pred_count"].items()))
188 | )
189 | self.logger.info(
190 | "> recall_avg: {:.4f}, f1_posneg: {:.4f}, acc: {:.4f}, f1_macro: {:.4f}".format(
191 | stats["recall_avg"],
192 | stats["f1_posneg"],
193 | stats["accuracy"],
194 | stats["f1_macro"],
195 | )
196 | )
197 |
198 | def write_error_table(self, y_true, y_pred, texts_list, filepath):
199 | y_true_list = y_true.tolist()
200 | y_pred_list = y_pred.tolist()
201 |
202 | with jsonlines.open(filepath, "w") as writer:
203 | for true_label, pred_label, text in zip(
204 | y_true_list, y_pred_list, texts_list
205 | ):
206 | writer.write(
207 | {"true_label": true_label, "pred_label": pred_label, "text": text}
208 | )
209 |
--------------------------------------------------------------------------------
/NewsSentiment/fxlogger.py:
--------------------------------------------------------------------------------
1 | import logging
2 |
3 | logger = None
4 |
5 |
6 | def get_logger():
7 | global logger
8 |
9 | if not logger:
10 | formatter = logging.Formatter(fmt='%(asctime)s %(levelname)s %(module)s(%(lineno)d):%(funcName)s|: %(message)s',
11 | datefmt='%H:%M:%S')
12 |
13 | handler = logging.StreamHandler()
14 | handler.setFormatter(formatter)
15 |
16 | logger = logging.getLogger("NewsSentiment")
17 | if logger.level == logging.NOTSET:
18 | logger.setLevel(logging.ERROR)
19 |
20 | if len(logger.handlers) == 0:
21 | logger.addHandler(handler)
22 |
23 | return logger
24 |
--------------------------------------------------------------------------------
/NewsSentiment/inferrest.py:
--------------------------------------------------------------------------------
1 | from flask import Flask, request
2 |
3 | from NewsSentiment import TargetSentimentClassifier
4 |
5 | app = Flask(__name__)
6 |
7 | tsc = TargetSentimentClassifier()
8 |
9 |
10 | @app.route("/infer", methods=["POST"])
11 | def index():
12 | text_left = request.form["left"]
13 | target_mention = request.form["target"]
14 | text_right = request.form["right"]
15 | return {
16 | "result": tsc.infer(
17 | text_left=text_left, target_mention=target_mention, text_right=text_right
18 | )
19 | }
20 |
21 |
22 | def start_rest_server(port=13273):
23 | print("starting server...")
24 | app.run(host="0.0.0.0", port=port)
25 | print("done")
26 |
27 |
28 | if __name__ == "__main__":
29 | start_rest_server()
30 |
--------------------------------------------------------------------------------
/NewsSentiment/knowledge/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fhamborg/NewsMTSC/a7bea7bc7265b4bfe4236e98965a47c767acbb30/NewsSentiment/knowledge/__init__.py
--------------------------------------------------------------------------------
/NewsSentiment/knowledge/bingliuopinion/bingliuopinion.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | from NewsSentiment.diskdict import DiskDict
4 | from NewsSentiment.knowledge.bingliuopinion.converter import PATH_DICT_BING_LIU_OPINION_POLARITY, POLARITY2INDEX
5 | from NewsSentiment.knowledge.knowledgeutils import find_key_original_or_lc
6 |
7 | __ddict_emolex = DiskDict(PATH_DICT_BING_LIU_OPINION_POLARITY)
8 | __ddict_emolex_keys_lower = {k.lower(): v for k, v in __ddict_emolex.items()}
9 | __num_emotions = 2
10 | assert len(__ddict_emolex) == 6726
11 | assert len(POLARITY2INDEX) == __num_emotions
12 |
13 |
14 | def get_num_bingliu_polarities():
15 | return __num_emotions
16 |
17 |
18 | def get_bingliu_polarities_as_tensor(term: str):
19 | emotions = find_key_original_or_lc(__ddict_emolex, __ddict_emolex_keys_lower, term)
20 | tensor_emotions = torch.zeros(__num_emotions, dtype=torch.long)
21 | for emotion_index in emotions:
22 | tensor_emotions[emotion_index] = 1
23 | return tensor_emotions
24 |
--------------------------------------------------------------------------------
/NewsSentiment/knowledge/bingliuopinion/converter.py:
--------------------------------------------------------------------------------
1 | from collections import defaultdict, Counter
2 | from pathlib import Path
3 | import csv
4 | import os
5 |
6 | from tqdm import tqdm
7 |
8 | from NewsSentiment.fxlogger import get_logger
9 | from NewsSentiment.diskdict import DiskDict
10 |
11 | POLARITY2INDEX = {
12 | "positive": 1,
13 | "negative": 0,
14 | }
15 | THIS_DIR = Path(os.path.dirname(os.path.realpath(__file__)))
16 | PATH_DICT_BING_LIU_OPINION_POLARITY = THIS_DIR / "opinion_polarity.ddict"
17 |
18 | logger = get_logger()
19 |
20 |
21 | def convert_single_txt_to_dict(path: Path):
22 | terms = set()
23 | with open(path, "r") as file:
24 | line_count = 0
25 | for line in file:
26 | if line_count > 29:
27 | terms.add(line.strip())
28 | line_count += 1
29 | return terms
30 |
31 |
32 | def convert_txt_to_dict():
33 | path_pos = THIS_DIR / "positive-words.txt"
34 | path_neg = THIS_DIR / "negative-words.txt"
35 |
36 | term2polarity = defaultdict(set)
37 | polarity_counter = Counter()
38 |
39 | positive_terms = convert_single_txt_to_dict(path_pos)
40 | negative_terms = convert_single_txt_to_dict(path_neg)
41 | all_terms = positive_terms.union(negative_terms)
42 |
43 | for term in all_terms:
44 | if term in positive_terms:
45 | term2polarity[term].add(POLARITY2INDEX["positive"])
46 | polarity_counter["positive"] += 1
47 | if term in negative_terms:
48 | term2polarity[term].add(POLARITY2INDEX["negative"])
49 | polarity_counter["negative"] += 1
50 |
51 | logger.info("read %s terms", len(term2polarity))
52 | logger.info("polarity count:\n%s", polarity_counter.most_common())
53 |
54 | logger.info("saving to %s...", PATH_DICT_BING_LIU_OPINION_POLARITY)
55 | ddict_emolex = DiskDict(PATH_DICT_BING_LIU_OPINION_POLARITY)
56 | ddict_emolex.update(term2polarity)
57 | ddict_emolex.sync_to_disk()
58 | logger.info("done")
59 |
60 |
61 | if __name__ == "__main__":
62 | convert_txt_to_dict()
63 |
--------------------------------------------------------------------------------
/NewsSentiment/knowledge/bingliuopinion/license.txt:
--------------------------------------------------------------------------------
1 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2 | ;
3 | ; Both files (negative- and positive-words.txt) and the papers can all be downloaded from
4 | ; http://www.cs.uic.edu/~liub/FBS/sentiment-analysis.html
5 | ;
6 | ; If you use this list, please cite the following paper:
7 | ;
8 | ; Minqing Hu and Bing Liu. "Mining and Summarizing Customer Reviews."
9 | ; Proceedings of the ACM SIGKDD International Conference on Knowledge
10 | ; Discovery and Data Mining (KDD-2004), Aug 22-25, 2004, Seattle,
11 | ; Washington, USA,
12 | ; Notes:
13 | ; 1. The appearance of an opinion word in a sentence does not necessarily
14 | ; mean that the sentence expresses a positive or negative opinion.
15 | ; See the paper below:
16 | ;
17 | ; Bing Liu. "Sentiment Analysis and Subjectivity." An chapter in
18 | ; Handbook of Natural Language Processing, Second Edition,
19 | ; (editors: N. Indurkhya and F. J. Damerau), 2010.
20 | ;
21 | ; 2. You will notice many misspelled words in the list. They are not
22 | ; mistakes. They are included as these misspelled words appear
23 | ; frequently in social media content.
24 | ;
25 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
26 |
--------------------------------------------------------------------------------
/NewsSentiment/knowledge/bingliuopinion/opinion_polarity.ddict:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fhamborg/NewsMTSC/a7bea7bc7265b4bfe4236e98965a47c767acbb30/NewsSentiment/knowledge/bingliuopinion/opinion_polarity.ddict
--------------------------------------------------------------------------------
/NewsSentiment/knowledge/knowledgeutils.py:
--------------------------------------------------------------------------------
1 | def find_key_original_or_lc(data: dict, data_keys_lower: dict, key: str):
2 | # try to get the key as it is from the dict
3 | if key in data.keys():
4 | return data[key]
5 | # if not contained, try whether if using case insensitivity we find an entry
6 | if key.lower() in data_keys_lower.keys():
7 | return data_keys_lower[key.lower()]
8 | # if not, return None
9 | return set()
10 |
--------------------------------------------------------------------------------
/NewsSentiment/knowledge/liwc/data/.gitignore:
--------------------------------------------------------------------------------
1 | # Ignore everything in this directory
2 | *
3 | # Except this file
4 | !.gitignore
5 | !readme.txt
6 |
--------------------------------------------------------------------------------
/NewsSentiment/knowledge/liwc/data/readme.txt:
--------------------------------------------------------------------------------
1 | In order to use LIWC, you need to acquire a license from them.
2 |
--------------------------------------------------------------------------------
/NewsSentiment/knowledge/liwc/dic.py:
--------------------------------------------------------------------------------
1 | def _parse_categories(lines):
2 | """
3 | Read (category_id, category_name) pairs from the categories section.
4 | Each line consists of an integer followed a tab and then the category name.
5 | This section is separated from the lexicon by a line consisting of a single "%".
6 | """
7 | for line in lines:
8 | line = line.strip()
9 | if line == "%":
10 | return
11 | # ignore non-matching groups of categories
12 | if "\t" in line:
13 | category_id, category_name = line.split("\t", 1)
14 | yield category_id, category_name
15 |
16 |
17 | def _parse_lexicon(lines, category_mapping):
18 | """
19 | Read (match_expression, category_names) pairs from the lexicon section.
20 | Each line consists of a match expression followed by a tab and then one or more
21 | tab-separated integers, which are mapped to category names using `category_mapping`.
22 | """
23 | for line in lines:
24 | line = line.strip()
25 | parts = line.split("\t")
26 | yield parts[0], [category_mapping[category_id] for category_id in parts[1:]]
27 |
28 |
29 | def read_dic(filepath):
30 | """
31 | Reads a LIWC lexicon from a file in the .dic format, returning a tuple of
32 | (lexicon, category_names), where:
33 | * `lexicon` is a dict mapping string patterns to lists of category names
34 | * `category_names` is a list of category names (as strings)
35 | """
36 | with open(filepath) as lines:
37 | # read up to first "%" (should be very first line of file)
38 | for line in lines:
39 | if line.strip() == "%":
40 | break
41 | # read categories (a mapping from integer string to category name)
42 | category_mapping = dict(_parse_categories(lines))
43 | # read lexicon (a mapping from matching string to a list of category names)
44 | lexicon = dict(_parse_lexicon(lines, category_mapping))
45 | return lexicon, list(category_mapping.values())
46 |
--------------------------------------------------------------------------------
/NewsSentiment/knowledge/liwc/liwc.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | from NewsSentiment.knowledge.liwc.liwchelper import load_token_parser
4 |
5 | parse, category_names = load_token_parser()
6 |
7 | LIWC_CATEGORY2INDEX = {}
8 | for index, category_name in enumerate(category_names):
9 | LIWC_CATEGORY2INDEX[category_name] = index
10 |
11 |
12 | def get_num_liwc_categories():
13 | return len(category_names)
14 |
15 |
16 | def get_liwc_categories_as_tensor(term: str):
17 | categories = parse(term)
18 | categories_of_lowercased = parse(term.lower())
19 |
20 | if len(categories) == 0 and len(categories_of_lowercased) > 0:
21 | # if we do not have categories of original term, but have them for lowercased term, use the latter
22 | categories = categories_of_lowercased
23 |
24 | tensor_emotions = torch.zeros(get_num_liwc_categories(), dtype=torch.long)
25 | for category in categories:
26 | index = LIWC_CATEGORY2INDEX[category]
27 | assert index < get_num_liwc_categories()
28 | tensor_emotions[index] = 1
29 | return tensor_emotions
30 |
--------------------------------------------------------------------------------
/NewsSentiment/knowledge/liwc/liwchelper.py:
--------------------------------------------------------------------------------
1 | from .dic import read_dic
2 | from .trie import build_trie, search_trie
3 |
4 | try:
5 | import pkg_resources
6 |
7 | __version__ = pkg_resources.get_distribution("liwc").version
8 | except Exception:
9 | __version__ = None
10 |
11 |
12 | def load_token_parser(filepath="knowledge/liwc/data/LIWC2015_English.dic"):
13 | """
14 | Reads a LIWC lexicon from a file in the .dic format, returning a tuple of
15 | (parse, category_names), where:
16 | * `parse` is a function from a token to a list of strings (potentially
17 | empty) of matching categories
18 | * `category_names` is a list of strings representing all LIWC categories in
19 | the lexicon
20 | """
21 | lexicon, category_names = read_dic(filepath)
22 | trie = build_trie(lexicon)
23 |
24 | def parse_token(token):
25 | return search_trie(trie, token)
26 |
27 | return parse_token, category_names
28 |
--------------------------------------------------------------------------------
/NewsSentiment/knowledge/liwc/trie.py:
--------------------------------------------------------------------------------
1 | def build_trie(lexicon):
2 | """
3 | Build a character-trie from the plain pattern_string -> categories_list
4 | mapping provided by `lexicon`.
5 |
6 | Some LIWC patterns end with a `*` to indicate a wildcard match.
7 | """
8 | trie = {}
9 | for pattern, category_names in lexicon.items():
10 | cursor = trie
11 | for char in pattern:
12 | if char == "*":
13 | cursor["*"] = category_names
14 | break
15 | if char not in cursor:
16 | cursor[char] = {}
17 | cursor = cursor[char]
18 | cursor["$"] = category_names
19 | return trie
20 |
21 |
22 | def search_trie(trie, token, token_i=0):
23 | """
24 | Search the given character-trie for paths that match the `token` string.
25 | """
26 | if "*" in trie:
27 | return trie["*"]
28 | if "$" in trie and token_i == len(token):
29 | return trie["$"]
30 | if token_i < len(token):
31 | char = token[token_i]
32 | if char in trie:
33 | return search_trie(trie[char], token, token_i + 1)
34 | return []
35 |
--------------------------------------------------------------------------------
/NewsSentiment/knowledge/mpqasubjectivity/converter.py:
--------------------------------------------------------------------------------
1 | import os
2 | from collections import defaultdict, Counter
3 | from pathlib import Path
4 | import csv
5 |
6 | from tqdm import tqdm
7 |
8 | from NewsSentiment.fxlogger import get_logger
9 | from NewsSentiment.diskdict import DiskDict
10 |
11 | POLARITY2INDEX = {
12 | "positive": 2,
13 | "neutral": 1,
14 | "negative": 0,
15 | }
16 | THIS_DIR = Path(os.path.dirname(os.path.realpath(__file__)))
17 | PATH_DICT_MPQA_SUBJECTIVITY = THIS_DIR / "subjclueslen1-HLTEMNLP05.tff.ddict"
18 |
19 | logger = get_logger()
20 |
21 |
22 | def get_value(entry_key_value: str):
23 | return entry_key_value.split("=")[1]
24 |
25 |
26 | def convert_txt_to_dict():
27 | path_dict = THIS_DIR / "subjclueslen1-HLTEMNLP05.tff"
28 |
29 | term2polarity = defaultdict(set)
30 | polarity_counter = Counter()
31 | with open(path_dict, "r") as csv_file:
32 | csv_reader = csv.reader(csv_file, delimiter=" ")
33 | for line in csv_reader:
34 | entry_type, entry_length, entry_word, entry_pos, entry_is_stemmed, entry_polarity = line
35 | entry_type = get_value(entry_type)
36 | entry_length = get_value(entry_length)
37 | entry_word = get_value(entry_word)
38 | entry_pos = get_value(entry_pos)
39 | entry_is_stemmed = get_value(entry_is_stemmed)
40 | entry_polarity = get_value(entry_polarity)
41 |
42 | assert " " not in entry_word, f"cannot handle spaces in word"
43 | assert entry_polarity in POLARITY2INDEX.keys() or entry_polarity == "both", f"polarity label not known: {entry_polarity} for {entry_word}"
44 |
45 | if entry_polarity == "both":
46 | polarities = [POLARITY2INDEX["positive"], POLARITY2INDEX["negative"]]
47 | else:
48 | polarities = (POLARITY2INDEX[entry_polarity],)
49 |
50 | for polarity in polarities:
51 | term2polarity[entry_word].add(polarity)
52 | polarity_counter[polarity] += 1
53 |
54 | logger.info("read %s terms", len(term2polarity))
55 | logger.info("polarity count:\n%s", polarity_counter.most_common())
56 |
57 | logger.info("saving to %s...", PATH_DICT_MPQA_SUBJECTIVITY)
58 | ddict_emolex = DiskDict(PATH_DICT_MPQA_SUBJECTIVITY)
59 | ddict_emolex.update(term2polarity)
60 | ddict_emolex.sync_to_disk()
61 | logger.info("done")
62 |
63 |
64 | if __name__ == "__main__":
65 | convert_txt_to_dict()
66 |
--------------------------------------------------------------------------------
/NewsSentiment/knowledge/mpqasubjectivity/mpqasubjectivity.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | from NewsSentiment.diskdict import DiskDict
4 | from NewsSentiment.knowledge.knowledgeutils import find_key_original_or_lc
5 | from NewsSentiment.knowledge.mpqasubjectivity.converter import POLARITY2INDEX, PATH_DICT_MPQA_SUBJECTIVITY
6 |
7 | __ddict_emolex = DiskDict(PATH_DICT_MPQA_SUBJECTIVITY)
8 | __ddict_emolex_keys_lower = {k.lower(): v for k, v in __ddict_emolex.items()}
9 | __num_emotions = 3
10 | assert len(__ddict_emolex) == 6886, len(__ddict_emolex)
11 | assert len(POLARITY2INDEX) == __num_emotions
12 |
13 |
14 | def get_num_mpqa_subjectivity_polarities():
15 | return __num_emotions
16 |
17 |
18 | def get_mpqa_subjectivity_polarities_as_tensor(term: str):
19 | emotions = find_key_original_or_lc(__ddict_emolex, __ddict_emolex_keys_lower, term)
20 | tensor_emotions = torch.zeros(__num_emotions, dtype=torch.long)
21 | for emotion_index in emotions:
22 | tensor_emotions[emotion_index] = 1
23 | return tensor_emotions
24 |
--------------------------------------------------------------------------------
/NewsSentiment/knowledge/mpqasubjectivity/subjclueslen1-HLTEMNLP05.tff.ddict:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fhamborg/NewsMTSC/a7bea7bc7265b4bfe4236e98965a47c767acbb30/NewsSentiment/knowledge/mpqasubjectivity/subjclueslen1-HLTEMNLP05.tff.ddict
--------------------------------------------------------------------------------
/NewsSentiment/knowledge/nrcemolex/NRC-Emotion-Lexicon-Wordlevel-v0.92.txt.ddict:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fhamborg/NewsMTSC/a7bea7bc7265b4bfe4236e98965a47c767acbb30/NewsSentiment/knowledge/nrcemolex/NRC-Emotion-Lexicon-Wordlevel-v0.92.txt.ddict
--------------------------------------------------------------------------------
/NewsSentiment/knowledge/nrcemolex/converter.py:
--------------------------------------------------------------------------------
1 | import os
2 | from collections import defaultdict, Counter
3 | from pathlib import Path
4 | import csv
5 |
6 | from NewsSentiment.fxlogger import get_logger
7 | from NewsSentiment.diskdict import DiskDict
8 |
9 | EMOTION2INDEX = {
10 | "anger": 0,
11 | "anticipation": 1,
12 | "disgust": 2,
13 | "fear": 3,
14 | "joy": 4,
15 | "negative": 5,
16 | "positive": 6,
17 | "sadness": 7,
18 | "surprise": 8,
19 | "trust": 9,
20 | }
21 | THIS_DIR = Path(os.path.dirname(os.path.realpath(__file__)))
22 | PATH_DICT_NRC_EMOLEX = THIS_DIR / "NRC-Emotion-Lexicon-Wordlevel-v0.92.txt.ddict"
23 |
24 | logger = get_logger()
25 |
26 |
27 | def convert_txt_to_dict():
28 | path_dict = THIS_DIR / "NRC-Emotion-Lexicon-Wordlevel-v0.92.txt"
29 |
30 | term2emotions = defaultdict(set)
31 | emotion_counter = Counter()
32 | with open(path_dict, "r") as csv_file:
33 | csv_reader = csv.reader(csv_file, delimiter="\t")
34 | for term, emotion, is_present in csv_reader:
35 | if is_present == "1":
36 | emotion_index = EMOTION2INDEX[emotion]
37 | term2emotions[term].add(emotion_index)
38 | emotion_counter[emotion] += 1
39 | elif is_present == "0":
40 | pass
41 | else:
42 | raise ValueError
43 | logger.info("read %s terms", len(term2emotions))
44 | logger.info("emotion count:\n%s", emotion_counter.most_common())
45 |
46 | logger.info("saving to %s...", None)
47 | ddict_emolex = DiskDict(PATH_DICT_NRC_EMOLEX)
48 | ddict_emolex.update(term2emotions)
49 | ddict_emolex.sync_to_disk()
50 | logger.info("done")
51 |
52 |
53 | if __name__ == "__main__":
54 | convert_txt_to_dict()
55 |
--------------------------------------------------------------------------------
/NewsSentiment/knowledge/nrcemolex/nrcemolex.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | from NewsSentiment.knowledge.knowledgeutils import find_key_original_or_lc
4 | from NewsSentiment.knowledge.nrcemolex.converter import PATH_DICT_NRC_EMOLEX, EMOTION2INDEX
5 | from NewsSentiment.diskdict import DiskDict
6 |
7 | __ddict_emolex = DiskDict(PATH_DICT_NRC_EMOLEX)
8 | __ddict_emolex_keys_lower = {k.lower(): v for k, v in __ddict_emolex.items()}
9 | __num_emotions = 10
10 | assert len(__ddict_emolex) == 6468
11 | assert len(EMOTION2INDEX) == __num_emotions
12 |
13 |
14 | def get_num_nrc_emotions():
15 | return __num_emotions
16 |
17 |
18 | def get_nrc_emotions_as_tensor(term: str):
19 | emotions = find_key_original_or_lc(__ddict_emolex, __ddict_emolex_keys_lower, term)
20 | tensor_emotions = torch.zeros(__num_emotions, dtype=torch.long)
21 | for emotion_index in emotions:
22 | tensor_emotions[emotion_index] = 1
23 | return tensor_emotions
24 |
--------------------------------------------------------------------------------
/NewsSentiment/knowledge/zeros/zerosknowledge.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 |
4 | def get_num_zero_dimensions():
5 | # while one dimension would suffice, using only one can cause (especially when using
6 | # single_targets=True) that the batcher by pytorch removes the singleton dimension. specifically,
7 | # with only one target per batch item, the size of the knowledge source will be 1, 150, 1, whereas the last dimension
8 | # is removed during batching by pytorch. so to keep things simple, we just add a two here to avoid having a
9 | # singleton dimension at the end
10 | return 2
11 |
12 |
13 | def get_zeros_as_tensor(term: str):
14 | tensor_emotions = torch.zeros(get_num_zero_dimensions(), dtype=torch.long)
15 | return tensor_emotions
16 |
--------------------------------------------------------------------------------
/NewsSentiment/layers/AggregatorForBert.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import torch
3 |
4 |
5 | class AggregatorForBert(nn.Module):
6 | """
7 | According to https://huggingface.co/transformers/model_doc/bert.html#bertmodel we
8 | should not (as in the original SPC version) the pooler_output but get the
9 | last_hidden_state and "averaging or pooling the sequence of hidden-states for the
10 | whole input sequence"
11 | """
12 | def __init__(self, spc_lm_representation: str):
13 | super(AggregatorForBert, self).__init__()
14 | self.spc_lm_representation = spc_lm_representation
15 |
16 | def forward(self, last_hidden_state, pooler_output, all_hidden_states):
17 | if self.spc_lm_representation == "pooler_output":
18 | prepared_output = pooler_output
19 | elif self.spc_lm_representation == "mean_last":
20 | prepared_output = last_hidden_state.mean(dim=1)
21 | elif self.spc_lm_representation == "mean_last_four":
22 | prepared_output = (
23 | torch.stack(all_hidden_states[-4:]).mean(dim=0).mean(dim=1)
24 | )
25 | elif self.spc_lm_representation == "mean_last_two":
26 | prepared_output = (
27 | torch.stack(all_hidden_states[-2:]).mean(dim=0).mean(dim=1)
28 | )
29 | elif self.spc_lm_representation == "mean_all":
30 | prepared_output = torch.stack(all_hidden_states).mean(dim=0).mean(dim=1)
31 | elif self.spc_lm_representation == "sum_last":
32 | prepared_output = last_hidden_state.sum(dim=1)
33 | elif self.spc_lm_representation == "sum_last_four":
34 | prepared_output = torch.stack(all_hidden_states[-4:]).sum(dim=0).sum(dim=1)
35 | elif self.spc_lm_representation == "sum_last_two":
36 | prepared_output = torch.stack(all_hidden_states[-2:]).sum(dim=0).sum(dim=1)
37 | elif self.spc_lm_representation == "sum_all":
38 | prepared_output = torch.stack(all_hidden_states).sum(dim=0).sum(dim=1)
39 | return prepared_output
40 |
--------------------------------------------------------------------------------
/NewsSentiment/layers/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # file: __init__.py
3 | # author: songyouwei
4 | # Copyright (C) 2018. All Rights Reserved.
--------------------------------------------------------------------------------
/NewsSentiment/layers/attention.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # file: attention.py
3 | # author: songyouwei for top two classes, huggingface for third (slightly
4 | # adapted by FH)
5 | # Copyright (C) 2020. All Rights Reserved.
6 |
7 | import math
8 | import torch
9 | import torch.nn as nn
10 | import torch.nn.functional as F
11 |
12 |
13 | class Attention(nn.Module):
14 | def __init__(
15 | self,
16 | embed_dim,
17 | hidden_dim=None,
18 | out_dim=None,
19 | n_head=1,
20 | score_function="dot_product",
21 | dropout=0,
22 | ):
23 | """ Attention Mechanism
24 | :param embed_dim:
25 | :param hidden_dim:
26 | :param out_dim:
27 | :param n_head: num of head (Multi-Head Attention)
28 | :param score_function: scaled_dot_product / mlp (concat) / bi_linear (general dot)
29 | :return (?, q_len, out_dim,)
30 | """
31 | super(Attention, self).__init__()
32 | if hidden_dim is None:
33 | hidden_dim = embed_dim // n_head
34 | if out_dim is None:
35 | out_dim = embed_dim
36 | self.embed_dim = embed_dim
37 | self.hidden_dim = hidden_dim
38 | self.n_head = n_head
39 | self.score_function = score_function
40 | self.w_k = nn.Linear(embed_dim, n_head * hidden_dim)
41 | self.w_q = nn.Linear(embed_dim, n_head * hidden_dim)
42 | self.proj = nn.Linear(n_head * hidden_dim, out_dim)
43 | self.dropout = nn.Dropout(dropout)
44 | if score_function == "mlp":
45 | self.weight = nn.Parameter(torch.Tensor(hidden_dim * 2))
46 | elif self.score_function == "bi_linear":
47 | self.weight = nn.Parameter(torch.Tensor(hidden_dim, hidden_dim))
48 | else: # dot_product / scaled_dot_product
49 | self.register_parameter("weight", None)
50 | self.reset_parameters()
51 |
52 | def reset_parameters(self):
53 | stdv = 1.0 / math.sqrt(self.hidden_dim)
54 | if self.weight is not None:
55 | self.weight.data.uniform_(-stdv, stdv)
56 |
57 | def forward(self, k, q):
58 | if len(q.shape) == 2: # q_len missing
59 | q = torch.unsqueeze(q, dim=1)
60 | if len(k.shape) == 2: # k_len missing
61 | k = torch.unsqueeze(k, dim=1)
62 | mb_size = k.shape[0] # ?
63 | k_len = k.shape[1]
64 | q_len = q.shape[1]
65 | # k: (?, k_len, embed_dim,)
66 | # q: (?, q_len, embed_dim,)
67 | # kx: (n_head*?, k_len, hidden_dim)
68 | # qx: (n_head*?, q_len, hidden_dim)
69 | # score: (n_head*?, q_len, k_len,)
70 | # output: (?, q_len, out_dim,)
71 | kx = self.w_k(k).view(mb_size, k_len, self.n_head, self.hidden_dim)
72 | kx = kx.permute(2, 0, 1, 3).contiguous().view(-1, k_len, self.hidden_dim)
73 | qx = self.w_q(q).view(mb_size, q_len, self.n_head, self.hidden_dim)
74 | qx = qx.permute(2, 0, 1, 3).contiguous().view(-1, q_len, self.hidden_dim)
75 | if self.score_function == "dot_product":
76 | kt = kx.permute(0, 2, 1)
77 | score = torch.bmm(qx, kt)
78 | elif self.score_function == "scaled_dot_product":
79 | kt = kx.permute(0, 2, 1)
80 | qkt = torch.bmm(qx, kt)
81 | score = torch.div(qkt, math.sqrt(self.hidden_dim))
82 | elif self.score_function == "mlp":
83 | kxx = torch.unsqueeze(kx, dim=1).expand(-1, q_len, -1, -1)
84 | qxx = torch.unsqueeze(qx, dim=2).expand(-1, -1, k_len, -1)
85 | kq = torch.cat((kxx, qxx), dim=-1) # (n_head*?, q_len, k_len, hidden_dim*2)
86 | score = torch.tanh(torch.matmul(kq, self.weight))
87 | elif self.score_function == "bi_linear":
88 | qw = torch.matmul(qx, self.weight)
89 | kt = kx.permute(0, 2, 1)
90 | score = torch.bmm(qw, kt)
91 | else:
92 | raise RuntimeError("invalid score_function")
93 | score = F.softmax(score, dim=-1)
94 | output = torch.bmm(score, kx) # (n_head*?, q_len, hidden_dim)
95 | output = torch.cat(
96 | torch.split(output, mb_size, dim=0), dim=-1
97 | ) # (?, q_len, n_head*hidden_dim)
98 | output = self.proj(output) # (?, q_len, out_dim)
99 | output = self.dropout(output)
100 | return output, score
101 |
102 |
103 | class NoQueryAttention(Attention):
104 | """q is a parameter"""
105 |
106 | def __init__(
107 | self,
108 | embed_dim,
109 | hidden_dim=None,
110 | out_dim=None,
111 | n_head=1,
112 | score_function="dot_product",
113 | q_len=1,
114 | dropout=0,
115 | ):
116 | super(NoQueryAttention, self).__init__(
117 | embed_dim, hidden_dim, out_dim, n_head, score_function, dropout
118 | )
119 | self.q_len = q_len
120 | self.q = nn.Parameter(torch.Tensor(q_len, embed_dim))
121 | self.reset_q()
122 |
123 | def reset_q(self):
124 | stdv = 1.0 / math.sqrt(self.embed_dim)
125 | self.q.data.uniform_(-stdv, stdv)
126 |
127 | def forward(self, k, **kwargs):
128 | mb_size = k.shape[0]
129 | q = self.q.expand(mb_size, -1, -1)
130 | return super(NoQueryAttention, self).forward(k, q)
131 |
132 |
133 | class FXBertSelfAttention(nn.Module):
134 | """
135 | Identical to the class BertSelfAttention by transformers by huggingface, but
136 | can be used without the need to have a config instance of class BertConfig but
137 | instead the required values can be passed on directly
138 | """
139 | def __init__(self, hidden_size, num_attention_heads, attention_probs_dropout_prob=0.1):
140 | super().__init__()
141 | if hidden_size % num_attention_heads != 0:
142 | raise ValueError(
143 | "The hidden size (%d) is not a multiple of the number of attention "
144 | "heads (%d)" % (hidden_size, num_attention_heads)
145 | )
146 |
147 | self.num_attention_heads = num_attention_heads
148 | self.attention_head_size = int(hidden_size / num_attention_heads)
149 | self.all_head_size = self.num_attention_heads * self.attention_head_size
150 |
151 | self.query = nn.Linear(hidden_size, self.all_head_size)
152 | self.key = nn.Linear(hidden_size, self.all_head_size)
153 | self.value = nn.Linear(hidden_size, self.all_head_size)
154 |
155 | self.dropout = nn.Dropout(attention_probs_dropout_prob)
156 |
157 | def transpose_for_scores(self, x):
158 | new_x_shape = x.size()[:-1] + (self.num_attention_heads, self.attention_head_size)
159 | x = x.view(*new_x_shape)
160 | return x.permute(0, 2, 1, 3)
161 |
162 | def forward(
163 | self,
164 | hidden_states,
165 | attention_mask=None,
166 | head_mask=None,
167 | encoder_hidden_states=None,
168 | encoder_attention_mask=None,
169 | output_attentions=False,
170 | ):
171 | mixed_query_layer = self.query(hidden_states)
172 |
173 | # If this is instantiated as a cross-attention module, the keys
174 | # and values come from an encoder; the attention mask needs to be
175 | # such that the encoder's padding tokens are not attended to.
176 | if encoder_hidden_states is not None:
177 | mixed_key_layer = self.key(encoder_hidden_states)
178 | mixed_value_layer = self.value(encoder_hidden_states)
179 | attention_mask = encoder_attention_mask
180 | else:
181 | mixed_key_layer = self.key(hidden_states)
182 | mixed_value_layer = self.value(hidden_states)
183 |
184 | query_layer = self.transpose_for_scores(mixed_query_layer)
185 | key_layer = self.transpose_for_scores(mixed_key_layer)
186 | value_layer = self.transpose_for_scores(mixed_value_layer)
187 |
188 | # Take the dot product between "query" and "key" to get the raw attention scores.
189 | attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2))
190 | attention_scores = attention_scores / math.sqrt(self.attention_head_size)
191 | if attention_mask is not None:
192 | # Apply the attention mask is (precomputed for all layers in BertModel forward() function)
193 | attention_scores = attention_scores + attention_mask
194 |
195 | # Normalize the attention scores to probabilities.
196 | attention_probs = nn.Softmax(dim=-1)(attention_scores)
197 |
198 | # This is actually dropping out entire tokens to attend to, which might
199 | # seem a bit unusual, but is taken from the original Transformer paper.
200 | attention_probs = self.dropout(attention_probs)
201 |
202 | # Mask heads if we want to
203 | if head_mask is not None:
204 | attention_probs = attention_probs * head_mask
205 |
206 | context_layer = torch.matmul(attention_probs, value_layer)
207 |
208 | context_layer = context_layer.permute(0, 2, 1, 3).contiguous()
209 | new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,)
210 | context_layer = context_layer.view(*new_context_layer_shape)
211 |
212 | outputs = (context_layer, attention_probs) if output_attentions else (context_layer,)
213 | return outputs
--------------------------------------------------------------------------------
/NewsSentiment/layers/dynamic_rnn.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # file: dynamic_rnn.py
3 | # author: songyouwei
4 | # Copyright (C) 2018. All Rights Reserved.
5 |
6 |
7 | import torch
8 | import torch.nn as nn
9 | import numpy as np
10 |
11 | class DynamicLSTM(nn.Module):
12 | def __init__(self, input_size, hidden_size, num_layers=1, bias=True, batch_first=True, dropout=0,
13 | bidirectional=False, only_use_last_hidden_state=False, rnn_type = 'LSTM'):
14 | """
15 | LSTM which can hold variable length sequence, use like TensorFlow's RNN(input, length...).
16 |
17 | :param input_size:The number of expected features in the input x
18 | :param hidden_size:The number of features in the hidden state h
19 | :param num_layers:Number of recurrent layers.
20 | :param bias:If False, then the layer does not use bias weights b_ih and b_hh. Default: True
21 | :param batch_first:If True, then the input and output tensors are provided as (batch, seq, feature)
22 | :param dropout:If non-zero, introduces a dropout layer on the outputs of each RNN layer except the last layer
23 | :param bidirectional:If True, becomes a bidirectional RNN. Default: False
24 | :param rnn_type: {LSTM, GRU, RNN}
25 | """
26 | super(DynamicLSTM, self).__init__()
27 | self.input_size = input_size
28 | self.hidden_size = hidden_size
29 | self.num_layers = num_layers
30 | self.bias = bias
31 | self.batch_first = batch_first
32 | self.dropout = dropout
33 | self.bidirectional = bidirectional
34 | self.only_use_last_hidden_state = only_use_last_hidden_state
35 | self.rnn_type = rnn_type
36 |
37 | if self.rnn_type == 'LSTM':
38 | self.RNN = nn.LSTM(
39 | input_size=input_size, hidden_size=hidden_size, num_layers=num_layers,
40 | bias=bias, batch_first=batch_first, dropout=dropout, bidirectional=bidirectional)
41 | elif self.rnn_type == 'GRU':
42 | self.RNN = nn.GRU(
43 | input_size=input_size, hidden_size=hidden_size, num_layers=num_layers,
44 | bias=bias, batch_first=batch_first, dropout=dropout, bidirectional=bidirectional)
45 | elif self.rnn_type == 'RNN':
46 | self.RNN = nn.RNN(
47 | input_size=input_size, hidden_size=hidden_size, num_layers=num_layers,
48 | bias=bias, batch_first=batch_first, dropout=dropout, bidirectional=bidirectional)
49 |
50 |
51 | def forward(self, x, x_len):
52 | """
53 | sequence -> sort -> pad and pack ->process using RNN -> unpack ->unsort
54 |
55 | :param x: sequence embedding vectors
56 | :param x_len: numpy/tensor list
57 | :return:
58 | """
59 | """sort"""
60 | x_sort_idx = torch.sort(-x_len)[1].long()
61 | x_unsort_idx = torch.sort(x_sort_idx)[1].long()
62 | x_len = x_len[x_sort_idx]
63 | x = x[x_sort_idx]
64 | """pack"""
65 | x_emb_p = torch.nn.utils.rnn.pack_padded_sequence(x, x_len, batch_first=self.batch_first)
66 |
67 | # process using the selected RNN
68 | if self.rnn_type == 'LSTM':
69 | out_pack, (ht, ct) = self.RNN(x_emb_p, None)
70 | else:
71 | out_pack, ht = self.RNN(x_emb_p, None)
72 | ct = None
73 | """unsort: h"""
74 | ht = torch.transpose(ht, 0, 1)[
75 | x_unsort_idx] # (num_layers * num_directions, batch, hidden_size) -> (batch, ...)
76 | ht = torch.transpose(ht, 0, 1)
77 |
78 | if self.only_use_last_hidden_state:
79 | return ht
80 | else:
81 | """unpack: out"""
82 | out = torch.nn.utils.rnn.pad_packed_sequence(out_pack, batch_first=self.batch_first) # (sequence, lengths)
83 | out = out[0] #
84 | out = out[x_unsort_idx]
85 | """unsort: out c"""
86 | if self.rnn_type =='LSTM':
87 | ct = torch.transpose(ct, 0, 1)[
88 | x_unsort_idx] # (num_layers * num_directions, batch, hidden_size) -> (batch, ...)
89 | ct = torch.transpose(ct, 0, 1)
90 |
91 | return out, (ht, ct)
92 |
--------------------------------------------------------------------------------
/NewsSentiment/layers/point_wise_feed_forward.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # file: point_wise_feed_forward.py
3 | # author: songyouwei
4 | # Copyright (C) 2018. All Rights Reserved.
5 |
6 | import torch.nn as nn
7 |
8 |
9 | class PositionwiseFeedForward(nn.Module):
10 | ''' A two-feed-forward-layer module '''
11 | def __init__(self, d_hid, d_inner_hid=None, dropout=0):
12 | super(PositionwiseFeedForward, self).__init__()
13 | if d_inner_hid is None:
14 | d_inner_hid = d_hid
15 | self.w_1 = nn.Conv1d(d_hid, d_inner_hid, 1) # position-wise
16 | self.w_2 = nn.Conv1d(d_inner_hid, d_hid, 1) # position-wise
17 | self.dropout = nn.Dropout(dropout)
18 | self.relu = nn.ReLU()
19 |
20 | def forward(self, x):
21 | output = self.relu(self.w_1(x.transpose(1, 2)))
22 | output = self.w_2(output).transpose(2, 1)
23 | output = self.dropout(output)
24 | return output
25 |
--------------------------------------------------------------------------------
/NewsSentiment/layers/pytorchnlpattention.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 |
4 | # from https://pytorchnlp.readthedocs.io/en/latest/_modules/torchnlp/nn/attention.html
5 | class ClassicAttention(nn.Module):
6 | """ Applies attention mechanism on the `context` using the `query`.
7 |
8 | **Thank you** to IBM for their initial implementation of :class:`Attention`. Here is
9 | their `License
10 | `__.
11 |
12 | Args:
13 | dimensions (int): Dimensionality of the query and context.
14 | attention_type (str, optional): How to compute the attention score:
15 |
16 | * dot: :math:`score(H_j,q) = H_j^T q`
17 | * general: :math:`score(H_j, q) = H_j^T W_a q`
18 |
19 | Example:
20 |
21 | >>> attention = Attention(256)
22 | >>> query = torch.randn(5, 1, 256)
23 | >>> context = torch.randn(5, 5, 256)
24 | >>> output, weights = attention(query, context)
25 | >>> output.size()
26 | torch.Size([5, 1, 256])
27 | >>> weights.size()
28 | torch.Size([5, 1, 5])
29 | """
30 |
31 | def __init__(self, dimensions, attention_type="general"):
32 | super(ClassicAttention, self).__init__()
33 |
34 | if attention_type not in ["dot", "general"]:
35 | raise ValueError("Invalid attention type selected.")
36 |
37 | self.attention_type = attention_type
38 | if self.attention_type == "general":
39 | self.linear_in = nn.Linear(dimensions, dimensions, bias=False)
40 |
41 | self.linear_out = nn.Linear(dimensions * 2, dimensions, bias=False)
42 | self.softmax = nn.Softmax(dim=-1)
43 | self.tanh = nn.Tanh()
44 |
45 | def forward(self, query, context):
46 | """
47 | Args:
48 | query (:class:`torch.FloatTensor` [batch size, output length, dimensions]): Sequence of
49 | queries to query the context.
50 | context (:class:`torch.FloatTensor` [batch size, query length, dimensions]): Data
51 | overwhich to apply the attention mechanism.
52 |
53 | Returns:
54 | :class:`tuple` with `output` and `weights`:
55 | * **output** (:class:`torch.LongTensor` [batch size, output length, dimensions]):
56 | Tensor containing the attended features.
57 | * **weights** (:class:`torch.FloatTensor` [batch size, output length, query length]):
58 | Tensor containing attention weights.
59 | """
60 | batch_size, output_len, dimensions = query.size()
61 | query_len = context.size(1)
62 |
63 | if self.attention_type == "general":
64 | query = query.reshape(batch_size * output_len, dimensions)
65 | query = self.linear_in(query)
66 | query = query.reshape(batch_size, output_len, dimensions)
67 |
68 | # TODO: Include mask on PADDING_INDEX?
69 |
70 | # (batch_size, output_len, dimensions) * (batch_size, query_len, dimensions) ->
71 | # (batch_size, output_len, query_len)
72 | attention_scores = torch.bmm(query, context.transpose(1, 2).contiguous())
73 |
74 | # Compute weights across every context sequence
75 | attention_scores = attention_scores.view(batch_size * output_len, query_len)
76 | attention_weights = self.softmax(attention_scores)
77 | attention_weights = attention_weights.view(batch_size, output_len, query_len)
78 |
79 | # (batch_size, output_len, query_len) * (batch_size, query_len, dimensions) ->
80 | # (batch_size, output_len, dimensions)
81 | mix = torch.bmm(attention_weights, context)
82 |
83 | # concat -> (batch_size * output_len, 2*dimensions)
84 | combined = torch.cat((mix, query), dim=2)
85 | combined = combined.view(batch_size * output_len, 2 * dimensions)
86 |
87 | # Apply linear_out on every 2nd dimension of concat
88 | # output -> (batch_size, output_len, dimensions)
89 | output = self.linear_out(combined).view(batch_size, output_len, dimensions)
90 | output = self.tanh(output)
91 |
92 | return output, attention_weights
93 |
--------------------------------------------------------------------------------
/NewsSentiment/layers/squeeze_embedding.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # file: squeeze_embedding.py
3 | # author: songyouwei
4 | # Copyright (C) 2018. All Rights Reserved.
5 |
6 |
7 | import torch
8 | import torch.nn as nn
9 | import numpy as np
10 |
11 | class SqueezeEmbedding(nn.Module):
12 | """
13 | Squeeze sequence embedding length to the longest one in the batch
14 | """
15 | def __init__(self, batch_first=True):
16 | super(SqueezeEmbedding, self).__init__()
17 | self.batch_first = batch_first
18 |
19 | def forward(self, x, x_len):
20 | """
21 | sequence -> sort -> pad and pack -> unpack ->unsort
22 | :param x: sequence embedding vectors
23 | :param x_len: numpy/tensor list
24 | :return:
25 | """
26 | """sort"""
27 | x_sort_idx = torch.sort(-x_len)[1].long()
28 | x_unsort_idx = torch.sort(x_sort_idx)[1].long()
29 | x_len = x_len[x_sort_idx]
30 | x = x[x_sort_idx]
31 | """pack"""
32 | x_emb_p = torch.nn.utils.rnn.pack_padded_sequence(x, x_len, batch_first=self.batch_first)
33 | """unpack: out"""
34 | out = torch.nn.utils.rnn.pad_packed_sequence(x_emb_p, batch_first=self.batch_first) # (sequence, lengths)
35 | out = out[0] #
36 | """unsort"""
37 | out = out[x_unsort_idx]
38 | return out
39 |
--------------------------------------------------------------------------------
/NewsSentiment/losses/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fhamborg/NewsMTSC/a7bea7bc7265b4bfe4236e98965a47c767acbb30/NewsSentiment/losses/__init__.py
--------------------------------------------------------------------------------
/NewsSentiment/losses/crossentropycrossweight.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 |
4 | # CrossEntropyLoss with additional cross weight loss for two targets
5 | from NewsSentiment.SentimentClasses import SentimentClasses
6 |
7 |
8 | class CrossEntropyLoss_CrossWeight(nn.Module):
9 | def __init__(self, device, ignore_index, weight=None, crossloss_weight=0.2):
10 | super(CrossEntropyLoss_CrossWeight, self).__init__()
11 | self.device = device
12 | self.class_loss = nn.CrossEntropyLoss(weight=weight, ignore_index=ignore_index,)
13 | # cosine similarity by default
14 | self.cross_weight_loss = nn.CosineSimilarity(dim=2)
15 | self.crossloss_weight = crossloss_weight
16 | self.crossentropyloss_weight = 1.0 - crossloss_weight
17 | self.ignore_index = ignore_index
18 |
19 | def forward(
20 | self,
21 | predictions: torch.Tensor,
22 | true_labels: torch.Tensor,
23 | cross_weight: torch.Tensor,
24 | target_mask: torch.Tensor,
25 | ):
26 | """
27 | Calculate k-dimensional cross entropy loss by comparing predictions and
28 | true_labels. Additionally, calculates a cross_weight loss that maximizes the
29 | differences between the weights compared between both targets.
30 |
31 | :param predictions: shape: (batch, polarities e.g. 3, targets ie 2)
32 | :param true_labels: shape: (batch, targets)
33 | :param cross_weight:shape: (batch, targets, seqlen, bertdim)
34 | :return:
35 | prediction size earlier was batch polarities
36 | """
37 | batch_size, num_classes, num_targets = predictions.size()
38 | bert_dim = cross_weight.shape[3]
39 |
40 | # calculate regular cross entropy loss
41 | classification_loss = self.class_loss(predictions, true_labels)
42 |
43 | # calculate cross-weight loss
44 | # we must not calc the cross weight loss for batch item of two targets,
45 | # where 1 or 2 is a fill up target, i.e., the true value is
46 | # SentimentClasses.FILLUP_POLARITY_VALUE
47 | # get all batch items where the first target is fill up
48 | is_ignored_a = true_labels[:, 0]
49 | is_ignored_a = is_ignored_a == SentimentClasses.FILLUP_POLARITY_VALUE
50 | # get all batch items where the second is
51 | is_ignored_b = true_labels[:, 1]
52 | is_ignored_b = is_ignored_b == SentimentClasses.FILLUP_POLARITY_VALUE
53 | # logical or both lists to one
54 | is_ignored_batch_item = is_ignored_a | is_ignored_b
55 | # select only those batch items where no target
56 | is_not_ignored_batch_item = ~is_ignored_batch_item
57 | target_mask = target_mask[is_not_ignored_batch_item, :, :]
58 | count_non_ignored_batch_items = target_mask.shape[0]
59 |
60 | # if we have identical targets (=target masks) set cross weight loss to 0
61 | target_mask_a = target_mask[:, 0, :]
62 | target_mask_b = target_mask[:, 1, :]
63 | diff_target_mask = target_mask_a - target_mask_b
64 | diff_target_mask = diff_target_mask.sum(dim=1)
65 | # shape: batch
66 | # diff target mask, will be 0 if the two targets in one batched item are
67 | # identical, and 1 if they are different
68 | is_different_target_per_two_batch_items = diff_target_mask != 0
69 |
70 | # only selecting different targets effectively zeros out values of identical
71 | # targets
72 | cross_weight = cross_weight[is_different_target_per_two_batch_items]
73 |
74 | # if there is not at least a single batch item with different targets, shape[0]
75 | # will be 0. we use this to test for this condition and if all targets over all
76 | # batch items are identical, we skip the cross weight part
77 | count_different_targets = cross_weight.shape[0]
78 | if count_different_targets == 0:
79 | cross_weight_loss = 0
80 | else:
81 | assert count_different_targets >= 1
82 | seq_len = cross_weight.shape[2]
83 | weight_a = cross_weight[:, 0, :, :]
84 | # weight_b = cross_weight[:, 1, :, :]
85 | weight_b = weight_a
86 |
87 | # we add the negative sign, since we actually want to maximize the distance
88 | # between both vectors
89 | cross_weight_similarity = self.cross_weight_loss(weight_a, weight_b)
90 | # cross_weight_similarity will be -1 for absolutely dissimilar values
91 | # 0 for unrelated and +1 for identical values
92 | # normalize between 0 and 1
93 | cross_weight_similarity = (cross_weight_similarity + 1) / 2
94 | # 0 = dissimilar, 1 = identical
95 | # mean over seq len
96 | cross_weight_loss = cross_weight_similarity.mean(dim=1)
97 | # at this point we have for each batch item its loss (0 if dissimilar
98 | # targets, 1 if identical target in the batch item)
99 | # finally, compute the single loss: sum
100 | cross_weight_loss = cross_weight_loss.sum()
101 | # normalize (divide by number of batch items; note that this can be different
102 | # from the number of different targets in the batch)
103 | cross_weight_loss = cross_weight_loss / count_non_ignored_batch_items
104 |
105 | # total_loss
106 | total_loss = (
107 | self.crossentropyloss_weight * classification_loss
108 | + self.crossloss_weight * cross_weight_loss
109 | )
110 |
111 | return total_loss
112 |
--------------------------------------------------------------------------------
/NewsSentiment/losses/crossentropylosslsr.py:
--------------------------------------------------------------------------------
1 | # adapted from ABSA-PyTorch
2 | import torch
3 | import torch.nn as nn
4 |
5 | # CrossEntropyLoss for Label Smoothing Regularization
6 | from NewsSentiment.SentimentClasses import SentimentClasses
7 |
8 |
9 | class CrossEntropyLoss_LSR(nn.Module):
10 | def __init__(self, device, smoothing_value=0.2, weight=None):
11 | super(CrossEntropyLoss_LSR, self).__init__()
12 | self.smoothing_value = smoothing_value
13 | self.device = device
14 | self.logSoftmax = nn.LogSoftmax(dim=1)
15 | self.weight = weight
16 |
17 | def _create_smooth_one_hot_for_true_targets(
18 | self, labels, batch_size, num_classes, num_targets
19 | ):
20 | """
21 | Produces a smooth one hot encoded tensor where all "false" scalars are set to
22 | base_prob, and all "true" scalars set to base_prob + 1 - smoothing_value.
23 | Effectively, for example for three classes, this results in:
24 | False classes: 0.03 (0.3333*0.1=baseprob)
25 | True classes: 0.03 + 1 - 0.1 = 0.93
26 | :param labels: the true classes
27 | :param batch_size:
28 | :param num_classes:
29 | :return:
30 | """
31 | # prior label distribution is set uniform (see Sec 3.5,
32 | # https://arxiv.org/pdf/1902.09314.pdf )
33 | prior_distribution = 1.0 / float(num_classes)
34 | # calculate base probability
35 | base_prob = self.smoothing_value * prior_distribution
36 | # initialize 0 tensor
37 | one_hot_label = torch.zeros(
38 | batch_size, num_classes, num_targets, device=self.device
39 | )
40 | # set probability of all classes in all batches and targets to the base prob
41 | # (in normal one hot encoding, this would be 0 instead)
42 | one_hot_label = one_hot_label + base_prob
43 |
44 | if self.weight is not None:
45 | raise NotImplementedError(
46 | "test this first!!! currently untested with multi tsc"
47 | )
48 | one_hot_label = one_hot_label * self.weight
49 |
50 | # iterate over each single item of the batch
51 | for batch_index in range(batch_size):
52 | for target_index in range(num_targets):
53 | # get the class index
54 | class_index = labels[batch_index, target_index].item()
55 | if class_index == SentimentClasses.FILLUP_POLARITY_VALUE:
56 | # cant set the one hot encoded here, since there is no "true" class
57 | # need to ignore this later
58 | pass
59 | else:
60 | # set the class probability
61 | one_hot_label[batch_index, class_index, target_index] += (
62 | 1.0 - self.smoothing_value
63 | )
64 |
65 | return one_hot_label
66 |
67 | def forward(
68 | self, predictions: torch.Tensor, labels: torch.Tensor, size_average=True
69 | ):
70 | """
71 | Calculate k-dimensional cross entropy loss
72 | :param predictions: shape: (batch, polarities e.g. 3, targets e.g. 5)
73 | :param labels: shape: (batch, targets)
74 | :param size_average:
75 | :return:
76 | prediction size earlier was batch polarities
77 | """
78 | batch_size, num_classes, num_targets = predictions.size()
79 | # calculate log of probability of each class (of each batch and target)
80 | pre_logsoftmax = self.logSoftmax(predictions)
81 |
82 | smooth_one_hot_for_true_targets = self._create_smooth_one_hot_for_true_targets(
83 | labels, batch_size, num_classes, num_targets
84 | ).to(self.device)
85 |
86 | # ignore those comparison where labels has a fillup value (=to be ignored)
87 | mask_nonfillup = labels != SentimentClasses.FILLUP_POLARITY_VALUE
88 | # shape is batch, targets
89 | # shape should be batch, classes, targets
90 | mask_nonfillup = mask_nonfillup.unsqueeze(1).repeat(1, num_classes, 1)
91 | # convert to 1 for True and 0 for False
92 | mask_nonfillup = mask_nonfillup.to(torch.int)
93 | # multiply so that scalars to be ignored are set to 0 (resulting in 0 loss for
94 | # those scalars, i.e., targets)
95 | # notes: categorical cross entropy loss does not directly punish on a low level
96 | # those predictions (scalars) that belong to incorrect classes (defined by true
97 | # or here, labels) but is only calculated by comparing the one true class (
98 | # defined by true, or here, labels) where it has a 1 (one hot encoded). since
99 | # the probability is 100% of all classes (also the output of the neural network)
100 | # the loss still punishes wrong predictions, i.e., if the class probability
101 | # should be 100% but is only 25% or 70%, the loss will be non-zero)
102 | # as a consequence, when there is no right class in "true", there cannot be a
103 | # loss. so, the multiplication below, which sets all fillup-targets to 0, has
104 | # the expected effect (no loss can result from fill up values, as all their
105 | # class probabilities are set to 0)
106 | smooth_one_hot_for_true_targets = (
107 | smooth_one_hot_for_true_targets * mask_nonfillup
108 | )
109 |
110 | # multiply
111 | loss = -smooth_one_hot_for_true_targets * pre_logsoftmax
112 |
113 | # aggregate loss to scalar over classes
114 | loss = torch.sum(loss, dim=1)
115 | # aggregate loss to scalar over targets
116 | loss = torch.sum(loss, dim=1)
117 |
118 | if size_average:
119 | return torch.mean(loss)
120 | else:
121 | return torch.sum(loss)
122 |
--------------------------------------------------------------------------------
/NewsSentiment/losses/crossentropylosswithconfidence.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import torch
3 |
4 |
5 | class CrossEntropyLossWithConfidence(nn.Module):
6 | def __init__(self, weight, ignore_index):
7 | super(CrossEntropyLossWithConfidence, self).__init__()
8 | self.crossentropyloss = nn.CrossEntropyLoss(
9 | weight=weight, ignore_index=ignore_index
10 | )
11 | self.loss_for_confidence = nn.HuberLoss()
12 | self.w_classes = 0.5
13 |
14 | def forward(self, predictions: torch.Tensor, labels: torch.Tensor):
15 | # get the prediction components
16 | class_preds = predictions[:, 0:3, :]
17 | confidence_preds = predictions[:, 3:4, :].squeeze(dim=1)
18 |
19 | # calc the regular class-based loss
20 | class_loss = self.crossentropyloss(class_preds, labels)
21 |
22 | # now calc the confidence-based loss
23 | confidence_loss = 0
24 |
25 | # get the predicted classes
26 | predicted_classes = class_preds.argmax(dim=1)
27 | # and compare with the correct classes
28 | is_correct = torch.eq(predicted_classes, labels)
29 | is_correct_as_float = is_correct.float()
30 | # calc the confidence loss
31 | confidence_loss = self.loss_for_confidence(
32 | confidence_preds, is_correct_as_float
33 | )
34 |
35 | # calc the total loss
36 | total_loss = (
37 | self.w_classes * class_loss + (1 - self.w_classes) * confidence_loss
38 | )
39 |
40 | return total_loss
41 |
--------------------------------------------------------------------------------
/NewsSentiment/losses/seq2seqloss.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 |
4 | from NewsSentiment.SentimentClasses import SentimentClasses
5 |
6 |
7 | class SequenceLoss(nn.Module):
8 | """
9 | Input to this loss are sequences, see models/multitargets/seq2seq.py
10 | """
11 |
12 | def __init__(self, device, weight):
13 | super(SequenceLoss, self).__init__()
14 | self.device = device
15 | self.weight = weight
16 | self.actual_loss = nn.CrossEntropyLoss(
17 | weight=self.weight, ignore_index=SentimentClasses.FILLUP_POLARITY_VALUE,
18 | )
19 |
20 | assert self.weight is None, "not implemented, weight must be None"
21 |
22 | def forward(
23 | self,
24 | predictions: torch.Tensor,
25 | true_classes: torch.Tensor,
26 | true_target_mask: torch.Tensor,
27 | ):
28 | """
29 | :param predictions: shape: batch, numclasses, seqlen
30 | :param true_classes: shape: batch, targets
31 | :param true_target_mask: shape: batch, targets, seqlen
32 | :return:
33 | """
34 | batch_size = predictions.shape[0]
35 | num_classes = predictions.shape[1]
36 | seq_len = predictions.shape[2]
37 |
38 | # create true_sequence, shape: batch, seqlen
39 | true_sequence = (
40 | torch.ones([batch_size, seq_len], dtype=torch.int64, device=self.device)
41 | * SentimentClasses.FILLUP_POLARITY_VALUE
42 | )
43 | # update individual scalars
44 | for batch_index in range(batch_size):
45 | for target_index in range(true_target_mask.shape[1]):
46 | # get the sentiment class of this target in this batch
47 | true_sentiment_class = true_classes[batch_index, target_index].item()
48 | # if the true_sentiment_class is FILLUP_POLARITY_VALUE, ignore (no need
49 | # to update because we initialized the whole true_sequence with
50 | # FILLUP_POLARITY_VALUE
51 | if true_sentiment_class == SentimentClasses.FILLUP_POLARITY_VALUE:
52 | continue
53 |
54 | # if not FILLUP_POLARITY_VALUE, update the true_sequence
55 | # iterate all tokens
56 | for seq_index in range(seq_len):
57 | # determine if at the current token there is a target
58 | is_target = true_target_mask[
59 | batch_index, target_index, seq_index
60 | ].item()
61 |
62 | if is_target == 1:
63 | # there is a target
64 | # we should update the scalar in true_sequence at the
65 | # corresponding part (thereby disregarding the target dimension,
66 | # since all targets are merged into one dimension). ensure, that
67 | # -100 is still there (if there is another value, this means
68 | # that we have overlapping targets)
69 | prev_value = true_sequence[batch_index, seq_index].item()
70 | if prev_value == SentimentClasses.FILLUP_POLARITY_VALUE:
71 | # the previous value is FILLUP_POLARITY_VALUE, so there is
72 | # no target at this token already. so, we can update
73 | true_sequence[batch_index, seq_index] = true_sentiment_class
74 | else:
75 | # there is already a target class -> overlapping targets
76 | # this can happen for probably two reasons:
77 | # 1) there are actually different targets in the data that
78 | # overlap
79 | # 2) we duplicated a target in FXDataset
80 | # either way, for now, if the value to be set is identical
81 | # to the one already present, we continue, otherwise throw
82 | # an error
83 | if prev_value == true_sentiment_class:
84 | pass
85 | else:
86 | raise ValueError(
87 | f"tried to update true_sequence[{batch_index},{seq_index}]={prev_value}"
88 | )
89 |
90 | elif is_target == 0:
91 | # no target
92 | # since we initialized the true_sequence tensor with -100
93 | # scalars, there's not need to update the value for non-target
94 | # nodes
95 | pass
96 | else:
97 | raise ValueError(
98 | f"true_target_mask must be either 0 or 1, is: {is_target}"
99 | )
100 |
101 | loss = self.actual_loss(predictions, true_sequence)
102 | return loss
103 |
--------------------------------------------------------------------------------
/NewsSentiment/models/FXBaseModel.py:
--------------------------------------------------------------------------------
1 | import os
2 | from copy import copy
3 |
4 | import torch.nn as nn
5 |
6 | from abc import abstractmethod
7 |
8 | from transformers import (
9 | XLNetModel,
10 | AlbertModel,
11 | BertModel,
12 | RobertaModel,
13 | PreTrainedModel,
14 | )
15 | from functools import wraps
16 |
17 | from NewsSentiment.download import Download
18 |
19 |
20 | class FXBaseModel(PreTrainedModel):
21 | def __init__(self, *args, **kwargs):
22 | super().__init__(*args, **kwargs)
23 |
24 | @staticmethod
25 | @abstractmethod
26 | def get_language_models():
27 | return
28 |
29 | @staticmethod
30 | @abstractmethod
31 | def get_input_field_ids():
32 | return
33 |
34 | def invoke_language_model(self, lm, input_ids, token_type_ids=None):
35 | type_lm = type(lm)
36 | if type_lm == XLNetModel:
37 | last_hidden_state, mems, all_hidden_states = lm(
38 | input_ids=input_ids, token_type_ids=token_type_ids,
39 | )
40 | elif type_lm in [AlbertModel, BertModel, RobertaModel]:
41 | if token_type_ids is None:
42 | last_hidden_state, pooler_output, hidden_states = lm(
43 | input_ids=input_ids,
44 | )
45 | else:
46 | # last_hidden_state, pooler_output, hidden_states = lm(
47 | # input_ids=input_ids, token_type_ids=token_type_ids
48 | # )
49 | model_results = lm(input_ids=input_ids, token_type_ids=token_type_ids)
50 | last_hidden_state = model_results.last_hidden_state
51 | pooler_output = model_results.pooler_output
52 | hidden_states = model_results.hidden_states
53 | else:
54 | raise NotImplementedError
55 |
56 | return last_hidden_state
57 |
58 |
59 | def provide_pretrained(version, pretrained_url):
60 | """
61 | Usage:
62 |
63 | @provide_pretrained("v1.0.0", "https://example.com/link/to/state_dict")
64 | class Example(nn.Module):
65 | pass
66 | """
67 |
68 | def decorator(model_class):
69 | # The actual decorator to use before the class
70 | wraps(model_class)
71 |
72 | wrapper = __get_pretrained_wrapper_class(model_class)
73 | wrapper._provide_pretrained_versions[version] = pretrained_url
74 |
75 | return wrapper
76 |
77 | return decorator
78 |
79 |
80 | def default_pretrained(version):
81 | """
82 | Set the version which should be used as the default version and will be used when running with --pretrained.
83 |
84 | Usage:
85 |
86 | @default_pretrained("v1.0.0")
87 | @provide_pretrained("v1.0.0", "https://example.com/link/to/state_dict")
88 | class Example(nn.Module):
89 | pass
90 | """
91 |
92 | def decorator(model_class):
93 | # The actual decorator to use before the class
94 | wraps(model_class)
95 |
96 | wrapper = __get_pretrained_wrapper_class(model_class)
97 | wrapper._provide_pretrained_default = version
98 |
99 | return wrapper
100 |
101 | return decorator
102 |
103 |
104 | def model_includes_pretrained(model):
105 | """
106 | Checks if a model-class includes the methods to load pretrained models.
107 |
108 | Arguments:
109 | model Model-class to check.
110 |
111 | Returns:
112 | True if it includes the functionality.
113 | """
114 | return hasattr(model, "has_pretrained_state_dict") and hasattr(
115 | model, "get_pretrained_state_dict"
116 | )
117 |
118 |
119 | __pretrained_wrapper_classes = set()
120 |
121 |
122 | def __get_pretrained_wrapper_class(base_class):
123 | if base_class in __pretrained_wrapper_classes:
124 | return base_class
125 |
126 | class PretrainedWrapper(base_class):
127 | _provide_pretrained_default = None
128 | _provide_pretrained_versions = {}
129 |
130 | def __init__(self, *args, **kwargs):
131 | super().__init__(*args, **kwargs)
132 |
133 | @classmethod
134 | def has_pretrained_state_dict(cls, version=None):
135 | version = version or cls._provide_pretrained_default
136 | return version in cls._provide_pretrained_versions
137 |
138 | @classmethod
139 | def get_pretrained_state_dict(
140 | cls, version=None, download_if_not_exists=True, **kwargs
141 | ):
142 | path = Download.model_path(cls, version)
143 | if os.path.isfile(path):
144 | if download_if_not_exists:
145 | Download.download(cls, version, False)
146 | else:
147 | raise FileNotFoundError("State dict not found")
148 | return cls.load_state_dict(path, **kwargs)
149 |
150 | @classmethod
151 | def get_pretrained_versions(cls):
152 | return copy(cls._provide_pretrained_versions)
153 |
154 | @classmethod
155 | def get_pretrained_source(cls, version=None):
156 | return cls._provide_pretrained_versions[
157 | version or cls._provide_pretrained_default
158 | ]
159 |
160 | @classmethod
161 | def get_pretrained_default_version(cls):
162 | return cls._provide_pretrained_default
163 |
164 | __pretrained_wrapper_classes.add(PretrainedWrapper)
165 | return PretrainedWrapper
166 |
--------------------------------------------------------------------------------
/NewsSentiment/models/FXEnsemble.py:
--------------------------------------------------------------------------------
1 | from abc import ABC
2 | from argparse import Namespace
3 | from typing import List, Dict
4 | import torch.nn as nn
5 | import torch
6 |
7 | from NewsSentiment.consts import *
8 | from NewsSentiment.dataset import FXDataset
9 | from NewsSentiment.models.FXBaseModel import FXBaseModel
10 |
11 |
12 | class FXEnsemble(FXBaseModel):
13 | @staticmethod
14 | def get_language_models():
15 | return (
16 | BERT_BASE_UNCASED,
17 | ROBERTA_BASE,
18 | XLNET_BASE_CASED,
19 | )
20 |
21 | @staticmethod
22 | def get_input_field_ids():
23 | return [
24 | (BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS),
25 | (BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK),
26 | (ROBERTA_BASE, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS),
27 | (ROBERTA_BASE, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK),
28 | (XLNET_BASE_CASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS),
29 | (XLNET_BASE_CASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK),
30 | ]
31 |
32 | def __init__(self, transformer_models: Dict, opt: Namespace):
33 | super().__init__()
34 |
35 | # language models
36 | self.bert = transformer_models[BERT_BASE_UNCASED]
37 | self.roberta = transformer_models[ROBERTA_BASE]
38 | self.xlnet = transformer_models[XLNET_BASE_CASED]
39 | self.num_models = 3
40 |
41 | # params
42 | assert (
43 | self.bert.config.hidden_size
44 | == self.roberta.config.hidden_size
45 | == self.xlnet.config.hidden_size
46 | )
47 | self.sequence_length = opt.max_seq_len
48 | self.hidden_size = self.bert.config.hidden_size
49 |
50 | # other neural network components
51 | self.dropout = nn.Dropout(opt.dropout)
52 | self.target_dependent_text_combiner = nn.Linear(
53 | self.hidden_size * self.num_models, opt.polarities_dim
54 | )
55 |
56 | def _combine_text_out_with_target_mask(
57 | self, batch_size, text_last_hidden_state, target_mask
58 | ):
59 | roberta_target_mask = target_mask.reshape((batch_size, 1, self.sequence_length))
60 | roberta_target_dependent_text = torch.bmm(
61 | roberta_target_mask, text_last_hidden_state
62 | )
63 | roberta_target_dependent_text = roberta_target_dependent_text.reshape(
64 | (batch_size, self.hidden_size)
65 | )
66 | return roberta_target_dependent_text
67 |
68 | def forward(self, inputs: List):
69 | # alternatively, we could also use this
70 | # FXDataset.get_all_inputs_for_model(input, self)
71 | bert_text_ids = FXDataset.get_input_by_params(
72 | inputs, BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS
73 | )
74 | bert_target_mask = FXDataset.get_input_by_params(
75 | inputs, BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK
76 | )
77 | roberta_text_ids = FXDataset.get_input_by_params(
78 | inputs, ROBERTA_BASE, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS
79 | )
80 | roberta_target_mask = FXDataset.get_input_by_params(
81 | inputs, ROBERTA_BASE, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK
82 | )
83 | xlnet_text_ids = FXDataset.get_input_by_params(
84 | inputs, XLNET_BASE_CASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS
85 | )
86 | xlnet_target_mask = FXDataset.get_input_by_params(
87 | inputs, XLNET_BASE_CASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK
88 | )
89 |
90 | # get variables
91 | batch_size = bert_text_ids.shape[0]
92 |
93 | # dev notes:
94 | # batch_size = 4, sequence_length = 150, hidden_size = 768
95 |
96 | # bert_text_out returns list with following elements
97 | # 0: last_hidden_state (batch_size, sequence_length, hidden_size)
98 | # 1: pooler_output (batch_size, hidden_size)
99 | # 2: hidden_states (batch_size, sequence_length, hidden_size)
100 | (
101 | bert_text_last_hidden_state,
102 | bert_text_pooler_output,
103 | bert_text_hidden_states,
104 | ) = self.bert(bert_text_ids)
105 | # roberta_text_out returns same output as bert
106 | (
107 | roberta_text_last_hidden_state,
108 | roberta_text_pooler_output,
109 | roberta_text_hidden_states,
110 | ) = self.roberta(roberta_text_ids)
111 | # xlnet_text_out returns list with following elements
112 | # 0: last_hidden_state (batch_size, sequence_length, hidden_size)
113 | # does not exist - (1: mems, a list of length config.n_layers)
114 | # 2: hidden_states (batch_size, sequence_length, hidden_size)
115 | xlnet_text_last_hidden_state, xlnet_text_hidden_states = self.xlnet(
116 | xlnet_text_ids
117 | )
118 |
119 | # incorporate target masks with (for now) last layer's states
120 | # *_target_dependent_text_out will be of shape (batch_size, hidden_size)
121 | bert_target_dependent_text_out = self._combine_text_out_with_target_mask(
122 | batch_size, bert_text_last_hidden_state, bert_target_mask
123 | )
124 | roberta_target_dependent_text_out = self._combine_text_out_with_target_mask(
125 | batch_size, roberta_text_last_hidden_state, roberta_target_mask
126 | )
127 | xlnet_target_dependent_text_out = self._combine_text_out_with_target_mask(
128 | batch_size, xlnet_text_last_hidden_state, xlnet_target_mask
129 | )
130 |
131 | # cat outputs
132 | cat_target_dependent_text_out = torch.cat(
133 | [
134 | bert_target_dependent_text_out,
135 | roberta_target_dependent_text_out,
136 | xlnet_target_dependent_text_out,
137 | ],
138 | dim=1,
139 | )
140 |
141 | # dropout for better learning
142 | cat_target_dependent_text_out = self.dropout(cat_target_dependent_text_out)
143 |
144 | # combine and get 3 dimensions
145 | logits = self.target_dependent_text_combiner(cat_target_dependent_text_out)
146 |
147 | return logits
148 |
--------------------------------------------------------------------------------
/NewsSentiment/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fhamborg/NewsMTSC/a7bea7bc7265b4bfe4236e98965a47c767acbb30/NewsSentiment/models/__init__.py
--------------------------------------------------------------------------------
/NewsSentiment/models/ensemble.py:
--------------------------------------------------------------------------------
1 | from argparse import Namespace
2 | from typing import List, Dict
3 | from NewsSentiment.consts import *
4 | import torch.nn as nn
5 | import torch
6 |
7 | from NewsSentiment.models.FXBaseModel import FXBaseModel
8 | from NewsSentiment.models.singletarget.td_bert import TD_BERT
9 |
10 |
11 | class EnsembleTopA(FXBaseModel):
12 | @staticmethod
13 | def get_language_models():
14 | return (get_default_lm(),)
15 |
16 | @staticmethod
17 | def get_input_field_ids():
18 | return [
19 | # tdbert
20 | (get_default_lm(), FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS),
21 | (get_default_lm(), FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK),
22 | # hosseinia
23 | (get_default_lm(), FIELD_TEXT_THEN_TARGET_IDS_WITH_SPECIAL_TOKENS),
24 | (get_default_lm(), FIELD_TEXT_THEN_TARGET_IDS_WITH_SPECIAL_TOKENS_SEGMENT_IDS),
25 | # while we use text-then-target as bert input, we can use text targetmask and text knowledge source mask
26 | # because it is identical to a hypothetical text-then-target target mask or text-then-target knowledge
27 | # source mask (we would not highlight the target in the 2nd component in the corresponding mask)
28 | (get_default_lm(), FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK),
29 | (get_default_lm(), FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_SELECTED_KNOWLEDGE_SOURCES),
30 | ]
31 |
32 | def __init__(self, transformer_models: Dict, opt: Namespace):
33 | super().__init__()
34 | from models.singletarget.stancedetectionpaper import StanceDetectionHosseinia
35 |
36 | # submodels models
37 | self.td_bert = TD_BERT(transformer_models, opt)
38 | self.hosseinia = StanceDetectionHosseinia(transformer_models, opt)
39 |
40 | # ensemble related
41 | self.hosseinia_dense = nn.Linear(
42 | self.hosseinia.language_model.config.hidden_size * 3 * 2 * 2,
43 | self.hosseinia.language_model.config.hidden_size
44 | )
45 | self.ensemble_combiner = nn.Linear(
46 | self.hosseinia.language_model.config.hidden_size * 2,
47 | opt.polarities_dim
48 | )
49 |
50 | def forward(self, inputs: List):
51 | # shape: batch, bertdim
52 | td_bert_out = self.td_bert(inputs, is_return_ensemble_values=True)
53 | # shape: batch, 3 * 2 * 2 * bertdim
54 | hosseinia_out = self.hosseinia(inputs, is_return_ensemble_values=True)
55 |
56 | # to ensure that both models have more or less similar impact on the result, apply a dense layer to hosseinia
57 | # so that its new shape is: batch, bertdim
58 | hosseinia_out = self.hosseinia_dense(hosseinia_out)
59 |
60 | # combine
61 | combined_out = torch.cat((td_bert_out, hosseinia_out), dim=1)
62 |
63 | logits = self.ensemble_combiner(combined_out)
64 |
65 | return logits
66 |
--------------------------------------------------------------------------------
/NewsSentiment/models/ensembleb.py:
--------------------------------------------------------------------------------
1 | from argparse import Namespace
2 | from typing import List, Dict
3 | from NewsSentiment.consts import *
4 | import torch.nn as nn
5 | import torch
6 |
7 | from NewsSentiment.models.FXBaseModel import FXBaseModel
8 | from NewsSentiment.models.singletarget.td_bert import TD_BERT
9 |
10 |
11 | class EnsembleTopB(FXBaseModel):
12 | @staticmethod
13 | def get_language_models():
14 | return (get_default_lm(),)
15 |
16 | @staticmethod
17 | def get_input_field_ids():
18 | return [
19 | # tdbert
20 | (get_default_lm(), FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS),
21 | (get_default_lm(), FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK),
22 | # hosseinia
23 | (get_default_lm(), FIELD_TEXT_THEN_TARGET_IDS_WITH_SPECIAL_TOKENS),
24 | (get_default_lm(), FIELD_TEXT_THEN_TARGET_IDS_WITH_SPECIAL_TOKENS_SEGMENT_IDS),
25 | # while we use text-then-target as bert input, we can use text targetmask and text knowledge source mask
26 | # because it is identical to a hypothetical text-then-target target mask or text-then-target knowledge
27 | # source mask (we would not highlight the target in the 2nd component in the corresponding mask)
28 | (get_default_lm(), FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK),
29 | (get_default_lm(), FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_SELECTED_KNOWLEDGE_SOURCES),
30 | ]
31 |
32 | def __init__(self, transformer_models: Dict, opt: Namespace):
33 | super().__init__()
34 | from models.singletarget.knowledgesourceshosseiniatdbert import KnowledgeSourcesHosseiniaTdBert
35 |
36 | # submodels models
37 | self.td_bert = TD_BERT(transformer_models, opt)
38 | self.hosseiniatdbert = KnowledgeSourcesHosseiniaTdBert(transformer_models, opt)
39 |
40 | # ensemble related
41 | self.ensemble_combiner = nn.Linear(
42 | self.hosseiniatdbert.language_model.config.hidden_size * 2,
43 | opt.polarities_dim
44 | )
45 |
46 | def forward(self, inputs: List):
47 | # shape: batch, bertdim
48 | td_bert_out = self.td_bert(inputs, is_return_ensemble_values=True)
49 | # shape: batch, 3 * 2 * 2 * bertdim
50 | hosseinia_out = self.hosseiniatdbert(inputs, is_return_ensemble_values=True)
51 |
52 | # combine
53 | combined_out = torch.cat((td_bert_out, hosseinia_out), dim=1)
54 |
55 | logits = self.ensemble_combiner(combined_out)
56 |
57 | return logits
58 |
--------------------------------------------------------------------------------
/NewsSentiment/models/multitargets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fhamborg/NewsMTSC/a7bea7bc7265b4bfe4236e98965a47c767acbb30/NewsSentiment/models/multitargets/__init__.py
--------------------------------------------------------------------------------
/NewsSentiment/models/multitargets/contrasting.py:
--------------------------------------------------------------------------------
1 | from argparse import Namespace
2 | from typing import Dict
3 |
4 | import torch
5 | import torch.nn as nn
6 |
7 | from NewsSentiment.consts import *
8 | from NewsSentiment.dataset import FXDataset
9 | from NewsSentiment.models.FXBaseModel import FXBaseModel
10 |
11 |
12 | class Contrasting(FXBaseModel):
13 | """
14 | This model returns a sequence that only contains the hidden states of those output
15 | nodes that represent a word piece of the target phrase. All other hidden states are
16 | set to 0. The length of the output sequence is opt.max_seq_len
17 |
18 | From a conceptual perspective, TD-BERT is in some aspects similar (e.g., only the
19 | output of target-phrase-related nodes is used), but in other aspects not similar or
20 | it is not quite clear how the authors of TD-BERT implemented them. An email I sent
21 | to them was not answered yet.
22 | """
23 |
24 | @staticmethod
25 | def get_language_models():
26 | return (BERT_BASE_UNCASED,)
27 |
28 | @staticmethod
29 | def get_input_field_ids():
30 | return [
31 | (BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS),
32 | (BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK),
33 | ]
34 |
35 | def __init__(self, transformer_models: Dict, opt: Namespace):
36 | super(Contrasting, self).__init__()
37 | self.language_model = transformer_models[BERT_BASE_UNCASED]
38 | self.dropout = nn.Dropout(opt.dropout)
39 |
40 | self.contrasting_weight_dense = nn.Linear(opt.max_seq_len, opt.max_seq_len,)
41 | self.dense = nn.Linear(
42 | self.language_model.config.hidden_size, opt.polarities_dim
43 | )
44 |
45 | def forward(self, inputs):
46 | # get inputs
47 | text_bert_indices = FXDataset.get_input_by_params(
48 | inputs, BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS
49 | )
50 | text_bert_indices_targets_mask = FXDataset.get_input_by_params(
51 | inputs, BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK
52 | )
53 | assert text_bert_indices.shape[1] == 2, "contrasting requires two targets"
54 |
55 | # target a
56 | a_text_bert_indices = text_bert_indices[:, 0, :]
57 | a_text_bert_indices_targets_mask = text_bert_indices_targets_mask[:, 0, :]
58 |
59 | # target b
60 | b_text_bert_indices = text_bert_indices[:, 1, :]
61 | b_text_bert_indices_targets_mask = text_bert_indices_targets_mask[:, 1, :]
62 |
63 | # bert
64 | (
65 | a_last_hidden_states,
66 | a_pooler_output,
67 | a_all_hidden_states,
68 | ) = self.language_model(input_ids=a_text_bert_indices)
69 | (
70 | b_last_hidden_states,
71 | b_pooler_output,
72 | b_all_hidden_states,
73 | ) = self.language_model(input_ids=b_text_bert_indices)
74 | stacked_bert_outs_ab = torch.stack(
75 | (a_last_hidden_states, b_last_hidden_states), dim=1
76 | )
77 | # stacked_bert_outs_ab
78 | stacked_bert_outs_ab = self.dropout(stacked_bert_outs_ab)
79 | # shape: batch, 2, seqlen, bertdim
80 |
81 | # create weight
82 | cross_weight = self.contrasting_weight_dense(text_bert_indices_targets_mask)
83 | cross_weight = cross_weight.unsqueeze(3).repeat(
84 | 1, 1, 1, stacked_bert_outs_ab.shape[3]
85 | )
86 | cross_weight = self.dropout(cross_weight)
87 | # shape: batch, 2, seqlen
88 |
89 | weighted_stacked_bert_outs_ab = stacked_bert_outs_ab * cross_weight
90 |
91 | # sum
92 | weighted_stacked_bert_outs_ab = weighted_stacked_bert_outs_ab.sum(dim=2)
93 |
94 | # dense
95 | logits = self.dense(weighted_stacked_bert_outs_ab)
96 |
97 | return logits, cross_weight
98 |
--------------------------------------------------------------------------------
/NewsSentiment/models/multitargets/random_multi.py:
--------------------------------------------------------------------------------
1 | from argparse import Namespace
2 | from typing import Dict
3 |
4 | import torch
5 | import torch.nn as nn
6 |
7 | from NewsSentiment.consts import *
8 | from NewsSentiment.dataset import FXDataset
9 | from NewsSentiment.models.FXBaseModel import FXBaseModel
10 |
11 |
12 | class RandomMulti(FXBaseModel):
13 | """
14 |
15 | """
16 |
17 | @staticmethod
18 | def get_language_models():
19 | """
20 | All architecture assumes that at least one model is used so we just require
21 | bert here for compatibility.
22 | :return:
23 | """
24 | return (BERT_BASE_UNCASED,)
25 |
26 | @staticmethod
27 | def get_input_field_ids():
28 | return [
29 | (BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS),
30 | (BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK),
31 | ]
32 |
33 | def __init__(self, transformer_models: Dict, opt: Namespace):
34 | super(RandomMulti, self).__init__()
35 | self.num_classes = opt.polarities_dim
36 |
37 | def forward(self, inputs):
38 | text_bert_indices = FXDataset.get_input_by_params(
39 | inputs, BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS
40 | )
41 | batch_size = text_bert_indices.shape[0]
42 | num_targets = text_bert_indices.shape[1]
43 |
44 | # get a random tensor
45 | logits = torch.rand(batch_size, num_targets, self.num_classes)
46 |
47 | return logits
48 |
--------------------------------------------------------------------------------
/NewsSentiment/models/multitargets/seq2seq.py:
--------------------------------------------------------------------------------
1 | from argparse import Namespace
2 | from typing import Dict
3 |
4 | import torch.nn as nn
5 |
6 | from NewsSentiment.SentimentClasses import SentimentClasses
7 | from NewsSentiment.consts import *
8 | from NewsSentiment.dataset import FXDataset
9 | from NewsSentiment.models.FXBaseModel import FXBaseModel
10 |
11 |
12 | class SeqTwoSeq(FXBaseModel):
13 | """
14 | Outputs the class probabilities for each token. So, the output will be:
15 | (batch, seqlen (150), classnum (3))
16 | """
17 |
18 | @staticmethod
19 | def get_language_models():
20 | return (get_default_lm(),)
21 |
22 | @staticmethod
23 | def get_input_field_ids():
24 | return [
25 | (get_default_lm(), FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS),
26 | (get_default_lm(), FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK),
27 | ]
28 |
29 | def __init__(self, transformer_models: Dict, opt: Namespace):
30 | super(SeqTwoSeq, self).__init__()
31 | self.language_model = transformer_models[get_default_lm()]
32 | self.dropout = nn.Dropout(opt.dropout)
33 | # https://pytorch.org/docs/stable/nn.html#linear supports multi-dimensional
34 | # input; only the last dimension has to be specified for Linear creation
35 |
36 | self.attentionlike_dense = nn.Bilinear(
37 | self.language_model.config.hidden_size,
38 | FXDataset.NUM_MAX_TARGETS_PER_ITEM,
39 | SentimentClasses.get_num_classes(),
40 | )
41 |
42 | def forward(self, inputs):
43 | # get inputs
44 | text_bert_indices = FXDataset.get_input_by_params(
45 | inputs, get_default_lm(), FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS
46 | )
47 | text_bert_indices_targets_mask = FXDataset.get_input_by_params(
48 | inputs, get_default_lm(), FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK
49 | )
50 | # prepare inputs
51 | # for text only, we do not need target specific information, i.e., all text
52 | # vectors are identical. also, bert, can
53 | # only process one sequence of size max_seq_len (more specifically, a tensor
54 | # of size batch_size x max_seq_ken). thus, we select only the first element from
55 | # the second dimension (the dimensions are: batch, targets, hidden_states)
56 | text_bert_indices = text_bert_indices[:, 0, :]
57 | # apply bert
58 | last_hidden_states = self.invoke_language_model(
59 | lm=self.language_model,
60 | input_ids=text_bert_indices,
61 | )
62 | # shape: batch, seqlen, bertdim
63 | last_hidden_states = self.dropout(last_hidden_states)
64 |
65 | # stack hidden states with target mask
66 | # hidden: batch, seqlen, bertdim -> stay
67 | # targetmask: batch, target, seqlen, -> batch, seqlen, target
68 | text_bert_indices_targets_mask = text_bert_indices_targets_mask.permute(
69 | 0, 2, 1
70 | ).contiguous()
71 |
72 | sequence_logits = self.attentionlike_dense(
73 | last_hidden_states, text_bert_indices_targets_mask
74 | )
75 |
76 | return sequence_logits
77 |
--------------------------------------------------------------------------------
/NewsSentiment/models/multitargets/seq2seq_without_targetmask.py:
--------------------------------------------------------------------------------
1 | from argparse import Namespace
2 | from typing import Dict
3 |
4 | import torch.nn as nn
5 |
6 | from NewsSentiment.SentimentClasses import SentimentClasses
7 | from NewsSentiment.consts import *
8 | from NewsSentiment.dataset import FXDataset
9 | from NewsSentiment.models.FXBaseModel import FXBaseModel
10 |
11 |
12 | class SeqTwoSeqWithoutTargetMask(FXBaseModel):
13 | """
14 | Outputs the class probabilities for each token. So, the output will be:
15 | (batch, seqlen (150), classnum (3))
16 | """
17 |
18 | @staticmethod
19 | def get_language_models():
20 | return (BERT_BASE_UNCASED,)
21 |
22 | @staticmethod
23 | def get_input_field_ids():
24 | return [
25 | (BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS),
26 | (BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK),
27 | ]
28 |
29 | def __init__(self, transformer_models: Dict, opt: Namespace):
30 | super(SeqTwoSeqWithoutTargetMask, self).__init__()
31 | self.language_model = transformer_models[BERT_BASE_UNCASED]
32 | self.dropout = nn.Dropout(opt.dropout)
33 | # https://pytorch.org/docs/stable/nn.html#linear supports multi-dimensional
34 | # input; only the last dimension has to be specified for Linear creation
35 |
36 | self.attentionlike_dense = nn.Linear(
37 | self.language_model.config.hidden_size,
38 | SentimentClasses.get_num_classes(),
39 | )
40 |
41 | def forward(self, inputs):
42 | # get inputs
43 | text_bert_indices = FXDataset.get_input_by_params(
44 | inputs, BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS
45 | )
46 |
47 | # prepare inputs
48 | # for text only, we do not need target specific information, i.e., all text
49 | # vectors are identical. also, bert, can
50 | # only process one sequence of size max_seq_len (more specifically, a tensor
51 | # of size batch_size x max_seq_ken). thus, we select only the first element from
52 | # the second dimension (the dimensions are: batch, targets, hidden_states)
53 | text_bert_indices = text_bert_indices[:, 0, :]
54 | # apply bert
55 | last_hidden_states, pooler_output, all_hidden_states = self.language_model(
56 | input_ids=text_bert_indices
57 | )
58 | # shape: batch, seqlen, bertdim
59 | last_hidden_states = self.dropout(last_hidden_states)
60 |
61 | sequence_logits = self.attentionlike_dense(last_hidden_states)
62 |
63 | return sequence_logits
64 |
--------------------------------------------------------------------------------
/NewsSentiment/models/multitargets/tdbertlikemultitarget.py:
--------------------------------------------------------------------------------
1 | from argparse import Namespace
2 | from typing import Dict
3 |
4 | import torch.nn as nn
5 |
6 | from NewsSentiment.consts import *
7 | from NewsSentiment.dataset import FXDataset
8 | from NewsSentiment.models.FXBaseModel import FXBaseModel
9 |
10 |
11 | class TDBertLikeMultiTarget(FXBaseModel):
12 | """
13 | This model returns a sequence that only contains the hidden states of those output
14 | nodes that represent a word piece of the target phrase. All other hidden states are
15 | set to 0. The length of the output sequence is opt.max_seq_len
16 |
17 | From a conceptual perspective, TD-BERT is in some aspects similar (e.g., only the
18 | output of target-phrase-related nodes is used), but in other aspects not similar or
19 | it is not quite clear how the authors of TD-BERT implemented them. An email I sent
20 | to them was not answered yet.
21 | """
22 |
23 | @staticmethod
24 | def get_language_models():
25 | return (BERT_BASE_UNCASED,)
26 |
27 | @staticmethod
28 | def get_input_field_ids():
29 | return [
30 | (BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS),
31 | (BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK),
32 | # (
33 | # BERT_BASE_UNCASED,
34 | # FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_SELECTED_KNOWLEDGE_SOURCES,
35 | # ),
36 | ]
37 |
38 | def __init__(self, transformer_models: Dict, opt: Namespace):
39 | super(TDBertLikeMultiTarget, self).__init__()
40 | self.language_model = transformer_models[BERT_BASE_UNCASED]
41 | self.dropout = nn.Dropout(opt.dropout)
42 | self.dense = nn.Linear(
43 | self.language_model.config.hidden_size, opt.polarities_dim
44 | )
45 |
46 | def forward(self, inputs):
47 | # get inputs
48 | text_bert_indices = FXDataset.get_input_by_params(
49 | inputs, BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS
50 | )
51 | text_bert_indices_targets_mask = FXDataset.get_input_by_params(
52 | inputs, BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK
53 | )
54 | # text_bert_indices_nrc_emolex = FXDataset.get_input_by_params(
55 | # inputs,
56 | # BERT_BASE_UNCASED,
57 | # FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_SELECTED_KNOWLEDGE_SOURCES,
58 | # )
59 | # prepare inputs
60 | # for text only, we do not need target specific information, i.e., all text
61 | # vectors are identical. also, bert, can
62 | # only process one sequence of size max_seq_len (more specifically, a tensor
63 | # of size batch_size x max_seq_ken). thus, we select only the first element from
64 | # the second dimension (the dimensions are: batch, targets, hidden_states)
65 | text_bert_indices = text_bert_indices[:, 0, :]
66 | # apply bert
67 | last_hidden_states, pooler_output, all_hidden_states = self.language_model(
68 | input_ids=text_bert_indices
69 | )
70 | # element-wise multiplication with target mask
71 | # align the dimensions of the tensors
72 | # last_hidden_states.shape = 4,150,768; should be 4,5,150,768
73 | # insert a new singleton dimension after the first dimension
74 | # new shape: 4,1,150,768
75 | last_hidden_states = last_hidden_states.unsqueeze(1)
76 | # repeat k times along the new 2nd dimension, where k is the target size
77 | last_hidden_states = last_hidden_states.repeat(
78 | 1, text_bert_indices_targets_mask.shape[1], 1, 1
79 | )
80 | # text_bert_indices_targets_mask.shape = 4,5,150; should be 4,5,150,768
81 | # insert singleton simension after the three already existing dimensions
82 | text_bert_indices_targets_mask = text_bert_indices_targets_mask.unsqueeze(3)
83 | text_bert_indices_targets_mask = text_bert_indices_targets_mask.repeat(
84 | 1, 1, 1, last_hidden_states.shape[3]
85 | )
86 |
87 | last_hidden_states_only_targets = (
88 | last_hidden_states * text_bert_indices_targets_mask
89 | )
90 |
91 | # similar to TD-BERT, perform max pooling TODO not implemented yet, instead:
92 | # for now, retrieve only the values of the target's output tokens and then
93 | # calculate the mean:
94 | # (batchsize, targetsize, 150, 768) -> (batchsize, targetsize, 768)
95 | # get the positions of target nodes. note that we cannot simply take the mean
96 | # as it would divide by the number of the sequence length, whereas the effective
97 | # length is only of size k, where k is the number of non-zero scalars in the
98 | # input mask (since we are only interested in those values)
99 | last_hidden_states_aggregated_per_target = last_hidden_states_only_targets.sum(
100 | dim=2
101 | )
102 | # get the sum for each (batch, hidden states) (sum over the sequence length
103 | # dim)
104 | denominator_for_mean = text_bert_indices_targets_mask.sum(dim=2)
105 | # divide for each (batch, hidden states) by the denominator to get the mean
106 | last_hidden_states_aggregated_per_target = (
107 | last_hidden_states_aggregated_per_target / denominator_for_mean
108 | )
109 | # dropout before dense layer, as in most other tsc models
110 | last_hidden_states_aggregated_per_target = self.dropout(
111 | last_hidden_states_aggregated_per_target
112 | )
113 | # dense layer
114 | logits = self.dense(last_hidden_states_aggregated_per_target)
115 |
116 | return logits
117 |
--------------------------------------------------------------------------------
/NewsSentiment/models/multitargets/tdbertlikemultitarget_dense.py:
--------------------------------------------------------------------------------
1 | from argparse import Namespace
2 | from typing import Dict
3 |
4 | import torch.nn as nn
5 |
6 | from NewsSentiment.consts import *
7 | from NewsSentiment.dataset import FXDataset
8 | from NewsSentiment.models.FXBaseModel import FXBaseModel
9 |
10 |
11 | class TDBertLikeMultiTargetDense(FXBaseModel):
12 | """
13 | This model returns a sequence that only contains the hidden states of those output
14 | nodes that represent a word piece of the target phrase. All other hidden states are
15 | set to 0. The length of the output sequence is opt.max_seq_len
16 |
17 | From a conceptual perspective, TD-BERT is in some aspects similar (e.g., only the
18 | output of target-phrase-related nodes is used), but in other aspects not similar or
19 | it is not quite clear how the authors of TD-BERT implemented them. An email I sent
20 | to them was not answered yet.
21 | """
22 |
23 | @staticmethod
24 | def get_language_models():
25 | return (BERT_BASE_UNCASED,)
26 |
27 | @staticmethod
28 | def get_input_field_ids():
29 | return [
30 | (BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS),
31 | (BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK),
32 | ]
33 |
34 | def __init__(self, transformer_models: Dict, opt: Namespace):
35 | super(TDBertLikeMultiTargetDense, self).__init__()
36 | self.language_model = transformer_models[BERT_BASE_UNCASED]
37 | self.dropout = nn.Dropout(opt.dropout)
38 | self.dense = nn.Bilinear(
39 | self.language_model.config.hidden_size,
40 | FXDataset.NUM_MAX_TARGETS_PER_ITEM,
41 | FXDataset.NUM_MAX_TARGETS_PER_ITEM,
42 | )
43 | self.dense2 = nn.Linear(opt.max_seq_len, opt.polarities_dim)
44 |
45 | def forward(self, inputs):
46 | # get inputs
47 | text_bert_indices = FXDataset.get_input_by_params(
48 | inputs, BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS
49 | )
50 | text_bert_indices_targets_mask = FXDataset.get_input_by_params(
51 | inputs, BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK
52 | )
53 | # prepare inputs
54 | # for text only, we do not need target specific information, i.e., all text
55 | # vectors are identical. also, bert, can
56 | # only process one sequence of size max_seq_len (more specifically, a tensor
57 | # of size batch_size x max_seq_ken). thus, we select only the first element from
58 | # the second dimension (the dimensions are: batch, targets, hidden_states)
59 | text_bert_indices = text_bert_indices[:, 0, :]
60 | # apply bert
61 | last_hidden_states, pooler_output, all_hidden_states = self.language_model(
62 | input_ids=text_bert_indices
63 | )
64 |
65 | # dropout
66 | last_hidden_states = self.dropout(last_hidden_states)
67 |
68 | # shapes:
69 | # last_hidden_states: batch, seqlen, bertdim
70 | # text_bert_indices_targets_mask: batch, target, seqlen
71 | # new text_bert_indices_targets_mask: batch, seqlen, target
72 | text_bert_indices_targets_mask = text_bert_indices_targets_mask.permute(
73 | 0, 2, 1
74 | ).clone()
75 |
76 | logits = self.dense(last_hidden_states, text_bert_indices_targets_mask)
77 | logits = logits.permute(0, 2, 1)
78 | logits = self.dense2(logits)
79 |
80 | return logits
81 |
--------------------------------------------------------------------------------
/NewsSentiment/models/singletarget/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fhamborg/NewsMTSC/a7bea7bc7265b4bfe4236e98965a47c767acbb30/NewsSentiment/models/singletarget/__init__.py
--------------------------------------------------------------------------------
/NewsSentiment/models/singletarget/aen.py:
--------------------------------------------------------------------------------
1 | # adapted from absa-pytorch
2 | from argparse import Namespace
3 | from typing import Dict
4 |
5 | import torch
6 | import torch.nn as nn
7 |
8 | from NewsSentiment.dataset import FXDataset
9 | from NewsSentiment.layers.attention import Attention
10 | from NewsSentiment.layers.point_wise_feed_forward import PositionwiseFeedForward
11 | from NewsSentiment.layers.squeeze_embedding import SqueezeEmbedding
12 |
13 | from NewsSentiment.consts import *
14 | from NewsSentiment.fxlogger import get_logger
15 | from NewsSentiment.models.FXBaseModel import FXBaseModel
16 |
17 | logger = get_logger()
18 |
19 |
20 | class AEN_Base(FXBaseModel):
21 | @staticmethod
22 | def get_language_models():
23 | return (BERT_BASE_UNCASED,)
24 |
25 | @staticmethod
26 | def get_input_field_ids():
27 | return [
28 | (BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS),
29 | (BERT_BASE_UNCASED, FIELD_TARGET_IDS_WITH_SPECIAL_TOKENS),
30 | (BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK),
31 | ]
32 |
33 | def __init__(self, transformer_models: Dict, opt: Namespace):
34 | super(AEN_Base, self).__init__()
35 | logger.info("creating AEN_Base")
36 | self.device = opt.device
37 |
38 | self.language_model = transformer_models[BERT_BASE_UNCASED]
39 | self.name = "aen_bert"
40 | self.lm_representation = "last"
41 | embed_dim = self.language_model.config.hidden_size
42 |
43 | self.squeeze_embedding = SqueezeEmbedding()
44 | self.dropout = nn.Dropout(opt.dropout)
45 | hidden_dim = embed_dim # or should this be 300, as mentioned in the paper
46 |
47 | self.attn_k = Attention(
48 | embed_dim,
49 | out_dim=hidden_dim,
50 | n_head=8,
51 | score_function="mlp",
52 | dropout=opt.dropout,
53 | )
54 | self.attn_q = Attention(
55 | embed_dim,
56 | out_dim=hidden_dim,
57 | n_head=8,
58 | score_function="mlp",
59 | dropout=opt.dropout,
60 | )
61 | self.ffn_c = PositionwiseFeedForward(hidden_dim, dropout=opt.dropout)
62 | self.ffn_t = PositionwiseFeedForward(hidden_dim, dropout=opt.dropout)
63 |
64 | self.attn_s1 = Attention(
65 | hidden_dim, n_head=8, score_function="mlp", dropout=opt.dropout
66 | )
67 |
68 | self.dense = nn.Linear(hidden_dim * 3, opt.polarities_dim)
69 |
70 | def apply_lm(self, _input, _input_attention=None):
71 | if self.name in ["aen_bert", "aen_roberta"]:
72 | last_hidden, _, all_hidden = self.language_model(
73 | input_ids=_input, attention_mask=_input_attention
74 | )
75 | elif self.name == "aen_distilbert":
76 | last_hidden, all_hidden = self.language_model(
77 | input_ids=_input, attention_mask=_input_attention
78 | )
79 | else:
80 | raise Exception("unknown model name")
81 |
82 | if self.lm_representation == "last":
83 | return last_hidden
84 | elif self.lm_representation == "sum_last_four":
85 | last_four = all_hidden[-4:] # list of four, each has shape: 16, 80, 768
86 | last_four_stacked = torch.stack(last_four) # shape: 4, 16, 80, 768
87 | sum_last_four = torch.sum(last_four_stacked, dim=0)
88 | return sum_last_four
89 | elif self.lm_representation == "mean_last_four":
90 | last_four = all_hidden[-4:] # list of four, each has shape: 16, 80, 768
91 | last_four_stacked = torch.stack(last_four) # shape: 4, 16, 80, 768
92 | mean_last_four = torch.mean(last_four_stacked, dim=0)
93 | return mean_last_four
94 | elif self.lm_representation == "sum_last_two":
95 | last_two = all_hidden[-2:]
96 | last_two_stacked = torch.stack(last_two)
97 | sum_last_two = torch.sum(last_two_stacked, dim=0)
98 | return sum_last_two
99 | elif self.lm_representation == "mean_last_two":
100 | last_two = all_hidden[-2:]
101 | last_two_stacked = torch.stack(last_two)
102 | mean_last_two = torch.mean(last_two_stacked, dim=0)
103 | return mean_last_two
104 | elif self.lm_representation == "sum_all":
105 | all_stacked = torch.stack(all_hidden)
106 | sum_all = torch.sum(all_stacked, dim=0)
107 | return sum_all
108 | elif self.lm_representation == "mean_all":
109 | all_stacked = torch.stack(all_hidden)
110 | mean_all = torch.mean(all_stacked, dim=0)
111 | return mean_all
112 |
113 | def forward(self, inputs):
114 | context = FXDataset.get_input_by_params(
115 | inputs, BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS
116 | )
117 | target = FXDataset.get_input_by_params(
118 | inputs, BERT_BASE_UNCASED, FIELD_TARGET_IDS_WITH_SPECIAL_TOKENS
119 | )
120 | context_len = torch.sum(context != 0, dim=-1)
121 | target_len = torch.sum(target != 0, dim=-1)
122 |
123 | context = self.squeeze_embedding(context, context_len)
124 | # context_attention = self.squeeze_embedding(context_attention, context_len)
125 | context = self.apply_lm(context)
126 | context = self.dropout(context)
127 |
128 | target = self.squeeze_embedding(target, target_len)
129 | # target_attention = self.squeeze_embedding(target_attention, target_len)
130 | target = self.apply_lm(target)
131 | target = self.dropout(target)
132 |
133 | hc, _ = self.attn_k(context, context)
134 | hc = self.ffn_c(hc)
135 |
136 | ht, _ = self.attn_q(context, target)
137 | ht = self.ffn_t(ht)
138 |
139 | s1, _ = self.attn_s1(hc, ht)
140 |
141 | context_len = torch.tensor(context_len, dtype=torch.float).to(self.device)
142 | target_len = torch.tensor(target_len, dtype=torch.float).to(self.device)
143 |
144 | hc_mean = torch.div(
145 | torch.sum(hc, dim=1), context_len.view(context_len.size(0), 1)
146 | )
147 | ht_mean = torch.div(
148 | torch.sum(ht, dim=1), target_len.view(target_len.size(0), 1)
149 | )
150 | s1_mean = torch.div(
151 | torch.sum(s1, dim=1), context_len.view(context_len.size(0), 1)
152 | )
153 |
154 | x = torch.cat((hc_mean, s1_mean, ht_mean), dim=-1)
155 | out = self.dense(x)
156 |
157 | return out
158 |
--------------------------------------------------------------------------------
/NewsSentiment/models/singletarget/grutscsingle.py:
--------------------------------------------------------------------------------
1 | from argparse import Namespace
2 | from typing import Dict
3 |
4 | import torch
5 | import torch.nn as nn
6 | from transformers import PretrainedConfig
7 |
8 | from NewsSentiment.consts import *
9 | from NewsSentiment.dataset import FXDataset, FXEasyTokenizer
10 | from NewsSentiment.models.FXBaseModel import (
11 | FXBaseModel,
12 | provide_pretrained,
13 | default_pretrained,
14 | )
15 |
16 |
17 | @default_pretrained("v1.0.0")
18 | @provide_pretrained(
19 | "v1.0.0", "https://github.com/fhamborg/NewsMTSC/releases/download/v1.0.0/grutsc"
20 | )
21 | class GRUTSCSingle(FXBaseModel):
22 | """
23 | Inspired from https://arxiv.org/pdf/2006.00052.pdf
24 | Differences:
25 | - instead of question ("Is the ACLU good for USA?") then text (1 or more sentences),
26 | we use text then target (and no question, similar to BERT-SPC)
27 | - no vader
28 | - additionally we can flexibly use any knowledge source as well as multiple
29 | - we have one large matrix for all concatenated knowledge source embeddings, whereas
30 | in the original paper they use individual, smaller matrices for each knowledge
31 | source embedding
32 | - target mask (mostly useful for BERT)
33 | - fine-tuning LM enabled
34 | """
35 |
36 | @staticmethod
37 | def get_language_models():
38 | return (get_default_lm(),)
39 |
40 | @staticmethod
41 | def get_input_field_ids():
42 | return [
43 | (get_default_lm(), FIELD_TEXT_THEN_TARGET_IDS_WITH_SPECIAL_TOKENS),
44 | (
45 | get_default_lm(),
46 | FIELD_TEXT_THEN_TARGET_IDS_WITH_SPECIAL_TOKENS_SEGMENT_IDS,
47 | ),
48 | # while we use text-then-target as bert input, we can use text targetmask
49 | # and text knowledge source mask because it is identical to a hypothetical
50 | # text-then-target target mask or text-then-target knowledge source mask
51 | # (we would not highlight the target in the 2nd component in the
52 | # corresponding mask)
53 | (get_default_lm(), FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK),
54 | (
55 | get_default_lm(),
56 | FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_SELECTED_KNOWLEDGE_SOURCES,
57 | ),
58 | ]
59 |
60 | def __init__(
61 | self, transformer_models: Dict, opt: Namespace, config: PretrainedConfig
62 | ):
63 | super().__init__(config)
64 | self.language_model = transformer_models[get_default_lm()]
65 | self.ks_embeddings_dense = nn.Linear(
66 | FXEasyTokenizer.NUM_CATEGORIES_OF_SELECTED_KNOWLEDGE_SOURCES,
67 | self.language_model.config.hidden_size,
68 | )
69 | if get_default_lm() == BERT_BASE_UNCASED:
70 | self.is_use_targetmask = True
71 | else:
72 | self.is_use_targetmask = False
73 |
74 | num_input_embeddings = 2
75 | if self.is_use_targetmask:
76 | num_input_embeddings = 3
77 |
78 | self.gru = nn.GRU(
79 | self.language_model.config.hidden_size * num_input_embeddings,
80 | self.language_model.config.hidden_size * num_input_embeddings,
81 | bidirectional=True,
82 | batch_first=True,
83 | )
84 | self.dropout = nn.Dropout(opt.dropout)
85 | num_output_dim = opt.polarities_dim
86 | if opt.is_return_confidence:
87 | num_output_dim += 1
88 |
89 | self.dense = nn.Linear(
90 | # 3 inputs (original last gru out, mean, max), 2 inputs to gru (bert and
91 | # knowledge embedding), 2 (because bidirectional gru)
92 | self.language_model.config.hidden_size * 3 * num_input_embeddings * 2,
93 | num_output_dim,
94 | )
95 |
96 | def forward(self, inputs, is_return_ensemble_values: bool = False):
97 | # get inputs
98 | text_target_bert_indices = FXDataset.get_input_by_params(
99 | inputs, get_default_lm(), FIELD_TEXT_THEN_TARGET_IDS_WITH_SPECIAL_TOKENS
100 | )
101 | text_target_bert_segments_ids = FXDataset.get_input_by_params(
102 | inputs,
103 | get_default_lm(),
104 | FIELD_TEXT_THEN_TARGET_IDS_WITH_SPECIAL_TOKENS_SEGMENT_IDS,
105 | )
106 | text_bert_indices_target_mask = FXDataset.get_input_by_params(
107 | inputs, get_default_lm(), FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK
108 | )
109 | text_bert_indices_selected_knowledge_sources = FXDataset.get_input_by_params(
110 | inputs,
111 | get_default_lm(),
112 | FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_SELECTED_KNOWLEDGE_SOURCES,
113 | )
114 |
115 | # apply bert
116 | last_hidden_states = self.invoke_language_model(
117 | lm=self.language_model,
118 | input_ids=text_target_bert_indices,
119 | token_type_ids=text_target_bert_segments_ids,
120 | )
121 | # shape: batch, seqlen, hiddendim
122 |
123 | # apply knowledge embedding
124 | knowledge_embedded = self.ks_embeddings_dense(
125 | text_bert_indices_selected_knowledge_sources.float()
126 | )
127 | # shape: batch, seqlen, hiddendim
128 |
129 | if self.is_use_targetmask:
130 | # repeat
131 | target_mask = text_bert_indices_target_mask.unsqueeze(dim=2).repeat(
132 | 1, 1, knowledge_embedded.shape[2]
133 | )
134 | # shape: batch, seqlen, hiddendim
135 |
136 | # concat (called x_t in paper)
137 | bert_and_knowledge = torch.cat(
138 | (last_hidden_states, knowledge_embedded, target_mask), dim=2
139 | )
140 | # batch x seq x bert+knowledge+targetmask
141 | else:
142 | # concat (called x_t in paper)
143 | bert_and_knowledge = torch.cat(
144 | (last_hidden_states, knowledge_embedded), dim=2
145 | )
146 | # batch x seq x bert+knowledge
147 |
148 | # apply gru (result called z_t in paper)
149 | gru_all_hidden, gru_last_hidden = self.gru(
150 | bert_and_knowledge,
151 | torch.zeros(
152 | 2,
153 | bert_and_knowledge.shape[0],
154 | self.language_model.config.hidden_size * 2,
155 | ).to(self.device),
156 | )
157 | # all hidden shape: batch x seq x 4*hidden (contains hidden states for each
158 | # part of the input seq)
159 | # last hidden shap: numdir x batch x 2*hidden (contains hidden states for last
160 | # part of input seq)
161 |
162 | # gru_last_hidden_own = gru_all_hidden[:,-1:,]
163 | # get both directions
164 | gru_last_hidden_dir0 = gru_last_hidden[0, :, :]
165 | gru_last_hidden_dir1 = gru_last_hidden[1, :, :]
166 | # shape each: batch x 2*hidden
167 | gru_last_hidden_stacked = torch.cat(
168 | (gru_last_hidden_dir0, gru_last_hidden_dir1), dim=1
169 | )
170 | # batch x 4*hidden
171 |
172 | # pooling
173 | # according to original paper: "max-pooling returns a vector with maximum
174 | # weights across all hidden states of input tokens for each dimension. in this
175 | # way, the input tokens with higher weights will be engaged for stance
176 | # prediction."
177 | gru_avg = torch.mean(gru_all_hidden, dim=1)
178 | gru_max, _ = torch.max(gru_all_hidden, dim=1)
179 |
180 | # concat (called "u" in original paper)
181 | gru_complete_concatted = torch.cat(
182 | (gru_last_hidden_stacked, gru_avg, gru_max), dim=1
183 | )
184 |
185 | if is_return_ensemble_values:
186 | return gru_complete_concatted
187 | else:
188 | # dense
189 | logits = self.dense(gru_complete_concatted)
190 |
191 | return logits
192 |
--------------------------------------------------------------------------------
/NewsSentiment/models/singletarget/lcf.py:
--------------------------------------------------------------------------------
1 | # adapted from https://github.com/yangheng95/LCF-ABSA
2 | from argparse import Namespace
3 | from typing import Dict
4 |
5 | import numpy as np
6 | import torch
7 | import torch.nn as nn
8 | # from transformers.modeling_bert import BertPooler, BertSelfAttention
9 |
10 | from NewsSentiment.consts import *
11 | from NewsSentiment.dataset import FXDataset
12 | from NewsSentiment.models.FXBaseModel import FXBaseModel
13 |
14 |
15 | class GlobalContext(nn.Module):
16 | def __init__(self, global_context_seqs_per_doc):
17 | super(GlobalContext, self).__init__()
18 | self.global_context_seqs_per_doc = global_context_seqs_per_doc
19 |
20 | def forward(self, inputs):
21 | pass
22 |
23 |
24 | class SelfAttention(nn.Module):
25 | def __init__(self, config, opt):
26 | super(SelfAttention, self).__init__()
27 | self.opt = opt
28 | self.config = config
29 | self.SA = None # BertSelfAttention(config)
30 | self.tanh = torch.nn.Tanh()
31 |
32 | def forward(self, inputs):
33 | zero_tensor = torch.tensor(
34 | np.zeros((inputs.size(0), 1, 1, self.opt.max_seq_len), dtype=np.float32),
35 | dtype=torch.float32,
36 | ).to(self.opt.device)
37 | SA_out = self.SA(inputs, zero_tensor)
38 | return self.tanh(SA_out[0])
39 |
40 |
41 | class LCF_BERT(FXBaseModel):
42 | @staticmethod
43 | def get_language_models():
44 | return (BERT_BASE_UNCASED,)
45 |
46 | @staticmethod
47 | def get_input_field_ids():
48 | return [
49 | (BERT_BASE_UNCASED, FIELD_TEXT_THEN_TARGET_IDS_WITH_SPECIAL_TOKENS),
50 | (
51 | BERT_BASE_UNCASED,
52 | FIELD_TEXT_THEN_TARGET_IDS_WITH_SPECIAL_TOKENS_SEGMENT_IDS,
53 | ),
54 | (BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS),
55 | (BERT_BASE_UNCASED, FIELD_TARGET_IDS_WITH_SPECIAL_TOKENS),
56 | (BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK),
57 | ]
58 |
59 | def __init__(self, transformer_models: Dict, opt: Namespace):
60 | super(LCF_BERT, self).__init__()
61 |
62 | bert = transformer_models[BERT_BASE_UNCASED]
63 | self.bert_spc = bert
64 | self.opt = opt
65 |
66 | # self.bert_local = copy.deepcopy(bert) # Uncomment the line to use dual Bert
67 | self.bert_local = (
68 | bert # Default to use single Bert and reduce memory requirements
69 | )
70 | self.dropout = nn.Dropout(self.opt.dropout)
71 | # while the paper describes 3 self attentions, the original implementation by the authors
72 | # uses only one. we stick with the original implementation.
73 | # answer by the author: the version found PyTorch-ABSA repository and below is better than what was
74 | # described in the paper (cf. https://github.com/yangheng95/LC-ABSA/issues/10#issuecomment-670301603)
75 | # self.bert_local_SA = SelfAttention(bert.config, self.opt)
76 | # self.bert_global_SA = SelfAttention(bert.config, self.opt)
77 | self.linear_double = nn.Linear(
78 | bert.config.hidden_size * 2, bert.config.hidden_size
79 | )
80 | self.bert_SA = SelfAttention(bert.config, self.opt)
81 | self.linear_single = nn.Linear(bert.config.hidden_size, bert.config.hidden_size)
82 | self.bert_pooler = None # BertPooler(bert.config)
83 |
84 | self.dense = nn.Linear(bert.config.hidden_size, self.opt.polarities_dim)
85 |
86 | def feature_dynamic_mask(self, text_local_indices, aspect_indices):
87 | texts = text_local_indices.cpu().numpy()
88 | asps = aspect_indices.cpu().numpy()
89 | mask_len = self.opt.SRD
90 | masked_text_raw_indices = np.ones(
91 | (
92 | text_local_indices.size(0),
93 | self.opt.max_seq_len,
94 | self.bert_local.config.hidden_size,
95 | ),
96 | dtype=np.float32,
97 | )
98 | for text_i, asp_i in zip(range(len(texts)), range(len(asps))):
99 | asp_len = np.count_nonzero(asps[asp_i]) - 2
100 | try:
101 | asp_begin = np.argwhere(texts[text_i] == asps[asp_i][1])[0][0]
102 | except:
103 | continue
104 | if asp_begin >= mask_len:
105 | mask_begin = asp_begin - mask_len
106 | else:
107 | mask_begin = 0
108 | for i in range(mask_begin):
109 | masked_text_raw_indices[text_i][i] = np.zeros(
110 | (self.bert_local.config.hidden_size), dtype=np.float
111 | )
112 | for j in range(asp_begin + asp_len + mask_len, self.opt.max_seq_len):
113 | masked_text_raw_indices[text_i][j] = np.zeros(
114 | (self.bert_local.config.hidden_size), dtype=np.float
115 | )
116 | masked_text_raw_indices = torch.from_numpy(masked_text_raw_indices)
117 | return masked_text_raw_indices.to(self.opt.device)
118 |
119 | def feature_dynamic_weighted(self, text_local_indices, aspect_indices):
120 | texts = text_local_indices.cpu().numpy()
121 | asps = aspect_indices.cpu().numpy()
122 | masked_text_raw_indices = np.ones(
123 | (
124 | text_local_indices.size(0),
125 | self.opt.max_seq_len,
126 | self.bert_local.config.hidden_size,
127 | ),
128 | dtype=np.float32,
129 | )
130 | for text_i, asp_i in zip(range(len(texts)), range(len(asps))):
131 | asp_len = np.count_nonzero(asps[asp_i]) - 2
132 | try:
133 | asp_begin = np.argwhere(texts[text_i] == asps[asp_i][1])[0][0]
134 | asp_avg_index = (asp_begin * 2 + asp_len) / 2
135 | except:
136 | continue
137 | distances = np.zeros(np.count_nonzero(texts[text_i]), dtype=np.float32)
138 | for i in range(1, np.count_nonzero(texts[text_i]) - 1):
139 | if abs(i - asp_avg_index) + asp_len / 2 > self.opt.SRD:
140 | distances[i] = 1 - (
141 | abs(i - asp_avg_index) + asp_len / 2 - self.opt.SRD
142 | ) / np.count_nonzero(texts[text_i])
143 | else:
144 | distances[i] = 1
145 | for i in range(len(distances)):
146 | masked_text_raw_indices[text_i][i] = (
147 | masked_text_raw_indices[text_i][i] * distances[i]
148 | )
149 | masked_text_raw_indices = torch.from_numpy(masked_text_raw_indices)
150 | return masked_text_raw_indices.to(self.opt.device)
151 |
152 | def forward(self, inputs):
153 | text_target_bert_indices = FXDataset.get_input_by_params(
154 | inputs, BERT_BASE_UNCASED, FIELD_TEXT_THEN_TARGET_IDS_WITH_SPECIAL_TOKENS,
155 | )
156 |
157 | text_target_bert_segments_ids = FXDataset.get_input_by_params(
158 | inputs,
159 | BERT_BASE_UNCASED,
160 | FIELD_TEXT_THEN_TARGET_IDS_WITH_SPECIAL_TOKENS_SEGMENT_IDS,
161 | )
162 | text_local_indices = FXDataset.get_input_by_params(
163 | inputs, BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS
164 | )
165 | aspect_indices = FXDataset.get_input_by_params(
166 | inputs, BERT_BASE_UNCASED, FIELD_TARGET_IDS_WITH_SPECIAL_TOKENS
167 | )
168 |
169 | # apply bert and dropout
170 | bert_spc_out, _, _ = self.bert_spc(
171 | text_target_bert_indices, text_target_bert_segments_ids
172 | )
173 | bert_spc_out = self.dropout(bert_spc_out)
174 |
175 | bert_local_out, _, _ = self.bert_local(text_local_indices)
176 | bert_local_out = self.dropout(bert_local_out)
177 |
178 | if self.opt.local_context_focus == "cdm":
179 | masked_local_text_vec = self.feature_dynamic_mask(
180 | text_local_indices, aspect_indices
181 | )
182 | bert_local_out = torch.mul(bert_local_out, masked_local_text_vec)
183 | elif self.opt.local_context_focus == "cdw":
184 | weighted_text_local_features = self.feature_dynamic_weighted(
185 | text_local_indices, aspect_indices
186 | )
187 | bert_local_out = torch.mul(bert_local_out, weighted_text_local_features)
188 |
189 | # attention
190 | # bert_local_out = self.bert_local_SA(bert_local_out)
191 | # bert_spc_out = self.bert_global_SA(bert_spc_out)
192 |
193 | # cat
194 | out_cat = torch.cat((bert_local_out, bert_spc_out), dim=-1)
195 |
196 | # "interactive learning layer"
197 | mean_pool = self.linear_double(out_cat)
198 | self_attention_out = self.bert_SA(mean_pool)
199 | pooled_out = self.bert_pooler(self_attention_out)
200 |
201 | dense_out = self.dense(pooled_out)
202 | return dense_out
203 |
--------------------------------------------------------------------------------
/NewsSentiment/models/singletarget/lcf2.py:
--------------------------------------------------------------------------------
1 | # adapted from https://github.com/yangheng95/LC-ABSA/blob/c945a94e0f86116c5578245aa9ad36c46c7b9c4a/models/lc_apc/lcf_bert.py
2 | # according to
3 | import copy
4 | from argparse import Namespace
5 | from typing import Dict
6 |
7 | import numpy as np
8 | import torch
9 | import torch.nn as nn
10 | # from transformers.modeling_bert import BertPooler, BertSelfAttention
11 |
12 | from NewsSentiment.consts import *
13 | from NewsSentiment.dataset import FXDataset
14 | from NewsSentiment.layers.attention import FXBertSelfAttention
15 | from NewsSentiment.models.FXBaseModel import FXBaseModel
16 |
17 |
18 | class GlobalContext(nn.Module):
19 | def __init__(self, global_context_seqs_per_doc):
20 | super(GlobalContext, self).__init__()
21 | self.global_context_seqs_per_doc = global_context_seqs_per_doc
22 |
23 | def forward(self, inputs):
24 | pass
25 |
26 |
27 | class SelfAttention(nn.Module):
28 | def __init__(self, config, opt):
29 | super(SelfAttention, self).__init__()
30 | self.opt = opt
31 | self.config = config
32 | self.SA = FXBertSelfAttention(
33 | hidden_size=config.hidden_size,
34 | num_attention_heads=config.num_attention_heads,
35 | attention_probs_dropout_prob=0.1,
36 | )
37 | self.tanh = torch.nn.Tanh()
38 |
39 | def forward(self, inputs):
40 | zero_tensor = torch.tensor(
41 | np.zeros((inputs.size(0), 1, 1, self.opt.max_seq_len), dtype=np.float32),
42 | dtype=torch.float32,
43 | ).to(self.opt.device)
44 | SA_out = self.SA(inputs, zero_tensor)
45 | return self.tanh(SA_out[0])
46 |
47 |
48 | class LCF_BERT2Dual(FXBaseModel):
49 | """
50 | While lcf.py:LCF_BERT is the implementation as implemented in PyTorch-ABSA repository, this implementation here
51 | (LCF_BERT2Dual) is following the implementation as in the author's repository, which according to
52 | https://github.com/yangheng95/LC-ABSA/issues/10#issuecomment-670301603 has seen some more improvements compared to
53 | the version from PyTorch-ABSA
54 | """
55 |
56 | @staticmethod
57 | def get_language_models():
58 | return (get_default_lm(),)
59 |
60 | @staticmethod
61 | def get_input_field_ids():
62 | return [
63 | (get_default_lm(), FIELD_TEXT_THEN_TARGET_IDS_WITH_SPECIAL_TOKENS),
64 | (
65 | get_default_lm(),
66 | FIELD_TEXT_THEN_TARGET_IDS_WITH_SPECIAL_TOKENS_SEGMENT_IDS,
67 | ),
68 | (get_default_lm(), FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS),
69 | (get_default_lm(), FIELD_TARGET_IDS_WITH_SPECIAL_TOKENS),
70 | (get_default_lm(), FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK),
71 | ]
72 |
73 | def __init__(self, transformer_models: Dict, opt: Namespace):
74 | super(LCF_BERT2Dual, self).__init__()
75 |
76 | bert = transformer_models[get_default_lm()]
77 |
78 | self.bert4global = bert
79 | # note that we use a second bert here, which should slightly improve performance
80 | # cf. https://github.com/yangheng95/LC-ABSA/#tips
81 | # self.bert4local = copy.deepcopy(bert)
82 | # we can't do this on scc because even for batch size = only 16 we run out of
83 | # memory. because of that, we use the same bert for both local and global
84 | # (just as in lcf.py)
85 | self.bert4local = bert
86 | self.opt = opt
87 | self.dropout = nn.Dropout(self.opt.dropout)
88 | self.bert_SA = SelfAttention(bert.config, self.opt)
89 | self.linear2 = nn.Linear(bert.config.hidden_size * 2, bert.config.hidden_size)
90 | # self.linear3 = nn.Linear(bert.config.hidden_size * 3, bert.config.hidden_size)
91 | self.bert_pooler = None # BertPooler(bert.config)
92 | self.dense = nn.Linear(bert.config.hidden_size, self.opt.polarities_dim)
93 |
94 | def feature_dynamic_mask(self, text_local_indices, aspect_indices):
95 | texts = text_local_indices.cpu().numpy()
96 | asps = aspect_indices.cpu().numpy()
97 | mask_len = self.opt.SRD
98 | masked_text_raw_indices = np.ones(
99 | (
100 | text_local_indices.size(0),
101 | self.opt.max_seq_len,
102 | self.bert4local.config.hidden_size,
103 | ),
104 | dtype=np.float32,
105 | )
106 | for text_i, asp_i in zip(range(len(texts)), range(len(asps))):
107 | asp_len = np.count_nonzero(asps[asp_i]) - 2
108 | try:
109 | asp_begin = np.argwhere(texts[text_i] == asps[asp_i][1])[0][0]
110 | except:
111 | continue
112 | if asp_begin >= mask_len:
113 | mask_begin = asp_begin - mask_len
114 | else:
115 | mask_begin = 0
116 | for i in range(mask_begin):
117 | masked_text_raw_indices[text_i][i] = np.zeros(
118 | (self.bert4local.config.hidden_size), dtype=np.float
119 | )
120 | for j in range(asp_begin + asp_len + mask_len, self.opt.max_seq_len):
121 | masked_text_raw_indices[text_i][j] = np.zeros(
122 | (self.bert4local.config.hidden_size), dtype=np.float
123 | )
124 | masked_text_raw_indices = torch.from_numpy(masked_text_raw_indices)
125 | return masked_text_raw_indices.to(self.opt.device)
126 |
127 | def feature_dynamic_weighted(self, text_local_indices, aspect_indices):
128 | texts = text_local_indices.cpu().numpy()
129 | asps = aspect_indices.cpu().numpy()
130 | masked_text_raw_indices = np.ones(
131 | (
132 | text_local_indices.size(0),
133 | self.opt.max_seq_len,
134 | self.bert4local.config.hidden_size,
135 | ),
136 | dtype=np.float32,
137 | )
138 | for text_i, asp_i in zip(range(len(texts)), range(len(asps))):
139 | asp_len = np.count_nonzero(asps[asp_i]) - 2
140 | try:
141 | asp_begin = np.argwhere(texts[text_i] == asps[asp_i][1])[0][0]
142 | asp_avg_index = (asp_begin * 2 + asp_len) / 2
143 | except:
144 | continue
145 | distances = np.zeros(np.count_nonzero(texts[text_i]), dtype=np.float32)
146 | for i in range(1, np.count_nonzero(texts[text_i]) - 1):
147 | if abs(i - asp_avg_index) + asp_len / 2 > self.opt.SRD:
148 | distances[i] = 1 - (
149 | abs(i - asp_avg_index) + asp_len / 2 - self.opt.SRD
150 | ) / np.count_nonzero(texts[text_i])
151 | else:
152 | distances[i] = 1
153 | for i in range(len(distances)):
154 | masked_text_raw_indices[text_i][i] = (
155 | masked_text_raw_indices[text_i][i] * distances[i]
156 | )
157 | masked_text_raw_indices = torch.from_numpy(masked_text_raw_indices)
158 | return masked_text_raw_indices.to(self.opt.device)
159 |
160 | def forward(self, inputs):
161 | text_target_bert_indices = FXDataset.get_input_by_params(
162 | inputs, get_default_lm(), FIELD_TEXT_THEN_TARGET_IDS_WITH_SPECIAL_TOKENS,
163 | )
164 | text_target_bert_segments_ids = FXDataset.get_input_by_params(
165 | inputs,
166 | get_default_lm(),
167 | FIELD_TEXT_THEN_TARGET_IDS_WITH_SPECIAL_TOKENS_SEGMENT_IDS,
168 | )
169 | text_local_indices = FXDataset.get_input_by_params(
170 | inputs, get_default_lm(), FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS
171 | )
172 | aspect_indices = FXDataset.get_input_by_params(
173 | inputs, get_default_lm(), FIELD_TARGET_IDS_WITH_SPECIAL_TOKENS
174 | )
175 |
176 | # bert
177 | global_context_features = self.invoke_language_model(
178 | self.bert4global,
179 | input_ids=text_target_bert_indices,
180 | token_type_ids=text_target_bert_segments_ids,
181 | )
182 | local_context_features = self.invoke_language_model(
183 | self.bert4local, text_local_indices
184 | )
185 |
186 | # mask
187 | if self.opt.local_context_focus == "cdm":
188 | lcf_matrix = self.feature_dynamic_mask(text_local_indices, aspect_indices)
189 | elif self.opt.local_context_focus == "cdw":
190 | lcf_matrix = self.feature_dynamic_weighted(
191 | text_local_indices, aspect_indices
192 | )
193 |
194 | # LCF layer
195 | lcf_features = torch.mul(local_context_features, lcf_matrix)
196 | lcf_features = self.bert_SA(lcf_features)
197 |
198 | cat_features = torch.cat((lcf_features, global_context_features), dim=-1)
199 | cat_features = self.linear2(cat_features)
200 | cat_features = self.dropout(cat_features)
201 |
202 | pooled_out = self.bert_pooler(cat_features)
203 | dense_out = self.dense(pooled_out)
204 |
205 | return dense_out
206 |
--------------------------------------------------------------------------------
/NewsSentiment/models/singletarget/notargetcls.py:
--------------------------------------------------------------------------------
1 | from argparse import Namespace
2 | from typing import Dict
3 |
4 | import torch.nn as nn
5 |
6 | from NewsSentiment.consts import *
7 | from NewsSentiment.dataset import FXDataset
8 | from NewsSentiment.layers.AggregatorForBert import AggregatorForBert
9 | from NewsSentiment.models.FXBaseModel import FXBaseModel
10 |
11 |
12 | class NoTargetClsBert(FXBaseModel):
13 | @staticmethod
14 | def get_language_models():
15 | return (BERT_BASE_UNCASED,)
16 |
17 | @staticmethod
18 | def get_input_field_ids():
19 | return [
20 | (BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS),
21 | (BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK),
22 | ]
23 |
24 | def __init__(self, transformer_models: Dict, opt: Namespace):
25 | super(NoTargetClsBert, self).__init__()
26 | self.language_model = transformer_models[BERT_BASE_UNCASED]
27 | self.aggregator_for_bert = AggregatorForBert(opt.spc_lm_representation)
28 | self.dropout = nn.Dropout(opt.dropout)
29 | self.dense = nn.Linear(
30 | self.language_model.config.hidden_size, opt.polarities_dim
31 | )
32 |
33 | def forward(self, inputs):
34 | text_bert_indices = FXDataset.get_input_by_params(
35 | inputs, BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS
36 | )
37 |
38 | last_hidden_state, pooler_output, all_hidden_states = self.language_model(
39 | input_ids=text_bert_indices
40 | )
41 | prepared_output = self.aggregator_for_bert(
42 | last_hidden_state, pooler_output, all_hidden_states
43 | )
44 |
45 | prepared_output = self.dropout(prepared_output)
46 | logits = self.dense(prepared_output)
47 |
48 | return logits
49 |
--------------------------------------------------------------------------------
/NewsSentiment/models/singletarget/random_single.py:
--------------------------------------------------------------------------------
1 | from argparse import Namespace
2 | from typing import Dict
3 |
4 | import torch
5 | import torch.nn as nn
6 |
7 | from NewsSentiment.consts import *
8 | from NewsSentiment.dataset import FXDataset
9 | from NewsSentiment.models.FXBaseModel import FXBaseModel
10 |
11 |
12 | class RandomSingle(FXBaseModel):
13 | """
14 |
15 | """
16 |
17 | @staticmethod
18 | def get_language_models():
19 | """
20 | All architecture assumes that at least one model is used so we just require
21 | bert here for compatibility.
22 | :return:
23 | """
24 | return (BERT_BASE_UNCASED,)
25 |
26 | @staticmethod
27 | def get_input_field_ids():
28 | return [
29 | (BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS),
30 | (BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK),
31 | ]
32 |
33 | def __init__(self, transformer_models: Dict, opt: Namespace):
34 | super(RandomSingle, self).__init__()
35 | self.num_classes = opt.polarities_dim
36 |
37 | def forward(self, inputs):
38 | text_bert_indices = FXDataset.get_input_by_params(
39 | inputs, BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS
40 | )
41 | batch_size = text_bert_indices.shape[0]
42 | num_targets = text_bert_indices.shape[1]
43 |
44 | # get a random tensor
45 | logits = torch.rand(batch_size, num_targets, self.num_classes)
46 |
47 | return logits
48 |
--------------------------------------------------------------------------------
/NewsSentiment/models/singletarget/spc.py:
--------------------------------------------------------------------------------
1 | from argparse import Namespace
2 | from typing import Dict
3 |
4 | import torch.nn as nn
5 |
6 | from NewsSentiment.consts import *
7 | from NewsSentiment.dataset import FXDataset
8 | from NewsSentiment.layers.AggregatorForBert import AggregatorForBert
9 | from NewsSentiment.models.FXBaseModel import FXBaseModel
10 |
11 |
12 | class SPC_Base(FXBaseModel):
13 | @staticmethod
14 | def get_language_models():
15 | return (get_default_lm(),)
16 |
17 | @staticmethod
18 | def get_input_field_ids():
19 | return [
20 | (get_default_lm(), FIELD_TEXT_THEN_TARGET_IDS_WITH_SPECIAL_TOKENS),
21 | (
22 | get_default_lm(),
23 | FIELD_TEXT_THEN_TARGET_IDS_WITH_SPECIAL_TOKENS_SEGMENT_IDS,
24 | ),
25 | (get_default_lm(), FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK),
26 | ]
27 |
28 | def __init__(self, transformer_models: Dict, opt: Namespace):
29 | super(SPC_Base, self).__init__()
30 | self.language_model = transformer_models[get_default_lm()]
31 | self.aggregator_for_bert = AggregatorForBert(opt.spc_lm_representation)
32 | self.dropout = nn.Dropout(opt.dropout)
33 | self.dense = nn.Linear(
34 | self.language_model.config.hidden_size, opt.polarities_dim
35 | )
36 |
37 | def forward(self, inputs):
38 | text_target_bert_indices = FXDataset.get_input_by_params(
39 | inputs, get_default_lm(), FIELD_TEXT_THEN_TARGET_IDS_WITH_SPECIAL_TOKENS
40 | )
41 | text_target_bert_segments_ids = FXDataset.get_input_by_params(
42 | inputs,
43 | get_default_lm(),
44 | FIELD_TEXT_THEN_TARGET_IDS_WITH_SPECIAL_TOKENS_SEGMENT_IDS,
45 | )
46 |
47 | last_hidden_state = self.invoke_language_model(
48 | lm=self.language_model,
49 | input_ids=text_target_bert_indices,
50 | token_type_ids=text_target_bert_segments_ids,
51 | )
52 | # the following two variables can only be derived for some model, whereas invoke_language_model currently
53 | # returns only one last_hidden_state.
54 | assert self.aggregator_for_bert.spc_lm_representation == "mean_last"
55 | pooler_output, all_hidden_states = None, None
56 | prepared_output = self.aggregator_for_bert(
57 | last_hidden_state, pooler_output, all_hidden_states
58 | )
59 | prepared_output = self.dropout(prepared_output)
60 | logits = self.dense(prepared_output)
61 |
62 | return logits
63 |
--------------------------------------------------------------------------------
/NewsSentiment/models/singletarget/td_bert.py:
--------------------------------------------------------------------------------
1 | # this file re-implements TD-BERT by Gao Zhengjie et al.
2 | # while this file aims to be conceptually identical to TD-BERT, one technical difference is that we do not calculate
3 | # the target mask within the model (here) but do this step as part of the dataset processing. in case there are strong
4 | # performance differences between original TD-BERT and this implementation, this technical difference might be worth
5 | # exploring whether it actually yields an identical implementation.
6 | from argparse import Namespace
7 | from typing import Dict
8 |
9 | import torch
10 | import torch.nn as nn
11 | import torch.nn.functional as F
12 |
13 | from NewsSentiment.consts import *
14 | from NewsSentiment.dataset import FXDataset
15 | from NewsSentiment.models.FXBaseModel import FXBaseModel
16 |
17 |
18 | class TD_BERT(FXBaseModel):
19 | @staticmethod
20 | def get_language_models():
21 | return (get_default_lm(),)
22 |
23 | @staticmethod
24 | def get_input_field_ids():
25 | return [
26 | (get_default_lm(), FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS),
27 | (get_default_lm(), FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK),
28 | ]
29 |
30 | def __init__(self, transformer_models: Dict, opt: Namespace):
31 | super(TD_BERT, self).__init__()
32 | self.opt = opt
33 | self.language_model = transformer_models[get_default_lm()]
34 | self.dropout = nn.Dropout(opt.dropout)
35 | self.fc = nn.Linear(self.language_model.config.hidden_size, opt.polarities_dim)
36 |
37 | def forward(self, inputs, is_return_ensemble_values: bool = False):
38 | # get inputs
39 | text_bert_indices = FXDataset.get_input_by_params(
40 | inputs, get_default_lm(), FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS
41 | )
42 | text_bert_indices_target_mask = FXDataset.get_input_by_params(
43 | inputs, get_default_lm(), FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK
44 | )
45 |
46 | # apply bert
47 | last_hidden_states = self.invoke_language_model(
48 | lm=self.language_model,
49 | input_ids=text_bert_indices,
50 | )
51 |
52 | # element-wise multiplication with target mask
53 | # unsqueeze, cf. https://stackoverflow.com/q/62559382
54 | text_bert_indices_target_mask_unsqueezed = text_bert_indices_target_mask.unsqueeze(
55 | -1
56 | )
57 | last_hidden_states_only_target = (
58 | last_hidden_states * text_bert_indices_target_mask_unsqueezed
59 | )
60 |
61 | # as in TD-BERT, perform max pooling
62 | last_hidden_states_only_target_aggregated, _ = last_hidden_states_only_target.max(
63 | dim=1
64 | )
65 |
66 | # dropout before dense layer, as in most other tsc models
67 | last_hidden_states_only_target_aggregated = self.dropout(
68 | last_hidden_states_only_target_aggregated
69 | )
70 |
71 | if is_return_ensemble_values:
72 | return last_hidden_states_only_target_aggregated
73 | else:
74 | # dense layer
75 | logits = self.fc(last_hidden_states_only_target_aggregated)
76 | # removed tanh, which was invoked in original tdbert. for training, we dont
77 | # need it to properly compute the loss. we would, however, need softmax during
78 | # inferring to have the probabilities of all mutually exclusive classes
79 | # to sum up to 1
80 | return logits
81 |
82 |
83 |
84 |
--------------------------------------------------------------------------------
/NewsSentiment/models/singletarget/td_bert_qa.py:
--------------------------------------------------------------------------------
1 | # this file re-implements TD-BERT-QA by Gao Zhengjie et al.
2 | from argparse import Namespace
3 | from typing import Dict
4 |
5 | import torch
6 | import torch.nn as nn
7 | import torch.nn.functional as F
8 |
9 | from NewsSentiment.consts import *
10 | from NewsSentiment.dataset import FXDataset
11 | from NewsSentiment.models.FXBaseModel import FXBaseModel
12 |
13 |
14 | class TD_BERT_QA_MUL(FXBaseModel):
15 | @staticmethod
16 | def get_language_models():
17 | return (BERT_BASE_UNCASED,)
18 |
19 | @staticmethod
20 | def get_input_field_ids():
21 | return [
22 | (BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS),
23 | (BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK),
24 | ]
25 |
26 | def __init__(self, transformer_models: Dict, opt: Namespace):
27 | super(TD_BERT_QA_MUL, self).__init__()
28 | self.opt = opt
29 | self.language_model = transformer_models[BERT_BASE_UNCASED]
30 | self.dropout = nn.Dropout(opt.dropout)
31 | self.fc = nn.Linear(self.language_model.config.hidden_size, opt.polarities_dim) # 全连接层 bbfc
32 | self.bn = nn.BatchNorm1d(self.language_model.config.hidden_size)
33 |
34 | def forward(self, inputs):
35 | # get inputs
36 | text_bert_indices = FXDataset.get_input_by_params(
37 | inputs, BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS
38 | )
39 | text_bert_indices_target_mask = FXDataset.get_input_by_params(
40 | inputs, BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK
41 | )
42 |
43 | # apply bert
44 | last_hidden_states, pooler_output, all_hidden_states = self.language_model(
45 | input_ids=text_bert_indices
46 | )
47 |
48 | # element-wise multiplication with target mask
49 | # unsqueeze, cf. https://stackoverflow.com/q/62559382
50 | text_bert_indices_target_mask_unsqueezed = text_bert_indices_target_mask.unsqueeze(
51 | -1
52 | )
53 | last_hidden_states_only_target = (
54 | last_hidden_states * text_bert_indices_target_mask_unsqueezed
55 | )
56 |
57 | # as in TD-BERT, perform max pooling
58 | last_hidden_states_only_target_aggregated, _ = last_hidden_states_only_target.max(
59 | dim=1
60 | )
61 |
62 | target_in_sent_embed = self.bn(last_hidden_states_only_target_aggregated)
63 | target_in_sent_embed = target_in_sent_embed.mul(pooler_output)
64 | cat = self.dropout(target_in_sent_embed)
65 |
66 | logits = self.fc(cat)
67 | # removed tanh, which was invoked in original tdbert. for training, we dont
68 | # need it to properly compute the loss. we would, however, need softmax during
69 | # inferring to have the probabilities of all mutually exclusive classes
70 | # to sum up to 1
71 |
72 | return logits
73 |
74 |
75 | class TD_BERT_QA_CON(FXBaseModel):
76 | @staticmethod
77 | def get_language_models():
78 | return (BERT_BASE_UNCASED,)
79 |
80 | @staticmethod
81 | def get_input_field_ids():
82 | return [
83 | (BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS),
84 | (BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK),
85 | ]
86 |
87 | def __init__(self, transformer_models: Dict, opt: Namespace):
88 | super(TD_BERT_QA_CON, self).__init__()
89 | self.opt = opt
90 | self.language_model = transformer_models[BERT_BASE_UNCASED]
91 | self.dropout = nn.Dropout(opt.dropout)
92 | self.fc = nn.Linear(self.language_model.config.hidden_size*2, opt.polarities_dim) # 全连接层 bbfc
93 | self.bn = nn.BatchNorm1d(self.language_model.config.hidden_size)
94 |
95 | def forward(self, inputs):
96 | # get inputs
97 | text_bert_indices = FXDataset.get_input_by_params(
98 | inputs, BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS
99 | )
100 | text_bert_indices_target_mask = FXDataset.get_input_by_params(
101 | inputs, BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK
102 | )
103 |
104 | # apply bert
105 | last_hidden_states, pooler_output, all_hidden_states = self.language_model(
106 | input_ids=text_bert_indices
107 | )
108 |
109 | # element-wise multiplication with target mask
110 | # unsqueeze, cf. https://stackoverflow.com/q/62559382
111 | text_bert_indices_target_mask_unsqueezed = text_bert_indices_target_mask.unsqueeze(
112 | -1
113 | )
114 | last_hidden_states_only_target = (
115 | last_hidden_states * text_bert_indices_target_mask_unsqueezed
116 | )
117 |
118 | # as in TD-BERT, perform max pooling
119 | last_hidden_states_only_target_aggregated, _ = last_hidden_states_only_target.max(
120 | dim=1
121 | )
122 |
123 | # not entirely sure whether this is as in original tdbertqa-con, because the code does not exist in the repo
124 | # (seems to be part of the commented lines in there)
125 | pooler_output = self.bn(pooler_output)
126 |
127 | cat = torch.cat([pooler_output, last_hidden_states_only_target_aggregated], dim=1)
128 | cat = self.dropout(cat)
129 | logits = self.fc(cat)
130 | # removed tanh, which was invoked in original tdbert. for training, we dont
131 | # need it to properly compute the loss. we would, however, need softmax during
132 | # inferring to have the probabilities of all mutually exclusive classes
133 | # to sum up to 1
134 |
135 | return logits
136 |
--------------------------------------------------------------------------------
/NewsSentiment/models/singletarget/tdbertlikesingle.py:
--------------------------------------------------------------------------------
1 | from argparse import Namespace
2 | from typing import Dict
3 |
4 | import torch
5 | import torch.nn as nn
6 |
7 | from NewsSentiment.consts import *
8 | from NewsSentiment.dataset import FXDataset
9 | from NewsSentiment.models.FXBaseModel import FXBaseModel
10 |
11 |
12 | class TDBertLikeSingle(FXBaseModel):
13 | """
14 | This model returns uses a target mask for a single target to obtain only the hidden
15 | states of those last layer nodes that correspond to a wordpiece of the target
16 | phrase. Then aggregation, dropout and a dense layer is applied to retrieve the
17 | 3-class logits.
18 |
19 | From a conceptual perspective, TD-BERT is in some aspects similar (e.g., only the
20 | output of target-phrase-related nodes is used), but in other aspects not similar,
21 | e.g., we don't use max pooling but a mean on all non-0 target nodes. In other cases,
22 | it is not quite clear how the authors of TD-BERT implemented them. An email I sent
23 | to them was not answered yet.
24 | """
25 |
26 | @staticmethod
27 | def get_language_models():
28 | return (BERT_BASE_UNCASED,)
29 |
30 | @staticmethod
31 | def get_input_field_ids():
32 | return [
33 | (BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS),
34 | (BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK),
35 | ]
36 |
37 | def __init__(self, transformer_models: Dict, opt: Namespace):
38 | super(TDBertLikeSingle, self).__init__()
39 | self.language_model = transformer_models[BERT_BASE_UNCASED]
40 | self.dropout = nn.Dropout(opt.dropout)
41 | self.dense = nn.Linear(
42 | self.language_model.config.hidden_size, opt.polarities_dim
43 | )
44 |
45 | def forward(self, inputs):
46 | # get inputs
47 | text_bert_indices = FXDataset.get_input_by_params(
48 | inputs, BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS
49 | )
50 | text_bert_indices_target_mask = FXDataset.get_input_by_params(
51 | inputs, BERT_BASE_UNCASED, FIELD_TEXT_IDS_WITH_SPECIAL_TOKENS_TARGET_MASK
52 | )
53 |
54 | # apply bert
55 | last_hidden_states, pooler_output, all_hidden_states = self.language_model(
56 | input_ids=text_bert_indices
57 | )
58 |
59 | # element-wise multiplication with target mask
60 | # unsqueeze, cf. https://stackoverflow.com/q/62559382
61 | text_bert_indices_target_mask_unsqueezed = text_bert_indices_target_mask.unsqueeze(
62 | -1
63 | )
64 | last_hidden_states_only_target = (
65 | last_hidden_states * text_bert_indices_target_mask_unsqueezed
66 | )
67 |
68 | # similar to TD-BERT, perform max pooling TODO not implemented yet, instead:
69 | # for now, retrieve only the values of the target's output tokens and then
70 | # calculate the mean: (batchsize, 150, 768) -> (batchsize, 768)
71 | # get the positions of target nodes. note that we cannot simply take the mean
72 | # as it would divide by the number of the sequence length, whereas the effective
73 | # length is only of size k, where k is the number of non-zero scalars in the
74 | # input mask (since we are only interested in those values)
75 | last_hidden_states_only_target_aggregated = last_hidden_states_only_target.sum(
76 | dim=1
77 | )
78 | # get the sum for each (batch, hidden states) (sum over the sequence length
79 | # dim)
80 | denominator_for_mean = text_bert_indices_target_mask_unsqueezed.sum(dim=1)
81 | # divide for each (batch, hidden states) by the denominator to get the mean
82 | last_hidden_states_only_target_aggregated = (
83 | last_hidden_states_only_target_aggregated / denominator_for_mean
84 | )
85 | # dropout before dense layer, as in most other tsc models
86 | last_hidden_states_only_target_aggregated = self.dropout(
87 | last_hidden_states_only_target_aggregated
88 | )
89 | # dense layer
90 | logits = self.dense(last_hidden_states_only_target_aggregated)
91 |
92 | return logits
93 |
--------------------------------------------------------------------------------
/NewsSentiment/plotter_utils.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import matplotlib.pyplot as plt
4 | import numpy as np
5 | from sklearn.metrics import confusion_matrix
6 |
7 | from NewsSentiment.fxlogger import get_logger
8 |
9 | logger = get_logger()
10 |
11 |
12 | def create_save_plotted_confusion_matrix(conf_matrix, expected_labels, basepath):
13 | ax, title = plot_confusion_matrix(conf_matrix, expected_labels, normalize=False)
14 | filepath = os.path.join(basepath, 'stats.png')
15 | plt.savefig(filepath, bbox_inches='tight')
16 | logger.debug("created confusion matrices in path: {}".format(filepath))
17 |
18 |
19 | def plot_confusion_matrix(cm, classes, normalize=False, title=None, cmap=plt.cm.Blues):
20 | """
21 | This function prints and plots the confusion matrix. Normalization can be applied by setting `normalize=True`.
22 | based on https://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html#sphx-glr-auto-examples-model-selection-plot-confusion-matrix-py
23 | """
24 | if not title:
25 | if normalize:
26 | title = 'Normalized confusion matrix'
27 | else:
28 | title = 'Confusion matrix, without normalization'
29 |
30 | if normalize:
31 | cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
32 | logger.debug("Normalized confusion matrix")
33 | else:
34 | logger.debug('Confusion matrix, without normalization')
35 |
36 | logger.debug(cm)
37 |
38 | fig, ax = plt.subplots()
39 | im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
40 | ax.figure.colorbar(im, ax=ax)
41 | # We want to show all ticks...
42 | ax.set(xticks=np.arange(cm.shape[1]),
43 | yticks=np.arange(cm.shape[0]),
44 | # ... and label them with the respective list entries
45 | xticklabels=classes, yticklabels=classes,
46 | title=title,
47 | ylabel='True label',
48 | xlabel='Predicted label')
49 |
50 | # Rotate the tick labels and set their alignment.
51 | plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
52 | rotation_mode="anchor")
53 |
54 | # Loop over data dimensions and create text annotations.
55 | fmt = '.2f' if normalize else 'd'
56 | thresh = cm.max() / 2.
57 | for i in range(cm.shape[0]):
58 | for j in range(cm.shape[1]):
59 | ax.text(j, i, format(cm[i, j], fmt),
60 | ha="center", va="center",
61 | color="white" if cm[i, j] > thresh else "black")
62 | fig.tight_layout()
63 |
64 | return ax, title
65 |
66 |
67 | if __name__ == '__main__':
68 | y_true = ["cat", "ant", "cat", "cat", "ant", "bird"]
69 | y_pred = ["ant", "ant", "cat", "cat", "ant", "cat"]
70 | confmat = confusion_matrix(y_true, y_pred, labels=["ant", "bird", "cat"])
71 |
72 | create_save_plotted_confusion_matrix(confmat, ["ant", "bird", "cat"], '.')
73 |
--------------------------------------------------------------------------------
/NewsSentiment/pretrained_models/.gitignore:
--------------------------------------------------------------------------------
1 | # Ignore everything in this directory
2 | *
3 | # Except this file
4 | !.gitignore
5 | !state_dicts
6 |
--------------------------------------------------------------------------------
/NewsSentiment/pretrained_models/state_dicts/.gitignore:
--------------------------------------------------------------------------------
1 | # Ignore everything in this directory
2 | *
3 | # Except this file
4 | !.gitignore
5 |
--------------------------------------------------------------------------------
/NewsSentiment/pretrained_models/state_dicts/grutsc_v1-0-0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fhamborg/NewsMTSC/a7bea7bc7265b4bfe4236e98965a47c767acbb30/NewsSentiment/pretrained_models/state_dicts/grutsc_v1-0-0
--------------------------------------------------------------------------------
/NewsSentiment/results/.gitignore:
--------------------------------------------------------------------------------
1 | # Ignore everything in this directory
2 | *
3 | # Except this file
4 | !.gitignore
5 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # NewsMTSC: (Multi-)Target-dependent Sentiment Classification in News Articles
2 | NewsMTSC is a dataset for target-dependent sentiment classification (TSC) on
3 | news articles reporting on policy issues. The dataset consists of more than 11k labeled
4 | sentences, which we sampled from news articles from online US news outlets. More
5 | information can be found in our paper published at the EACL 2021.
6 |
7 | This repository contains the **dataset** for target-dependent
8 | sentiment classification in news articles reporting on policy issues. Additionally,
9 | the repository contains our **model** named GRU-TSC, which achieves state-of-the-art
10 | TSC classification performance on NewsMTSC. Check it out - it **works out of the box** :-)
11 |
12 | # Quick start
13 |
14 | * **I want to classify sentiment**: check out our easy-to-use, high-quality sentiment classifier on [PyPI](https://pypi.org/project/NewsSentiment/)
15 | * **I need the dataset**: you can [download it here](https://github.com/fhamborg/NewsMTSC/raw/main/NewsSentiment/controller_data/datasets/NewsMTSC-dataset/NewsMTSC-dataset.zip) or [view it here](https://github.com/fhamborg/NewsMTSC/tree/main/NewsSentiment/controller_data/datasets/NewsMTSC-dataset). We also offer NewsMTSC as a dataset on [Huggingface Hub](https://huggingface.co/datasets/fhamborg/news_sentiment_newsmtsc) and on [Kaggle](https://www.kaggle.com/fhamborg/news-articles-sentiment).
16 | * **I want to train my own models**: read the remainder of this file.
17 |
18 | Reminder: the following description is only relevant if you in fact want to train your own models. If that's not the case, please check above for links to the dataset and our easy-to-use python package.
19 |
20 |
21 | # Installation
22 | It's super easy, we promise! Note that following these instructions is only necessary if you're planning to train a model using our tool. If you only want to predict the sentiment of sentences, please use our [Python package](https://pypi.org/project/NewsSentiment/), which is even easier to install and use :-)
23 |
24 | NewsMTSC was tested on MacOS and Ubuntu; other OS may work, too. Let us know :-)
25 |
26 | **1. Setup the environment:**
27 |
28 | This step is optional if you have Python 3.8 installed already (`python --version`). If you don't have Python 3.8, we recommend using Anaconda for setting up requirements. If you do not have it yet, follow Anaconda's
29 | [installation instructions](https://docs.anaconda.com/anaconda/install/).
30 |
31 | To setup a Python 3.8 environment (in case you don't have one yet) you may use, for example:
32 | ```bash
33 | conda create --yes -n newsmtsc python=3.8
34 | conda activate newsmtsc
35 | ```
36 |
37 | FYI, for users of virtualenv, the equivalent command would be:
38 | ```bash
39 | virtualenv -ppython3.8 --setuptools 45 venv
40 | source venv/bin/activate
41 | ```
42 |
43 | Note: We recommend Python 3.8, however we have successfully tested NewsMTSC with Python version >=3.8, <3.12.
44 |
45 | **2. Setup NewsMTSC:**
46 | ```bash
47 | git clone git@github.com:fhamborg/NewsMTSC.git
48 | ```
49 |
50 | Afterward, for example, open the project in your IDE and follow the instruction described in the section "Training".
51 |
52 | Note that if you only want to classify sentiment using our model, we recommend that you use our PyPI package [NewsSentiment](https://pypi.org/project/NewsSentiment/). Getting it is as simple as `pip install NewsSentiment` and using it is four lines of code :-)
53 |
54 |
55 | # Training
56 | If you want to train one of our models or your own model, please clone the repository first.
57 |
58 |
59 | There are two entry points to the system. `train.py` is used to train and evaluate a specific model on a specific dataset using
60 | specific hyperparameters. We call a single run an _experiment_. `controller.py` is used to run multiple experiments
61 | automatically. This is for example useful for model selection and evaluating hundreds or thousands of combinations of
62 | models, hyperparameters, and datasets.
63 |
64 | ## Running a single experiment
65 | Goal: training a model with a user-defined (hyper)parameter combination.
66 |
67 | `train.py` allows fine-grained control over the training and evaluation process, yet for most command line arguments
68 | we provide useful defaults. Two arguments are required:
69 |
70 | * `--own_model_name` (which model is used, e.g., `grutsc`),
71 | * `--dataset_name` (which dataset is used, e.g., `newsmtsc-rw`).
72 |
73 | For more information refer to `train.py` and
74 | `combinations_absadata_0.py`. If you just want to get started quickly, the command below should work out of the box.
75 |
76 | ```
77 | python train.py --own_model_name grutsc --dataset_name newsmtsc-rw
78 | ```
79 |
80 | ## Running multiple experiments
81 | Goal: finding the (hyper)parameter combination to train a model that achieves the best performance.
82 |
83 | `controller.py` takes a set of values for each argument, creates combinations of arguments, applies conditions to remove
84 | unnecessary combinations (e.g., some arguments may only be used for a specific model), and creates a multiprocessing
85 | pool to run experiments of these argument combinations in parallel. After completion, `controller.py` creates a summary,
86 | which contains detailed results, including evaluation performance, of all experiments. By using `createoverview.py`, you
87 | can export this summary into an Excel spreadsheet.
88 |
89 | # Support
90 | If you have questions on how to use NewsMTSC or its library, please create a new [issue](https://github.com/fhamborg/NewsMTSC/issues) on GitHub. Please understand that we are not able to provide individual support via email. We think that help is more valuable if it is shared publicly so that more people can benefit from it.
91 |
92 | # Acknowledgements
93 | This repository is in part based on [ABSA-PyTorch](https://github.com/songyouwei/ABSA-PyTorch).
94 | We thank Song et al. for making their excellent repository open source.
95 |
96 | # How to cite
97 | If you use the dataset or model, please cite our [paper](https://www.aclweb.org/anthology/2021.eacl-main.142/) ([PDF](https://www.aclweb.org/anthology/2021.eacl-main.142.pdf)):
98 |
99 | ```
100 | @InProceedings{Hamborg2021b,
101 | author = {Hamborg, Felix and Donnay, Karsten},
102 | title = {NewsMTSC: (Multi-)Target-dependent Sentiment Classification in News Articles},
103 | booktitle = {Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics (EACL 2021)},
104 | year = {2021},
105 | month = {Apr.},
106 | location = {Virtual Event},
107 | }
108 | ```
109 |
--------------------------------------------------------------------------------
/READMEpypi.md:
--------------------------------------------------------------------------------
1 | # NewsSentiment: easy-to-use, high-quality target-dependent sentiment classification for news articles
2 | NewsSentiment is an easy-to-use Python library that achieves state-of-the-art performance
3 | for target-dependent sentiment classification on news articles.
4 | NewsSentiment uses the currently [best performing](https://aclanthology.org/2021.eacl-main.142.pdf)
5 | targeted sentiment classifier for news articles. In contrast to regular sentiment
6 | classification, targeted sentiment classification allows you to provide a target in a sentence.
7 | Only for this target, the sentiment is then predicted. This is more reliable in many
8 | cases, as demonstrated by the following simplistic example: "I like Bert, but I hate Robert."
9 |
10 | We designed NewsSentiment to serve as an easy-to-use wrapper around the sophisticated
11 | GRU-TSC model, which was trained on the NewsMTSC dataset consisting of more than 10k
12 | labeled sentences sampled from political news articles. More information on the dataset
13 | and the model can be found [here](https://aclanthology.org/2021.eacl-main.142.pdf). The
14 | dataset, the model, and its source code can be viewed in our [GitHub repository](https://github.com/fhamborg/NewsMTSC).
15 |
16 | # Installation
17 | It's super easy, we promise!
18 |
19 | You just need a Python 3.8 environment. See [here](https://raw.githubusercontent.com/fhamborg/NewsMTSC/main/pythoninfo.md) if you
20 | don't have Python or a different version (run `python --version` in a terminal to see
21 | your version). Then run:
22 |
23 | ```bash
24 | pip3 install NewsSentiment # without cuda support (choose this if you don't know what cuda is)
25 | pip3 install NewsSentiment[cuda] # with cuda support
26 | ```
27 |
28 | You're all set now :-)
29 |
30 | # Target-dependent Sentiment Classification
31 |
32 | Note that using NewsSentiment the first time will take *a few minutes* because it needs
33 | to download the fine-tuned language model. Please do not abort this initial download.
34 | Since this is a one-time process, future use of NewsSentiment will be much faster.
35 |
36 | ```python
37 | from NewsSentiment import TargetSentimentClassifier
38 | tsc = TargetSentimentClassifier()
39 |
40 | data = [
41 | ("I like ", "Peter", " but I don't like Robert."),
42 | ("", "Mark Meadows", "'s coverup of Trump’s coup attempt is falling apart."),
43 | ]
44 |
45 | sentiments = tsc.infer(targets=data)
46 |
47 | for i, result in enumerate(sentiments):
48 | print("Sentiment: ", i, result[0])
49 | ```
50 |
51 | This method will internally split the data into batches of size 16 for increased speed. You can adjust the
52 | batch size using the `batch_size` parameter, e.g., `batch_size=32`.
53 |
54 | Alternatively, you can also use the `infer_from_text` method to infer sentiment for a single target:
55 |
56 | ```python
57 | sentiment = tsc.infer_from_text("I like " ,"Peter", " but I don't like Robert.")
58 | print(sentiment[0])
59 | ```
60 |
61 | # How to identify a person in a sentence?
62 |
63 | In case your data is not separated as shown in the examples above, i.e., in three segments, you will need to identify one (or more) targets first.
64 | How this is done best depends on your project and analysis task but you may, for example, use NER. This [example](https://github.com/fhamborg/NewsMTSC/issues/30#issuecomment-1700645679) shows a simple way of doing so.
65 |
66 | # Acknowledgements
67 |
68 | Thanks to [Tilman Hornung](https://github.com/t1h0) for adding the batching functionality and various other improvements.
69 |
70 | # How to cite
71 | If you use the dataset or model, please cite our [paper](https://www.aclweb.org/anthology/2021.eacl-main.142/) ([PDF](https://www.aclweb.org/anthology/2021.eacl-main.142.pdf)):
72 |
73 | ```
74 | @InProceedings{Hamborg2021b,
75 | author = {Hamborg, Felix and Donnay, Karsten},
76 | title = {NewsMTSC: (Multi-)Target-dependent Sentiment Classification in News Articles},
77 | booktitle = {Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics (EACL 2021)},
78 | year = {2021},
79 | month = {Apr.},
80 | location = {Virtual Event},
81 | }
82 | ```
83 |
--------------------------------------------------------------------------------
/hubconf.py:
--------------------------------------------------------------------------------
1 | from NewsSentiment.models.singletarget.grutscsingle import GRUTSCSingle
2 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = [
3 | "setuptools>=45",
4 | "wheel"
5 | ]
6 | build-backend = "setuptools.build_meta"
7 |
--------------------------------------------------------------------------------
/pythoninfo.md:
--------------------------------------------------------------------------------
1 | This step is optional if you have Python >=3.8, <3.12 installed (run `python --version`
2 | in a terminal and check the version that is printed; we recommend 3.8). If you don't
3 | have Python (in the correct version), we recommend using Anaconda for setting up
4 | requirements because it is very easy (but any way of installing is fine).
5 | If you do not have Anaconda yet, follow their
6 | [installation instructions](https://docs.anaconda.com/anaconda/install/).
7 |
8 | After installing Anaconda, to set up a Python 3.8 environment (in case you don't have one
9 | yet) execute:
10 |
11 | ```bash
12 | conda create --yes -n newsmtsc python=3.8
13 | conda activate newsmtsc
14 | ```
15 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | name = NewsSentiment
3 | version = 1.2.28
4 | author = Felix Hamborg
5 | author_email = felix.hamborg@uni-konstanz.de
6 | description = Easy-to-use, high-quality target-dependent sentiment classification for English news articles
7 | long_description = file: READMEpypi.md
8 | long_description_content_type = text/markdown
9 | url = https://github.com/fhamborg/NewsMTSC
10 | project_urls =
11 | Bug Tracker = https://github.com/fhamborg/NewsMTSC/issues
12 | classifiers =
13 | Development Status :: 5 - Production/Stable
14 | Environment :: Console
15 | License :: OSI Approved :: MIT License
16 | Operating System :: OS Independent
17 | Programming Language :: Python :: 3
18 | Programming Language :: Python :: 3.8
19 | Programming Language :: Python :: 3.9
20 | Programming Language :: Python :: 3.10
21 | Programming Language :: Python :: 3.11
22 | Intended Audience :: Developers
23 | Intended Audience :: Science/Research
24 | Topic :: Scientific/Engineering
25 | Topic :: Scientific/Engineering :: Information Analysis
26 | Topic :: Scientific/Engineering :: Artificial Intelligence
27 | Topic :: Text Processing :: Linguistic
28 |
29 | [options]
30 | package_dir =
31 | = .
32 | packages = find_namespace:
33 | python_requires = >=3.8, <3.12
34 | # include_package_data = true
35 | install_requires =
36 | boto3>=1.19.7
37 | gensim>=4.0.1
38 | imbalanced-learn>=0.8.1
39 | jsonlines>=2.0.0
40 | matplotlib>=3.4.3
41 | networkx>=2.6.3
42 | openpyxl>=3.0.5
43 | pandas>=1.3.3
44 | regex>=2021.10.23
45 | requests>=2.26.0
46 | sacremoses>=0.0.46
47 | scikit-learn>=1.0.1
48 | spacy>=3.2
49 | tabulate>=0.8.9
50 | tqdm>=4.62.3
51 | transformers>=4.17,<=4.24
52 | torch>=1.12,<2.1
53 |
54 | [options.packages.find]
55 | where = .
56 |
57 | [options.package_data]
58 | * = *.txt, *.ddict, *.tff, *.gitignore, *.gitkeep
59 |
60 | [options.data_files]
61 | git =
62 | *.gitignore
63 |
64 | [options.extras_require]
65 | cuda =
66 | cudatoolkit==10.1
67 |
--------------------------------------------------------------------------------