├── MSE-ChatGLM3-6B ├── data │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ └── load_data.cpython-310.pyc │ ├── getLengths.py │ ├── TextPre.py │ └── load_data.py ├── utils │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── functions.cpython-310.pyc │ │ └── metricsTop.cpython-310.pyc │ ├── functions.py │ └── metricsTop.py ├── models │ ├── __init__.py │ ├── ChatGLM3 │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── modeling_chatglm.cpython-310.pyc │ │ │ ├── tokenization_chatglm.cpython-310.pyc │ │ │ └── configuration_chatglm.cpython-310.pyc │ │ └── configuration_chatglm.py │ ├── subNets │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ └── Textmodel.cpython-310.pyc │ │ └── Textmodel.py │ ├── multiTask │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── CMCM.cpython-310.pyc │ │ │ └── __init__.cpython-310.pyc │ │ └── CMCM.py │ ├── __pycache__ │ │ ├── AMIO.cpython-310.pyc │ │ └── __init__.cpython-310.pyc │ └── AMIO.py ├── trains │ ├── __init__.py │ ├── multiTask │ │ ├── __init__.py │ │ └── __pycache__ │ │ │ ├── CMCM.cpython-310.pyc │ │ │ └── __init__.cpython-310.pyc │ ├── __pycache__ │ │ ├── ATIO.cpython-310.pyc │ │ └── __init__.cpython-310.pyc │ └── ATIO.py ├── config │ ├── __pycache__ │ │ ├── config_regression.cpython-310.pyc │ │ └── config_classification.cpython-310.pyc │ ├── config_regression.py │ └── config_classification.py ├── LICENSE └── run.py ├── MSE-Llama2-7B ├── data │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ └── load_data.cpython-310.pyc │ ├── getLengths.py │ └── TextPre.py ├── models │ ├── __init__.py │ ├── subNets │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ └── Textmodel.cpython-310.pyc │ │ └── Textmodel.py │ ├── multiTask │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── CMCM.cpython-310.pyc │ │ │ └── __init__.cpython-310.pyc │ │ └── CMCM.py │ ├── __pycache__ │ │ ├── AMIO.cpython-310.pyc │ │ └── __init__.cpython-310.pyc │ └── AMIO.py ├── trains │ ├── __init__.py │ ├── multiTask │ │ ├── __init__.py │ │ └── __pycache__ │ │ │ ├── CMCM.cpython-310.pyc │ │ │ └── __init__.cpython-310.pyc │ ├── __pycache__ │ │ ├── ATIO.cpython-310.pyc │ │ └── __init__.cpython-310.pyc │ └── ATIO.py ├── utils │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── functions.cpython-310.pyc │ │ └── metricsTop.cpython-310.pyc │ ├── functions.py │ └── metricsTop.py ├── config │ ├── __pycache__ │ │ ├── config_regression.cpython-310.pyc │ │ └── config_classification.cpython-310.pyc │ ├── config_regression.py │ └── config_classification.py ├── LICENSE └── run.py ├── MSE-Qwen-1.8B ├── data │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ └── load_data.cpython-310.pyc │ ├── getLengths.py │ └── TextPre.py ├── models │ ├── __init__.py │ ├── subNets │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ └── Textmodel.cpython-310.pyc │ │ └── Textmodel.py │ ├── multiTask │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── CMCM.cpython-310.pyc │ │ │ └── __init__.cpython-310.pyc │ │ └── CMCM.py │ ├── __pycache__ │ │ ├── AMIO.cpython-310.pyc │ │ └── __init__.cpython-310.pyc │ └── AMIO.py ├── trains │ ├── __init__.py │ ├── multiTask │ │ ├── __init__.py │ │ └── __pycache__ │ │ │ ├── CMCM.cpython-310.pyc │ │ │ └── __init__.cpython-310.pyc │ ├── __pycache__ │ │ ├── ATIO.cpython-310.pyc │ │ └── __init__.cpython-310.pyc │ └── ATIO.py ├── utils │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── functions.cpython-310.pyc │ │ └── metricsTop.cpython-310.pyc │ ├── functions.py │ └── metricsTop.py ├── config │ ├── __pycache__ │ │ ├── config_regression.cpython-310.pyc │ │ └── config_classification.cpython-310.pyc │ ├── config_regression.py │ └── config_classification.py ├── LICENSE └── run.py ├── Fig └── overall.png ├── README.md └── requirements.txt /MSE-ChatGLM3-6B/data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MSE-ChatGLM3-6B/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MSE-Llama2-7B/data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MSE-Llama2-7B/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MSE-Llama2-7B/trains/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MSE-Llama2-7B/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MSE-Qwen-1.8B/data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MSE-Qwen-1.8B/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MSE-Qwen-1.8B/trains/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MSE-Qwen-1.8B/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MSE-ChatGLM3-6B/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MSE-ChatGLM3-6B/trains/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MSE-ChatGLM3-6B/models/ChatGLM3/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MSE-ChatGLM3-6B/models/subNets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MSE-Llama2-7B/models/subNets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MSE-Qwen-1.8B/models/subNets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Fig/overall.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/Fig/overall.png -------------------------------------------------------------------------------- /MSE-Llama2-7B/trains/multiTask/__init__.py: -------------------------------------------------------------------------------- 1 | from trains.multiTask.CMCM import CMCM 2 | 3 | __all__ = ['CMCM'] -------------------------------------------------------------------------------- /MSE-Qwen-1.8B/trains/multiTask/__init__.py: -------------------------------------------------------------------------------- 1 | from trains.multiTask.CMCM import CMCM 2 | 3 | __all__ = ['CMCM'] -------------------------------------------------------------------------------- /MSE-ChatGLM3-6B/trains/multiTask/__init__.py: -------------------------------------------------------------------------------- 1 | from trains.multiTask.CMCM import CMCM 2 | 3 | __all__ = ['CMCM'] -------------------------------------------------------------------------------- /MSE-Llama2-7B/models/multiTask/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from models.multiTask.CMCM import CMCM 3 | 4 | __all__ = ['CMCM'] -------------------------------------------------------------------------------- /MSE-Qwen-1.8B/models/multiTask/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from models.multiTask.CMCM import CMCM 3 | 4 | __all__ = ['CMCM'] -------------------------------------------------------------------------------- /MSE-ChatGLM3-6B/models/multiTask/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from models.multiTask.CMCM import CMCM 3 | 4 | __all__ = ['CMCM'] -------------------------------------------------------------------------------- /MSE-ChatGLM3-6B/models/__pycache__/AMIO.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/models/__pycache__/AMIO.cpython-310.pyc -------------------------------------------------------------------------------- /MSE-ChatGLM3-6B/trains/__pycache__/ATIO.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/trains/__pycache__/ATIO.cpython-310.pyc -------------------------------------------------------------------------------- /MSE-Llama2-7B/data/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Llama2-7B/data/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /MSE-Llama2-7B/models/__pycache__/AMIO.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Llama2-7B/models/__pycache__/AMIO.cpython-310.pyc -------------------------------------------------------------------------------- /MSE-Llama2-7B/trains/__pycache__/ATIO.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Llama2-7B/trains/__pycache__/ATIO.cpython-310.pyc -------------------------------------------------------------------------------- /MSE-Qwen-1.8B/data/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Qwen-1.8B/data/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /MSE-Qwen-1.8B/models/__pycache__/AMIO.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Qwen-1.8B/models/__pycache__/AMIO.cpython-310.pyc -------------------------------------------------------------------------------- /MSE-Qwen-1.8B/trains/__pycache__/ATIO.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Qwen-1.8B/trains/__pycache__/ATIO.cpython-310.pyc -------------------------------------------------------------------------------- /MSE-ChatGLM3-6B/data/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/data/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /MSE-Llama2-7B/data/__pycache__/load_data.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Llama2-7B/data/__pycache__/load_data.cpython-310.pyc -------------------------------------------------------------------------------- /MSE-Llama2-7B/models/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Llama2-7B/models/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /MSE-Llama2-7B/trains/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Llama2-7B/trains/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /MSE-Llama2-7B/utils/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Llama2-7B/utils/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /MSE-Llama2-7B/utils/__pycache__/functions.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Llama2-7B/utils/__pycache__/functions.cpython-310.pyc -------------------------------------------------------------------------------- /MSE-Qwen-1.8B/data/__pycache__/load_data.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Qwen-1.8B/data/__pycache__/load_data.cpython-310.pyc -------------------------------------------------------------------------------- /MSE-Qwen-1.8B/models/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Qwen-1.8B/models/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /MSE-Qwen-1.8B/trains/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Qwen-1.8B/trains/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /MSE-Qwen-1.8B/utils/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Qwen-1.8B/utils/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /MSE-Qwen-1.8B/utils/__pycache__/functions.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Qwen-1.8B/utils/__pycache__/functions.cpython-310.pyc -------------------------------------------------------------------------------- /MSE-ChatGLM3-6B/data/__pycache__/load_data.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/data/__pycache__/load_data.cpython-310.pyc -------------------------------------------------------------------------------- /MSE-ChatGLM3-6B/models/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/models/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /MSE-ChatGLM3-6B/trains/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/trains/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /MSE-ChatGLM3-6B/utils/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/utils/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /MSE-ChatGLM3-6B/utils/__pycache__/functions.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/utils/__pycache__/functions.cpython-310.pyc -------------------------------------------------------------------------------- /MSE-ChatGLM3-6B/utils/__pycache__/metricsTop.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/utils/__pycache__/metricsTop.cpython-310.pyc -------------------------------------------------------------------------------- /MSE-Llama2-7B/utils/__pycache__/metricsTop.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Llama2-7B/utils/__pycache__/metricsTop.cpython-310.pyc -------------------------------------------------------------------------------- /MSE-Qwen-1.8B/utils/__pycache__/metricsTop.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Qwen-1.8B/utils/__pycache__/metricsTop.cpython-310.pyc -------------------------------------------------------------------------------- /MSE-ChatGLM3-6B/models/multiTask/__pycache__/CMCM.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/models/multiTask/__pycache__/CMCM.cpython-310.pyc -------------------------------------------------------------------------------- /MSE-ChatGLM3-6B/trains/multiTask/__pycache__/CMCM.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/trains/multiTask/__pycache__/CMCM.cpython-310.pyc -------------------------------------------------------------------------------- /MSE-Llama2-7B/models/multiTask/__pycache__/CMCM.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Llama2-7B/models/multiTask/__pycache__/CMCM.cpython-310.pyc -------------------------------------------------------------------------------- /MSE-Llama2-7B/models/subNets/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Llama2-7B/models/subNets/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /MSE-Llama2-7B/trains/multiTask/__pycache__/CMCM.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Llama2-7B/trains/multiTask/__pycache__/CMCM.cpython-310.pyc -------------------------------------------------------------------------------- /MSE-Qwen-1.8B/models/multiTask/__pycache__/CMCM.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Qwen-1.8B/models/multiTask/__pycache__/CMCM.cpython-310.pyc -------------------------------------------------------------------------------- /MSE-Qwen-1.8B/models/subNets/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Qwen-1.8B/models/subNets/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /MSE-Qwen-1.8B/trains/multiTask/__pycache__/CMCM.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Qwen-1.8B/trains/multiTask/__pycache__/CMCM.cpython-310.pyc -------------------------------------------------------------------------------- /MSE-ChatGLM3-6B/models/subNets/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/models/subNets/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /MSE-Llama2-7B/config/__pycache__/config_regression.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Llama2-7B/config/__pycache__/config_regression.cpython-310.pyc -------------------------------------------------------------------------------- /MSE-Llama2-7B/models/multiTask/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Llama2-7B/models/multiTask/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /MSE-Llama2-7B/models/subNets/__pycache__/Textmodel.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Llama2-7B/models/subNets/__pycache__/Textmodel.cpython-310.pyc -------------------------------------------------------------------------------- /MSE-Llama2-7B/trains/multiTask/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Llama2-7B/trains/multiTask/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /MSE-Qwen-1.8B/config/__pycache__/config_regression.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Qwen-1.8B/config/__pycache__/config_regression.cpython-310.pyc -------------------------------------------------------------------------------- /MSE-Qwen-1.8B/models/multiTask/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Qwen-1.8B/models/multiTask/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /MSE-Qwen-1.8B/models/subNets/__pycache__/Textmodel.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Qwen-1.8B/models/subNets/__pycache__/Textmodel.cpython-310.pyc -------------------------------------------------------------------------------- /MSE-Qwen-1.8B/trains/multiTask/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Qwen-1.8B/trains/multiTask/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /MSE-ChatGLM3-6B/config/__pycache__/config_regression.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/config/__pycache__/config_regression.cpython-310.pyc -------------------------------------------------------------------------------- /MSE-ChatGLM3-6B/models/ChatGLM3/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/models/ChatGLM3/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /MSE-ChatGLM3-6B/models/multiTask/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/models/multiTask/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /MSE-ChatGLM3-6B/models/subNets/__pycache__/Textmodel.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/models/subNets/__pycache__/Textmodel.cpython-310.pyc -------------------------------------------------------------------------------- /MSE-ChatGLM3-6B/trains/multiTask/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/trains/multiTask/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /MSE-Llama2-7B/config/__pycache__/config_classification.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Llama2-7B/config/__pycache__/config_classification.cpython-310.pyc -------------------------------------------------------------------------------- /MSE-Qwen-1.8B/config/__pycache__/config_classification.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Qwen-1.8B/config/__pycache__/config_classification.cpython-310.pyc -------------------------------------------------------------------------------- /MSE-ChatGLM3-6B/config/__pycache__/config_classification.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/config/__pycache__/config_classification.cpython-310.pyc -------------------------------------------------------------------------------- /MSE-ChatGLM3-6B/models/ChatGLM3/__pycache__/modeling_chatglm.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/models/ChatGLM3/__pycache__/modeling_chatglm.cpython-310.pyc -------------------------------------------------------------------------------- /MSE-ChatGLM3-6B/models/ChatGLM3/__pycache__/tokenization_chatglm.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/models/ChatGLM3/__pycache__/tokenization_chatglm.cpython-310.pyc -------------------------------------------------------------------------------- /MSE-ChatGLM3-6B/models/ChatGLM3/__pycache__/configuration_chatglm.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/models/ChatGLM3/__pycache__/configuration_chatglm.cpython-310.pyc -------------------------------------------------------------------------------- /MSE-Llama2-7B/trains/ATIO.py: -------------------------------------------------------------------------------- 1 | """ 2 | AIO -- All Trains in One 3 | """ 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | from torch.autograd import Variable 8 | from torch.nn.parameter import Parameter 9 | from torch.nn.init import xavier_uniform, xavier_normal, orthogonal 10 | 11 | from trains.multiTask import * 12 | 13 | __all__ = ['ATIO'] 14 | 15 | class ATIO(): 16 | def __init__(self): 17 | self.TRAIN_MAP = { 18 | 'cmcm': CMCM, 19 | } 20 | 21 | def getTrain(self, args): 22 | return self.TRAIN_MAP[args.modelName.lower()](args) 23 | -------------------------------------------------------------------------------- /MSE-Qwen-1.8B/trains/ATIO.py: -------------------------------------------------------------------------------- 1 | """ 2 | AIO -- All Trains in One 3 | """ 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | from torch.autograd import Variable 8 | from torch.nn.parameter import Parameter 9 | from torch.nn.init import xavier_uniform, xavier_normal, orthogonal 10 | 11 | from trains.multiTask import * 12 | 13 | __all__ = ['ATIO'] 14 | 15 | class ATIO(): 16 | def __init__(self): 17 | self.TRAIN_MAP = { 18 | 'cmcm': CMCM, 19 | } 20 | 21 | def getTrain(self, args): 22 | return self.TRAIN_MAP[args.modelName.lower()](args) 23 | -------------------------------------------------------------------------------- /MSE-ChatGLM3-6B/trains/ATIO.py: -------------------------------------------------------------------------------- 1 | """ 2 | AIO -- All Trains in One 3 | """ 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | from torch.autograd import Variable 8 | from torch.nn.parameter import Parameter 9 | from torch.nn.init import xavier_uniform, xavier_normal, orthogonal 10 | 11 | from trains.multiTask import * 12 | 13 | __all__ = ['ATIO'] 14 | 15 | class ATIO(): 16 | def __init__(self): 17 | self.TRAIN_MAP = { 18 | 'cmcm': CMCM, 19 | } 20 | 21 | def getTrain(self, args): 22 | return self.TRAIN_MAP[args.modelName.lower()](args) 23 | -------------------------------------------------------------------------------- /MSE-Llama2-7B/models/AMIO.py: -------------------------------------------------------------------------------- 1 | """ 2 | AIO -- All Model in One 3 | """ 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | from torch.autograd import Variable 8 | from torch.nn.parameter import Parameter 9 | from torch.nn.init import xavier_uniform, xavier_normal, orthogonal 10 | 11 | 12 | from models.multiTask import * 13 | 14 | __all__ = ['AMIO'] 15 | 16 | MODEL_MAP = { 17 | 'cmcm': CMCM 18 | } 19 | 20 | class AMIO(nn.Module): 21 | def __init__(self, args): 22 | super(AMIO, self).__init__() 23 | lastModel = MODEL_MAP[args.modelName] 24 | self.Model = lastModel(args) 25 | 26 | def forward(self, labels_m, text_x, audio_x, video_x): 27 | return self.Model(labels_m, text_x, audio_x, video_x) 28 | 29 | def generate(self, text_x, audio_x, video_x): 30 | return self.Model.generate(text_x, audio_x, video_x) -------------------------------------------------------------------------------- /MSE-Qwen-1.8B/models/AMIO.py: -------------------------------------------------------------------------------- 1 | """ 2 | AIO -- All Model in One 3 | """ 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | from torch.autograd import Variable 8 | from torch.nn.parameter import Parameter 9 | from torch.nn.init import xavier_uniform, xavier_normal, orthogonal 10 | 11 | 12 | from models.multiTask import * 13 | 14 | __all__ = ['AMIO'] 15 | 16 | MODEL_MAP = { 17 | 'cmcm': CMCM 18 | } 19 | 20 | class AMIO(nn.Module): 21 | def __init__(self, args): 22 | super(AMIO, self).__init__() 23 | lastModel = MODEL_MAP[args.modelName] 24 | self.Model = lastModel(args) 25 | 26 | def forward(self, labels_m, text_x, audio_x, video_x): 27 | return self.Model(labels_m, text_x, audio_x, video_x) 28 | 29 | def generate(self, text_x, audio_x, video_x): 30 | return self.Model.generate(text_x, audio_x, video_x) -------------------------------------------------------------------------------- /MSE-ChatGLM3-6B/models/AMIO.py: -------------------------------------------------------------------------------- 1 | """ 2 | AIO -- All Model in One 3 | """ 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | from torch.autograd import Variable 8 | from torch.nn.parameter import Parameter 9 | from torch.nn.init import xavier_uniform, xavier_normal, orthogonal 10 | 11 | 12 | from models.multiTask import * 13 | 14 | __all__ = ['AMIO'] 15 | 16 | MODEL_MAP = { 17 | 'cmcm': CMCM 18 | } 19 | 20 | class AMIO(nn.Module): 21 | def __init__(self, args): 22 | super(AMIO, self).__init__() 23 | lastModel = MODEL_MAP[args.modelName] 24 | self.Model = lastModel(args) 25 | 26 | def forward(self, labels_m, text_x, audio_x, video_x): 27 | return self.Model(labels_m, text_x, audio_x, video_x) 28 | 29 | def generate(self, text_x, audio_x, video_x): 30 | return self.Model.generate(text_x, audio_x, video_x) -------------------------------------------------------------------------------- /MSE-ChatGLM3-6B/utils/functions.py: -------------------------------------------------------------------------------- 1 | def dict_to_str(src_dict): 2 | dst_str = "" 3 | for key in src_dict.keys(): 4 | dst_str += " %s: %.4f " %(key, src_dict[key]) 5 | return dst_str 6 | 7 | class Storage(dict): 8 | """ 9 | A Storage object is like a dictionary except `obj.foo` can be used inadition to `obj['foo']` 10 | ref: https://blog.csdn.net/a200822146085/article/details/88430450 11 | """ 12 | def __getattr__(self, key): 13 | try: 14 | return self[key] if key in self else False 15 | except KeyError as k: 16 | raise AttributeError(k) 17 | 18 | def __setattr__(self, key, value): 19 | self[key] = value 20 | 21 | def __delattr__(self, key): 22 | try: 23 | del self[key] 24 | except KeyError as k: 25 | raise AttributeError(k) 26 | 27 | def __str__(self): 28 | return "<" + self.__class__.__name__ + dict.__repr__(self) + ">" 29 | 30 | -------------------------------------------------------------------------------- /MSE-Llama2-7B/utils/functions.py: -------------------------------------------------------------------------------- 1 | def dict_to_str(src_dict): 2 | dst_str = "" 3 | for key in src_dict.keys(): 4 | dst_str += " %s: %.4f " %(key, src_dict[key]) 5 | return dst_str 6 | 7 | class Storage(dict): 8 | """ 9 | A Storage object is like a dictionary except `obj.foo` can be used inadition to `obj['foo']` 10 | ref: https://blog.csdn.net/a200822146085/article/details/88430450 11 | """ 12 | def __getattr__(self, key): 13 | try: 14 | return self[key] if key in self else False 15 | except KeyError as k: 16 | raise AttributeError(k) 17 | 18 | def __setattr__(self, key, value): 19 | self[key] = value 20 | 21 | def __delattr__(self, key): 22 | try: 23 | del self[key] 24 | except KeyError as k: 25 | raise AttributeError(k) 26 | 27 | def __str__(self): 28 | return "<" + self.__class__.__name__ + dict.__repr__(self) + ">" 29 | 30 | -------------------------------------------------------------------------------- /MSE-Qwen-1.8B/utils/functions.py: -------------------------------------------------------------------------------- 1 | def dict_to_str(src_dict): 2 | dst_str = "" 3 | for key in src_dict.keys(): 4 | dst_str += " %s: %.4f " %(key, src_dict[key]) 5 | return dst_str 6 | 7 | class Storage(dict): 8 | """ 9 | A Storage object is like a dictionary except `obj.foo` can be used inadition to `obj['foo']` 10 | ref: https://blog.csdn.net/a200822146085/article/details/88430450 11 | """ 12 | def __getattr__(self, key): 13 | try: 14 | return self[key] if key in self else False 15 | except KeyError as k: 16 | raise AttributeError(k) 17 | 18 | def __setattr__(self, key, value): 19 | self[key] = value 20 | 21 | def __delattr__(self, key): 22 | try: 23 | del self[key] 24 | except KeyError as k: 25 | raise AttributeError(k) 26 | 27 | def __str__(self): 28 | return "<" + self.__class__.__name__ + dict.__repr__(self) + ">" 29 | 30 | -------------------------------------------------------------------------------- /MSE-ChatGLM3-6B/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 iyuge2 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MSE-Llama2-7B/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 iyuge2 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MSE-Qwen-1.8B/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 iyuge2 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MSE-ChatGLM3-6B/data/getLengths.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle as plk 3 | import numpy as np 4 | 5 | from tqdm import tqdm 6 | 7 | def get_lengths(mode, feature_name): 8 | fd = data[mode][feature_name] 9 | max_len = fd.shape[1] 10 | 11 | c_sum = np.sum(fd, axis=-1) 12 | lengths = [] 13 | for i in tqdm(range(fd.shape[0])): 14 | null = True 15 | zeros = np.zeros([fd.shape[1], fd.shape[2]]) 16 | cur_length = max_len 17 | for j in range(max_len): 18 | if c_sum[i][j] == 0: 19 | cur_length = j 20 | null = False 21 | break 22 | if cur_length == 0: 23 | cur_length = 1 24 | lengths.append(cur_length) 25 | return lengths 26 | 27 | with open('/home/sharing/disk3/dataset/multimodal-sentiment-dataset/ALL/mosei/unaligned_50.pkl', 'rb') as lf: 28 | data = plk.load(lf) 29 | 30 | def handleData(mode): 31 | # data[mode]['audio_lengths'], _ = get_lengths(mode, 'feature_A') 32 | # data[mode]['vision_lengths'], _ = get_lengths(mode, 'feature_V') 33 | data[mode]['audio_lengths'] = get_lengths(mode, 'audio') 34 | data[mode]['vision_lengths'] = get_lengths(mode, 'vision') 35 | 36 | handleData('train') 37 | handleData('valid') 38 | handleData('test') 39 | 40 | with open('/home/sharing/disk3/dataset/multimodal-sentiment-dataset/ALL/mosei/unaligned_50.pkl', 'wb') as df: 41 | plk.dump(data, df, protocol = 4) -------------------------------------------------------------------------------- /MSE-Llama2-7B/data/getLengths.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle as plk 3 | import numpy as np 4 | 5 | from tqdm import tqdm 6 | 7 | def get_lengths(mode, feature_name): 8 | fd = data[mode][feature_name] 9 | max_len = fd.shape[1] 10 | 11 | c_sum = np.sum(fd, axis=-1) 12 | lengths = [] 13 | for i in tqdm(range(fd.shape[0])): 14 | null = True 15 | zeros = np.zeros([fd.shape[1], fd.shape[2]]) 16 | cur_length = max_len 17 | for j in range(max_len): 18 | if c_sum[i][j] == 0: 19 | cur_length = j 20 | null = False 21 | break 22 | if cur_length == 0: 23 | cur_length = 1 24 | lengths.append(cur_length) 25 | return lengths 26 | 27 | with open('/home/sharing/disk3/dataset/multimodal-sentiment-dataset/ALL/mosei/unaligned_50.pkl', 'rb') as lf: 28 | data = plk.load(lf) 29 | 30 | def handleData(mode): 31 | # data[mode]['audio_lengths'], _ = get_lengths(mode, 'feature_A') 32 | # data[mode]['vision_lengths'], _ = get_lengths(mode, 'feature_V') 33 | data[mode]['audio_lengths'] = get_lengths(mode, 'audio') 34 | data[mode]['vision_lengths'] = get_lengths(mode, 'vision') 35 | 36 | handleData('train') 37 | handleData('valid') 38 | handleData('test') 39 | 40 | with open('/home/sharing/disk3/dataset/multimodal-sentiment-dataset/ALL/mosei/unaligned_50.pkl', 'wb') as df: 41 | plk.dump(data, df, protocol = 4) -------------------------------------------------------------------------------- /MSE-Qwen-1.8B/data/getLengths.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle as plk 3 | import numpy as np 4 | 5 | from tqdm import tqdm 6 | 7 | def get_lengths(mode, feature_name): 8 | fd = data[mode][feature_name] 9 | max_len = fd.shape[1] 10 | 11 | c_sum = np.sum(fd, axis=-1) 12 | lengths = [] 13 | for i in tqdm(range(fd.shape[0])): 14 | null = True 15 | zeros = np.zeros([fd.shape[1], fd.shape[2]]) 16 | cur_length = max_len 17 | for j in range(max_len): 18 | if c_sum[i][j] == 0: 19 | cur_length = j 20 | null = False 21 | break 22 | if cur_length == 0: 23 | cur_length = 1 24 | lengths.append(cur_length) 25 | return lengths 26 | 27 | with open('/home/sharing/disk3/dataset/multimodal-sentiment-dataset/ALL/mosei/unaligned_50.pkl', 'rb') as lf: 28 | data = plk.load(lf) 29 | 30 | def handleData(mode): 31 | # data[mode]['audio_lengths'], _ = get_lengths(mode, 'feature_A') 32 | # data[mode]['vision_lengths'], _ = get_lengths(mode, 'feature_V') 33 | data[mode]['audio_lengths'] = get_lengths(mode, 'audio') 34 | data[mode]['vision_lengths'] = get_lengths(mode, 'vision') 35 | 36 | handleData('train') 37 | handleData('valid') 38 | handleData('test') 39 | 40 | with open('/home/sharing/disk3/dataset/multimodal-sentiment-dataset/ALL/mosei/unaligned_50.pkl', 'wb') as df: 41 | plk.dump(data, df, protocol = 4) -------------------------------------------------------------------------------- /MSE-ChatGLM3-6B/models/ChatGLM3/configuration_chatglm.py: -------------------------------------------------------------------------------- 1 | from transformers import PretrainedConfig 2 | 3 | 4 | class ChatGLMConfig(PretrainedConfig): 5 | model_type = "chatglm" 6 | def __init__( 7 | self, 8 | num_layers=28, 9 | padded_vocab_size=65024, 10 | hidden_size=4096, 11 | ffn_hidden_size=13696, 12 | kv_channels=128, 13 | num_attention_heads=32, 14 | seq_length=2048, 15 | hidden_dropout=0.0, 16 | classifier_dropout=None, 17 | attention_dropout=0.0, 18 | layernorm_epsilon=1e-5, 19 | rmsnorm=True, 20 | apply_residual_connection_post_layernorm=False, 21 | post_layer_norm=True, 22 | add_bias_linear=False, 23 | add_qkv_bias=False, 24 | bias_dropout_fusion=True, 25 | multi_query_attention=False, 26 | multi_query_group_num=1, 27 | apply_query_key_layer_scaling=True, 28 | attention_softmax_in_fp32=True, 29 | fp32_residual_connection=False, 30 | quantization_bit=0, 31 | pre_seq_len=None, 32 | prefix_projection=False, 33 | **kwargs 34 | ): 35 | self.num_layers = num_layers 36 | self.vocab_size = padded_vocab_size 37 | self.padded_vocab_size = padded_vocab_size 38 | self.hidden_size = hidden_size 39 | self.ffn_hidden_size = ffn_hidden_size 40 | self.kv_channels = kv_channels 41 | self.num_attention_heads = num_attention_heads 42 | self.seq_length = seq_length 43 | self.hidden_dropout = hidden_dropout 44 | self.classifier_dropout = classifier_dropout 45 | self.attention_dropout = attention_dropout 46 | self.layernorm_epsilon = layernorm_epsilon 47 | self.rmsnorm = rmsnorm 48 | self.apply_residual_connection_post_layernorm = apply_residual_connection_post_layernorm 49 | self.post_layer_norm = post_layer_norm 50 | self.add_bias_linear = add_bias_linear 51 | self.add_qkv_bias = add_qkv_bias 52 | self.bias_dropout_fusion = bias_dropout_fusion 53 | self.multi_query_attention = multi_query_attention 54 | self.multi_query_group_num = multi_query_group_num 55 | self.apply_query_key_layer_scaling = apply_query_key_layer_scaling 56 | self.attention_softmax_in_fp32 = attention_softmax_in_fp32 57 | self.fp32_residual_connection = fp32_residual_connection 58 | self.quantization_bit = quantization_bit 59 | self.pre_seq_len = pre_seq_len 60 | self.prefix_projection = prefix_projection 61 | super().__init__(**kwargs) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | # 😊 The Official Implementation of MSE-Adapter 4 | 5 |

6 | arXiv 7 | AAAI 2025 8 |

9 | 10 | 🎉🎉 **We have been accepted at AAAI-2025!** 11 |
12 | 13 | --- 14 | This is the official code for the 《MSE-Adapter: A Lightweight Plugin Endowing LLMs with the Capability to Perform Multimodal Sentiment Analysis and Emotion Recognition》. 15 | 16 | ![Overall](Fig/overall.png) 17 |
18 | 19 | *Fig1: The comprehensive framework integrating MSE-Adapter with LLM.* 20 | 21 |
22 | 23 | --- 24 | 25 | ## 🚀 Get Started! (Take MSE-ChatGLM3-6B as an example.) 26 | 27 | ### 🔧 Step 1: Create the Environment 28 | ``` bash 29 | git clone https://github.com/AZYoung233/MSE-Adapter.git 30 | cd MSE-Adapter 31 | conda create --name MSE-Adapter python=3.10.13 32 | conda activate MSE-Adapter 33 | pip install -r requirements.txt 34 | ``` 35 | 🚨 **Critical Notice (2025/04/29 update)**: It is **highly recommended** to create a new **virtual environment** directly using `requirements.txt`. If that's not feasible, at least ensure that the `transformers` version matches exactly. Otherwise, the training loss may decrease as expected, but the evaluation metrics could be abnormal, severely impacting the model's performance. 36 | 37 | ### 📂 Step 2: Download the Dataset 38 | - You can download the dataset at the link below: 39 | - [MOSEI](https://huggingface.co/datasets/AZYoung/MOSEI_processed)📦 40 | - [SIMS-V2](https://huggingface.co/datasets/AZYoung/SIMSV2_processed)📦 41 | - [MELD](https://huggingface.co/datasets/AZYoung/MELD_processed)📦 42 | - [CHERMA](https://huggingface.co/datasets/AZYoung/CHERMA0723_processed)📦 43 | - Place them under the same folder, and set `root_dataset_dir` in `parse_args` of `run.py` to the path where you store your dataset. 44 | 45 | ### 💾 Step 3: Download the Backbone LLM 46 | - Download backbone LLM from the [THUDM/chatglm3-6b](https://huggingface.co/THUDM/chatglm3-6b) and set `pretrain_LM` in `parse_args` of `run.py` to the path where you store your LLM. If for any particular reason your download is too slow, try using [Modelscope](https://modelscope.cn/my/overview) 🌐 or [HF-mirrors](https://hf-mirror.com/) 🌐. 47 | 48 | ### ▶️ Step 4: Run! 49 | - Once you have completed the basic setup as described above, you can run the code using the following steps. The code will run 5 random seeds and the results will be saved in `results/result`. The results presented in the paper are the average of 5 random seeds. 50 | ```bash 51 | cd MSE-ChatGLM3-6B 52 | python run.py 53 | ``` 54 | 55 | ## 🙏 Acknowledgment 56 | Our code is structurally referenced to [SELF-MM](https://github.com/thuiar/Self-MM). Thanks to their open-source spirit for saving us a lot of time. 💖 57 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | addict==2.4.0 2 | aiohttp==3.9.1 3 | aiosignal==1.3.1 4 | aliyun-python-sdk-core==2.14.0 5 | aliyun-python-sdk-kms==2.16.2 6 | annotated-types==0.6.0 7 | anyio==4.8.0 8 | async-timeout==4.0.3 9 | attrs==23.1.0 10 | blis==0.7.11 11 | Brotli==1.0.9 12 | cachetools==5.3.2 13 | catalogue==2.0.10 14 | certifi==2023.11.17 15 | cffi==1.16.0 16 | charset-normalizer==2.0.4 17 | click==8.1.7 18 | cloudpathlib==0.16.0 19 | cmake==3.28.1 20 | confection==0.1.4 21 | contourpy==1.2.0 22 | crcmod==1.7 23 | cryptography==41.0.7 24 | cycler==0.12.1 25 | cymem==2.0.8 26 | datasets==2.15.0 27 | diffusers==0.31.0 28 | dill==0.3.7 29 | easydict==1.13 30 | einops==0.7.0 31 | en-core-web-sm==3.7.1 32 | exceptiongroup==1.2.2 33 | fastapi==0.115.8 34 | filelock==3.13.1 35 | fonttools==4.46.0 36 | frozenlist==1.4.1 37 | fsspec==2023.10.0 38 | gast==0.5.4 39 | gmpy2==2.1.2 40 | graphviz==0.20.3 41 | h11==0.14.0 42 | huggingface-hub==0.26.2 43 | idna==3.4 44 | importlib-metadata==7.0.0 45 | jieba==0.42.1 46 | Jinja2==3.1.2 47 | jmespath==0.10.0 48 | joblib==1.3.2 49 | kiwisolver==1.4.5 50 | langcodes==3.3.0 51 | lit==17.0.6 52 | MarkupSafe==2.1.1 53 | matplotlib==3.8.2 54 | mkl-fft==1.3.8 55 | mkl-random==1.2.4 56 | mkl-service==2.4.0 57 | modelscope==1.10.0 58 | mpmath==1.3.0 59 | multidict==6.0.4 60 | multiprocess==0.70.15 61 | murmurhash==1.0.10 62 | networkx==3.1 63 | numpy==1.26.2 64 | nvidia-cublas-cu11==11.10.3.66 65 | nvidia-cuda-cupti-cu11==11.7.101 66 | nvidia-cuda-nvrtc-cu11==11.7.99 67 | nvidia-cuda-runtime-cu11==11.7.99 68 | nvidia-cudnn-cu11==8.5.0.96 69 | nvidia-cufft-cu11==10.9.0.58 70 | nvidia-curand-cu11==10.2.10.91 71 | nvidia-cusolver-cu11==11.4.0.1 72 | nvidia-cusparse-cu11==11.7.4.91 73 | nvidia-ml-py==12.535.133 74 | nvidia-nccl-cu11==2.14.3 75 | nvidia-nvtx-cu11==11.7.91 76 | nvitop==1.3.1 77 | opencv-python==4.11.0.86 78 | oss2==2.18.3 79 | packaging==23.2 80 | pandas==2.1.4 81 | Pillow==10.0.1 82 | pip==23.3.1 83 | platformdirs==4.1.0 84 | preshed==3.0.9 85 | protobuf==4.25.3 86 | psutil==5.9.6 87 | pyarrow==14.0.1 88 | pyarrow-hotfix==0.6 89 | pycparser==2.21 90 | pycryptodome==3.19.0 91 | pydantic==2.5.3 92 | pydantic_core==2.14.6 93 | pyOpenSSL==23.2.0 94 | pyparsing==3.1.1 95 | PySocks==1.7.1 96 | python-dateutil==2.8.2 97 | pytz==2023.3.post1 98 | PyYAML==6.0.1 99 | regex==2023.10.3 100 | requests==2.31.0 101 | safetensors==0.4.1 102 | scikit-learn==1.3.2 103 | scipy==1.11.4 104 | sentencepiece==0.1.99 105 | setuptools==68.2.2 106 | simplejson==3.19.2 107 | six==1.16.0 108 | smart-open==6.4.0 109 | sniffio==1.3.1 110 | sortedcontainers==2.4.0 111 | spacy==3.7.2 112 | spacy-legacy==3.0.12 113 | spacy-loggers==1.0.5 114 | srsly==2.4.8 115 | starlette==0.45.3 116 | sympy==1.12 117 | termcolor==2.4.0 118 | thinc==8.2.2 119 | threadpoolctl==3.2.0 120 | tiktoken==0.5.2 121 | tokenizers==0.15.0 122 | tomli==2.0.1 123 | torch==2.0.1 124 | torchaudio==2.1.2 125 | torchvision==0.16.2 126 | torchviz==0.0.2 127 | tqdm==4.66.1 128 | transformers==4.36.1 129 | transformers-stream-generator==0.0.4 130 | triton==2.0.0 131 | typer==0.9.0 132 | typing_extensions==4.12.2 133 | tzdata==2023.3 134 | urllib3==1.26.18 135 | uvicorn==0.34.0 136 | wasabi==1.1.2 137 | weasel==0.3.4 138 | wheel==0.41.2 139 | xformers==0.0.21 140 | xxhash==3.4.1 141 | yapf==0.40.2 142 | yarl==1.9.4 143 | zipp==3.17.0 144 | -------------------------------------------------------------------------------- /MSE-ChatGLM3-6B/config/config_regression.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | 4 | from utils.functions import Storage 5 | 6 | class ConfigRegression(): 7 | def __init__(self, args): 8 | # hyper parameters for models 9 | HYPER_MODEL_MAP = { 10 | 'cmcm': self.__CMCM 11 | } 12 | # hyper parameters for datasets 13 | self.root_dataset_dir = args.root_dataset_dir 14 | HYPER_DATASET_MAP = self.__datasetCommonParams() 15 | # normalize 16 | model_name = str.lower(args.modelName) 17 | dataset_name = str.lower(args.datasetName) 18 | # load params 19 | commonArgs = HYPER_MODEL_MAP[model_name]()['commonParas'] 20 | dataArgs = HYPER_DATASET_MAP[dataset_name] 21 | dataArgs = dataArgs['aligned'] if (commonArgs['need_data_aligned'] and 'aligned' in dataArgs) else dataArgs['unaligned'] 22 | # integrate all parameters 23 | self.args = Storage(dict(vars(args), 24 | **dataArgs, 25 | **commonArgs, 26 | **HYPER_MODEL_MAP[model_name]()['datasetParas'][dataset_name], 27 | )) 28 | 29 | def __datasetCommonParams(self): 30 | root_dataset_dir = self.root_dataset_dir 31 | tmp = { 32 | 'mosi':{ 33 | 'unaligned': { 34 | 'dataPath': os.path.join(root_dataset_dir, 'MOSI/Processed/unaligned_50.pkl'), 35 | 'seq_lens': (50, 50, 50), 36 | # (text, audio, video) 37 | 'feature_dims': (4096, 5, 20), 38 | 'train_samples': 1284, 39 | 'num_classes': 3, 40 | 'language': 'en', 41 | 'KeyEval': 'MAE' 42 | } 43 | }, 44 | 'mosei':{ 45 | 'unaligned': { 46 | 'dataPath': os.path.join(root_dataset_dir, 'MOSEI/Processed/unaligned_50.pkl'), 47 | 'seq_lens': (50, 500, 375), 48 | # (text, audio, video) 49 | 'feature_dims': (4096, 74, 35), 50 | 'train_samples': 16326, 51 | 'num_classes': 3, 52 | 'language': 'en', 53 | 'KeyEval': 'MAE' 54 | } 55 | }, 56 | 57 | 58 | 'simsv2': { 59 | 'unaligned': { 60 | 'dataPath': os.path.join(root_dataset_dir, 'SIMS_V2/ch-simsv2s.pkl'), 61 | # (batch_size, seq_lens, feature_dim) 62 | 'seq_lens': (50, 925, 232), # (text, audio, video) 63 | 'feature_dims': (4096, 25, 177), # (text, audio, video) 64 | 'train_samples': 2722, 65 | 'num_classes': 3, 66 | 'language': 'cn', 67 | 'KeyEval': 'MAE', 68 | } 69 | } 70 | } 71 | return tmp 72 | 73 | def __CMCM(self): 74 | tmp = { 75 | 'commonParas':{ 76 | 'need_data_aligned': False, 77 | 'need_model_aligned': False, 78 | 'need_label_prefix':True, 79 | 'need_normalized': False, 80 | 'use_PLM': True, 81 | 'save_labels': False, 82 | }, 83 | # dataset 84 | 'datasetParas':{ 85 | 'mosei':{ 86 | # the batch_size of each epoch is update_epochs * batch_size 87 | 'task_specific_prompt': 'Please predict the sentiment intensity of the above multimodal content in the range [-3.0, 3.0]. response: The sentiment is', 88 | 'max_new_tokens': 4, 89 | 'pseudo_tokens': 4, 90 | 'batch_size': 8, 91 | 'learning_rate': 5e-5, 92 | # feature subNets 93 | 'a_lstm_hidden_size': 64, 94 | 'v_lstm_hidden_size': 32, 95 | 'a_lstm_layers': 1, 96 | 'v_lstm_layers': 1, 97 | 'a_lstm_dropout': 0.0, 98 | 'v_lstm_dropout': 0.0, 99 | 'warm_up_epochs':30, 100 | #loss weight best:1 101 | 'gamma':1, 102 | 'update_epochs': 1, 103 | 'early_stop': 10, #10和8没啥区别 104 | # res 105 | 'H': 3.0 106 | }, 107 | 108 | 'simsv2': { 109 | # the batch_size of each epoch is update_epochs * batch_size 110 | 'max_new_tokens': 4, 111 | 'pseudo_tokens': 4, 112 | 'task_specific_prompt': '请对上述多模态内容的情感强度进行预测,范围在[-1.0, 1.0]之间。响应: 情感为', 113 | 'batch_size': 8, 114 | 'learning_rate': 5e-5, 115 | # feature subNets 116 | 'a_lstm_hidden_size': 64, 117 | 'v_lstm_hidden_size': 64, 118 | 'a_lstm_layers': 1, 119 | 'v_lstm_layers': 1, 120 | 'a_lstm_dropout': 0.0, 121 | 'v_lstm_dropout': 0.0, 122 | 'warm_up_epochs': 80, 123 | 'update_epochs': 1, 124 | 'early_stop': 10, 125 | # loss weight best:0.25 126 | 'gamma': 1, 127 | # res 128 | 'H': 1.0 129 | }, 130 | }, 131 | } 132 | return tmp 133 | 134 | def get_config(self): 135 | return self.args -------------------------------------------------------------------------------- /MSE-Llama2-7B/config/config_regression.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | 4 | from utils.functions import Storage 5 | 6 | class ConfigRegression(): 7 | def __init__(self, args): 8 | # hyper parameters for models 9 | HYPER_MODEL_MAP = { 10 | 'cmcm': self.__CMCM 11 | } 12 | # hyper parameters for datasets 13 | self.root_dataset_dir = args.root_dataset_dir 14 | HYPER_DATASET_MAP = self.__datasetCommonParams() 15 | 16 | # normalize 17 | model_name = str.lower(args.modelName) 18 | dataset_name = str.lower(args.datasetName) 19 | # load params 20 | commonArgs = HYPER_MODEL_MAP[model_name]()['commonParas'] 21 | dataArgs = HYPER_DATASET_MAP[dataset_name] 22 | dataArgs = dataArgs['aligned'] if (commonArgs['need_data_aligned'] and 'aligned' in dataArgs) else dataArgs['unaligned'] 23 | # integrate all parameters 24 | self.args = Storage(dict(vars(args), 25 | **dataArgs, 26 | **commonArgs, 27 | **HYPER_MODEL_MAP[model_name]()['datasetParas'][dataset_name], 28 | )) 29 | 30 | def __datasetCommonParams(self): 31 | root_dataset_dir = self.root_dataset_dir 32 | tmp = { 33 | 'mosi':{ 34 | 'unaligned': { 35 | 'dataPath': os.path.join(root_dataset_dir, 'MOSI/Processed/unaligned_50.pkl'), 36 | 'seq_lens': (50, 50, 50), 37 | # (text, audio, video) 38 | 'feature_dims': (4096, 5, 20), 39 | 'train_samples': 1284, 40 | 'num_classes': 3, 41 | 'language': 'en', 42 | 'KeyEval': 'MAE' 43 | } 44 | }, 45 | 'mosei':{ 46 | 'unaligned': { 47 | 'dataPath': os.path.join(root_dataset_dir, 'MOSEI/Processed/unaligned_50.pkl'), 48 | 'seq_lens': (50, 500, 375), 49 | # (text, audio, video) 50 | 'feature_dims': (4096, 74, 35), 51 | 'train_samples': 16326, 52 | 'num_classes': 3, 53 | 'language': 'en', 54 | 'KeyEval': 'MAE' 55 | } 56 | }, 57 | 58 | 59 | 'simsv2': { 60 | 'unaligned': { 61 | 'dataPath': os.path.join(root_dataset_dir, 'SIMS_V2/ch-simsv2s.pkl'), 62 | # (batch_size, seq_lens, feature_dim) 63 | 'seq_lens': (50, 925, 232), # (text, audio, video) 64 | 'feature_dims': (4096, 25, 177), # (text, audio, video) 65 | 'train_samples': 2722, 66 | 'num_classes': 3, 67 | 'language': 'cn', 68 | 'KeyEval': 'MAE', 69 | } 70 | } 71 | } 72 | return tmp 73 | 74 | def __CMCM(self): 75 | tmp = { 76 | 'commonParas':{ 77 | 'need_data_aligned': False, 78 | 'need_model_aligned': False, 79 | 'need_label_prefix':True, 80 | 'need_normalized': False, 81 | 'use_PLM': True, 82 | 'save_labels': False, 83 | }, 84 | # dataset 85 | 'datasetParas':{ 86 | 'mosei':{ 87 | # the batch_size of each epoch is update_epochs * batch_size 88 | 'task_specific_prompt': 'Please predict the sentiment intensity of the above multimodal content in the range [-3.0, +3.0]. Assistant: The sentiment is', 89 | 'max_new_tokens': 4, 90 | 'pseudo_tokens': 4, 91 | 'batch_size': 8, 92 | 'learning_rate': 5e-5, 93 | # feature subNets 94 | 'a_lstm_hidden_size': 64, 95 | 'v_lstm_hidden_size': 32, 96 | 'a_lstm_layers': 1, 97 | 'v_lstm_layers': 1, 98 | 'a_lstm_dropout': 0.0, 99 | 'v_lstm_dropout': 0.0, 100 | 'warm_up_epochs':30, 101 | #loss weight best:1 102 | 'gamma':1, 103 | 'update_epochs': 1, 104 | 'early_stop': 10, #10和8没啥区别 105 | # res 106 | 'H': 3.0 107 | }, 108 | 109 | 'simsv2': { 110 | # the batch_size of each epoch is update_epochs * batch_size 111 | 'max_new_tokens': 4, 112 | 'pseudo_tokens': 4, 113 | 'task_specific_prompt': '请对上述多模态内容的情感强度进行预测,范围在[-1.0, 1.0]之间。响应: 情感为', 114 | 'batch_size': 8, 115 | 'learning_rate': 5e-5, 116 | # feature subNets 117 | 'a_lstm_hidden_size': 64, 118 | 'v_lstm_hidden_size': 64, 119 | 'a_lstm_layers': 1, 120 | 'v_lstm_layers': 1, 121 | 'a_lstm_dropout': 0.0, 122 | 'v_lstm_dropout': 0.0, 123 | 'warm_up_epochs': 40, 124 | 'update_epochs': 1, 125 | 'early_stop': 10, 126 | # loss weight best:0.25 127 | 'gamma': 1, 128 | # res 129 | 'H': 1.0 130 | }, 131 | }, 132 | } 133 | return tmp 134 | 135 | def get_config(self): 136 | return self.args -------------------------------------------------------------------------------- /MSE-Qwen-1.8B/config/config_regression.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | 4 | from utils.functions import Storage 5 | 6 | class ConfigRegression(): 7 | def __init__(self, args): 8 | # hyper parameters for models 9 | HYPER_MODEL_MAP = { 10 | 'cmcm': self.__CMCM 11 | } 12 | # hyper parameters for datasets 13 | self.root_dataset_dir = args.root_dataset_dir 14 | HYPER_DATASET_MAP = self.__datasetCommonParams() 15 | 16 | # normalize 17 | model_name = str.lower(args.modelName) 18 | dataset_name = str.lower(args.datasetName) 19 | # load params 20 | commonArgs = HYPER_MODEL_MAP[model_name]()['commonParas'] 21 | dataArgs = HYPER_DATASET_MAP[dataset_name] 22 | dataArgs = dataArgs['aligned'] if (commonArgs['need_data_aligned'] and 'aligned' in dataArgs) else dataArgs['unaligned'] 23 | # integrate all parameters 24 | self.args = Storage(dict(vars(args), 25 | **dataArgs, 26 | **commonArgs, 27 | **HYPER_MODEL_MAP[model_name]()['datasetParas'][dataset_name], 28 | )) 29 | 30 | def __datasetCommonParams(self): 31 | root_dataset_dir = self.root_dataset_dir 32 | tmp = { 33 | 'mosi':{ 34 | 'unaligned': { 35 | 'dataPath': os.path.join(root_dataset_dir, 'MOSI/Processed/unaligned_50.pkl'), 36 | 'seq_lens': (50, 50, 50), 37 | # (text, audio, video) 38 | 'feature_dims': (2048, 5, 20), 39 | 'train_samples': 1284, 40 | 'num_classes': 3, 41 | 'language': 'en', 42 | 'KeyEval': 'MAE' 43 | } 44 | }, 45 | 'mosei':{ 46 | 'unaligned': { 47 | 'dataPath': os.path.join(root_dataset_dir, 'MOSEI/Processed/unaligned_50.pkl'), 48 | 'seq_lens': (50, 500, 375), 49 | # (text, audio, video) 50 | 'feature_dims': (2048, 74, 35), 51 | 'train_samples': 16326, 52 | 'num_classes': 3, 53 | 'language': 'en', 54 | 'KeyEval': 'MAE' 55 | } 56 | }, 57 | 58 | 59 | 'simsv2': { 60 | 'unaligned': { 61 | 'dataPath': os.path.join(root_dataset_dir, 'SIMS_V2/ch-simsv2s.pkl'), 62 | # (batch_size, seq_lens, feature_dim) 63 | 'seq_lens': (50, 925, 232), # (text, audio, video) 64 | 'feature_dims': (2048, 25, 177), # (text, audio, video) 65 | 'train_samples': 2722, 66 | 'num_classes': 3, 67 | 'language': 'cn', 68 | 'KeyEval': 'MAE', 69 | } 70 | } 71 | } 72 | return tmp 73 | 74 | def __CMCM(self): 75 | tmp = { 76 | 'commonParas':{ 77 | 'need_data_aligned': False, 78 | 'need_model_aligned': False, 79 | 'need_label_prefix':True, 80 | 'need_normalized': False, 81 | 'use_PLM': True, 82 | 'save_labels': False, 83 | }, 84 | # dataset 85 | 'datasetParas':{ 86 | 'mosei':{ 87 | # the batch_size of each epoch is update_epochs * batch_size 88 | 'task_specific_prompt': 'Please predict the sentiment intensity of the above multimodal content in the range [-3.0, +3.0]. Assistant: The sentiment is', 89 | 'max_new_tokens': 4, 90 | 'pseudo_tokens': 4, 91 | 'batch_size': 16, 92 | 'learning_rate': 5e-3, 93 | # feature subNets 94 | 'a_lstm_hidden_size': 64, 95 | 'v_lstm_hidden_size': 32, 96 | 'a_lstm_layers': 1, 97 | 'v_lstm_layers': 1, 98 | 'a_lstm_dropout': 0.0, 99 | 'v_lstm_dropout': 0.0, 100 | 'warm_up_epochs':30, 101 | #loss weight best:1 102 | 'gamma':1, 103 | 'update_epochs': 1, 104 | 'early_stop': 10, #10和8没啥区别 105 | # res 106 | 'H': 3.0, 107 | }, 108 | 109 | 'simsv2': { 110 | # the batch_size of each epoch is update_epochs * batch_size 111 | 'max_new_tokens': 4, 112 | 'pseudo_tokens': 4, 113 | 'task_specific_prompt': '请对上述多模态内容的情感强度进行预测,范围在[-1.0, +1.0]之间。响应: 情感为', 114 | 'batch_size': 16, 115 | 'learning_rate': 5e-4, #5e -4 较好 116 | # feature subNets 117 | 'a_lstm_hidden_size': 64, 118 | 'v_lstm_hidden_size': 64, 119 | 'a_lstm_layers': 1, 120 | 'v_lstm_layers': 1, 121 | 'a_lstm_dropout': 0.0, 122 | 'v_lstm_dropout': 0.0, 123 | 'warm_up_epochs': 30, # 不太确定是30还是40,先跑一把 124 | 'update_epochs': 1, 125 | 'early_stop': 10, 126 | # loss weight best:0.25 127 | 'gamma': 1, 128 | # res 129 | 'H': 1.0 130 | }, 131 | }, 132 | } 133 | return tmp 134 | 135 | def get_config(self): 136 | return self.args -------------------------------------------------------------------------------- /MSE-ChatGLM3-6B/config/config_classification.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | 4 | from utils.functions import Storage 5 | 6 | class ConfigClassification(): 7 | def __init__(self, args): 8 | # hyper parameters for models 9 | HYPER_MODEL_MAP = { 10 | 'cmcm': self.__CMCM 11 | } 12 | # hyper parameters for datasets 13 | self.root_dataset_dir = args.root_dataset_dir 14 | HYPER_DATASET_MAP = self.__datasetCommonParams() 15 | 16 | # normalize 17 | model_name = str.lower(args.modelName) 18 | dataset_name = str.lower(args.datasetName) 19 | # load params 20 | commonArgs = HYPER_MODEL_MAP[model_name]()['commonParas'] 21 | dataArgs = HYPER_DATASET_MAP[dataset_name] 22 | dataArgs = dataArgs['aligned'] if (commonArgs['need_data_aligned'] and 'aligned' in dataArgs) else dataArgs['unaligned'] 23 | # integrate all parameters 24 | self.args = Storage(dict(vars(args), 25 | **dataArgs, 26 | **commonArgs, 27 | **HYPER_MODEL_MAP[model_name]()['datasetParas'][dataset_name], 28 | )) 29 | 30 | def __datasetCommonParams(self): 31 | root_dataset_dir = self.root_dataset_dir 32 | tmp = { 33 | 'iemocap':{ 34 | 'unaligned': { 35 | 'dataPath': os.path.join(root_dataset_dir, 'IEMOCAP'), 36 | 'seq_lens': (84, 157, 32), 37 | # (text, audio, video) 38 | 'feature_dims': (4096, 64, 64), 39 | 'train_samples': 5240, 40 | 'num_classes': 3, 41 | 'language': 'en', 42 | 'KeyEval': 'weight_F1' 43 | } 44 | }, 45 | 'meld':{ 46 | 'unaligned': { 47 | 'dataPath': os.path.join(root_dataset_dir, 'MELD'), 48 | 'seq_lens': (65, 157, 32), 49 | # (text, audio, video) 50 | 'feature_dims': (4096, 64, 64), 51 | 'train_samples': 9992, 52 | 'num_classes': 3, 53 | 'language': 'en', 54 | 'KeyEval': 'weight_F1' 55 | } 56 | }, 57 | 'cherma':{ 58 | 'unaligned': { 59 | 'dataPath': os.path.join(root_dataset_dir, 'CHERMA0723'), 60 | # (batch_size, seq_lens, feature_dim) 61 | 'seq_lens': (78, 543, 16), # (text, audio, video) 62 | 'feature_dims': (4096, 1024, 2048), # (text, audio, video) 63 | 'train_samples': 16326, 64 | 'num_classes': 3, 65 | 'language': 'cn', 66 | 'KeyEval': 'weight_F1', 67 | } 68 | }, 69 | 70 | 71 | } 72 | return tmp 73 | 74 | def __CMCM(self): 75 | tmp = { 76 | 'commonParas':{ 77 | 'need_data_aligned': False, 78 | 'need_model_aligned': False, 79 | 'need_label_prefix':True, 80 | 'need_normalized': False, 81 | 'use_PLM': True, 82 | 'save_labels': False, 83 | }, 84 | # dataset 85 | 'datasetParas':{ 86 | 'meld':{ 87 | # the batch_size of each epoch is update_epochs * batch_size 88 | 'task_specific_prompt': 'Please recognize the emotion of the above multimodal content from the target \ 89 | set . response: The emotion is', 90 | 'max_new_tokens': 2, 91 | 'pseudo_tokens': 4, 92 | 'label_index_mapping': {'neutral': 0, 'surprise': 1, 'fear': 2, 'sadness': 3, 'joy': 4, 'disgust': 5, 93 | 'anger': 6}, 94 | 'batch_size': 8, 95 | 'learning_rate': 5e-5, 96 | # feature subNets 97 | 'a_lstm_hidden_size': 64, 98 | 'v_lstm_hidden_size': 32, 99 | 'a_lstm_layers': 1, 100 | 'v_lstm_layers': 1, 101 | 'a_lstm_dropout': 0.0, 102 | 'v_lstm_dropout': 0.0, 103 | 'warm_up_epochs': 90, 104 | #loss weight best:1 105 | 'gamma':1, 106 | 'update_epochs': 1, 107 | 'early_stop': 8, 108 | # res 109 | 'H': 3.0 110 | }, 111 | 'cherma':{ 112 | # the batch_size of each epoch is update_epochs * batch_size 113 | 'task_specific_prompt': '请选择适用于上述多模态内容的情绪标签:<愤怒:0, 厌恶:1, 恐惧:2, 高兴:3, 平静:4, 悲伤:5, 惊奇:6>。响应: 情绪为', 114 | 'max_new_tokens': 2, 115 | 'pseudo_tokens': 4, 116 | 'label_index_mapping': {'愤怒': 0, '厌恶': 1, '恐惧': 2, '高兴': 3, '平静': 4, '悲伤': 5, 117 | '惊奇': 6}, 118 | 'batch_size': 8, 119 | 'learning_rate': 5e-5, 120 | # feature subNets 121 | 'a_lstm_hidden_size': 32, 122 | 'v_lstm_hidden_size': 16, 123 | 'a_lstm_layers': 1, 124 | 'v_lstm_layers': 1, 125 | 'a_lstm_dropout': 0.0, 126 | 'v_lstm_dropout': 0.0, 127 | 'warm_up_epochs': 30, 128 | 'update_epochs': 1, 129 | 'early_stop': 8, 130 | # loss weight 131 | 'gamma': 0, 132 | # res 133 | 'H': 1.0 134 | }, 135 | }, 136 | } 137 | return tmp 138 | 139 | def get_config(self): 140 | return self.args -------------------------------------------------------------------------------- /MSE-Llama2-7B/config/config_classification.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | 4 | from utils.functions import Storage 5 | 6 | class ConfigClassification(): 7 | def __init__(self, args): 8 | # hyper parameters for models 9 | HYPER_MODEL_MAP = { 10 | 'cmcm': self.__CMCM 11 | } 12 | # hyper parameters for datasets 13 | self.root_dataset_dir = args.root_dataset_dir 14 | HYPER_DATASET_MAP = self.__datasetCommonParams() 15 | 16 | # normalize 17 | model_name = str.lower(args.modelName) 18 | dataset_name = str.lower(args.datasetName) 19 | # load params 20 | commonArgs = HYPER_MODEL_MAP[model_name]()['commonParas'] 21 | dataArgs = HYPER_DATASET_MAP[dataset_name] 22 | dataArgs = dataArgs['aligned'] if (commonArgs['need_data_aligned'] and 'aligned' in dataArgs) else dataArgs['unaligned'] 23 | # integrate all parameters 24 | self.args = Storage(dict(vars(args), 25 | **dataArgs, 26 | **commonArgs, 27 | **HYPER_MODEL_MAP[model_name]()['datasetParas'][dataset_name], 28 | )) 29 | 30 | def __datasetCommonParams(self): 31 | root_dataset_dir = self.root_dataset_dir 32 | tmp = { 33 | 'iemocap':{ 34 | 'unaligned': { 35 | 'dataPath': os.path.join(root_dataset_dir, 'IEMOCAP'), 36 | 'seq_lens': (84, 157, 32), 37 | # (text, audio, video) 38 | 'feature_dims': (4096, 64, 64), 39 | 'train_samples': 5240, 40 | 'num_classes': 3, 41 | 'language': 'en', 42 | 'KeyEval': 'weight_F1' 43 | } 44 | }, 45 | 'meld':{ 46 | 'unaligned': { 47 | 'dataPath': os.path.join(root_dataset_dir, 'MELD'), 48 | 'seq_lens': (65, 157, 32), 49 | # (text, audio, video) 50 | 'feature_dims': (4096, 64, 64), 51 | 'train_samples': 9992, 52 | 'num_classes': 3, 53 | 'language': 'en', 54 | 'KeyEval': 'weight_F1' 55 | } 56 | }, 57 | 'cherma':{ 58 | 'unaligned': { 59 | 'dataPath': os.path.join(root_dataset_dir, 'CHERMA0723'), 60 | # (batch_size, seq_lens, feature_dim) 61 | 'seq_lens': (78, 543, 16), # (text, audio, video) 62 | 'feature_dims': (4096, 1024, 2048), # (text, audio, video) 63 | 'train_samples': 16326, 64 | 'num_classes': 3, 65 | 'language': 'cn', 66 | 'KeyEval': 'weight_F1', 67 | } 68 | }, 69 | 70 | 71 | } 72 | return tmp 73 | 74 | def __CMCM(self): 75 | tmp = { 76 | 'commonParas':{ 77 | 'need_data_aligned': False, 78 | 'need_model_aligned': False, 79 | 'need_label_prefix':True, 80 | 'need_normalized': False, 81 | 'use_PLM': True, 82 | 'save_labels': False, 83 | }, 84 | # dataset 85 | 'datasetParas':{ 86 | 'meld':{ 87 | # the batch_size of each epoch is update_epochs * batch_size 88 | 'task_specific_prompt': 'Please recognize the emotion of the above multimodal content from the \ 89 | target set . Assistant: The emotion is', 90 | 'max_new_tokens': 2, 91 | 'pseudo_tokens': 4, 92 | 'label_index_mapping': {'neutral': 0, 'surprise': 1, 'fear': 2, 'sadness': 3, 'joy': 4, 'disgust': 5, 93 | 'anger': 6}, 94 | 'batch_size': 6, 95 | 'learning_rate': 5e-4, 96 | # feature subNets 97 | 'a_lstm_hidden_size': 64, 98 | 'v_lstm_hidden_size': 32, #原来是32,16 99 | 'a_lstm_layers': 1, 100 | 'v_lstm_layers': 1, 101 | 'a_lstm_dropout': 0.0, 102 | 'v_lstm_dropout': 0.0, 103 | 'warm_up_epochs':30, 104 | #loss weight best:1 105 | 'gamma':1, 106 | 'update_epochs': 1, 107 | 'early_stop': 8, 108 | # res 109 | 'H': 3.0 110 | }, 111 | 'cherma':{ 112 | # the batch_size of each epoch is update_epochs * batch_size 113 | 'task_specific_prompt': '请选择适用于上述多模态内容的情绪标签:<愤怒:0, 厌恶:1, 恐惧:2, 高兴:3, 平静:4, 悲伤:5, 惊奇:6>。助手: 情绪为', 114 | 'max_new_tokens': 2, 115 | 'pseudo_tokens': 4, 116 | 'label_index_mapping': {'愤怒': 0, '厌恶': 1, '恐惧': 2, '高兴': 3, '平静': 4, '悲伤': 5, 117 | '惊奇': 6}, 118 | 'batch_size': 6, 119 | 'learning_rate': 5e-5, 120 | # feature subNets 121 | 'a_lstm_hidden_size': 32, 122 | 'v_lstm_hidden_size': 16, 123 | 'a_lstm_layers': 1, 124 | 'v_lstm_layers': 1, 125 | 'a_lstm_dropout': 0.0, 126 | 'v_lstm_dropout': 0.0, 127 | 'warm_up_epochs': 30, 128 | 'update_epochs': 1, 129 | 'early_stop': 8, 130 | # loss weight 131 | 'gamma': 0, 132 | # res 133 | 'H': 1.0, 134 | }, 135 | }, 136 | } 137 | return tmp 138 | 139 | def get_config(self): 140 | return self.args -------------------------------------------------------------------------------- /MSE-ChatGLM3-6B/models/multiTask/CMCM.py: -------------------------------------------------------------------------------- 1 | # self supervised multimodal multi-task learning network 2 | import math 3 | import os 4 | import sys 5 | import collections 6 | from torch.cuda.amp import autocast, GradScaler 7 | import torch 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | from torch.autograd.function import Function 11 | from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence 12 | 13 | from models.subNets.Textmodel import Language_model 14 | 15 | __all__ = ['CMCM'] 16 | 17 | class CMCM(nn.Module): 18 | def __init__(self, args): 19 | super(CMCM, self).__init__() 20 | # text enocding 21 | self.LLM = Language_model(args) 22 | 23 | # audio and video enocding 24 | text_in, audio_in, video_in = args.feature_dims[:] 25 | text_len, audio_len, video_len = args.seq_lens[:] 26 | 27 | self.audio_LSTM = TVA_LSTM(audio_in, args.a_lstm_hidden_size, num_layers=args.a_lstm_layers, dropout=args.a_lstm_dropout) 28 | self.video_LSTM = TVA_LSTM(video_in, args.v_lstm_hidden_size, num_layers=args.v_lstm_layers, dropout=args.v_lstm_dropout) 29 | 30 | self.text_guide_mixer = Text_guide_mixer() 31 | #low_rank_fusion 32 | fusion_input_size = 256 33 | self.mutli_scale_fusion = mutli_scale_fusion(input_size=fusion_input_size, output_size= text_in, pseudo_tokens= args.pseudo_tokens) 34 | 35 | 36 | def forward(self, labels, text, audio, video): 37 | audio, audio_len = audio 38 | video, video_len = video 39 | text, text_len = text 40 | text = self.LLM.text_embedding(text[:,0,:].long()) 41 | 42 | video_h = self.video_LSTM(video, video_len) 43 | audio_h = self.audio_LSTM(audio, audio_len) 44 | 45 | 46 | fusion_h= self.text_guide_mixer(audio_h, video_h, text) 47 | 48 | fusion_h= self.mutli_scale_fusion(fusion_h) 49 | 50 | 51 | LLM_input = torch.cat([fusion_h, text], dim=1) 52 | 53 | LLM_output = self.LLM(LLM_input, labels) 54 | 55 | res = { 56 | 'Loss': LLM_output.loss, 57 | 'Feature_a': audio_h, 58 | 'Feature_v': video_h, 59 | 'Feature_f': fusion_h, 60 | } 61 | return res 62 | 63 | def generate(self, text, audio, video): 64 | audio, audio_len = audio 65 | video, video_len = video 66 | text, text_len = text 67 | text = self.LLM.text_embedding(text[:,0,:].long()) 68 | 69 | audio_h = self.audio_LSTM(audio, audio_len) 70 | video_h = self.video_LSTM(video, video_len) 71 | 72 | 73 | fusion_h = self.text_guide_mixer(audio_h, video_h, text) 74 | 75 | # low_rank_fusion 76 | 77 | fusion_h = self.mutli_scale_fusion(fusion_h) 78 | 79 | # concatenate mutli_scale_fusion and text_embedding 80 | 81 | LLM_input = torch.cat([fusion_h, text], dim=1) 82 | 83 | LLM_output = self.LLM.generate(LLM_input) 84 | 85 | return LLM_output 86 | 87 | 88 | 89 | class TVA_LSTM(nn.Module): 90 | def __init__(self, in_size, hidden_size, num_layers=1, dropout=0.2, bidirectional=False): 91 | ''' 92 | Args: 93 | in_size: input dimension 94 | hidden_size: hidden layer dimension 95 | num_layers: specify the number of layers of LSTMs. 96 | dropout: dropout probability 97 | bidirectional: specify usage of bidirectional LSTM 98 | Output: 99 | (return value in forward) a tensor of shape (batch_size, out_size) 100 | ''' 101 | super(TVA_LSTM, self).__init__() 102 | self.rnn = nn.LSTM(in_size, hidden_size, num_layers=num_layers, dropout=dropout, bidirectional=bidirectional, batch_first=True) 103 | self.dropout = nn.Dropout(dropout) 104 | self.linear = nn.Linear(hidden_size, 256) 105 | 106 | def forward(self, x, lengths): 107 | ''' 108 | x: (batch_size, sequence_len, in_size) 109 | ''' 110 | packed_sequence = pack_padded_sequence(x, lengths.to('cpu'), batch_first=True, enforce_sorted=False) #这里把length.to cpu是因为pytorch版本问题 111 | # _, (final_states, _) = self.rnn(packed_sequence) 112 | # h = self.dropout(final_states[-1]) 113 | _, final_states = self.rnn(packed_sequence) 114 | h = self.dropout(final_states[0].squeeze()) 115 | h = self.linear(h) 116 | return h 117 | 118 | class Text_guide_mixer(nn.Module): 119 | def __init__(self): 120 | super(Text_guide_mixer, self).__init__() 121 | self.GAP = nn.AdaptiveAvgPool1d(1) 122 | self.text_mlp = nn.Linear(4096, 256) 123 | def forward(self, audio, video, text): 124 | text_GAP = self.GAP(text.permute(0, 2, 1)).squeeze() 125 | text_knowledge = self.text_mlp(text_GAP) 126 | 127 | audio_mixed = torch.mul(audio, text_knowledge) 128 | video_mixed = torch.mul(video, text_knowledge) 129 | 130 | fusion = audio_mixed + video_mixed 131 | 132 | return fusion 133 | 134 | 135 | class mutli_scale_fusion(nn.Module): 136 | def __init__(self, input_size, output_size, pseudo_tokens = 4): 137 | super(mutli_scale_fusion, self).__init__() 138 | multi_scale_hidden = 256 139 | self.scale1 = nn.Sequential( 140 | nn.Linear(input_size, output_size // 8), 141 | nn.GELU(), 142 | nn.Linear(output_size // 8, multi_scale_hidden) 143 | ) 144 | self.scale2 = nn.Sequential( 145 | nn.Linear(input_size, output_size // 32), 146 | nn.GELU(), 147 | nn.Linear(output_size // 32, multi_scale_hidden) 148 | ) 149 | self.scale3 = nn.Sequential( 150 | nn.Linear(input_size, output_size // 16), 151 | nn.GELU(), 152 | nn.Linear(output_size // 16, multi_scale_hidden) 153 | ) 154 | 155 | self.integrating = Integrating(scales = 3) 156 | self.multi_scale_projector = nn.Linear(multi_scale_hidden, output_size) 157 | self.projector = nn.Linear(1, pseudo_tokens) 158 | 159 | def forward(self,x): 160 | # 增加样本复制,将单一样本复制一份,避免最后一个batch只有一个数据时的报错 161 | if x.dim() == 1: 162 | x = x.unsqueeze(0) 163 | #compute different scale experts outputs 164 | scale1 = self.scale1(x) 165 | scale2 = self.scale2(x) 166 | scale3 = self.scale3(x) 167 | 168 | 169 | # Calculate the expert outputs 170 | multi_scale_stack = torch.stack([scale1, scale2, scale3], dim=2) 171 | multi_scale_integrating = self.integrating(multi_scale_stack) 172 | 173 | multi_scale = self.multi_scale_projector(multi_scale_integrating) 174 | output = self.projector(multi_scale.unsqueeze(2)) 175 | return output.permute(0, 2, 1) #[batch,seq_len,hidden_siez] 176 | 177 | # Define the gating model 178 | class Integrating(nn.Module): 179 | def __init__(self, scales): 180 | super(Integrating, self).__init__() 181 | 182 | # Layers 183 | self.Integrating_layer = nn.Sequential(nn.Conv2d(1, 1, kernel_size=(1, scales), stride=1), 184 | ) 185 | 186 | def forward(self, x): 187 | x = x.unsqueeze(1) 188 | x = self.Integrating_layer(x) 189 | x = x.squeeze((1, 3)) 190 | return x 191 | -------------------------------------------------------------------------------- /MSE-Llama2-7B/models/multiTask/CMCM.py: -------------------------------------------------------------------------------- 1 | # self supervised multimodal multi-task learning network 2 | import math 3 | import os 4 | import sys 5 | import collections 6 | from torch.cuda.amp import autocast, GradScaler 7 | import torch 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | from torch.autograd.function import Function 11 | from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence 12 | 13 | from models.subNets.Textmodel import Language_model 14 | 15 | __all__ = ['CMCM'] 16 | 17 | class CMCM(nn.Module): 18 | def __init__(self, args): 19 | super(CMCM, self).__init__() 20 | # text enocding 21 | self.LLM = Language_model(args) 22 | 23 | # audio and video enocding 24 | text_in, audio_in, video_in = args.feature_dims[:] 25 | text_len, audio_len, video_len = args.seq_lens[:] 26 | 27 | self.audio_LSTM = TVA_LSTM(audio_in, args.a_lstm_hidden_size, num_layers=args.a_lstm_layers, dropout=args.a_lstm_dropout) 28 | self.video_LSTM = TVA_LSTM(video_in, args.v_lstm_hidden_size, num_layers=args.v_lstm_layers, dropout=args.v_lstm_dropout) 29 | 30 | self.text_guide_mixer = Text_guide_mixer() 31 | #low_rank_fusion 32 | fusion_input_size = 256 33 | self.mutli_scale_fusion = mutli_scale_fusion(input_size=fusion_input_size, output_size= text_in, pseudo_tokens= args.pseudo_tokens) 34 | 35 | 36 | def forward(self, labels, text, audio, video): 37 | audio, audio_len = audio 38 | video, video_len = video 39 | text, text_len = text 40 | text = self.LLM.text_embedding(text[:,0,:].long()) 41 | 42 | video_h = self.video_LSTM(video, video_len) 43 | audio_h = self.audio_LSTM(audio, audio_len) 44 | 45 | 46 | fusion_h= self.text_guide_mixer(audio_h, video_h, text) 47 | 48 | fusion_h= self.mutli_scale_fusion(fusion_h) 49 | 50 | 51 | LLM_input = torch.cat([fusion_h, text], dim=1) 52 | 53 | LLM_output = self.LLM(LLM_input, labels) 54 | 55 | res = { 56 | 'Loss': LLM_output.loss, 57 | 'Feature_a': audio_h, 58 | 'Feature_v': video_h, 59 | 'Feature_f': fusion_h, 60 | } 61 | return res 62 | 63 | def generate(self, text, audio, video): 64 | audio, audio_len = audio 65 | video, video_len = video 66 | text, text_len = text 67 | text = self.LLM.text_embedding(text[:,0,:].long()) 68 | 69 | audio_h = self.audio_LSTM(audio, audio_len) 70 | video_h = self.video_LSTM(video, video_len) 71 | 72 | 73 | fusion_h = self.text_guide_mixer(audio_h, video_h, text) 74 | 75 | # low_rank_fusion 76 | 77 | fusion_h = self.mutli_scale_fusion(fusion_h) 78 | 79 | # concatenate mutli_scale_fusion and text_embedding 80 | 81 | LLM_input = torch.cat([fusion_h, text], dim=1) 82 | 83 | LLM_output = self.LLM.generate(LLM_input) 84 | 85 | return LLM_output 86 | 87 | 88 | 89 | class TVA_LSTM(nn.Module): 90 | def __init__(self, in_size, hidden_size, num_layers=1, dropout=0.2, bidirectional=False): 91 | ''' 92 | Args: 93 | in_size: input dimension 94 | hidden_size: hidden layer dimension 95 | num_layers: specify the number of layers of LSTMs. 96 | dropout: dropout probability 97 | bidirectional: specify usage of bidirectional LSTM 98 | Output: 99 | (return value in forward) a tensor of shape (batch_size, out_size) 100 | ''' 101 | super(TVA_LSTM, self).__init__() 102 | self.rnn = nn.LSTM(in_size, hidden_size, num_layers=num_layers, dropout=dropout, bidirectional=bidirectional, batch_first=True) 103 | self.dropout = nn.Dropout(dropout) 104 | self.linear = nn.Linear(hidden_size, 256) 105 | 106 | def forward(self, x, lengths): 107 | ''' 108 | x: (batch_size, sequence_len, in_size) 109 | ''' 110 | packed_sequence = pack_padded_sequence(x, lengths.to('cpu'), batch_first=True, enforce_sorted=False) #这里把length.to cpu是因为pytorch版本问题 111 | # _, (final_states, _) = self.rnn(packed_sequence) 112 | # h = self.dropout(final_states[-1]) 113 | _, final_states = self.rnn(packed_sequence) 114 | h = self.dropout(final_states[0].squeeze()) 115 | h = self.linear(h) 116 | return h 117 | 118 | class Text_guide_mixer(nn.Module): 119 | def __init__(self): 120 | super(Text_guide_mixer, self).__init__() 121 | self.GAP = nn.AdaptiveAvgPool1d(1) 122 | self.text_mlp = nn.Linear(4096, 256) 123 | def forward(self, audio, video, text): 124 | text_GAP = self.GAP(text.permute(0, 2, 1)).squeeze() 125 | text_knowledge = self.text_mlp(text_GAP) 126 | 127 | audio_mixed = torch.mul(audio, text_knowledge) 128 | video_mixed = torch.mul(video, text_knowledge) 129 | 130 | fusion = audio_mixed + video_mixed 131 | 132 | return fusion 133 | 134 | 135 | class mutli_scale_fusion(nn.Module): 136 | def __init__(self, input_size, output_size, pseudo_tokens = 4): 137 | super(mutli_scale_fusion, self).__init__() 138 | multi_scale_hidden = 256 139 | self.scale1 = nn.Sequential( 140 | nn.Linear(input_size, output_size // 8), 141 | nn.GELU(), 142 | nn.Linear(output_size // 8, multi_scale_hidden) 143 | ) 144 | self.scale2 = nn.Sequential( 145 | nn.Linear(input_size, output_size // 32), 146 | nn.GELU(), 147 | nn.Linear(output_size // 32, multi_scale_hidden) 148 | ) 149 | self.scale3 = nn.Sequential( 150 | nn.Linear(input_size, output_size // 16), 151 | nn.GELU(), 152 | nn.Linear(output_size // 16, multi_scale_hidden) 153 | ) 154 | 155 | self.integrating = Integrating(scales = 3) 156 | self.multi_scale_projector = nn.Linear(multi_scale_hidden, output_size) 157 | self.projector = nn.Linear(1, pseudo_tokens) 158 | 159 | def forward(self,x): 160 | # 增加样本复制,将单一样本复制一份,避免最后一个batch只有一个数据时的报错 161 | if x.dim() == 1: 162 | x = x.unsqueeze(0) 163 | #compute different scale experts outputs 164 | scale1 = self.scale1(x) 165 | scale2 = self.scale2(x) 166 | scale3 = self.scale3(x) 167 | 168 | 169 | # Calculate the expert outputs 170 | multi_scale_stack = torch.stack([scale1, scale2, scale3], dim=2) 171 | multi_scale_integrating = self.integrating(multi_scale_stack) 172 | 173 | multi_scale = self.multi_scale_projector(multi_scale_integrating) 174 | output = self.projector(multi_scale.unsqueeze(2)) 175 | return output.permute(0, 2, 1) #[batch,seq_len,hidden_siez] 176 | 177 | # Define the gating model 178 | class Integrating(nn.Module): 179 | def __init__(self, scales): 180 | super(Integrating, self).__init__() 181 | 182 | # Layers 183 | self.Integrating_layer = nn.Sequential(nn.Conv2d(1, 1, kernel_size=(1, scales), stride=1), 184 | ) 185 | 186 | def forward(self, x): 187 | x = x.unsqueeze(1) 188 | x = self.Integrating_layer(x) 189 | x = x.squeeze((1, 3)) 190 | return x 191 | -------------------------------------------------------------------------------- /MSE-Qwen-1.8B/models/multiTask/CMCM.py: -------------------------------------------------------------------------------- 1 | # self supervised multimodal multi-task learning network 2 | import math 3 | import os 4 | import sys 5 | import collections 6 | from torch.cuda.amp import autocast, GradScaler 7 | import torch 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | from torch.autograd.function import Function 11 | from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence 12 | 13 | from models.subNets.Textmodel import Language_model 14 | 15 | __all__ = ['CMCM'] 16 | 17 | class CMCM(nn.Module): 18 | def __init__(self, args): 19 | super(CMCM, self).__init__() 20 | # text enocding 21 | self.LLM = Language_model(args) 22 | 23 | # audio and video enocding 24 | text_in, audio_in, video_in = args.feature_dims[:] 25 | text_len, audio_len, video_len = args.seq_lens[:] 26 | 27 | self.audio_LSTM = TVA_LSTM(audio_in, args.a_lstm_hidden_size, num_layers=args.a_lstm_layers, dropout=args.a_lstm_dropout) 28 | self.video_LSTM = TVA_LSTM(video_in, args.v_lstm_hidden_size, num_layers=args.v_lstm_layers, dropout=args.v_lstm_dropout) 29 | 30 | self.text_guide_mixer = Text_guide_mixer() 31 | #low_rank_fusion 32 | fusion_input_size = 256 33 | self.mutli_scale_fusion = mutli_scale_fusion(input_size=fusion_input_size, output_size= text_in, pseudo_tokens= args.pseudo_tokens) 34 | 35 | 36 | def forward(self, labels, text, audio, video): 37 | audio, audio_len = audio 38 | video, video_len = video 39 | text, text_len = text 40 | text = self.LLM.text_embedding(text[:,0,:].long()) 41 | 42 | video_h = self.video_LSTM(video, video_len) 43 | audio_h = self.audio_LSTM(audio, audio_len) 44 | 45 | 46 | fusion_h= self.text_guide_mixer(audio_h, video_h, text) 47 | 48 | fusion_h= self.mutli_scale_fusion(fusion_h) 49 | 50 | 51 | LLM_input = torch.cat([fusion_h, text], dim=1) 52 | 53 | LLM_output = self.LLM(LLM_input, labels) 54 | 55 | res = { 56 | 'Loss': LLM_output.loss, 57 | 'Feature_a': audio_h, 58 | 'Feature_v': video_h, 59 | 'Feature_f': fusion_h, 60 | } 61 | return res 62 | 63 | def generate(self, text, audio, video): 64 | audio, audio_len = audio 65 | video, video_len = video 66 | text, text_len = text 67 | text = self.LLM.text_embedding(text[:,0,:].long()) 68 | 69 | audio_h = self.audio_LSTM(audio, audio_len) 70 | video_h = self.video_LSTM(video, video_len) 71 | 72 | 73 | fusion_h = self.text_guide_mixer(audio_h, video_h, text) 74 | 75 | # low_rank_fusion 76 | 77 | fusion_h = self.mutli_scale_fusion(fusion_h) 78 | 79 | # concatenate mutli_scale_fusion and text_embedding 80 | 81 | LLM_input = torch.cat([fusion_h, text], dim=1) 82 | 83 | LLM_output = self.LLM.generate(LLM_input) 84 | 85 | return LLM_output 86 | 87 | 88 | 89 | class TVA_LSTM(nn.Module): 90 | def __init__(self, in_size, hidden_size, num_layers=1, dropout=0.2, bidirectional=False): 91 | ''' 92 | Args: 93 | in_size: input dimension 94 | hidden_size: hidden layer dimension 95 | num_layers: specify the number of layers of LSTMs. 96 | dropout: dropout probability 97 | bidirectional: specify usage of bidirectional LSTM 98 | Output: 99 | (return value in forward) a tensor of shape (batch_size, out_size) 100 | ''' 101 | super(TVA_LSTM, self).__init__() 102 | self.rnn = nn.LSTM(in_size, hidden_size, num_layers=num_layers, dropout=dropout, bidirectional=bidirectional, batch_first=True) 103 | self.dropout = nn.Dropout(dropout) 104 | self.linear = nn.Linear(hidden_size, 256) 105 | 106 | def forward(self, x, lengths): 107 | ''' 108 | x: (batch_size, sequence_len, in_size) 109 | ''' 110 | packed_sequence = pack_padded_sequence(x, lengths.to('cpu'), batch_first=True, enforce_sorted=False) #这里把length.to cpu是因为pytorch版本问题 111 | # _, (final_states, _) = self.rnn(packed_sequence) 112 | # h = self.dropout(final_states[-1]) 113 | _, final_states = self.rnn(packed_sequence) 114 | h = self.dropout(final_states[0].squeeze()) 115 | h = self.linear(h) 116 | return h 117 | 118 | class Text_guide_mixer(nn.Module): 119 | def __init__(self): 120 | super(Text_guide_mixer, self).__init__() 121 | self.GAP = nn.AdaptiveAvgPool1d(1) 122 | self.text_mlp = nn.Linear(2048, 256) 123 | def forward(self, audio, video, text): 124 | text_GAP = self.GAP(text.permute(0, 2, 1)).squeeze() 125 | text_knowledge = self.text_mlp(text_GAP) 126 | 127 | audio_mixed = torch.mul(audio, text_knowledge) 128 | video_mixed = torch.mul(video, text_knowledge) 129 | 130 | fusion = audio_mixed + video_mixed 131 | 132 | return fusion 133 | 134 | 135 | class mutli_scale_fusion(nn.Module): 136 | def __init__(self, input_size, output_size, pseudo_tokens = 4): 137 | super(mutli_scale_fusion, self).__init__() 138 | multi_scale_hidden = 256 139 | self.scale1 = nn.Sequential( 140 | nn.Linear(input_size, output_size // 8), 141 | nn.GELU(), 142 | nn.Linear(output_size // 8, multi_scale_hidden) 143 | ) 144 | self.scale2 = nn.Sequential( 145 | nn.Linear(input_size, output_size // 32), 146 | nn.GELU(), 147 | nn.Linear(output_size // 32, multi_scale_hidden) 148 | ) 149 | self.scale3 = nn.Sequential( 150 | nn.Linear(input_size, output_size // 16), 151 | nn.GELU(), 152 | nn.Linear(output_size // 16, multi_scale_hidden) 153 | ) 154 | 155 | self.integrating = Integrating(scales = 3) 156 | self.multi_scale_projector = nn.Linear(multi_scale_hidden, output_size) 157 | self.projector = nn.Linear(1, pseudo_tokens) 158 | 159 | def forward(self,x): 160 | # 增加样本复制,将单一样本复制一份,避免最后一个batch只有一个数据时的报错 161 | if x.dim() == 1: 162 | x = x.unsqueeze(0) 163 | #compute different scale experts outputs 164 | scale1 = self.scale1(x) 165 | scale2 = self.scale2(x) 166 | scale3 = self.scale3(x) 167 | 168 | 169 | # Calculate the expert outputs 170 | multi_scale_stack = torch.stack([scale1, scale2, scale3], dim=2) 171 | multi_scale_integrating = self.integrating(multi_scale_stack) 172 | 173 | multi_scale = self.multi_scale_projector(multi_scale_integrating) 174 | output = self.projector(multi_scale.unsqueeze(2)) 175 | return output.permute(0, 2, 1) #[batch,seq_len,hidden_siez] 176 | 177 | # Define the gating model 178 | class Integrating(nn.Module): 179 | def __init__(self, scales): 180 | super(Integrating, self).__init__() 181 | 182 | # Layers 183 | self.Integrating_layer = nn.Sequential(nn.Conv2d(1, 1, kernel_size=(1, scales), stride=1), 184 | ) 185 | 186 | def forward(self, x): 187 | x = x.unsqueeze(1) 188 | x = self.Integrating_layer(x) 189 | x = x.squeeze((1, 3)) 190 | return x 191 | -------------------------------------------------------------------------------- /MSE-Qwen-1.8B/config/config_classification.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | 4 | from utils.functions import Storage 5 | 6 | class ConfigClassification(): 7 | def __init__(self, args): 8 | # hyper parameters for models 9 | HYPER_MODEL_MAP = { 10 | 'cmcm': self.__CMCM 11 | } 12 | # hyper parameters for datasets 13 | self.root_dataset_dir = args.root_dataset_dir 14 | HYPER_DATASET_MAP = self.__datasetCommonParams() 15 | 16 | # normalize 17 | model_name = str.lower(args.modelName) 18 | dataset_name = str.lower(args.datasetName) 19 | # load params 20 | commonArgs = HYPER_MODEL_MAP[model_name]()['commonParas'] 21 | dataArgs = HYPER_DATASET_MAP[dataset_name] 22 | dataArgs = dataArgs['aligned'] if (commonArgs['need_data_aligned'] and 'aligned' in dataArgs) else dataArgs['unaligned'] 23 | # integrate all parameters 24 | self.args = Storage(dict(vars(args), 25 | **dataArgs, 26 | **commonArgs, 27 | **HYPER_MODEL_MAP[model_name]()['datasetParas'][dataset_name], 28 | )) 29 | 30 | def __datasetCommonParams(self): 31 | root_dataset_dir = self.root_dataset_dir 32 | tmp = { 33 | 'iemocap':{ 34 | 'unaligned': { 35 | 'dataPath': os.path.join(root_dataset_dir, 'IEMOCAP'), 36 | 'seq_lens': (84, 157, 32), 37 | # (text, audio, video) 38 | 'feature_dims': (2048, 64, 64), 39 | 'train_samples': 5240, 40 | 'num_classes': 3, 41 | 'language': 'en', 42 | 'KeyEval': 'weight_F1' 43 | } 44 | }, 45 | 'meld':{ 46 | 'unaligned': { 47 | 'dataPath': os.path.join(root_dataset_dir, 'MELD'), 48 | 'seq_lens': (65, 157, 32), 49 | # (text, audio, video) 50 | 'feature_dims': (2048, 64, 64), 51 | 'train_samples': 9992, 52 | 'num_classes': 3, 53 | 'language': 'en', 54 | 'KeyEval': 'weight_F1' 55 | } 56 | }, 57 | 'cherma':{ 58 | 'unaligned': { 59 | 'dataPath': os.path.join(root_dataset_dir, 'CHERMA0723'), 60 | # (batch_size, seq_lens, feature_dim) 61 | 'seq_lens': (78, 543, 16), # (text, audio, video) 62 | 'feature_dims': (2048, 1024, 2048), # (text, audio, video) 63 | 'train_samples': 16326, 64 | 'num_classes': 3, 65 | 'language': 'cn', 66 | 'KeyEval': 'weight_F1', 67 | } 68 | }, 69 | 70 | 71 | } 72 | return tmp 73 | 74 | def __CMCM(self): 75 | tmp = { 76 | 'commonParas':{ 77 | 'need_data_aligned': False, 78 | 'need_model_aligned': False, 79 | 'need_label_prefix':True, 80 | 'need_normalized': False, 81 | 'use_PLM': True, 82 | 'save_labels': False, 83 | }, 84 | # dataset 85 | 'datasetParas':{ 86 | 'iemocap':{ 87 | # the batch_size of each epoch is update_epochs * batch_size 88 | 'task_specific_prompt': 'Please recognize the emotion of the above multimodal content from the label \ 89 | set . Assistant: The emotion is', 90 | 'max_new_tokens': 1, 91 | 'pseudo_tokens': 4, 92 | 'label_index_mapping': {'hap': 0, 'sad': 1, 'neu': 2, 'ang': 3, 'exc': 4, 'fru': 5}, 93 | 'batch_size': 4, 94 | 'learning_rate': 5e-4, 95 | # feature subNets 96 | 'a_lstm_hidden_size': 32, 97 | 'v_lstm_hidden_size': 32, 98 | 'a_lstm_layers': 1, 99 | 'v_lstm_layers': 1, 100 | 'a_lstm_dropout': 0.0, 101 | 'v_lstm_dropout': 0.0, 102 | 'warm_up_epochs': 30, #it should be low 103 | 'gamma': 1, 104 | 'update_epochs': 1, 105 | 'early_stop': 8, 106 | # res 107 | 'H': 3.0 108 | }, 109 | 'meld':{ 110 | # the batch_size of each epoch is update_epochs * batch_size 111 | 'task_specific_prompt': 'Please recognize the emotion of the above multimodal content from the \ 112 | target set . Assistant: The emotion is', 113 | 'max_new_tokens': 1, 114 | 'pseudo_tokens': 2, 115 | 'label_index_mapping': {'neutral': 0, 'surprise': 1, 'fear': 2, 'sadness': 3, 'joy': 4, 'disgust': 5, 116 | 'anger': 6}, 117 | 'batch_size': 16, 118 | 'learning_rate': 5e-4, 119 | # feature subNets 120 | 'a_lstm_hidden_size': 32, 121 | 'v_lstm_hidden_size': 16, 122 | 'a_lstm_layers': 1, 123 | 'v_lstm_layers': 1, 124 | 'a_lstm_dropout': 0.0, 125 | 'v_lstm_dropout': 0.0, 126 | 'warm_up_epochs':50, 127 | #loss weight best:1 128 | 'gamma':1, 129 | 'update_epochs': 1, 130 | 'early_stop': 8, 131 | # res 132 | 'H': 3.0 133 | }, 134 | 'cherma':{ 135 | # the batch_size of each epoch is update_epochs * batch_size 136 | 'task_specific_prompt': '请选择适用于上述多模态内容的情绪标签:<愤怒:0, 厌恶:1, 恐惧:2, 高兴:3, 平静:4, 悲伤:5, 惊奇:6>。助手: 情绪为', 137 | 'max_new_tokens': 1, 138 | 'pseudo_tokens': 4, 139 | 'label_index_mapping': {'愤怒': 0, '厌恶': 1, '恐惧': 2, '高兴': 3, '平静': 4, '悲伤': 5, 140 | '惊奇': 6}, 141 | 'batch_size': 16, 142 | 'learning_rate': 5e-3, 143 | # feature subNets 144 | 'a_lstm_hidden_size': 32, 145 | 'v_lstm_hidden_size': 16, 146 | 'a_lstm_layers': 1, 147 | 'v_lstm_layers': 1, 148 | 'a_lstm_dropout': 0.0, 149 | 'v_lstm_dropout': 0.0, 150 | 'warm_up_epochs': 30, 151 | 'update_epochs': 1, 152 | 'early_stop': 8, 153 | # loss weight 154 | 'gamma': 0, 155 | # res 156 | 'H': 1.0 157 | }, 158 | }, 159 | } 160 | return tmp 161 | 162 | def get_config(self): 163 | return self.args -------------------------------------------------------------------------------- /MSE-ChatGLM3-6B/run.py: -------------------------------------------------------------------------------- 1 | import os 2 | import gc 3 | import time 4 | import random 5 | import torch 6 | import pynvml 7 | import logging 8 | import argparse 9 | import numpy as np 10 | import pandas as pd 11 | from tqdm import tqdm 12 | 13 | from models.AMIO import AMIO 14 | from trains.ATIO import ATIO 15 | from data.load_data import MMDataLoader 16 | from config.config_regression import ConfigRegression 17 | from config.config_classification import ConfigClassification 18 | 19 | os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" 20 | os.environ['CUDA_LAUNCH_BLOCKING'] = '1' # 下面老是报错 shape 不一致 21 | 22 | def setup_seed(seed): 23 | torch.manual_seed(seed) 24 | torch.cuda.manual_seed_all(seed) 25 | np.random.seed(seed) 26 | random.seed(seed) 27 | torch.backends.cudnn.deterministic = True 28 | 29 | def run(args): 30 | if not os.path.exists(args.model_save_dir): 31 | os.makedirs(args.model_save_dir) 32 | args.model_save_path = os.path.join(args.model_save_dir,\ 33 | f'{args.modelName}-{args.datasetName}-{args.train_mode}.pth') 34 | 35 | if len(args.gpu_ids) == 0 and torch.cuda.is_available(): 36 | # load free-most gpu 37 | pynvml.nvmlInit() 38 | dst_gpu_id, min_mem_used = 0, 1e16 39 | for g_id in [0, 1, 2, 3]: 40 | handle = pynvml.nvmlDeviceGetHandleByIndex(g_id) 41 | meminfo = pynvml.nvmlDeviceGetMemoryInfo(handle) 42 | mem_used = meminfo.used 43 | if mem_used < min_mem_used: 44 | min_mem_used = mem_used 45 | dst_gpu_id = g_id 46 | print(f'Find gpu: {dst_gpu_id}, use memory: {min_mem_used}!') 47 | logger.info(f'Find gpu: {dst_gpu_id}, with memory: {min_mem_used} left!') 48 | args.gpu_ids.append(dst_gpu_id) 49 | # device 50 | using_cuda = len(args.gpu_ids) > 0 and torch.cuda.is_available() 51 | logger.info("Let's use the GPU %d !" % len(args.gpu_ids)) 52 | device = torch.device('cuda:%d' % int(args.gpu_ids[0]) if using_cuda else 'cpu') 53 | # device = "cuda:1" if torch.cuda.is_available() else "cpu" 54 | args.device = device 55 | # data 56 | dataloader = MMDataLoader(args) 57 | model = AMIO(args).to(device) 58 | 59 | def print_trainable_parameters(model): 60 | """ 61 | Prints the number of trainable parameters in the model. 62 | """ 63 | trainable_params = 0 64 | all_param = 0 65 | for _, param in model.named_parameters(): 66 | all_param += param.numel() 67 | if param.requires_grad: 68 | trainable_params += param.numel() 69 | 70 | logger.info(f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}") 71 | 72 | print_trainable_parameters(model) 73 | 74 | # using multiple gpus 75 | # if using_cuda and len(args.gpu_ids) > 1: 76 | # model = torch.nn.DataParallel(model, 77 | # device_ids=args.gpu_ids, 78 | # output_device=args.gpu_ids[0]) 79 | atio = ATIO().getTrain(args) 80 | # do train 81 | atio.do_train(model, dataloader) 82 | # load pretrained model 83 | assert os.path.exists(args.model_save_path) 84 | # load finetune parameters 85 | checkpoint = torch.load(args.model_save_path) 86 | model.load_state_dict(checkpoint, strict=False) 87 | model.to(device) 88 | 89 | # do test 90 | if args.tune_mode: 91 | # using valid dataset to debug hyper parameters 92 | results = atio.do_test(model, dataloader['valid'], mode="VALID") 93 | else: 94 | results = atio.do_test(model, dataloader['test'], mode="TEST") 95 | 96 | del model 97 | torch.cuda.empty_cache() 98 | gc.collect() 99 | 100 | return results 101 | 102 | 103 | 104 | def run_normal(args): 105 | args.res_save_dir = os.path.join(args.res_save_dir) 106 | init_args = args 107 | model_results = [] 108 | seeds = args.seeds 109 | # warm_epochs =[30,40,50,60,70,80,90,100] 110 | # for warm_up_epoch in warm_epochs: 111 | # run results 112 | for i, seed in enumerate(seeds): 113 | args = init_args 114 | # load config 115 | if args.train_mode == "regression": 116 | config = ConfigRegression(args) 117 | else : 118 | config = ConfigClassification(args) 119 | args = config.get_config() 120 | 121 | setup_seed(seed) 122 | args.seed = seed 123 | # args.warm_up_epochs = warm_up_epoch 124 | logger.info('Start running %s...' % (args.modelName)) 125 | logger.info(args) 126 | # runnning 127 | args.cur_time = i + 1 128 | test_results = run(args) # 训练 129 | # restore results 130 | model_results.append(test_results) 131 | 132 | criterions = list(model_results[0].keys()) 133 | # load other results 134 | save_path = os.path.join(args.res_save_dir, f'{args.datasetName}-{args.train_mode}-{args.warm_up_epochs}.csv') 135 | if not os.path.exists(args.res_save_dir): 136 | os.makedirs(args.res_save_dir) 137 | if os.path.exists(save_path): 138 | df = pd.read_csv(save_path) 139 | else: 140 | # df = pd.DataFrame(columns=["Model"] + criterions) 141 | df = pd.DataFrame(columns=["Model", "Seed"] + criterions) 142 | # save results 143 | # res = [args.modelName] 144 | 145 | for k, test_results in enumerate(model_results): 146 | res = [args.modelName, f'{seed}'] 147 | for c in criterions: 148 | res.append(round(test_results[c] * 100, 2)) 149 | df.loc[len(df)] = res 150 | 151 | # df.loc[len(df)] = res 152 | df.to_csv(save_path, index=None) 153 | logger.info('Results are added to %s...' % (save_path)) 154 | df = df.iloc[0:0] # 保存后清0 155 | model_results = [] 156 | 157 | 158 | def set_log(args): 159 | if not os.path.exists('logs'): 160 | os.makedirs('logs') 161 | log_file_path = f'logs/{args.modelName}-{args.datasetName}.log' 162 | # set logging 163 | logger = logging.getLogger() 164 | logger.setLevel(logging.DEBUG) 165 | 166 | for ph in logger.handlers: 167 | logger.removeHandler(ph) 168 | # add FileHandler to log file 169 | formatter_file = logging.Formatter('%(asctime)s:%(levelname)s:%(message)s', datefmt='%Y-%m-%d %H:%M:%S') 170 | fh = logging.FileHandler(log_file_path) 171 | fh.setLevel(logging.DEBUG) 172 | fh.setFormatter(formatter_file) 173 | logger.addHandler(fh) 174 | # add StreamHandler to terminal outputs 175 | formatter_stream = logging.Formatter('%(message)s') 176 | ch = logging.StreamHandler() 177 | ch.setLevel(logging.DEBUG) 178 | ch.setFormatter(formatter_stream) 179 | logger.addHandler(ch) 180 | return logger 181 | 182 | def parse_args(): 183 | parser = argparse.ArgumentParser() 184 | parser.add_argument('--is_tune', type=bool, default=False, 185 | help='tune parameters ?') 186 | parser.add_argument('--train_mode', type=str, default="regression", 187 | help='regression / classification') 188 | parser.add_argument('--modelName', type=str, default='cmcm', 189 | help='support CMCM') 190 | parser.add_argument('--datasetName', type=str, default='mosi', 191 | help='support mosei/simsv2/meld/cherma') 192 | parser.add_argument('--root_dataset_dir', type=str, default='/home/young/DL/multimodal_dataset/', 193 | help='Location of the root directory where the dataset is stored') 194 | parser.add_argument('--num_workers', type=int, default=0, 195 | help='num workers of loading data') 196 | parser.add_argument('--model_save_dir', type=str, default='results/models', 197 | help='path to save results.') 198 | parser.add_argument('--res_save_dir', type=str, default='results/results', 199 | help='path to save results.') 200 | parser.add_argument('--pretrain_LM', type=str, default='/data/huggingface_model/THUDM/chatglm3-6b-base/', 201 | help='path to load pretrain LLM.') 202 | parser.add_argument('--gpu_ids', type=list, default=[], 203 | help='indicates the gpus will be used. If none, the most-free gpu will be used!') #使用GPU1 204 | return parser.parse_args() 205 | 206 | if __name__ == '__main__': 207 | args = parse_args() 208 | logger = set_log(args) 209 | for data_name in ['mosei', 'simsv2', 'meld', 'cherma']: 210 | if data_name in ['mosei', 'simsv2']: 211 | args.train_mode = 'regression' 212 | else: 213 | args.train_mode = 'classification' 214 | 215 | args.datasetName = data_name 216 | args.seeds = [1111, 2222, 3333, 4444, 5555] 217 | # args.seeds = [1111] 218 | run_normal(args) -------------------------------------------------------------------------------- /MSE-Llama2-7B/run.py: -------------------------------------------------------------------------------- 1 | import os 2 | import gc 3 | import time 4 | import random 5 | import torch 6 | import pynvml 7 | import logging 8 | import argparse 9 | import numpy as np 10 | import pandas as pd 11 | from tqdm import tqdm 12 | 13 | from models.AMIO import AMIO 14 | from trains.ATIO import ATIO 15 | from data.load_data import MMDataLoader 16 | from config.config_regression import ConfigRegression 17 | from config.config_classification import ConfigClassification 18 | 19 | os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" 20 | os.environ['CUDA_LAUNCH_BLOCKING'] = '1' # 下面老是报错 shape 不一致 21 | 22 | def setup_seed(seed): 23 | torch.manual_seed(seed) 24 | torch.cuda.manual_seed_all(seed) 25 | np.random.seed(seed) 26 | random.seed(seed) 27 | torch.backends.cudnn.deterministic = True 28 | 29 | def run(args): 30 | named = 'data_percent' 31 | if not os.path.exists(args.model_save_dir): 32 | os.makedirs(args.model_save_dir) 33 | args.model_save_path = os.path.join(args.model_save_dir,\ 34 | f'{args.modelName}-{args.datasetName}-{args.train_mode}.pth') 35 | 36 | if len(args.gpu_ids) == 0 and torch.cuda.is_available(): 37 | # load free-most gpu 38 | pynvml.nvmlInit() 39 | dst_gpu_id, min_mem_used = 0, 1e16 40 | for g_id in [0, 1, 2, 3]: 41 | handle = pynvml.nvmlDeviceGetHandleByIndex(g_id) 42 | meminfo = pynvml.nvmlDeviceGetMemoryInfo(handle) 43 | mem_used = meminfo.used 44 | if mem_used < min_mem_used: 45 | min_mem_used = mem_used 46 | dst_gpu_id = g_id 47 | print(f'Find gpu: {dst_gpu_id}, use memory: {min_mem_used}!') 48 | logger.info(f'Find gpu: {dst_gpu_id}, with memory: {min_mem_used} left!') 49 | args.gpu_ids.append(dst_gpu_id) 50 | # device 51 | using_cuda = len(args.gpu_ids) > 0 and torch.cuda.is_available() 52 | logger.info("Let's use the GPU %d !" % len(args.gpu_ids)) 53 | device = torch.device('cuda:%d' % int(args.gpu_ids[0]) if using_cuda else 'cpu') 54 | # device = "cuda:1" if torch.cuda.is_available() else "cpu" 55 | args.device = device 56 | # data 57 | dataloader = MMDataLoader(args) 58 | model = AMIO(args).to(device) 59 | 60 | def print_trainable_parameters(model): 61 | """ 62 | Prints the number of trainable parameters in the model. 63 | """ 64 | trainable_params = 0 65 | all_param = 0 66 | for _, param in model.named_parameters(): 67 | all_param += param.numel() 68 | if param.requires_grad: 69 | trainable_params += param.numel() 70 | 71 | logger.info(f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}") 72 | 73 | print_trainable_parameters(model) 74 | 75 | # using multiple gpus 76 | # if using_cuda and len(args.gpu_ids) > 1: 77 | # model = torch.nn.DataParallel(model, 78 | # device_ids=args.gpu_ids, 79 | # output_device=args.gpu_ids[0]) 80 | atio = ATIO().getTrain(args) 81 | # do train 82 | atio.do_train(model, dataloader) 83 | # load pretrained model 84 | assert os.path.exists(args.model_save_path) 85 | # load finetune parameters 86 | checkpoint = torch.load(args.model_save_path) 87 | model.load_state_dict(checkpoint, strict=False) 88 | model.to(device) 89 | 90 | # do test 91 | if args.tune_mode: 92 | # using valid dataset to debug hyper parameters 93 | results = atio.do_test(model, dataloader['valid'], mode="VALID") 94 | else: 95 | results = atio.do_test(model, dataloader['test'], mode="TEST") 96 | 97 | del model 98 | torch.cuda.empty_cache() 99 | gc.collect() 100 | 101 | return results 102 | 103 | 104 | 105 | def run_normal(args): 106 | args.res_save_dir = os.path.join(args.res_save_dir) 107 | init_args = args 108 | model_results = [] 109 | seeds = args.seeds 110 | 111 | for i, seed in enumerate(seeds): 112 | args = init_args 113 | # load config 114 | if args.train_mode == "regression": 115 | config = ConfigRegression(args) 116 | else: 117 | config = ConfigClassification(args) 118 | args = config.get_config() 119 | 120 | setup_seed(seed) 121 | args.seed = seed 122 | # args.warm_up_epochs = warmup 123 | logger.info('Start running %s...' % (args.modelName)) 124 | logger.info(args) 125 | # runnning 126 | args.cur_time = i + 1 127 | test_results = run(args) # 训练 128 | # restore results 129 | model_results.append(test_results) 130 | 131 | criterions = list(model_results[0].keys()) 132 | # load other results 133 | save_path = os.path.join(args.res_save_dir, 134 | f'{args.datasetName}-{args.train_mode}-{args.warm_up_epochs}.csv') 135 | if not os.path.exists(args.res_save_dir): 136 | os.makedirs(args.res_save_dir) 137 | if os.path.exists(save_path): 138 | df = pd.read_csv(save_path) 139 | else: 140 | 141 | df = pd.DataFrame(columns=["Model", "Seed"] + criterions) 142 | # save results 143 | # res = [args.modelName] 144 | 145 | for k, test_results in enumerate(model_results): 146 | res = [args.modelName, f'{seed}'] 147 | for c in criterions: 148 | res.append(round(test_results[c] * 100, 2)) 149 | df.loc[len(df)] = res 150 | 151 | # df.loc[len(df)] = res 152 | df.to_csv(save_path, index=None) 153 | logger.info('Results are added to %s...' % (save_path)) 154 | df = df.iloc[0:0] # 保存后清0 155 | model_results = [] 156 | 157 | 158 | def set_log(args): 159 | if not os.path.exists('logs'): 160 | os.makedirs('logs') 161 | log_file_path = f'logs/{args.modelName}-{args.datasetName}.log' 162 | # set logging 163 | logger = logging.getLogger() 164 | logger.setLevel(logging.DEBUG) 165 | 166 | for ph in logger.handlers: 167 | logger.removeHandler(ph) 168 | # add FileHandler to log file 169 | formatter_file = logging.Formatter('%(asctime)s:%(levelname)s:%(message)s', datefmt='%Y-%m-%d %H:%M:%S') 170 | fh = logging.FileHandler(log_file_path) 171 | fh.setLevel(logging.DEBUG) 172 | fh.setFormatter(formatter_file) 173 | logger.addHandler(fh) 174 | # add StreamHandler to terminal outputs 175 | formatter_stream = logging.Formatter('%(message)s') 176 | ch = logging.StreamHandler() 177 | ch.setLevel(logging.DEBUG) 178 | ch.setFormatter(formatter_stream) 179 | logger.addHandler(ch) 180 | return logger 181 | 182 | def parse_args(): 183 | parser = argparse.ArgumentParser() 184 | parser.add_argument('--is_tune', type=bool, default=False, 185 | help='tune parameters ?') 186 | parser.add_argument('--train_mode', type=str, default="regression", 187 | help='regression / classification') 188 | parser.add_argument('--modelName', type=str, default='cmcm', 189 | help='support CMCM') 190 | parser.add_argument('--datasetName', type=str, default='sims', 191 | help='support mosi/mosei/simsv2/iemocap/meld/cherma') 192 | parser.add_argument('--root_dataset_dir', type=str, default='/home/young/DL/multimodal_dataset/', 193 | help='Location of the root directory where the dataset is stored') 194 | parser.add_argument('--num_workers', type=int, default=0, 195 | help='num workers of loading data') 196 | parser.add_argument('--model_save_dir', type=str, default='results/models', 197 | help='path to save results.') 198 | parser.add_argument('--res_save_dir', type=str, default='results/results', 199 | help='path to save results.') 200 | parser.add_argument('--pretrain_LM', type=str, default='/data/huggingface_model/Meta/Llama-2-7b-hf/', 201 | help='path to load pretrain LLM.') 202 | parser.add_argument('--gpu_ids', type=list, default=[2], 203 | help='indicates the gpus will be used. If none, the most-free gpu will be used!') #使用GPU1 204 | return parser.parse_args() 205 | 206 | if __name__ == '__main__': 207 | args = parse_args() 208 | logger = set_log(args) 209 | # for data_name in ['mosi', 'mosei', 'simsv2', 'iemocap', 'meld', 'cherma' ]: 210 | # for data_name in ['simsv2','cherma']: 211 | # for data_name in ['mosi']: 212 | for data_name in ['simsv2', 'mosei', 'meld', 'cherma']: 213 | if data_name in ['mosi', 'mosei', 'sims', 'simsv2']: 214 | args.train_mode = 'regression' 215 | else: 216 | args.train_mode = 'classification' 217 | 218 | args.datasetName = data_name 219 | args.seeds = [1111, 2222, 3333, 4444, 5555] 220 | run_normal(args) -------------------------------------------------------------------------------- /MSE-Qwen-1.8B/run.py: -------------------------------------------------------------------------------- 1 | import os 2 | import gc 3 | import time 4 | import random 5 | import torch 6 | import pynvml 7 | import logging 8 | import argparse 9 | import numpy as np 10 | import pandas as pd 11 | from tqdm import tqdm 12 | 13 | from models.AMIO import AMIO 14 | from trains.ATIO import ATIO 15 | from data.load_data import MMDataLoader 16 | from config.config_regression import ConfigRegression 17 | from config.config_classification import ConfigClassification 18 | 19 | os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" 20 | os.environ['CUDA_LAUNCH_BLOCKING'] = '1' # 下面老是报错 shape 不一致 21 | 22 | def setup_seed(seed): 23 | torch.manual_seed(seed) 24 | torch.cuda.manual_seed_all(seed) 25 | np.random.seed(seed) 26 | random.seed(seed) 27 | torch.backends.cudnn.deterministic = True 28 | 29 | def run(args): 30 | if not os.path.exists(args.model_save_dir): 31 | os.makedirs(args.model_save_dir) 32 | args.model_save_path = os.path.join(args.model_save_dir,\ 33 | f'{args.modelName}-{args.datasetName}-{args.train_mode}.pth') 34 | 35 | if len(args.gpu_ids) == 0 and torch.cuda.is_available(): 36 | # load free-most gpu 37 | pynvml.nvmlInit() 38 | dst_gpu_id, min_mem_used = 0, 1e16 39 | for g_id in [0, 1, 2, 3]: 40 | handle = pynvml.nvmlDeviceGetHandleByIndex(g_id) 41 | meminfo = pynvml.nvmlDeviceGetMemoryInfo(handle) 42 | mem_used = meminfo.used 43 | if mem_used < min_mem_used: 44 | min_mem_used = mem_used 45 | dst_gpu_id = g_id 46 | print(f'Find gpu: {dst_gpu_id}, use memory: {min_mem_used}!') 47 | logger.info(f'Find gpu: {dst_gpu_id}, with memory: {min_mem_used} left!') 48 | args.gpu_ids.append(dst_gpu_id) 49 | # device 50 | using_cuda = len(args.gpu_ids) > 0 and torch.cuda.is_available() 51 | logger.info("Let's use the GPU %d !" % int(args.gpu_ids[0])) 52 | device = torch.device('cuda:%d' % int(args.gpu_ids[0]) if using_cuda else 'cpu') 53 | # device = "cuda:1" if torch.cuda.is_available() else "cpu" 54 | args.device = device 55 | # data 56 | dataloader = MMDataLoader(args) 57 | model = AMIO(args).to(device) 58 | 59 | def print_trainable_parameters(model): 60 | """ 61 | Prints the number of trainable parameters in the model. 62 | """ 63 | trainable_params = 0 64 | all_param = 0 65 | for _, param in model.named_parameters(): 66 | all_param += param.numel() 67 | if param.requires_grad: 68 | trainable_params += param.numel() 69 | 70 | logger.info(f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}") 71 | 72 | print_trainable_parameters(model) 73 | 74 | # using multiple gpus 75 | # if using_cuda and len(args.gpu_ids) > 1: 76 | # model = torch.nn.DataParallel(model, 77 | # device_ids=args.gpu_ids, 78 | # output_device=args.gpu_ids[0]) 79 | atio = ATIO().getTrain(args) 80 | # do train 81 | atio.do_train(model, dataloader) 82 | # load pretrained model 83 | assert os.path.exists(args.model_save_path) 84 | # load finetune parameters 85 | checkpoint = torch.load(args.model_save_path) 86 | model.load_state_dict(checkpoint, strict=False) 87 | model.to(device) 88 | 89 | # do test 90 | if args.tune_mode: 91 | # using valid dataset to debug hyper parameters 92 | results = atio.do_test(model, dataloader['valid'], mode="VALID") 93 | else: 94 | results = atio.do_test(model, dataloader['test'], mode="TEST") 95 | 96 | del model 97 | torch.cuda.empty_cache() 98 | gc.collect() 99 | 100 | return results 101 | 102 | 103 | 104 | def run_normal(args): 105 | args.res_save_dir = os.path.join(args.res_save_dir) 106 | init_args = args 107 | model_results = [] 108 | seeds = args.seeds 109 | # warmup_list = [30] 110 | # # run results 111 | # for warmup in warmup_list: 112 | for i, seed in enumerate(seeds): 113 | args = init_args 114 | # load config 115 | if args.train_mode == "regression": 116 | config = ConfigRegression(args) 117 | else : 118 | config = ConfigClassification(args) 119 | args = config.get_config() 120 | 121 | setup_seed(seed) 122 | args.seed = seed 123 | # args.warm_up_epochs = warmup 124 | logger.info('Start running %s...' % (args.modelName)) 125 | logger.info(args) 126 | # runnning 127 | args.cur_time = i + 1 128 | start_time = time.time() 129 | test_results = run(args) # 训练 130 | 131 | end_time = time.time() 132 | # 计算运行时间 133 | elapsed_time = end_time - start_time 134 | print(f"程序运行时间: {elapsed_time:.6f} 秒") 135 | 136 | # restore results 137 | model_results.append(test_results) 138 | 139 | criterions = list(model_results[0].keys()) 140 | # load other results 141 | save_path = os.path.join(args.res_save_dir, f'{args.datasetName}-{args.train_mode}-{args.warm_up_epochs}.csv') 142 | if not os.path.exists(args.res_save_dir): 143 | os.makedirs(args.res_save_dir) 144 | if os.path.exists(save_path): 145 | df = pd.read_csv(save_path) 146 | else: 147 | 148 | df = pd.DataFrame(columns=["Model", "Seed"] + criterions) 149 | # save results 150 | # res = [args.modelName] 151 | 152 | for k, test_results in enumerate(model_results): 153 | res = [args.modelName, f'{seed}'] 154 | for c in criterions: 155 | res.append(round(test_results[c] * 100, 2)) 156 | df.loc[len(df)] = res 157 | 158 | # df.loc[len(df)] = res 159 | df.to_csv(save_path, index=None) 160 | logger.info('Results are added to %s...' % (save_path)) 161 | df = df.iloc[0:0] # 保存后清0 162 | model_results = [] 163 | 164 | 165 | def set_log(args): 166 | if not os.path.exists('logs'): 167 | os.makedirs('logs') 168 | log_file_path = f'logs/{args.modelName}-{args.datasetName}.log' 169 | # set logging 170 | logger = logging.getLogger() 171 | logger.setLevel(logging.DEBUG) 172 | 173 | for ph in logger.handlers: 174 | logger.removeHandler(ph) 175 | # add FileHandler to log file 176 | formatter_file = logging.Formatter('%(asctime)s:%(levelname)s:%(message)s', datefmt='%Y-%m-%d %H:%M:%S') 177 | fh = logging.FileHandler(log_file_path) 178 | fh.setLevel(logging.DEBUG) 179 | fh.setFormatter(formatter_file) 180 | logger.addHandler(fh) 181 | # add StreamHandler to terminal outputs 182 | formatter_stream = logging.Formatter('%(message)s') 183 | ch = logging.StreamHandler() 184 | ch.setLevel(logging.DEBUG) 185 | ch.setFormatter(formatter_stream) 186 | logger.addHandler(ch) 187 | return logger 188 | 189 | def parse_args(): 190 | parser = argparse.ArgumentParser() 191 | parser.add_argument('--is_tune', type=bool, default=False, 192 | help='tune parameters ?') 193 | parser.add_argument('--train_mode', type=str, default="regression", 194 | help='regression / classification') 195 | parser.add_argument('--modelName', type=str, default='cmcm', 196 | help='support CMCM') 197 | parser.add_argument('--datasetName', type=str, default='sims', 198 | help='support mosi/mosei/simsv2/iemocap/meld/cherma') 199 | parser.add_argument('--root_dataset_dir', type=str, default='/home/young/DL/multimodal_dataset/', 200 | help='Location of the root directory where the dataset is stored') 201 | parser.add_argument('--num_workers', type=int, default=0, 202 | help='num workers of loading data') 203 | parser.add_argument('--model_save_dir', type=str, default='results/models', 204 | help='path to save results.') 205 | parser.add_argument('--res_save_dir', type=str, default='results/results', 206 | help='path to save results.') 207 | parser.add_argument('--pretrain_LM', type=str, default='/data/huggingface_model/Qwen/Qwen-1_8B/', 208 | help='path to load pretrain LLM.') 209 | parser.add_argument('--gpu_ids', type=list, default=[], 210 | help='indicates the gpus will be used. If none, the most-free gpu will be used!') #使用GPU1 211 | return parser.parse_args() 212 | 213 | if __name__ == '__main__': 214 | args = parse_args() 215 | logger = set_log(args) 216 | for data_name in [ 'simsv2', 'mosei', 'meld', 'cherma']: 217 | if data_name in ['mosi', 'mosei', 'sims', 'simsv2']: 218 | args.train_mode = 'regression' 219 | else: 220 | args.train_mode = 'classification' 221 | 222 | args.datasetName = data_name 223 | args.seeds = [1111, 2222, 3333, 4444, 5555] 224 | run_normal(args) -------------------------------------------------------------------------------- /MSE-ChatGLM3-6B/utils/metricsTop.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from sklearn.metrics import classification_report 4 | from sklearn.metrics import confusion_matrix 5 | from sklearn.metrics import precision_recall_fscore_support 6 | from sklearn.metrics import accuracy_score, f1_score 7 | from sklearn.metrics import r2_score 8 | from itertools import chain 9 | __all__ = ['MetricsTop'] 10 | 11 | class MetricsTop(): 12 | def __init__(self, args): 13 | if args.train_mode == "regression": 14 | self.metrics_dict = { 15 | 'MOSI': self.__eval_mosi_regression, 16 | 'MOSEI': self.__eval_mosei_regression, 17 | 'SIMS': self.__eval_sims_regression, 18 | 'SIMSV2': self.__eval_simsv2_regression 19 | } 20 | else: 21 | self.metrics_dict = { 22 | 'IEMOCAP': self.__eval_iemocap_classification, 23 | 'MELD': self.__eval_meld_classification, 24 | 'CHERMA': self.__eval_cherma_classification 25 | } 26 | self.label_index_mapping = args.label_index_mapping 27 | 28 | def __eval_iemocap_classification(self, results, truths): 29 | # label_index_mapping = self.label_index_mapping 30 | # # 主要通过混淆矩阵来计算 31 | # results_indices = [label_index_mapping.get(label, label_index_mapping.get('neu')) for label in results] 32 | # truths_indices = [label_index_mapping.get(label, -1) for label in truths] 33 | # acc = accuracy_score(truths_indices, results_indices) 34 | # weight_F1 = f1_score(truths_indices, results_indices, average='weighted') 35 | acc = accuracy_score(truths, results) 36 | weight_F1 = f1_score(truths, results, average='weighted') 37 | 38 | eval_result = { 39 | 'acc': acc, 40 | 'weight_F1': weight_F1 41 | } 42 | return eval_result 43 | 44 | def __eval_cherma_classification(self, results, truths): 45 | acc = accuracy_score(truths, results) 46 | weight_F1 = f1_score(truths, results, average='weighted') 47 | eval_result = { 48 | 'acc': acc, 49 | 'weight_F1': weight_F1 50 | } 51 | return eval_result 52 | 53 | def __eval_meld_classification(self, results, truths): 54 | acc = accuracy_score(truths, results) 55 | weight_F1 = f1_score(truths, results, average='weighted') 56 | 57 | 58 | eval_result = { 59 | 'acc': acc, 60 | 'weight_F1': weight_F1 61 | } 62 | return eval_result 63 | 64 | 65 | 66 | 67 | def __multiclass_acc(self, y_pred, y_true): 68 | """ 69 | Compute the multiclass accuracy w.r.t. groundtruth 70 | 71 | :param preds: Float array representing the predictions, dimension (N,) 72 | :param truths: Float/int array representing the groundtruth classes, dimension (N,) 73 | :return: Classification accuracy 74 | """ 75 | return np.sum(np.round(y_pred) == np.round(y_true)) / float(len(y_true)) 76 | 77 | 78 | def __eval_mosei_regression(self, y_pred, y_true, exclude_zero=False): 79 | test_preds = y_pred.view(-1).cpu().detach().numpy() 80 | test_truth = y_true.view(-1).cpu().detach().numpy() 81 | 82 | test_preds_a7 = np.clip(test_preds, a_min=-3., a_max=3.) 83 | test_truth_a7 = np.clip(test_truth, a_min=-3., a_max=3.) 84 | test_preds_a5 = np.clip(test_preds, a_min=-2., a_max=2.) 85 | test_truth_a5 = np.clip(test_truth, a_min=-2., a_max=2.) 86 | test_preds_a3 = np.clip(test_preds, a_min=-1., a_max=1.) 87 | test_truth_a3 = np.clip(test_truth, a_min=-1., a_max=1.) 88 | 89 | 90 | mae = np.mean(np.absolute(test_preds - test_truth)) # Average L1 distance between preds and truths 91 | corr = np.corrcoef(test_preds, test_truth)[0][1] 92 | mult_a7 = self.__multiclass_acc(test_preds_a7, test_truth_a7) 93 | mult_a5 = self.__multiclass_acc(test_preds_a5, test_truth_a5) 94 | mult_a3 = self.__multiclass_acc(test_preds_a3, test_truth_a3) 95 | 96 | non_zeros = np.array([i for i, e in enumerate(test_truth) if e != 0]) 97 | non_zeros_binary_truth = (test_truth[non_zeros] > 0) 98 | non_zeros_binary_preds = (test_preds[non_zeros] > 0) 99 | 100 | non_zeros_acc2 = accuracy_score(non_zeros_binary_preds, non_zeros_binary_truth) 101 | non_zeros_f1_score = f1_score(non_zeros_binary_truth, non_zeros_binary_preds, average='weighted') 102 | 103 | binary_truth = (test_truth >= 0) 104 | binary_preds = (test_preds >= 0) 105 | acc2 = accuracy_score(binary_preds, binary_truth) 106 | f_score = f1_score(binary_truth, binary_preds, average='weighted') 107 | 108 | eval_results = { 109 | "Has0_acc_2": round(acc2, 4), 110 | "Has0_F1_score": round(f_score, 4), 111 | "Non0_acc_2": round(non_zeros_acc2, 4), 112 | "Non0_F1_score": round(non_zeros_f1_score, 4), 113 | "Mult_acc_5": round(mult_a5, 4), 114 | "Mult_acc_7": round(mult_a7, 4), 115 | "MAE": round(mae, 4), 116 | "Corr": round(corr, 4) 117 | } 118 | return eval_results 119 | 120 | 121 | def __eval_mosi_regression(self, y_pred, y_true): 122 | return self.__eval_mosei_regression(y_pred, y_true) 123 | 124 | def __eval_sims_regression(self, y_pred, y_true): 125 | test_preds = y_pred.view(-1).cpu().detach().numpy() 126 | test_truth = y_true.view(-1).cpu().detach().numpy() 127 | test_preds = np.clip(test_preds, a_min=-1., a_max=1.) 128 | test_truth = np.clip(test_truth, a_min=-1., a_max=1.) 129 | 130 | # weak sentiment two classes{[-0.6, 0.0], (0.0, 0.6]} 131 | ms_2 = [-1.01, 0.0, 1.01] 132 | weak_index_l = np.where(test_truth >= -0.4)[0] 133 | weak_index_r = np.where(test_truth <= 0.4)[0] 134 | weak_index = [x for x in weak_index_l if x in weak_index_r] 135 | test_preds_weak = test_preds[weak_index] 136 | test_truth_weak = test_truth[weak_index] 137 | test_preds_a2_weak = test_preds_weak.copy() 138 | test_truth_a2_weak = test_truth_weak.copy() 139 | for i in range(2): 140 | test_preds_a2_weak[np.logical_and(test_preds_weak > ms_2[i], test_preds_weak <= ms_2[i + 1])] = i 141 | for i in range(2): 142 | test_truth_a2_weak[np.logical_and(test_truth_weak > ms_2[i], test_truth_weak <= ms_2[i + 1])] = i 143 | 144 | # two classes{[-1.0, 0.0], (0.0, 1.0]} 145 | ms_2 = [-1.01, 0.0, 1.01] 146 | test_preds_a2 = test_preds.copy() 147 | test_truth_a2 = test_truth.copy() 148 | for i in range(2): 149 | test_preds_a2[np.logical_and(test_preds > ms_2[i], test_preds <= ms_2[i+1])] = i 150 | for i in range(2): 151 | test_truth_a2[np.logical_and(test_truth > ms_2[i], test_truth <= ms_2[i+1])] = i 152 | 153 | # three classes{[-1.0, -0.1], (-0.1, 0.1], (0.1, 1.0]} 154 | ms_3 = [-1.01, -0.1, 0.1, 1.01] 155 | test_preds_a3 = test_preds.copy() 156 | test_truth_a3 = test_truth.copy() 157 | for i in range(3): 158 | test_preds_a3[np.logical_and(test_preds > ms_3[i], test_preds <= ms_3[i+1])] = i 159 | for i in range(3): 160 | test_truth_a3[np.logical_and(test_truth > ms_3[i], test_truth <= ms_3[i+1])] = i 161 | 162 | # five classes{[-1.0, -0.7], (-0.7, -0.1], (-0.1, 0.1], (0.1, 0.7], (0.7, 1.0]} 163 | ms_5 = [-1.01, -0.7, -0.1, 0.1, 0.7, 1.01] 164 | test_preds_a5 = test_preds.copy() 165 | test_truth_a5 = test_truth.copy() 166 | for i in range(5): 167 | test_preds_a5[np.logical_and(test_preds > ms_5[i], test_preds <= ms_5[i+1])] = i 168 | for i in range(5): 169 | test_truth_a5[np.logical_and(test_truth > ms_5[i], test_truth <= ms_5[i+1])] = i 170 | 171 | mae = np.mean(np.absolute(test_preds - test_truth)) # Average L1 distance between preds and truths 172 | corr = np.corrcoef(test_preds, test_truth)[0][1] 173 | mult_a2 = self.__multiclass_acc(test_preds_a2, test_truth_a2) 174 | mult_a2_weak = self.__multiclass_acc(test_preds_a2_weak, test_truth_a2_weak) 175 | mult_a3 = self.__multiclass_acc(test_preds_a3, test_truth_a3) 176 | mult_a5 = self.__multiclass_acc(test_preds_a5, test_truth_a5) 177 | f_score = f1_score(test_truth_a2, test_preds_a2, average='weighted') 178 | r2 = r2_score(test_truth, test_preds) 179 | eval_results = { 180 | "Mult_acc_2": mult_a2, 181 | "Mult_acc_2_weak": mult_a2_weak, 182 | "Mult_acc_3": mult_a3, 183 | "Mult_acc_5": mult_a5, 184 | "F1_score": f_score, 185 | "MAE": mae, 186 | "Corr": corr, # Correlation Coefficient 187 | "R_squre": r2 188 | } 189 | return eval_results 190 | 191 | def __eval_simsv2_regression(self, y_pred, y_true): 192 | return self.__eval_sims_regression(y_pred, y_true) 193 | def getMetics(self, datasetName): 194 | return self.metrics_dict[datasetName.upper()] -------------------------------------------------------------------------------- /MSE-Llama2-7B/utils/metricsTop.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from sklearn.metrics import classification_report 4 | from sklearn.metrics import confusion_matrix 5 | from sklearn.metrics import precision_recall_fscore_support 6 | from sklearn.metrics import accuracy_score, f1_score 7 | from sklearn.metrics import r2_score 8 | from itertools import chain 9 | __all__ = ['MetricsTop'] 10 | 11 | class MetricsTop(): 12 | def __init__(self, args): 13 | if args.train_mode == "regression": 14 | self.metrics_dict = { 15 | 'MOSI': self.__eval_mosi_regression, 16 | 'MOSEI': self.__eval_mosei_regression, 17 | 'SIMS': self.__eval_sims_regression, 18 | 'SIMSV2': self.__eval_simsv2_regression 19 | } 20 | else: 21 | self.metrics_dict = { 22 | 'IEMOCAP': self.__eval_iemocap_classification, 23 | 'MELD': self.__eval_meld_classification, 24 | 'CHERMA': self.__eval_cherma_classification 25 | } 26 | self.label_index_mapping = args.label_index_mapping 27 | 28 | def __eval_iemocap_classification(self, results, truths): 29 | # label_index_mapping = self.label_index_mapping 30 | # # 主要通过混淆矩阵来计算 31 | # results_indices = [label_index_mapping.get(label, label_index_mapping.get('neu')) for label in results] 32 | # truths_indices = [label_index_mapping.get(label, -1) for label in truths] 33 | # acc = accuracy_score(truths_indices, results_indices) 34 | # weight_F1 = f1_score(truths_indices, results_indices, average='weighted') 35 | acc = accuracy_score(truths, results) 36 | weight_F1 = f1_score(truths, results, average='weighted') 37 | 38 | eval_result = { 39 | 'acc': acc, 40 | 'weight_F1': weight_F1 41 | } 42 | return eval_result 43 | 44 | def __eval_cherma_classification(self, results, truths): 45 | acc = accuracy_score(truths, results) 46 | weight_F1 = f1_score(truths, results, average='weighted') 47 | eval_result = { 48 | 'acc': acc, 49 | 'weight_F1': weight_F1 50 | } 51 | return eval_result 52 | 53 | def __eval_meld_classification(self, results, truths): 54 | acc = accuracy_score(truths, results) 55 | weight_F1 = f1_score(truths, results, average='weighted') 56 | 57 | 58 | eval_result = { 59 | 'acc': acc, 60 | 'weight_F1': weight_F1 61 | } 62 | return eval_result 63 | 64 | 65 | 66 | 67 | def __multiclass_acc(self, y_pred, y_true): 68 | """ 69 | Compute the multiclass accuracy w.r.t. groundtruth 70 | 71 | :param preds: Float array representing the predictions, dimension (N,) 72 | :param truths: Float/int array representing the groundtruth classes, dimension (N,) 73 | :return: Classification accuracy 74 | """ 75 | return np.sum(np.round(y_pred) == np.round(y_true)) / float(len(y_true)) 76 | 77 | 78 | def __eval_mosei_regression(self, y_pred, y_true, exclude_zero=False): 79 | test_preds = y_pred.view(-1).cpu().detach().numpy() 80 | test_truth = y_true.view(-1).cpu().detach().numpy() 81 | 82 | test_preds_a7 = np.clip(test_preds, a_min=-3., a_max=3.) 83 | test_truth_a7 = np.clip(test_truth, a_min=-3., a_max=3.) 84 | test_preds_a5 = np.clip(test_preds, a_min=-2., a_max=2.) 85 | test_truth_a5 = np.clip(test_truth, a_min=-2., a_max=2.) 86 | test_preds_a3 = np.clip(test_preds, a_min=-1., a_max=1.) 87 | test_truth_a3 = np.clip(test_truth, a_min=-1., a_max=1.) 88 | 89 | 90 | mae = np.mean(np.absolute(test_preds - test_truth)) # Average L1 distance between preds and truths 91 | corr = np.corrcoef(test_preds, test_truth)[0][1] 92 | mult_a7 = self.__multiclass_acc(test_preds_a7, test_truth_a7) 93 | mult_a5 = self.__multiclass_acc(test_preds_a5, test_truth_a5) 94 | mult_a3 = self.__multiclass_acc(test_preds_a3, test_truth_a3) 95 | 96 | non_zeros = np.array([i for i, e in enumerate(test_truth) if e != 0]) 97 | non_zeros_binary_truth = (test_truth[non_zeros] > 0) 98 | non_zeros_binary_preds = (test_preds[non_zeros] > 0) 99 | 100 | non_zeros_acc2 = accuracy_score(non_zeros_binary_preds, non_zeros_binary_truth) 101 | non_zeros_f1_score = f1_score(non_zeros_binary_truth, non_zeros_binary_preds, average='weighted') 102 | 103 | binary_truth = (test_truth >= 0) 104 | binary_preds = (test_preds >= 0) 105 | acc2 = accuracy_score(binary_preds, binary_truth) 106 | f_score = f1_score(binary_truth, binary_preds, average='weighted') 107 | 108 | eval_results = { 109 | "Has0_acc_2": round(acc2, 4), 110 | "Has0_F1_score": round(f_score, 4), 111 | "Non0_acc_2": round(non_zeros_acc2, 4), 112 | "Non0_F1_score": round(non_zeros_f1_score, 4), 113 | "Mult_acc_5": round(mult_a5, 4), 114 | "Mult_acc_7": round(mult_a7, 4), 115 | "MAE": round(mae, 4), 116 | "Corr": round(corr, 4) 117 | } 118 | return eval_results 119 | 120 | 121 | def __eval_mosi_regression(self, y_pred, y_true): 122 | return self.__eval_mosei_regression(y_pred, y_true) 123 | 124 | def __eval_sims_regression(self, y_pred, y_true): 125 | test_preds = y_pred.view(-1).cpu().detach().numpy() 126 | test_truth = y_true.view(-1).cpu().detach().numpy() 127 | test_preds = np.clip(test_preds, a_min=-1., a_max=1.) 128 | test_truth = np.clip(test_truth, a_min=-1., a_max=1.) 129 | 130 | # weak sentiment two classes{[-0.6, 0.0], (0.0, 0.6]} 131 | ms_2 = [-1.01, 0.0, 1.01] 132 | weak_index_l = np.where(test_truth >= -0.4)[0] 133 | weak_index_r = np.where(test_truth <= 0.4)[0] 134 | weak_index = [x for x in weak_index_l if x in weak_index_r] 135 | test_preds_weak = test_preds[weak_index] 136 | test_truth_weak = test_truth[weak_index] 137 | test_preds_a2_weak = test_preds_weak.copy() 138 | test_truth_a2_weak = test_truth_weak.copy() 139 | for i in range(2): 140 | test_preds_a2_weak[np.logical_and(test_preds_weak > ms_2[i], test_preds_weak <= ms_2[i + 1])] = i 141 | for i in range(2): 142 | test_truth_a2_weak[np.logical_and(test_truth_weak > ms_2[i], test_truth_weak <= ms_2[i + 1])] = i 143 | 144 | # two classes{[-1.0, 0.0], (0.0, 1.0]} 145 | ms_2 = [-1.01, 0.0, 1.01] 146 | test_preds_a2 = test_preds.copy() 147 | test_truth_a2 = test_truth.copy() 148 | for i in range(2): 149 | test_preds_a2[np.logical_and(test_preds > ms_2[i], test_preds <= ms_2[i+1])] = i 150 | for i in range(2): 151 | test_truth_a2[np.logical_and(test_truth > ms_2[i], test_truth <= ms_2[i+1])] = i 152 | 153 | # three classes{[-1.0, -0.1], (-0.1, 0.1], (0.1, 1.0]} 154 | ms_3 = [-1.01, -0.1, 0.1, 1.01] 155 | test_preds_a3 = test_preds.copy() 156 | test_truth_a3 = test_truth.copy() 157 | for i in range(3): 158 | test_preds_a3[np.logical_and(test_preds > ms_3[i], test_preds <= ms_3[i+1])] = i 159 | for i in range(3): 160 | test_truth_a3[np.logical_and(test_truth > ms_3[i], test_truth <= ms_3[i+1])] = i 161 | 162 | # five classes{[-1.0, -0.7], (-0.7, -0.1], (-0.1, 0.1], (0.1, 0.7], (0.7, 1.0]} 163 | ms_5 = [-1.01, -0.7, -0.1, 0.1, 0.7, 1.01] 164 | test_preds_a5 = test_preds.copy() 165 | test_truth_a5 = test_truth.copy() 166 | for i in range(5): 167 | test_preds_a5[np.logical_and(test_preds > ms_5[i], test_preds <= ms_5[i+1])] = i 168 | for i in range(5): 169 | test_truth_a5[np.logical_and(test_truth > ms_5[i], test_truth <= ms_5[i+1])] = i 170 | 171 | mae = np.mean(np.absolute(test_preds - test_truth)) # Average L1 distance between preds and truths 172 | corr = np.corrcoef(test_preds, test_truth)[0][1] 173 | mult_a2 = self.__multiclass_acc(test_preds_a2, test_truth_a2) 174 | mult_a2_weak = self.__multiclass_acc(test_preds_a2_weak, test_truth_a2_weak) 175 | mult_a3 = self.__multiclass_acc(test_preds_a3, test_truth_a3) 176 | mult_a5 = self.__multiclass_acc(test_preds_a5, test_truth_a5) 177 | f_score = f1_score(test_truth_a2, test_preds_a2, average='weighted') 178 | r2 = r2_score(test_truth, test_preds) 179 | eval_results = { 180 | "Mult_acc_2": mult_a2, 181 | "Mult_acc_2_weak": mult_a2_weak, 182 | "Mult_acc_3": mult_a3, 183 | "Mult_acc_5": mult_a5, 184 | "F1_score": f_score, 185 | "MAE": mae, 186 | "Corr": corr, # Correlation Coefficient 187 | "R_squre": r2 188 | } 189 | return eval_results 190 | 191 | def __eval_simsv2_regression(self, y_pred, y_true): 192 | return self.__eval_sims_regression(y_pred, y_true) 193 | def getMetics(self, datasetName): 194 | return self.metrics_dict[datasetName.upper()] -------------------------------------------------------------------------------- /MSE-Qwen-1.8B/utils/metricsTop.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from sklearn.metrics import classification_report 4 | from sklearn.metrics import confusion_matrix 5 | from sklearn.metrics import precision_recall_fscore_support 6 | from sklearn.metrics import accuracy_score, f1_score 7 | from sklearn.metrics import r2_score 8 | from itertools import chain 9 | __all__ = ['MetricsTop'] 10 | 11 | class MetricsTop(): 12 | def __init__(self, args): 13 | if args.train_mode == "regression": 14 | self.metrics_dict = { 15 | 'MOSI': self.__eval_mosi_regression, 16 | 'MOSEI': self.__eval_mosei_regression, 17 | 'SIMS': self.__eval_sims_regression, 18 | 'SIMSV2': self.__eval_simsv2_regression 19 | } 20 | else: 21 | self.metrics_dict = { 22 | 'IEMOCAP': self.__eval_iemocap_classification, 23 | 'MELD': self.__eval_meld_classification, 24 | 'CHERMA': self.__eval_cherma_classification 25 | } 26 | self.label_index_mapping = args.label_index_mapping 27 | 28 | def __eval_iemocap_classification(self, results, truths): 29 | # label_index_mapping = self.label_index_mapping 30 | # # 主要通过混淆矩阵来计算 31 | # results_indices = [label_index_mapping.get(label, label_index_mapping.get('neu')) for label in results] 32 | # truths_indices = [label_index_mapping.get(label, -1) for label in truths] 33 | # acc = accuracy_score(truths_indices, results_indices) 34 | # weight_F1 = f1_score(truths_indices, results_indices, average='weighted') 35 | acc = accuracy_score(truths, results) 36 | weight_F1 = f1_score(truths, results, average='weighted') 37 | 38 | eval_result = { 39 | 'acc': acc, 40 | 'weight_F1': weight_F1 41 | } 42 | return eval_result 43 | 44 | def __eval_cherma_classification(self, results, truths): 45 | acc = accuracy_score(truths, results) 46 | weight_F1 = f1_score(truths, results, average='weighted') 47 | eval_result = { 48 | 'acc': acc, 49 | 'weight_F1': weight_F1 50 | } 51 | return eval_result 52 | 53 | def __eval_meld_classification(self, results, truths): 54 | acc = accuracy_score(truths, results) 55 | weight_F1 = f1_score(truths, results, average='weighted') 56 | 57 | 58 | eval_result = { 59 | 'acc': acc, 60 | 'weight_F1': weight_F1 61 | } 62 | return eval_result 63 | 64 | 65 | 66 | 67 | def __multiclass_acc(self, y_pred, y_true): 68 | """ 69 | Compute the multiclass accuracy w.r.t. groundtruth 70 | 71 | :param preds: Float array representing the predictions, dimension (N,) 72 | :param truths: Float/int array representing the groundtruth classes, dimension (N,) 73 | :return: Classification accuracy 74 | """ 75 | return np.sum(np.round(y_pred) == np.round(y_true)) / float(len(y_true)) 76 | 77 | 78 | def __eval_mosei_regression(self, y_pred, y_true, exclude_zero=False): 79 | test_preds = y_pred.view(-1).cpu().detach().numpy() 80 | test_truth = y_true.view(-1).cpu().detach().numpy() 81 | 82 | test_preds_a7 = np.clip(test_preds, a_min=-3., a_max=3.) 83 | test_truth_a7 = np.clip(test_truth, a_min=-3., a_max=3.) 84 | test_preds_a5 = np.clip(test_preds, a_min=-2., a_max=2.) 85 | test_truth_a5 = np.clip(test_truth, a_min=-2., a_max=2.) 86 | test_preds_a3 = np.clip(test_preds, a_min=-1., a_max=1.) 87 | test_truth_a3 = np.clip(test_truth, a_min=-1., a_max=1.) 88 | 89 | 90 | mae = np.mean(np.absolute(test_preds - test_truth)) # Average L1 distance between preds and truths 91 | corr = np.corrcoef(test_preds, test_truth)[0][1] 92 | mult_a7 = self.__multiclass_acc(test_preds_a7, test_truth_a7) 93 | mult_a5 = self.__multiclass_acc(test_preds_a5, test_truth_a5) 94 | mult_a3 = self.__multiclass_acc(test_preds_a3, test_truth_a3) 95 | 96 | non_zeros = np.array([i for i, e in enumerate(test_truth) if e != 0]) 97 | non_zeros_binary_truth = (test_truth[non_zeros] > 0) 98 | non_zeros_binary_preds = (test_preds[non_zeros] > 0) 99 | 100 | non_zeros_acc2 = accuracy_score(non_zeros_binary_preds, non_zeros_binary_truth) 101 | non_zeros_f1_score = f1_score(non_zeros_binary_truth, non_zeros_binary_preds, average='weighted') 102 | 103 | binary_truth = (test_truth >= 0) 104 | binary_preds = (test_preds >= 0) 105 | acc2 = accuracy_score(binary_preds, binary_truth) 106 | f_score = f1_score(binary_truth, binary_preds, average='weighted') 107 | 108 | eval_results = { 109 | "Has0_acc_2": round(acc2, 4), 110 | "Has0_F1_score": round(f_score, 4), 111 | "Non0_acc_2": round(non_zeros_acc2, 4), 112 | "Non0_F1_score": round(non_zeros_f1_score, 4), 113 | "Mult_acc_5": round(mult_a5, 4), 114 | "Mult_acc_7": round(mult_a7, 4), 115 | "MAE": round(mae, 4), 116 | "Corr": round(corr, 4) 117 | } 118 | return eval_results 119 | 120 | 121 | def __eval_mosi_regression(self, y_pred, y_true): 122 | return self.__eval_mosei_regression(y_pred, y_true) 123 | 124 | def __eval_sims_regression(self, y_pred, y_true): 125 | test_preds = y_pred.view(-1).cpu().detach().numpy() 126 | test_truth = y_true.view(-1).cpu().detach().numpy() 127 | test_preds = np.clip(test_preds, a_min=-1., a_max=1.) 128 | test_truth = np.clip(test_truth, a_min=-1., a_max=1.) 129 | 130 | # weak sentiment two classes{[-0.6, 0.0], (0.0, 0.6]} 131 | ms_2 = [-1.01, 0.0, 1.01] 132 | weak_index_l = np.where(test_truth >= -0.4)[0] 133 | weak_index_r = np.where(test_truth <= 0.4)[0] 134 | weak_index = [x for x in weak_index_l if x in weak_index_r] 135 | test_preds_weak = test_preds[weak_index] 136 | test_truth_weak = test_truth[weak_index] 137 | test_preds_a2_weak = test_preds_weak.copy() 138 | test_truth_a2_weak = test_truth_weak.copy() 139 | for i in range(2): 140 | test_preds_a2_weak[np.logical_and(test_preds_weak > ms_2[i], test_preds_weak <= ms_2[i + 1])] = i 141 | for i in range(2): 142 | test_truth_a2_weak[np.logical_and(test_truth_weak > ms_2[i], test_truth_weak <= ms_2[i + 1])] = i 143 | 144 | # two classes{[-1.0, 0.0], (0.0, 1.0]} 145 | ms_2 = [-1.01, 0.0, 1.01] 146 | test_preds_a2 = test_preds.copy() 147 | test_truth_a2 = test_truth.copy() 148 | for i in range(2): 149 | test_preds_a2[np.logical_and(test_preds > ms_2[i], test_preds <= ms_2[i+1])] = i 150 | for i in range(2): 151 | test_truth_a2[np.logical_and(test_truth > ms_2[i], test_truth <= ms_2[i+1])] = i 152 | 153 | # three classes{[-1.0, -0.1], (-0.1, 0.1], (0.1, 1.0]} 154 | ms_3 = [-1.01, -0.1, 0.1, 1.01] 155 | test_preds_a3 = test_preds.copy() 156 | test_truth_a3 = test_truth.copy() 157 | for i in range(3): 158 | test_preds_a3[np.logical_and(test_preds > ms_3[i], test_preds <= ms_3[i+1])] = i 159 | for i in range(3): 160 | test_truth_a3[np.logical_and(test_truth > ms_3[i], test_truth <= ms_3[i+1])] = i 161 | 162 | # five classes{[-1.0, -0.7], (-0.7, -0.1], (-0.1, 0.1], (0.1, 0.7], (0.7, 1.0]} 163 | ms_5 = [-1.01, -0.7, -0.1, 0.1, 0.7, 1.01] 164 | test_preds_a5 = test_preds.copy() 165 | test_truth_a5 = test_truth.copy() 166 | for i in range(5): 167 | test_preds_a5[np.logical_and(test_preds > ms_5[i], test_preds <= ms_5[i+1])] = i 168 | for i in range(5): 169 | test_truth_a5[np.logical_and(test_truth > ms_5[i], test_truth <= ms_5[i+1])] = i 170 | 171 | mae = np.mean(np.absolute(test_preds - test_truth)) # Average L1 distance between preds and truths 172 | corr = np.corrcoef(test_preds, test_truth)[0][1] 173 | mult_a2 = self.__multiclass_acc(test_preds_a2, test_truth_a2) 174 | mult_a2_weak = self.__multiclass_acc(test_preds_a2_weak, test_truth_a2_weak) 175 | mult_a3 = self.__multiclass_acc(test_preds_a3, test_truth_a3) 176 | mult_a5 = self.__multiclass_acc(test_preds_a5, test_truth_a5) 177 | f_score = f1_score(test_truth_a2, test_preds_a2, average='weighted') 178 | r2 = r2_score(test_truth, test_preds) 179 | eval_results = { 180 | "Mult_acc_2": mult_a2, 181 | "F1_score": f_score, 182 | "Mult_acc_2_weak": mult_a2_weak, 183 | "MAE": mae, 184 | "Corr": corr, # Correlation Coefficient 185 | "Mult_acc_3": mult_a3, 186 | "Mult_acc_5": mult_a5, 187 | "R_squre": r2 188 | } 189 | return eval_results 190 | 191 | def __eval_simsv2_regression(self, y_pred, y_true): 192 | return self.__eval_sims_regression(y_pred, y_true) 193 | def getMetics(self, datasetName): 194 | return self.metrics_dict[datasetName.upper()] -------------------------------------------------------------------------------- /MSE-ChatGLM3-6B/models/subNets/Textmodel.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import collections 4 | import re 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | 9 | from models.ChatGLM3.modeling_chatglm import ChatGLMForConditionalGeneration 10 | from models.ChatGLM3.tokenization_chatglm import ChatGLMTokenizer 11 | 12 | __all__ = ['Language_model'] 13 | 14 | class Language_model (nn.Module): 15 | def __init__(self, args, use_PLM = True): 16 | """ 17 | language: en / cn 18 | """ 19 | super(Language_model, self).__init__() 20 | 21 | if use_PLM: 22 | pretrained_model = args.pretrain_LM #pretrained model select 23 | self.model = ChatGLMForConditionalGeneration.from_pretrained(pretrained_model, trust_remote_code=True, torch_dtype=torch.bfloat16).half() 24 | self.tokenizer = ChatGLMTokenizer.from_pretrained(pretrained_model, trust_remote_code=True) 25 | self.device = args.device 26 | self.language = args.language 27 | self.max_new_tokens = args.max_new_tokens 28 | self.datasetName = args.datasetName 29 | self.train_mode = args.train_mode 30 | self.task_specific_prompt = args.task_specific_prompt 31 | # freeze parameter 32 | for param in self.model.parameters(): 33 | param.requires_grad = False 34 | else: 35 | print('please use PLM') 36 | 37 | def text_embedding(self,text_ids): 38 | embeddings = self.model.base_model.get_input_embeddings() 39 | return embeddings(text_ids) 40 | 41 | 42 | def forward(self, fusion_embedding, labels): 43 | """ 44 | Args: 45 | fusion_embedding: the "concatenate" result of multimodal low rank fusion and text embedding 46 | label: ground_truth 47 | """ 48 | 49 | fusion_embedding = self.multimodal_prompt_wrap(fusion_embedding) #添加多模态输入的special prompt 50 | opt_tokens, labels = self.input_processing(fusion_embedding, labels, mode = 'train') #创建fusion+prompt+answer_mask的input和label 51 | 52 | with torch.cuda.amp.autocast(): 53 | output = self.model(input_ids = opt_tokens, input_fusion=fusion_embedding, labels = labels) # Models outputs are now tuples 54 | 55 | return output 56 | 57 | def generate(self, fusion_embedding): 58 | """ 59 | Args: 60 | samples (dict): A dictionary containing the following keys: 61 | use_nucleus_sampling (bool): Whether to use nucleus sampling. If False, use top-k sampling. 62 | num_beams (int): Number of beams for beam search. 1 means no beam search. 63 | max_new_tokens (int): The maximum length of the new tokens to be generated. 64 | top_p (float): The cumulative probability for nucleus sampling. 65 | top_k (int): The k for top-k sampling. 66 | penalty_alpha (float): The parameter for repetition penalty. 1.0 means no penalty. 67 | num_captions (int): Number of captions to be generated for each image. 68 | """ 69 | if self.train_mode == 'regression': 70 | # gen_kwargs = {"max_new_tokens": self.max_new_tokens, "num_beams": 1, "do_sample": False, "penalty_alpha": 0.6, "top_p": 0.01, "temperature": 0.01} 71 | gen_kwargs = {"max_new_tokens": self.max_new_tokens, "num_beams": 1, "do_sample": False, "top_k": 10} 72 | else: 73 | gen_kwargs = {"max_new_tokens": self.max_new_tokens, "num_beams": 1, "do_sample": False, "top_k": 10 } 74 | 75 | fusion_embedding = self.multimodal_prompt_wrap(fusion_embedding) # 添加多模态输入的special prompt 76 | opt_tokens, _ = self.input_processing(fusion_embedding, mode = 'generate') # 创建fusion+prompt的input 77 | 78 | context_length = opt_tokens.size(1) 79 | all_responses =[] 80 | 81 | for outputs in self.model.stream_generate(opt_tokens, **gen_kwargs, input_fusion=fusion_embedding): 82 | outputs = outputs[:, context_length:].tolist() 83 | response = self.tokenizer.batch_decode(outputs) 84 | # all_responses = list(map(float, response)) 85 | # all_responses = list(map(lambda x: float(x.replace('–', '-')), response)) 86 | # all_responses = list(map(lambda x: float(x.replace('–', '-').replace('一', '-').replace(':', '').replace('/', '').replace('(', '').replace(':', '')), response)) 87 | # all_responses = [float(re.sub(r'[^0-9.-]', '0', re.sub(r'(?' 173 | special_token = '' 174 | else: 175 | prompt = '{问题}\n\n <多模态>' 176 | special_token = '' 177 | 178 | batch_size = fusion_embeddings.shape[0] 179 | p_before, p_after = prompt.split(special_token) 180 | p_before_tokens = self.tokenizer( 181 | p_before, return_tensors="pt", add_special_tokens=True).to(self.device) 182 | p_after_tokens = self.tokenizer( 183 | p_after, return_tensors="pt", add_special_tokens=False).to(self.device) 184 | p_before_embeds = self.text_embedding(p_before_tokens.input_ids).expand(batch_size, -1, -1) 185 | p_after_embeds = self.text_embedding(p_after_tokens.input_ids).expand(batch_size, -1, -1) 186 | wrapped_fusion_embeddings = torch.cat([p_before_embeds, fusion_embeddings, p_after_embeds], dim=1) 187 | 188 | return wrapped_fusion_embeddings -------------------------------------------------------------------------------- /MSE-ChatGLM3-6B/data/TextPre.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import h5py 4 | import pickle 5 | import argparse 6 | import numpy as np 7 | from tqdm import tqdm 8 | 9 | import torch 10 | import torch.nn as nn 11 | import torch.nn.functional as F 12 | # from pytorch_transformers.modeling_bert import BertForSequenceClassification, BertConfig, MultimodalBertForSequenceClassification 13 | # from pytorch_transformers.amir_tokenization import BertTokenizer 14 | # from pytorch_transformers.optimization import AdamW, WarmupLinearSchedule 15 | 16 | # from transformers.tokenization import BertTokenizer 17 | from models.subNets.BertTextEncoder import BertTextEncoder 18 | 19 | class TextPre(object): 20 | """A single set of features of data.""" 21 | 22 | def __init__(self, args): 23 | self.device = torch.device('cuda:0') 24 | self.args = args 25 | self.loadTextMap = { 26 | 'mosi': self.__load_data_mosi, 27 | 'mosei': self.__load_data_mosei 28 | } 29 | self.bert = BertTextEncoder(language=args.language).to(self.device) 30 | 31 | def textConvertID(self, data, tokenizer): 32 | features = {} 33 | Input_ids, Input_mask, Segment_ids = [], [], [] 34 | Raw_text, Visual, Audio = [], [], [] 35 | Label, ids = [], [] 36 | max_seq_length = self.args.max_seq_length 37 | for i in tqdm(range(len(data['raw_text']))): 38 | raw_text = data['raw_text'][i] 39 | visual = data['vision'][i] 40 | audio = data['audio'][i] 41 | tokens_a, inversions_a = tokenizer.tokenize(raw_text,invertable=True) 42 | 43 | if len(tokens_a) > max_seq_length - 2: 44 | tokens_a = tokens_a[:max_seq_length - 2] 45 | inversions_a = inversions_a[:max_seq_length - 2] 46 | 47 | tokens = ["[CLS]"] + tokens_a + ["[SEP]"] 48 | 49 | segment_ids = [0] * len(tokens) 50 | 51 | input_ids = tokenizer.convert_tokens_to_ids(tokens) 52 | 53 | input_mask = [1] * len(input_ids) 54 | padding = [0] * (max_seq_length - len(input_ids)) 55 | 56 | 57 | if self.args.aligned: 58 | text_len = min(len(raw_text.split()), max_seq_length) 59 | new_visual = [visual[len(visual) - text_len + inv_id] for inv_id in inversions_a] 60 | new_audio = [audio[len(audio) - text_len + inv_id] for inv_id in inversions_a] 61 | 62 | visual = np.array(new_visual) 63 | audio = np.array(new_audio) 64 | 65 | # add "start" and "end" for audio and vision 66 | audio_zero = np.zeros((1,audio.shape[1])) 67 | audio = np.concatenate((audio_zero,audio,audio_zero)) 68 | 69 | visual_zero = np.zeros((1,visual.shape[1])) 70 | visual = np.concatenate((visual_zero,visual,visual_zero)) 71 | 72 | audio_padding = np.zeros((max_seq_length - len(input_ids),audio.shape[1])) 73 | audio = np.concatenate((audio,audio_padding)) 74 | 75 | video_padding = np.zeros((max_seq_length - len(input_ids),visual.shape[1])) 76 | visual = np.concatenate((visual,video_padding)) 77 | 78 | assert audio.shape[0] == max_seq_length 79 | assert visual.shape[0] == max_seq_length 80 | 81 | input_ids += padding 82 | input_mask += padding 83 | segment_ids += padding 84 | 85 | assert len(input_ids) == max_seq_length 86 | assert len(input_mask) == max_seq_length 87 | assert len(segment_ids) == max_seq_length 88 | 89 | label = float(data['labels'][i]) 90 | 91 | Input_ids.append(input_ids) 92 | Visual.append(visual) 93 | Audio.append(audio) 94 | Input_mask.append(input_mask) 95 | Segment_ids.append(segment_ids) 96 | Label.append(label) 97 | Raw_text.append(raw_text) 98 | ids.append(data['id'][i]) 99 | 100 | features['raw_text'] = np.array(Raw_text) 101 | features['audio'] = np.array(Audio) 102 | features['vision'] = np.array(Visual) 103 | features['labels'] = np.array(Label) 104 | features['id'] = np.array(ids) 105 | Input_ids = np.expand_dims(Input_ids, 1) 106 | Input_mask = np.expand_dims(Input_mask, 1) 107 | Segment_ids = np.expand_dims(Segment_ids, 1) 108 | text_bert = np.concatenate((Input_ids, Input_mask, Segment_ids), axis=1) 109 | features['text_bert'] = text_bert 110 | features['text'] = self.__convertID2Vector(text_bert) 111 | return features 112 | 113 | def __convertID2Vector(self, ids, batch_size=64): 114 | results = [] 115 | left = 0 116 | ids = torch.Tensor(ids) 117 | for left in tqdm(range(0, ids.size(0), batch_size)): 118 | right = min(left + batch_size, ids.size(0)) 119 | c_ids = ids[left:right].to(self.device) 120 | c_vector = self.bert(c_ids).detach().cpu().numpy() 121 | results.append(c_vector) 122 | results = np.concatenate(results, axis=0) 123 | return results 124 | 125 | def __load_data_mosi(self): 126 | # get text data 127 | link = os.path.join(self.args.data_dir, 'Raw/Transcript/Segmented') 128 | text_data = {} 129 | for file in os.listdir(link): 130 | name = file.split('.')[0] 131 | for line in open(os.path.join(link, file), "r"): 132 | num_id, cur_t = line.split('_DELIM_') 133 | name_id = name + '_' + num_id.strip() 134 | text_data[name_id] = cur_t.strip() 135 | # get data 136 | def matchData(mode='train'): 137 | r_text = [] 138 | for cur_id in data[mode]['id']: 139 | r_text.append(text_data[cur_id[0]]) 140 | data[mode]['raw_text'] = r_text 141 | 142 | with open(os.path.join(self.args.data_dir, 'Processed/mosei_senti_data_noalign.pkl'), 'rb') as lf: 143 | data = pickle.load(lf) 144 | 145 | matchData(mode='train') 146 | matchData(mode='valid') 147 | matchData(mode='test') 148 | 149 | return data 150 | 151 | def __load_data_mosei(self): 152 | def convert0(s): 153 | if s == '0': 154 | return '0.0' 155 | return s 156 | # get text data 157 | link = os.path.join(self.args.data_dir, 'Raw/Transcript/Segmented') 158 | text_data = {} 159 | for file in os.listdir(link): 160 | name = file.split('.')[0] 161 | for line in open(os.path.join(link, file), "r"): 162 | items = line.split('___') 163 | name_id = items[0] + '_' + convert0(items[2]) + '_' + convert0(items[3]) 164 | text_data[name_id.strip()] = items[-1].strip() 165 | # get data 166 | def matchData(mode='train'): 167 | r_text = [] 168 | for cur_id in data[mode]['id']: 169 | name = '_'.join(cur_id) 170 | r_text.append(text_data[name]) 171 | data[mode]['raw_text'] = r_text 172 | 173 | with open(os.path.join(self.args.data_dir, 'Processed/mosei_senti_data_noalign.pkl'), 'rb') as lf: 174 | data = pickle.load(lf) 175 | 176 | matchData(mode='train') 177 | matchData(mode='valid') 178 | matchData(mode='test') 179 | 180 | return data 181 | 182 | def run(self): 183 | data = self.loadTextMap[self.args.datasetName]() 184 | 185 | train_list = data['train'] 186 | valid_list = data['valid'] 187 | test_list = data['test'] 188 | 189 | tokenizer = self.bert.get_tokenizer() 190 | 191 | save_data = {} 192 | save_data['train'] = self.textConvertID(train_list, tokenizer) 193 | save_data['valid'] = self.textConvertID(valid_list, tokenizer) 194 | save_data['test'] = self.textConvertID(test_list, tokenizer) 195 | 196 | if self.args.aligned: 197 | saved_path = os.path.join(self.args.save_dir, 'aligned_' + str(self.args.max_seq_length) + '.pkl') 198 | else: 199 | saved_path = os.path.join(self.args.save_dir, 'unaligned_' + str(self.args.max_seq_length) + '.pkl') 200 | 201 | if not os.path.exists(os.path.dirname(saved_path)): 202 | os.makedirs(os.path.dirname(saved_path)) 203 | 204 | with open(saved_path, 'wb') as file: 205 | pickle.dump(save_data, file, protocol=4) 206 | print('Save Successful!') 207 | 208 | def parse_args(): 209 | parser = argparse.ArgumentParser() 210 | parser.add_argument('--datasetName', type=str, default='mosei', 211 | help='need aligned data (support mosi / mosei)') 212 | parser.add_argument('--language', type=str, default='cn', 213 | help='data language') 214 | parser.add_argument('--aligned', type=bool, default=True, 215 | help='need aligned data') 216 | parser.add_argument('--data_dir', type=str, default = '/home/sharing/disk3/dataset/multimodal-sentiment-dataset/CMU-MOSEI', 217 | help='path to MOSI / MOSEI') 218 | parser.add_argument('--save_dir', type=str, default = '/home/sharing/disk3/dataset/multimodal-sentiment-dataset/ALL/mosei/raw', 219 | help='path to saved directory') 220 | parser.add_argument('--max_seq_length', type=int, default = 50, 221 | help='length') 222 | return parser.parse_args() 223 | 224 | if __name__ == "__main__": 225 | args = parse_args() 226 | tp = TextPre(args) 227 | tp.run() 228 | # tp.convertID2Vector() -------------------------------------------------------------------------------- /MSE-Llama2-7B/data/TextPre.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import h5py 4 | import pickle 5 | import argparse 6 | import numpy as np 7 | from tqdm import tqdm 8 | 9 | import torch 10 | import torch.nn as nn 11 | import torch.nn.functional as F 12 | # from pytorch_transformers.modeling_bert import BertForSequenceClassification, BertConfig, MultimodalBertForSequenceClassification 13 | # from pytorch_transformers.amir_tokenization import BertTokenizer 14 | # from pytorch_transformers.optimization import AdamW, WarmupLinearSchedule 15 | 16 | # from transformers.tokenization import BertTokenizer 17 | from models.subNets.BertTextEncoder import BertTextEncoder 18 | 19 | class TextPre(object): 20 | """A single set of features of data.""" 21 | 22 | def __init__(self, args): 23 | self.device = torch.device('cuda:0') 24 | self.args = args 25 | self.loadTextMap = { 26 | 'mosi': self.__load_data_mosi, 27 | 'mosei': self.__load_data_mosei 28 | } 29 | self.bert = BertTextEncoder(language=args.language).to(self.device) 30 | 31 | def textConvertID(self, data, tokenizer): 32 | features = {} 33 | Input_ids, Input_mask, Segment_ids = [], [], [] 34 | Raw_text, Visual, Audio = [], [], [] 35 | Label, ids = [], [] 36 | max_seq_length = self.args.max_seq_length 37 | for i in tqdm(range(len(data['raw_text']))): 38 | raw_text = data['raw_text'][i] 39 | visual = data['vision'][i] 40 | audio = data['audio'][i] 41 | tokens_a, inversions_a = tokenizer.tokenize(raw_text,invertable=True) 42 | 43 | if len(tokens_a) > max_seq_length - 2: 44 | tokens_a = tokens_a[:max_seq_length - 2] 45 | inversions_a = inversions_a[:max_seq_length - 2] 46 | 47 | tokens = ["[CLS]"] + tokens_a + ["[SEP]"] 48 | 49 | segment_ids = [0] * len(tokens) 50 | 51 | input_ids = tokenizer.convert_tokens_to_ids(tokens) 52 | 53 | input_mask = [1] * len(input_ids) 54 | padding = [0] * (max_seq_length - len(input_ids)) 55 | 56 | 57 | if self.args.aligned: 58 | text_len = min(len(raw_text.split()), max_seq_length) 59 | new_visual = [visual[len(visual) - text_len + inv_id] for inv_id in inversions_a] 60 | new_audio = [audio[len(audio) - text_len + inv_id] for inv_id in inversions_a] 61 | 62 | visual = np.array(new_visual) 63 | audio = np.array(new_audio) 64 | 65 | # add "start" and "end" for audio and vision 66 | audio_zero = np.zeros((1,audio.shape[1])) 67 | audio = np.concatenate((audio_zero,audio,audio_zero)) 68 | 69 | visual_zero = np.zeros((1,visual.shape[1])) 70 | visual = np.concatenate((visual_zero,visual,visual_zero)) 71 | 72 | audio_padding = np.zeros((max_seq_length - len(input_ids),audio.shape[1])) 73 | audio = np.concatenate((audio,audio_padding)) 74 | 75 | video_padding = np.zeros((max_seq_length - len(input_ids),visual.shape[1])) 76 | visual = np.concatenate((visual,video_padding)) 77 | 78 | assert audio.shape[0] == max_seq_length 79 | assert visual.shape[0] == max_seq_length 80 | 81 | input_ids += padding 82 | input_mask += padding 83 | segment_ids += padding 84 | 85 | assert len(input_ids) == max_seq_length 86 | assert len(input_mask) == max_seq_length 87 | assert len(segment_ids) == max_seq_length 88 | 89 | label = float(data['labels'][i]) 90 | 91 | Input_ids.append(input_ids) 92 | Visual.append(visual) 93 | Audio.append(audio) 94 | Input_mask.append(input_mask) 95 | Segment_ids.append(segment_ids) 96 | Label.append(label) 97 | Raw_text.append(raw_text) 98 | ids.append(data['id'][i]) 99 | 100 | features['raw_text'] = np.array(Raw_text) 101 | features['audio'] = np.array(Audio) 102 | features['vision'] = np.array(Visual) 103 | features['labels'] = np.array(Label) 104 | features['id'] = np.array(ids) 105 | Input_ids = np.expand_dims(Input_ids, 1) 106 | Input_mask = np.expand_dims(Input_mask, 1) 107 | Segment_ids = np.expand_dims(Segment_ids, 1) 108 | text_bert = np.concatenate((Input_ids, Input_mask, Segment_ids), axis=1) 109 | features['text_bert'] = text_bert 110 | features['text'] = self.__convertID2Vector(text_bert) 111 | return features 112 | 113 | def __convertID2Vector(self, ids, batch_size=64): 114 | results = [] 115 | left = 0 116 | ids = torch.Tensor(ids) 117 | for left in tqdm(range(0, ids.size(0), batch_size)): 118 | right = min(left + batch_size, ids.size(0)) 119 | c_ids = ids[left:right].to(self.device) 120 | c_vector = self.bert(c_ids).detach().cpu().numpy() 121 | results.append(c_vector) 122 | results = np.concatenate(results, axis=0) 123 | return results 124 | 125 | def __load_data_mosi(self): 126 | # get text data 127 | link = os.path.join(self.args.data_dir, 'Raw/Transcript/Segmented') 128 | text_data = {} 129 | for file in os.listdir(link): 130 | name = file.split('.')[0] 131 | for line in open(os.path.join(link, file), "r"): 132 | num_id, cur_t = line.split('_DELIM_') 133 | name_id = name + '_' + num_id.strip() 134 | text_data[name_id] = cur_t.strip() 135 | # get data 136 | def matchData(mode='train'): 137 | r_text = [] 138 | for cur_id in data[mode]['id']: 139 | r_text.append(text_data[cur_id[0]]) 140 | data[mode]['raw_text'] = r_text 141 | 142 | with open(os.path.join(self.args.data_dir, 'Processed/mosei_senti_data_noalign.pkl'), 'rb') as lf: 143 | data = pickle.load(lf) 144 | 145 | matchData(mode='train') 146 | matchData(mode='valid') 147 | matchData(mode='test') 148 | 149 | return data 150 | 151 | def __load_data_mosei(self): 152 | def convert0(s): 153 | if s == '0': 154 | return '0.0' 155 | return s 156 | # get text data 157 | link = os.path.join(self.args.data_dir, 'Raw/Transcript/Segmented') 158 | text_data = {} 159 | for file in os.listdir(link): 160 | name = file.split('.')[0] 161 | for line in open(os.path.join(link, file), "r"): 162 | items = line.split('___') 163 | name_id = items[0] + '_' + convert0(items[2]) + '_' + convert0(items[3]) 164 | text_data[name_id.strip()] = items[-1].strip() 165 | # get data 166 | def matchData(mode='train'): 167 | r_text = [] 168 | for cur_id in data[mode]['id']: 169 | name = '_'.join(cur_id) 170 | r_text.append(text_data[name]) 171 | data[mode]['raw_text'] = r_text 172 | 173 | with open(os.path.join(self.args.data_dir, 'Processed/mosei_senti_data_noalign.pkl'), 'rb') as lf: 174 | data = pickle.load(lf) 175 | 176 | matchData(mode='train') 177 | matchData(mode='valid') 178 | matchData(mode='test') 179 | 180 | return data 181 | 182 | def run(self): 183 | data = self.loadTextMap[self.args.datasetName]() 184 | 185 | train_list = data['train'] 186 | valid_list = data['valid'] 187 | test_list = data['test'] 188 | 189 | tokenizer = self.bert.get_tokenizer() 190 | 191 | save_data = {} 192 | save_data['train'] = self.textConvertID(train_list, tokenizer) 193 | save_data['valid'] = self.textConvertID(valid_list, tokenizer) 194 | save_data['test'] = self.textConvertID(test_list, tokenizer) 195 | 196 | if self.args.aligned: 197 | saved_path = os.path.join(self.args.save_dir, 'aligned_' + str(self.args.max_seq_length) + '.pkl') 198 | else: 199 | saved_path = os.path.join(self.args.save_dir, 'unaligned_' + str(self.args.max_seq_length) + '.pkl') 200 | 201 | if not os.path.exists(os.path.dirname(saved_path)): 202 | os.makedirs(os.path.dirname(saved_path)) 203 | 204 | with open(saved_path, 'wb') as file: 205 | pickle.dump(save_data, file, protocol=4) 206 | print('Save Successful!') 207 | 208 | def parse_args(): 209 | parser = argparse.ArgumentParser() 210 | parser.add_argument('--datasetName', type=str, default='mosei', 211 | help='need aligned data (support mosi / mosei)') 212 | parser.add_argument('--language', type=str, default='cn', 213 | help='data language') 214 | parser.add_argument('--aligned', type=bool, default=True, 215 | help='need aligned data') 216 | parser.add_argument('--data_dir', type=str, default = '/home/sharing/disk3/dataset/multimodal-sentiment-dataset/CMU-MOSEI', 217 | help='path to MOSI / MOSEI') 218 | parser.add_argument('--save_dir', type=str, default = '/home/sharing/disk3/dataset/multimodal-sentiment-dataset/ALL/mosei/raw', 219 | help='path to saved directory') 220 | parser.add_argument('--max_seq_length', type=int, default = 50, 221 | help='length') 222 | return parser.parse_args() 223 | 224 | if __name__ == "__main__": 225 | args = parse_args() 226 | tp = TextPre(args) 227 | tp.run() 228 | # tp.convertID2Vector() -------------------------------------------------------------------------------- /MSE-Qwen-1.8B/data/TextPre.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import h5py 4 | import pickle 5 | import argparse 6 | import numpy as np 7 | from tqdm import tqdm 8 | 9 | import torch 10 | import torch.nn as nn 11 | import torch.nn.functional as F 12 | # from pytorch_transformers.modeling_bert import BertForSequenceClassification, BertConfig, MultimodalBertForSequenceClassification 13 | # from pytorch_transformers.amir_tokenization import BertTokenizer 14 | # from pytorch_transformers.optimization import AdamW, WarmupLinearSchedule 15 | 16 | # from transformers.tokenization import BertTokenizer 17 | from models.subNets.BertTextEncoder import BertTextEncoder 18 | 19 | class TextPre(object): 20 | """A single set of features of data.""" 21 | 22 | def __init__(self, args): 23 | self.device = torch.device('cuda:0') 24 | self.args = args 25 | self.loadTextMap = { 26 | 'mosi': self.__load_data_mosi, 27 | 'mosei': self.__load_data_mosei 28 | } 29 | self.bert = BertTextEncoder(language=args.language).to(self.device) 30 | 31 | def textConvertID(self, data, tokenizer): 32 | features = {} 33 | Input_ids, Input_mask, Segment_ids = [], [], [] 34 | Raw_text, Visual, Audio = [], [], [] 35 | Label, ids = [], [] 36 | max_seq_length = self.args.max_seq_length 37 | for i in tqdm(range(len(data['raw_text']))): 38 | raw_text = data['raw_text'][i] 39 | visual = data['vision'][i] 40 | audio = data['audio'][i] 41 | tokens_a, inversions_a = tokenizer.tokenize(raw_text,invertable=True) 42 | 43 | if len(tokens_a) > max_seq_length - 2: 44 | tokens_a = tokens_a[:max_seq_length - 2] 45 | inversions_a = inversions_a[:max_seq_length - 2] 46 | 47 | tokens = ["[CLS]"] + tokens_a + ["[SEP]"] 48 | 49 | segment_ids = [0] * len(tokens) 50 | 51 | input_ids = tokenizer.convert_tokens_to_ids(tokens) 52 | 53 | input_mask = [1] * len(input_ids) 54 | padding = [0] * (max_seq_length - len(input_ids)) 55 | 56 | 57 | if self.args.aligned: 58 | text_len = min(len(raw_text.split()), max_seq_length) 59 | new_visual = [visual[len(visual) - text_len + inv_id] for inv_id in inversions_a] 60 | new_audio = [audio[len(audio) - text_len + inv_id] for inv_id in inversions_a] 61 | 62 | visual = np.array(new_visual) 63 | audio = np.array(new_audio) 64 | 65 | # add "start" and "end" for audio and vision 66 | audio_zero = np.zeros((1,audio.shape[1])) 67 | audio = np.concatenate((audio_zero,audio,audio_zero)) 68 | 69 | visual_zero = np.zeros((1,visual.shape[1])) 70 | visual = np.concatenate((visual_zero,visual,visual_zero)) 71 | 72 | audio_padding = np.zeros((max_seq_length - len(input_ids),audio.shape[1])) 73 | audio = np.concatenate((audio,audio_padding)) 74 | 75 | video_padding = np.zeros((max_seq_length - len(input_ids),visual.shape[1])) 76 | visual = np.concatenate((visual,video_padding)) 77 | 78 | assert audio.shape[0] == max_seq_length 79 | assert visual.shape[0] == max_seq_length 80 | 81 | input_ids += padding 82 | input_mask += padding 83 | segment_ids += padding 84 | 85 | assert len(input_ids) == max_seq_length 86 | assert len(input_mask) == max_seq_length 87 | assert len(segment_ids) == max_seq_length 88 | 89 | label = float(data['labels'][i]) 90 | 91 | Input_ids.append(input_ids) 92 | Visual.append(visual) 93 | Audio.append(audio) 94 | Input_mask.append(input_mask) 95 | Segment_ids.append(segment_ids) 96 | Label.append(label) 97 | Raw_text.append(raw_text) 98 | ids.append(data['id'][i]) 99 | 100 | features['raw_text'] = np.array(Raw_text) 101 | features['audio'] = np.array(Audio) 102 | features['vision'] = np.array(Visual) 103 | features['labels'] = np.array(Label) 104 | features['id'] = np.array(ids) 105 | Input_ids = np.expand_dims(Input_ids, 1) 106 | Input_mask = np.expand_dims(Input_mask, 1) 107 | Segment_ids = np.expand_dims(Segment_ids, 1) 108 | text_bert = np.concatenate((Input_ids, Input_mask, Segment_ids), axis=1) 109 | features['text_bert'] = text_bert 110 | features['text'] = self.__convertID2Vector(text_bert) 111 | return features 112 | 113 | def __convertID2Vector(self, ids, batch_size=64): 114 | results = [] 115 | left = 0 116 | ids = torch.Tensor(ids) 117 | for left in tqdm(range(0, ids.size(0), batch_size)): 118 | right = min(left + batch_size, ids.size(0)) 119 | c_ids = ids[left:right].to(self.device) 120 | c_vector = self.bert(c_ids).detach().cpu().numpy() 121 | results.append(c_vector) 122 | results = np.concatenate(results, axis=0) 123 | return results 124 | 125 | def __load_data_mosi(self): 126 | # get text data 127 | link = os.path.join(self.args.data_dir, 'Raw/Transcript/Segmented') 128 | text_data = {} 129 | for file in os.listdir(link): 130 | name = file.split('.')[0] 131 | for line in open(os.path.join(link, file), "r"): 132 | num_id, cur_t = line.split('_DELIM_') 133 | name_id = name + '_' + num_id.strip() 134 | text_data[name_id] = cur_t.strip() 135 | # get data 136 | def matchData(mode='train'): 137 | r_text = [] 138 | for cur_id in data[mode]['id']: 139 | r_text.append(text_data[cur_id[0]]) 140 | data[mode]['raw_text'] = r_text 141 | 142 | with open(os.path.join(self.args.data_dir, 'Processed/mosei_senti_data_noalign.pkl'), 'rb') as lf: 143 | data = pickle.load(lf) 144 | 145 | matchData(mode='train') 146 | matchData(mode='valid') 147 | matchData(mode='test') 148 | 149 | return data 150 | 151 | def __load_data_mosei(self): 152 | def convert0(s): 153 | if s == '0': 154 | return '0.0' 155 | return s 156 | # get text data 157 | link = os.path.join(self.args.data_dir, 'Raw/Transcript/Segmented') 158 | text_data = {} 159 | for file in os.listdir(link): 160 | name = file.split('.')[0] 161 | for line in open(os.path.join(link, file), "r"): 162 | items = line.split('___') 163 | name_id = items[0] + '_' + convert0(items[2]) + '_' + convert0(items[3]) 164 | text_data[name_id.strip()] = items[-1].strip() 165 | # get data 166 | def matchData(mode='train'): 167 | r_text = [] 168 | for cur_id in data[mode]['id']: 169 | name = '_'.join(cur_id) 170 | r_text.append(text_data[name]) 171 | data[mode]['raw_text'] = r_text 172 | 173 | with open(os.path.join(self.args.data_dir, 'Processed/mosei_senti_data_noalign.pkl'), 'rb') as lf: 174 | data = pickle.load(lf) 175 | 176 | matchData(mode='train') 177 | matchData(mode='valid') 178 | matchData(mode='test') 179 | 180 | return data 181 | 182 | def run(self): 183 | data = self.loadTextMap[self.args.datasetName]() 184 | 185 | train_list = data['train'] 186 | valid_list = data['valid'] 187 | test_list = data['test'] 188 | 189 | tokenizer = self.bert.get_tokenizer() 190 | 191 | save_data = {} 192 | save_data['train'] = self.textConvertID(train_list, tokenizer) 193 | save_data['valid'] = self.textConvertID(valid_list, tokenizer) 194 | save_data['test'] = self.textConvertID(test_list, tokenizer) 195 | 196 | if self.args.aligned: 197 | saved_path = os.path.join(self.args.save_dir, 'aligned_' + str(self.args.max_seq_length) + '.pkl') 198 | else: 199 | saved_path = os.path.join(self.args.save_dir, 'unaligned_' + str(self.args.max_seq_length) + '.pkl') 200 | 201 | if not os.path.exists(os.path.dirname(saved_path)): 202 | os.makedirs(os.path.dirname(saved_path)) 203 | 204 | with open(saved_path, 'wb') as file: 205 | pickle.dump(save_data, file, protocol=4) 206 | print('Save Successful!') 207 | 208 | def parse_args(): 209 | parser = argparse.ArgumentParser() 210 | parser.add_argument('--datasetName', type=str, default='mosei', 211 | help='need aligned data (support mosi / mosei)') 212 | parser.add_argument('--language', type=str, default='cn', 213 | help='data language') 214 | parser.add_argument('--aligned', type=bool, default=True, 215 | help='need aligned data') 216 | parser.add_argument('--data_dir', type=str, default = '/home/sharing/disk3/dataset/multimodal-sentiment-dataset/CMU-MOSEI', 217 | help='path to MOSI / MOSEI') 218 | parser.add_argument('--save_dir', type=str, default = '/home/sharing/disk3/dataset/multimodal-sentiment-dataset/ALL/mosei/raw', 219 | help='path to saved directory') 220 | parser.add_argument('--max_seq_length', type=int, default = 50, 221 | help='length') 222 | return parser.parse_args() 223 | 224 | if __name__ == "__main__": 225 | args = parse_args() 226 | tp = TextPre(args) 227 | tp.run() 228 | # tp.convertID2Vector() -------------------------------------------------------------------------------- /MSE-Llama2-7B/models/subNets/Textmodel.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import collections 4 | import re 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | 9 | from modelscope import AutoTokenizer, AutoModel, AutoModelForCausalLM 10 | 11 | 12 | __all__ = ['Language_model'] 13 | 14 | class Language_model (nn.Module): 15 | def __init__(self, args, use_PLM = True): 16 | """ 17 | language: en / cn 18 | """ 19 | super(Language_model, self).__init__() 20 | 21 | if use_PLM: 22 | pretrained_model = args.pretrain_LM #pretrained model select 23 | self.tokenizer = AutoTokenizer.from_pretrained( 24 | pretrained_model, 25 | padding_side='left', 26 | trust_remote_code=True 27 | ) 28 | self.model = AutoModelForCausalLM.from_pretrained( 29 | pretrained_model, 30 | trust_remote_code=True, 31 | torch_dtype=torch.bfloat16 32 | ).half() 33 | # self.pad_token_id = self.tokenizer.convert_tokens_to_ids('<|extra_0|>') 34 | # self.tokenizer.pad_token_id = self.pad_token_id 35 | self.tokenizer.pad_token_id = 0 36 | self.eos_token_id = self.tokenizer.convert_tokens_to_ids('<|endoftext|>') 37 | 38 | self.device = args.device 39 | self.language = args.language 40 | self.max_new_tokens = args.max_new_tokens 41 | self.datasetName = args.datasetName 42 | self.train_mode = args.train_mode 43 | self.task_specific_prompt = args.task_specific_prompt 44 | # freeze parameter 45 | for param in self.model.parameters(): 46 | param.requires_grad = False 47 | else: 48 | print('please use PLM') 49 | 50 | def text_embedding(self,text_ids): 51 | embeddings = self.model.base_model.get_input_embeddings() 52 | return embeddings(text_ids) 53 | 54 | 55 | def forward(self, fusion_embedding, labels): 56 | """ 57 | Args: 58 | fusion_embedding: the "concatenate" result of multimodal low rank fusion and text embedding 59 | label: ground_truth 60 | """ 61 | 62 | fusion_embedding = self.multimodal_prompt_wrap(fusion_embedding) #添加多模态输入的special prompt 63 | opt_tokens, atts_bos, atts_fusion, labels, labels_atts = self.input_processing(fusion_embedding, labels, mode = 'train') #创建fusion+prompt+answer_mask的input和label 64 | 65 | attention_mask = torch.cat([atts_bos, atts_fusion, labels_atts], dim=1) 66 | 67 | 68 | with torch.cuda.amp.autocast(): 69 | output = self.model(inputs_embeds = opt_tokens, return_dict=True, labels = labels) # Models outputs are now tuples 70 | 71 | return output 72 | 73 | def generate(self, fusion_embedding): 74 | """ 75 | Args: 76 | samples (dict): A dictionary containing the following keys: 77 | use_nucleus_sampling (bool): Whether to use nucleus sampling. If False, use top-k sampling. 78 | num_beams (int): Number of beams for beam search. 1 means no beam search. 79 | max_new_tokens (int): The maximum length of the new tokens to be generated. 80 | top_p (float): The cumulative probability for nucleus sampling. 81 | top_k (int): The k for top-k sampling. 82 | penalty_alpha (float): The parameter for repetition penalty. 1.0 means no penalty. 83 | num_captions (int): Number of captions to be generated for each image. 84 | """ 85 | 86 | 87 | fusion_embedding = self.multimodal_prompt_wrap(fusion_embedding) # 添加多模态输入的special prompt 88 | opt_tokens, _, _, _, _= self.input_processing(fusion_embedding, mode = 'generate') # 创建fusion+prompt的input 89 | # attention_mask = torch.cat([atts_bos, atts_fusion], dim=1) 90 | context_length = opt_tokens.size(1) 91 | all_responses =[] 92 | 93 | outputs = self.model.generate(inputs_embeds = opt_tokens, 94 | num_beams=1, 95 | do_sample = False, 96 | top_p = None, 97 | max_new_tokens = self.max_new_tokens) 98 | responses = self.tokenizer.batch_decode(outputs[:,1:], add_special_tokens=False, skip_special_tokens=True, clean_up_tokenization_spaces=False) 99 | 100 | # print(responses) 101 | for response in responses: 102 | # 处理生成结果,将一些不必要的字符转换为0 103 | if self.train_mode == 'regression': 104 | try: 105 | value = float( 106 | response.replace('–', '-').replace('一', '-').replace(':', '').replace('/', '').replace('(', '').replace( 107 | ':', '')) 108 | # value = float(re.sub(r'[^0-9.-]', '0', re.sub(r'(? 0 else f"{label.item():.{1}f}" for label in 160 | # labels] 161 | # label_template = [ 162 | # f"+{label.item():.1f}" if label > 0 else f"{+label.item():.1f}" if label == 0 else f"{label.item():.1f}" 163 | # for label in labels] 164 | else: 165 | label_template = [f"{label.item()}" for label in labels] 166 | 167 | labels = self.tokenizer(label_template, padding=True, return_tensors="pt", add_special_tokens=False).to(self.device) 168 | labels_id = labels["input_ids"] 169 | labels_atts = labels["attention_mask"] 170 | 171 | labels_embedding = self.text_embedding(labels_id) 172 | labels_matrix = torch.empty(opt_tokens.size(0), opt_tokens.size(1)).fill_(-100).long().to(self.device) # bz * seq_len 只构建和token_ids一个维度的矩阵 173 | opt_tokens = torch.cat([opt_tokens, labels_embedding], dim=1) # 将输入与labels拼接 174 | labels = torch.cat([labels_matrix, labels_id], dim=1) 175 | 176 | 177 | else: 178 | labels_atts = None 179 | 180 | return opt_tokens, labels, labels_atts 181 | 182 | def get_task_prompt(self): 183 | # get the task_specific_prompt 184 | prompt_text = self.task_specific_prompt 185 | prompt_ids = self.tokenizer(prompt_text, padding=True, return_tensors="pt", add_special_tokens=False)["input_ids"].to(self.device) 186 | 187 | return prompt_ids 188 | 189 | def multimodal_prompt_wrap(self,fusion_embeddings): 190 | """ 191 | Args: 192 | Wrap the input with a special token 193 | """ 194 | if self.language == "en": 195 | prompt = '' 196 | special_token = '' 197 | else: 198 | prompt = '<多模态>' 199 | special_token = '' 200 | 201 | batch_size = fusion_embeddings.shape[0] 202 | p_before, p_after = prompt.split(special_token) 203 | p_before_tokens = self.tokenizer( 204 | p_before, return_tensors="pt", add_special_tokens=True).to(self.device) 205 | p_after_tokens = self.tokenizer( 206 | p_after, return_tensors="pt", add_special_tokens=False).to(self.device) 207 | p_before_embeds = self.text_embedding(p_before_tokens.input_ids.expand(batch_size, -1)) 208 | p_after_embeds = self.text_embedding(p_after_tokens.input_ids.expand(batch_size, -1)) 209 | wrapped_fusion_embeddings = torch.cat([p_before_embeds, fusion_embeddings, p_after_embeds], dim=1) 210 | 211 | 212 | return wrapped_fusion_embeddings -------------------------------------------------------------------------------- /MSE-Qwen-1.8B/models/subNets/Textmodel.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import collections 4 | import re 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | 9 | from modelscope import AutoTokenizer, AutoModel, AutoModelForCausalLM 10 | 11 | 12 | __all__ = ['Language_model'] 13 | 14 | class Language_model (nn.Module): 15 | def __init__(self, args, use_PLM = True): 16 | """ 17 | language: en / cn 18 | """ 19 | super(Language_model, self).__init__() 20 | 21 | if use_PLM: 22 | pretrained_model = args.pretrain_LM #pretrained model select 23 | self.tokenizer = AutoTokenizer.from_pretrained( 24 | pretrained_model, 25 | padding_side='left', 26 | trust_remote_code=True 27 | ) 28 | self.model = AutoModelForCausalLM.from_pretrained( 29 | pretrained_model, 30 | trust_remote_code=True, 31 | torch_dtype=torch.bfloat16 32 | ).half() 33 | # self.pad_token_id = self.tokenizer.convert_tokens_to_ids('<|extra_0|>') 34 | # self.tokenizer.pad_token_id = self.pad_token_id 35 | # self.tokenizer.pad_token_id = 0 36 | self.eos_token_id = self.tokenizer.convert_tokens_to_ids('<|endoftext|>') 37 | self.tokenizer.pad_token_id = self.eos_token_id 38 | 39 | self.bos_token_id = self.tokenizer.convert_tokens_to_ids('<|im_start|>') 40 | self.tokenizer.bos_token_id = self.bos_token_id 41 | 42 | self.device = args.device 43 | self.language = args.language 44 | self.max_new_tokens = args.max_new_tokens 45 | self.datasetName = args.datasetName 46 | self.train_mode = args.train_mode 47 | self.task_specific_prompt = args.task_specific_prompt 48 | # freeze parameter 49 | for param in self.model.parameters(): 50 | param.requires_grad = False 51 | else: 52 | print('please use PLM') 53 | 54 | def text_embedding(self,text_ids): 55 | embeddings = self.model.base_model.get_input_embeddings() 56 | return embeddings(text_ids) 57 | 58 | 59 | def forward(self, fusion_embedding, labels): 60 | """ 61 | Args: 62 | fusion_embedding: the "concatenate" result of multimodal low rank fusion and text embedding 63 | label: ground_truth 64 | """ 65 | 66 | fusion_embedding = self.multimodal_prompt_wrap(fusion_embedding) #添加多模态输入的special prompt 67 | opt_tokens, atts_bos, atts_fusion, labels, labels_atts = self.input_processing(fusion_embedding, labels, mode = 'train') #创建fusion+prompt+answer_mask的input和label 68 | 69 | attention_mask = torch.cat([atts_bos, atts_fusion, labels_atts], dim=1) 70 | 71 | 72 | with torch.cuda.amp.autocast(): 73 | output = self.model(inputs_embeds = opt_tokens, return_dict=True, labels = labels) # Models outputs are now tuples 74 | 75 | return output 76 | 77 | def generate(self, fusion_embedding): 78 | """ 79 | Args: 80 | samples (dict): A dictionary containing the following keys: 81 | use_nucleus_sampling (bool): Whether to use nucleus sampling. If False, use top-k sampling. 82 | num_beams (int): Number of beams for beam search. 1 means no beam search. 83 | max_new_tokens (int): The maximum length of the new tokens to be generated. 84 | top_p (float): The cumulative probability for nucleus sampling. 85 | top_k (int): The k for top-k sampling. 86 | penalty_alpha (float): The parameter for repetition penalty. 1.0 means no penalty. 87 | num_captions (int): Number of captions to be generated for each image. 88 | """ 89 | 90 | 91 | fusion_embedding = self.multimodal_prompt_wrap(fusion_embedding) # 添加多模态输入的special prompt 92 | opt_tokens, atts_bos, atts_fusion, _, _= self.input_processing(fusion_embedding, mode = 'generate') # 创建fusion+prompt的input 93 | attention_mask = torch.cat([atts_bos, atts_fusion], dim=1) 94 | context_length = opt_tokens.size(1) 95 | all_responses =[] 96 | 97 | outputs = self.model.generate(inputs_embeds = opt_tokens, 98 | num_beams=1, 99 | do_sample = False, 100 | bos_token_id = self.tokenizer.bos_token_id, 101 | max_new_tokens = self.max_new_tokens) 102 | responses = self.tokenizer.batch_decode(outputs[:,1:], add_special_tokens=False, skip_special_tokens=True, clean_up_tokenization_spaces=False) 103 | 104 | # print(responses) 105 | for response in responses: 106 | # 处理生成结果,将一些不必要的字符转换为0 107 | if self.train_mode == 'regression': 108 | try: 109 | value = float( 110 | response.replace('–', '-').replace('一', '-').replace(':', '').replace('/', '').replace('(', '').replace( 111 | ':', '')) 112 | # value = float(re.sub(r'[^0-9.-]', '0', re.sub(r'(?= 0 else f"{label.item():.{1}f}" for label in 164 | labels] 165 | # label_template = [ 166 | # f"+{label.item():.1f}" if label > 0 else f"{+label.item():.1f}" if label == 0 else f"{label.item():.1f}" 167 | # for label in labels] 168 | else: 169 | label_template = [f"{label.item()}" for label in labels] 170 | 171 | labels = self.tokenizer(label_template, padding=True, return_tensors="pt", add_special_tokens=False).to(self.device) 172 | labels_id = labels["input_ids"] 173 | labels_atts = labels["attention_mask"] 174 | 175 | # a = [' ','0.20','-0.2','5','2','0','1','3','4','5','6','7','8','9'] 176 | # c = [31106] 177 | # b = self.tokenizer(a, padding=True, return_tensors="pt", add_special_tokens=False) 178 | # d = self.tokenizer.decode(c) 179 | labels_embedding = self.text_embedding(labels_id) 180 | labels_matrix = torch.empty(opt_tokens.size(0), opt_tokens.size(1)).fill_(-100).long().to(self.device) # bz * seq_len 只构建和token_ids一个维度的矩阵 181 | opt_tokens = torch.cat([opt_tokens, labels_embedding], dim=1) # 将输入与labels拼接 182 | labels = torch.cat([labels_matrix, labels_id], dim=1) 183 | 184 | 185 | else: 186 | labels_atts = None 187 | 188 | return opt_tokens, labels, labels_atts 189 | 190 | def get_task_prompt(self): 191 | # get the task_specific_prompt 192 | prompt_text = self.task_specific_prompt 193 | prompt_ids = self.tokenizer(prompt_text, padding=True, return_tensors="pt", add_special_tokens=False)["input_ids"].to(self.device) 194 | 195 | return prompt_ids 196 | 197 | def multimodal_prompt_wrap(self,fusion_embeddings): 198 | """ 199 | Args: 200 | Wrap the input with a special token 201 | """ 202 | if self.language == "en": 203 | prompt = '' 204 | special_token = '' 205 | else: 206 | prompt = '<多模态>' 207 | special_token = '' 208 | 209 | batch_size = fusion_embeddings.shape[0] 210 | p_before, p_after = prompt.split(special_token) 211 | p_before_tokens = self.tokenizer( 212 | p_before, return_tensors="pt", add_special_tokens=True).to(self.device) 213 | p_after_tokens = self.tokenizer( 214 | p_after, return_tensors="pt", add_special_tokens=False).to(self.device) 215 | p_before_embeds = self.text_embedding(p_before_tokens.input_ids.expand(batch_size, -1)) 216 | p_after_embeds = self.text_embedding(p_after_tokens.input_ids.expand(batch_size, -1)) 217 | wrapped_fusion_embeddings = torch.cat([p_before_embeds, fusion_embeddings, p_after_embeds], dim=1) 218 | 219 | 220 | return wrapped_fusion_embeddings -------------------------------------------------------------------------------- /MSE-ChatGLM3-6B/data/load_data.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | import pickle 4 | import json 5 | import numpy as np 6 | import pandas as pd 7 | import torch 8 | import gzip 9 | import torch.nn.functional as F 10 | from torch.utils.data import Dataset, DataLoader 11 | from modelscope import AutoTokenizer, AutoModel 12 | from operator import itemgetter 13 | from torch.nn.utils.rnn import pad_sequence 14 | 15 | __all__ = ['MMDataLoader'] 16 | 17 | logger = logging.getLogger('MSA') 18 | 19 | class MMDataset(Dataset): 20 | def __init__(self, args, mode='train'): 21 | self.mode = mode 22 | self.args = args 23 | DATA_MAP = { 24 | 'mosi': self.__init_mosi, 25 | 'mosei': self.__init_mosei, 26 | 'sims': self.__init_sims, 27 | 'simsv2': self.__init_simsv2, 28 | 'meld': self.__init_meld, 29 | 'iemocap': self.__init_iemocap, 30 | 'cherma': self.__init_cherma, 31 | 32 | } 33 | DATA_MAP[args.datasetName]() 34 | 35 | 36 | 37 | def __init_meld(self): 38 | data_path = os.path.join(self.args.dataPath, self.args.datasetName + '_' + self.mode + '.pkl') 39 | label_index_mapping = self.args.label_index_mapping 40 | with open(data_path, 'rb') as f: 41 | data = pickle.load(f) 42 | self.vision = np.array(list(map(lambda item: item['features']['video'], data))).astype(np.float32) 43 | self.audio = np.array(list(map(lambda item: item['features']['audio'], data))).astype(np.float32) 44 | self.rawText = np.array(list(map(lambda item: item['features']['text'], data))) 45 | 46 | # self.labels = { 47 | # 'M': list(map(lambda item: item['label'], data)) 48 | # } 49 | self.labels = { 50 | 'M': list(map(lambda item: label_index_mapping.get(item['label'],-1), data)) 51 | } 52 | if self.args.use_PLM: 53 | self.text = self.PLM_tokenizer(self.rawText) 54 | 55 | # label_mapping 56 | 57 | # self.labels['M'] = [label_index_mapping.get(label, -1) for label in self.labels['M']] 58 | 59 | if not self.args.need_data_aligned: 60 | self.audio_lengths = np.array(list(map(lambda item: item['features']['audio_len'], data))) 61 | self.vision_lengths = np.array(list(map(lambda item: item['features']['video_len'], data))) 62 | 63 | def __init_iemocap(self): 64 | return self.__init_meld() 65 | 66 | def __init_cherma(self): 67 | return self.__init_meld() 68 | 69 | def __init_mosi(self): 70 | with open(self.args.dataPath, 'rb') as f: 71 | data = pickle.load(f) 72 | if self.args.use_PLM: 73 | self.text = data[self.mode]['raw_text'] 74 | self.text = self.PLM_tokenizer(self.text) 75 | 76 | self.vision = data[self.mode]['vision'].astype(np.float32) 77 | self.audio = data[self.mode]['audio'].astype(np.float32) 78 | self.rawText = data[self.mode]['raw_text'] 79 | self.ids = data[self.mode]['id'] 80 | 81 | self.labels = { 82 | 'M': data[self.mode][self.args.train_mode+'_labels'].astype(np.float32) 83 | } 84 | 85 | if self.args.need_label_prefix: 86 | labels = self.labels['M'] 87 | label_prefix = [] 88 | for i in range(len(labels)): 89 | if labels[i] < 0: 90 | label_prefix.append(f'negative,{labels[i].item():.{1}f}') 91 | elif labels[i] > 0: 92 | label_prefix.append(f'positive,{labels[i].item():.{1}f}') 93 | else: 94 | label_prefix.append(f'neutral,{labels[i].item():.{1}f}') 95 | self.labels_prefix = label_prefix 96 | 97 | if self.args.datasetName == 'sims': 98 | for m in "TAV": 99 | self.labels[m] = data[self.mode][self.args.train_mode+'_labels_'+m] 100 | 101 | logger.info(f"{self.mode} samples: {self.labels['M'].shape}") 102 | 103 | if not self.args.need_data_aligned: 104 | self.audio_lengths = data[self.mode]['audio_lengths'] 105 | self.vision_lengths = data[self.mode]['vision_lengths'] 106 | self.text_lengths = self.args.seq_lens[0] 107 | self.audio[self.audio == -np.inf] = 0 108 | self.vision[self.vision != self.vision] = 0 109 | 110 | if self.args.need_normalized: 111 | self.__normalize() 112 | 113 | def __init_mosei(self): 114 | return self.__init_mosi() 115 | 116 | def __init_sims(self): 117 | return self.__init_mosi() 118 | 119 | def __init_simsv2(self): 120 | return self.__init_mosi() 121 | 122 | def __truncated(self): 123 | # NOTE: Here for dataset we manually cut the input into specific length. 124 | def Truncated(modal_features, length): 125 | if length == modal_features.shape[1]: 126 | return modal_features 127 | truncated_feature = [] 128 | padding = np.array([0 for i in range(modal_features.shape[2])]) 129 | for instance in modal_features: 130 | for index in range(modal_features.shape[1]): 131 | if((instance[index] == padding).all()): 132 | if(index + length >= modal_features.shape[1]): 133 | truncated_feature.append(instance[index:index+20]) 134 | break 135 | else: 136 | truncated_feature.append(instance[index:index+20]) 137 | break 138 | truncated_feature = np.array(truncated_feature) 139 | return truncated_feature 140 | 141 | text_length, audio_length, video_length = self.args.seq_lens 142 | self.vision = Truncated(self.vision, video_length) 143 | self.text = Truncated(self.text, text_length) 144 | self.audio = Truncated(self.audio, audio_length) 145 | 146 | def __normalize(self): 147 | # (num_examples,max_len,feature_dim) -> (max_len, num_examples, feature_dim) 148 | self.vision = np.transpose(self.vision, (1, 0, 2)) 149 | self.audio = np.transpose(self.audio, (1, 0, 2)) 150 | # for visual and audio modality, we average across time 151 | # here the original data has shape (max_len, num_examples, feature_dim) 152 | # after averaging they become (1, num_examples, feature_dim) 153 | self.vision = np.mean(self.vision, axis=0, keepdims=True) 154 | self.audio = np.mean(self.audio, axis=0, keepdims=True) 155 | 156 | # remove possible NaN values 157 | self.vision[self.vision != self.vision] = 0 158 | self.audio[self.audio != self.audio] = 0 159 | 160 | self.vision = np.transpose(self.vision, (1, 0, 2)) 161 | self.audio = np.transpose(self.audio, (1, 0, 2)) 162 | 163 | def __len__(self): 164 | return len(self.labels['M']) 165 | 166 | # 这里text.shape是三维矩阵[sample_num,tokenizer_output,length] 167 | # tokenizer_output的3个维度分别是token_ids,mask(识别句子中padding的位置),segment_ids 168 | def get_seq_len(self): 169 | return (self.text.shape[2], self.audio.shape[1], self.vision.shape[1]) 170 | 171 | def get_feature_dim(self): 172 | return self.text.shape[2], self.audio.shape[2], self.vision.shape[2] 173 | 174 | def PLM_tokenizer (self, rawtexts): 175 | self.tokenizer = AutoTokenizer.from_pretrained(self.args.pretrain_LM, trust_remote_code=True) 176 | token_list = [] 177 | for text in rawtexts: 178 | text_tokenizer = self.tokenizer(text, 179 | padding='max_length', # 如果样本长度不满足最大长度则填充 180 | truncation=True, # 截断至最大长度 181 | max_length=self.args.seq_lens[0], 182 | return_tensors = 'pt', 183 | add_special_tokens=False 184 | ) 185 | 186 | token_ids = text_tokenizer['input_ids'].squeeze(0) # tensor of token ids torch.Size([max_len]) 187 | attn_masks = text_tokenizer['attention_mask'].squeeze(0) # binary tensor with "0" for padded values and "1" for the other values torch.Size([max_len]) 188 | token_type_ids = [0] * len(token_ids) #不区分上下句 189 | 190 | #调整维度 191 | input_ids = np.expand_dims(token_ids, 1) 192 | input_mask = np.expand_dims(attn_masks, 1) 193 | segment_ids = np.expand_dims(token_type_ids, 1) 194 | 195 | text_pretrain = np.concatenate([input_ids, input_mask, segment_ids], axis=1).T 196 | token_list.append(text_pretrain) 197 | 198 | # x_dimensions = [array.shape[1] for array in token_list] 199 | # # 计算 x 维度的平均值 200 | # average_x = np.mean(x_dimensions) 201 | # median_x = np.median(x_dimensions) 202 | token_list = np.array(token_list) 203 | return token_list 204 | 205 | 206 | def __getitem__(self, index): 207 | if self.args.train_mode == 'regression': 208 | sample = { 209 | 'raw_text': self.rawText[index], 210 | 'text': torch.Tensor(self.text[index]), 211 | 'audio': torch.Tensor(self.audio[index]), 212 | 'vision': torch.Tensor(self.vision[index]), 213 | 'index': index, 214 | 'id': self.ids[index], 215 | 'labels': {k: torch.Tensor(v[index].reshape(-1)) for k, v in self.labels.items()}, 216 | 'labels_prefix': self.labels_prefix[index] 217 | } 218 | else: 219 | sample = { 220 | 'raw_text': self.rawText[index], 221 | 'text': torch.Tensor(self.text[index]), 222 | 'audio': torch.Tensor(self.audio[index]), 223 | 'vision': torch.Tensor(self.vision[index]), 224 | 'index': index, 225 | 'labels': {k: v[index] for k, v in self.labels.items()} 226 | # 'labels': {torch.Tensor(self.labels)}, 227 | } 228 | 229 | if not self.args.need_data_aligned: 230 | sample['audio_lengths'] = self.audio_lengths[index] 231 | sample['vision_lengths'] = self.vision_lengths[index] 232 | sample['text_lengths'] = self.args.seq_lens[0] 233 | 234 | return sample 235 | 236 | 237 | 238 | def MMDataLoader(args): 239 | 240 | datasets = { 241 | 'train': MMDataset(args, mode='train'), 242 | 'valid': MMDataset(args, mode='valid'), 243 | 'test': MMDataset(args, mode='test') 244 | } 245 | 246 | if 'seq_lens' in args: 247 | args.seq_lens = datasets['train'].get_seq_len() 248 | 249 | dataLoader = { 250 | ds: DataLoader(datasets[ds], 251 | batch_size=args.batch_size, 252 | num_workers=args.num_workers, 253 | shuffle=True) 254 | for ds in datasets.keys() 255 | } 256 | 257 | return dataLoader --------------------------------------------------------------------------------