├── MSE-ChatGLM3-6B
├── data
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-310.pyc
│ │ └── load_data.cpython-310.pyc
│ ├── getLengths.py
│ ├── TextPre.py
│ └── load_data.py
├── utils
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-310.pyc
│ │ ├── functions.cpython-310.pyc
│ │ └── metricsTop.cpython-310.pyc
│ ├── functions.py
│ └── metricsTop.py
├── models
│ ├── __init__.py
│ ├── ChatGLM3
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ │ ├── __init__.cpython-310.pyc
│ │ │ ├── modeling_chatglm.cpython-310.pyc
│ │ │ ├── tokenization_chatglm.cpython-310.pyc
│ │ │ └── configuration_chatglm.cpython-310.pyc
│ │ └── configuration_chatglm.py
│ ├── subNets
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ │ ├── __init__.cpython-310.pyc
│ │ │ └── Textmodel.cpython-310.pyc
│ │ └── Textmodel.py
│ ├── multiTask
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ │ ├── CMCM.cpython-310.pyc
│ │ │ └── __init__.cpython-310.pyc
│ │ └── CMCM.py
│ ├── __pycache__
│ │ ├── AMIO.cpython-310.pyc
│ │ └── __init__.cpython-310.pyc
│ └── AMIO.py
├── trains
│ ├── __init__.py
│ ├── multiTask
│ │ ├── __init__.py
│ │ └── __pycache__
│ │ │ ├── CMCM.cpython-310.pyc
│ │ │ └── __init__.cpython-310.pyc
│ ├── __pycache__
│ │ ├── ATIO.cpython-310.pyc
│ │ └── __init__.cpython-310.pyc
│ └── ATIO.py
├── config
│ ├── __pycache__
│ │ ├── config_regression.cpython-310.pyc
│ │ └── config_classification.cpython-310.pyc
│ ├── config_regression.py
│ └── config_classification.py
├── LICENSE
└── run.py
├── MSE-Llama2-7B
├── data
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-310.pyc
│ │ └── load_data.cpython-310.pyc
│ ├── getLengths.py
│ └── TextPre.py
├── models
│ ├── __init__.py
│ ├── subNets
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ │ ├── __init__.cpython-310.pyc
│ │ │ └── Textmodel.cpython-310.pyc
│ │ └── Textmodel.py
│ ├── multiTask
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ │ ├── CMCM.cpython-310.pyc
│ │ │ └── __init__.cpython-310.pyc
│ │ └── CMCM.py
│ ├── __pycache__
│ │ ├── AMIO.cpython-310.pyc
│ │ └── __init__.cpython-310.pyc
│ └── AMIO.py
├── trains
│ ├── __init__.py
│ ├── multiTask
│ │ ├── __init__.py
│ │ └── __pycache__
│ │ │ ├── CMCM.cpython-310.pyc
│ │ │ └── __init__.cpython-310.pyc
│ ├── __pycache__
│ │ ├── ATIO.cpython-310.pyc
│ │ └── __init__.cpython-310.pyc
│ └── ATIO.py
├── utils
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-310.pyc
│ │ ├── functions.cpython-310.pyc
│ │ └── metricsTop.cpython-310.pyc
│ ├── functions.py
│ └── metricsTop.py
├── config
│ ├── __pycache__
│ │ ├── config_regression.cpython-310.pyc
│ │ └── config_classification.cpython-310.pyc
│ ├── config_regression.py
│ └── config_classification.py
├── LICENSE
└── run.py
├── MSE-Qwen-1.8B
├── data
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-310.pyc
│ │ └── load_data.cpython-310.pyc
│ ├── getLengths.py
│ └── TextPre.py
├── models
│ ├── __init__.py
│ ├── subNets
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ │ ├── __init__.cpython-310.pyc
│ │ │ └── Textmodel.cpython-310.pyc
│ │ └── Textmodel.py
│ ├── multiTask
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ │ ├── CMCM.cpython-310.pyc
│ │ │ └── __init__.cpython-310.pyc
│ │ └── CMCM.py
│ ├── __pycache__
│ │ ├── AMIO.cpython-310.pyc
│ │ └── __init__.cpython-310.pyc
│ └── AMIO.py
├── trains
│ ├── __init__.py
│ ├── multiTask
│ │ ├── __init__.py
│ │ └── __pycache__
│ │ │ ├── CMCM.cpython-310.pyc
│ │ │ └── __init__.cpython-310.pyc
│ ├── __pycache__
│ │ ├── ATIO.cpython-310.pyc
│ │ └── __init__.cpython-310.pyc
│ └── ATIO.py
├── utils
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-310.pyc
│ │ ├── functions.cpython-310.pyc
│ │ └── metricsTop.cpython-310.pyc
│ ├── functions.py
│ └── metricsTop.py
├── config
│ ├── __pycache__
│ │ ├── config_regression.cpython-310.pyc
│ │ └── config_classification.cpython-310.pyc
│ ├── config_regression.py
│ └── config_classification.py
├── LICENSE
└── run.py
├── Fig
└── overall.png
├── README.md
└── requirements.txt
/MSE-ChatGLM3-6B/data/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/utils/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/MSE-Llama2-7B/data/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/MSE-Llama2-7B/models/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/MSE-Llama2-7B/trains/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/MSE-Llama2-7B/utils/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/data/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/models/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/trains/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/utils/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/models/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/trains/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/models/ChatGLM3/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/models/subNets/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/MSE-Llama2-7B/models/subNets/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/models/subNets/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/Fig/overall.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/Fig/overall.png
--------------------------------------------------------------------------------
/MSE-Llama2-7B/trains/multiTask/__init__.py:
--------------------------------------------------------------------------------
1 | from trains.multiTask.CMCM import CMCM
2 |
3 | __all__ = ['CMCM']
--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/trains/multiTask/__init__.py:
--------------------------------------------------------------------------------
1 | from trains.multiTask.CMCM import CMCM
2 |
3 | __all__ = ['CMCM']
--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/trains/multiTask/__init__.py:
--------------------------------------------------------------------------------
1 | from trains.multiTask.CMCM import CMCM
2 |
3 | __all__ = ['CMCM']
--------------------------------------------------------------------------------
/MSE-Llama2-7B/models/multiTask/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from models.multiTask.CMCM import CMCM
3 |
4 | __all__ = ['CMCM']
--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/models/multiTask/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from models.multiTask.CMCM import CMCM
3 |
4 | __all__ = ['CMCM']
--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/models/multiTask/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from models.multiTask.CMCM import CMCM
3 |
4 | __all__ = ['CMCM']
--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/models/__pycache__/AMIO.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/models/__pycache__/AMIO.cpython-310.pyc
--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/trains/__pycache__/ATIO.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/trains/__pycache__/ATIO.cpython-310.pyc
--------------------------------------------------------------------------------
/MSE-Llama2-7B/data/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Llama2-7B/data/__pycache__/__init__.cpython-310.pyc
--------------------------------------------------------------------------------
/MSE-Llama2-7B/models/__pycache__/AMIO.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Llama2-7B/models/__pycache__/AMIO.cpython-310.pyc
--------------------------------------------------------------------------------
/MSE-Llama2-7B/trains/__pycache__/ATIO.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Llama2-7B/trains/__pycache__/ATIO.cpython-310.pyc
--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/data/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Qwen-1.8B/data/__pycache__/__init__.cpython-310.pyc
--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/models/__pycache__/AMIO.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Qwen-1.8B/models/__pycache__/AMIO.cpython-310.pyc
--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/trains/__pycache__/ATIO.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Qwen-1.8B/trains/__pycache__/ATIO.cpython-310.pyc
--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/data/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/data/__pycache__/__init__.cpython-310.pyc
--------------------------------------------------------------------------------
/MSE-Llama2-7B/data/__pycache__/load_data.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Llama2-7B/data/__pycache__/load_data.cpython-310.pyc
--------------------------------------------------------------------------------
/MSE-Llama2-7B/models/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Llama2-7B/models/__pycache__/__init__.cpython-310.pyc
--------------------------------------------------------------------------------
/MSE-Llama2-7B/trains/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Llama2-7B/trains/__pycache__/__init__.cpython-310.pyc
--------------------------------------------------------------------------------
/MSE-Llama2-7B/utils/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Llama2-7B/utils/__pycache__/__init__.cpython-310.pyc
--------------------------------------------------------------------------------
/MSE-Llama2-7B/utils/__pycache__/functions.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Llama2-7B/utils/__pycache__/functions.cpython-310.pyc
--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/data/__pycache__/load_data.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Qwen-1.8B/data/__pycache__/load_data.cpython-310.pyc
--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/models/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Qwen-1.8B/models/__pycache__/__init__.cpython-310.pyc
--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/trains/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Qwen-1.8B/trains/__pycache__/__init__.cpython-310.pyc
--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/utils/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Qwen-1.8B/utils/__pycache__/__init__.cpython-310.pyc
--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/utils/__pycache__/functions.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Qwen-1.8B/utils/__pycache__/functions.cpython-310.pyc
--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/data/__pycache__/load_data.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/data/__pycache__/load_data.cpython-310.pyc
--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/models/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/models/__pycache__/__init__.cpython-310.pyc
--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/trains/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/trains/__pycache__/__init__.cpython-310.pyc
--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/utils/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/utils/__pycache__/__init__.cpython-310.pyc
--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/utils/__pycache__/functions.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/utils/__pycache__/functions.cpython-310.pyc
--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/utils/__pycache__/metricsTop.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/utils/__pycache__/metricsTop.cpython-310.pyc
--------------------------------------------------------------------------------
/MSE-Llama2-7B/utils/__pycache__/metricsTop.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Llama2-7B/utils/__pycache__/metricsTop.cpython-310.pyc
--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/utils/__pycache__/metricsTop.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Qwen-1.8B/utils/__pycache__/metricsTop.cpython-310.pyc
--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/models/multiTask/__pycache__/CMCM.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/models/multiTask/__pycache__/CMCM.cpython-310.pyc
--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/trains/multiTask/__pycache__/CMCM.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/trains/multiTask/__pycache__/CMCM.cpython-310.pyc
--------------------------------------------------------------------------------
/MSE-Llama2-7B/models/multiTask/__pycache__/CMCM.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Llama2-7B/models/multiTask/__pycache__/CMCM.cpython-310.pyc
--------------------------------------------------------------------------------
/MSE-Llama2-7B/models/subNets/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Llama2-7B/models/subNets/__pycache__/__init__.cpython-310.pyc
--------------------------------------------------------------------------------
/MSE-Llama2-7B/trains/multiTask/__pycache__/CMCM.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Llama2-7B/trains/multiTask/__pycache__/CMCM.cpython-310.pyc
--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/models/multiTask/__pycache__/CMCM.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Qwen-1.8B/models/multiTask/__pycache__/CMCM.cpython-310.pyc
--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/models/subNets/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Qwen-1.8B/models/subNets/__pycache__/__init__.cpython-310.pyc
--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/trains/multiTask/__pycache__/CMCM.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Qwen-1.8B/trains/multiTask/__pycache__/CMCM.cpython-310.pyc
--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/models/subNets/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/models/subNets/__pycache__/__init__.cpython-310.pyc
--------------------------------------------------------------------------------
/MSE-Llama2-7B/config/__pycache__/config_regression.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Llama2-7B/config/__pycache__/config_regression.cpython-310.pyc
--------------------------------------------------------------------------------
/MSE-Llama2-7B/models/multiTask/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Llama2-7B/models/multiTask/__pycache__/__init__.cpython-310.pyc
--------------------------------------------------------------------------------
/MSE-Llama2-7B/models/subNets/__pycache__/Textmodel.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Llama2-7B/models/subNets/__pycache__/Textmodel.cpython-310.pyc
--------------------------------------------------------------------------------
/MSE-Llama2-7B/trains/multiTask/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Llama2-7B/trains/multiTask/__pycache__/__init__.cpython-310.pyc
--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/config/__pycache__/config_regression.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Qwen-1.8B/config/__pycache__/config_regression.cpython-310.pyc
--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/models/multiTask/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Qwen-1.8B/models/multiTask/__pycache__/__init__.cpython-310.pyc
--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/models/subNets/__pycache__/Textmodel.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Qwen-1.8B/models/subNets/__pycache__/Textmodel.cpython-310.pyc
--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/trains/multiTask/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Qwen-1.8B/trains/multiTask/__pycache__/__init__.cpython-310.pyc
--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/config/__pycache__/config_regression.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/config/__pycache__/config_regression.cpython-310.pyc
--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/models/ChatGLM3/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/models/ChatGLM3/__pycache__/__init__.cpython-310.pyc
--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/models/multiTask/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/models/multiTask/__pycache__/__init__.cpython-310.pyc
--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/models/subNets/__pycache__/Textmodel.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/models/subNets/__pycache__/Textmodel.cpython-310.pyc
--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/trains/multiTask/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/trains/multiTask/__pycache__/__init__.cpython-310.pyc
--------------------------------------------------------------------------------
/MSE-Llama2-7B/config/__pycache__/config_classification.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Llama2-7B/config/__pycache__/config_classification.cpython-310.pyc
--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/config/__pycache__/config_classification.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Qwen-1.8B/config/__pycache__/config_classification.cpython-310.pyc
--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/config/__pycache__/config_classification.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/config/__pycache__/config_classification.cpython-310.pyc
--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/models/ChatGLM3/__pycache__/modeling_chatglm.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/models/ChatGLM3/__pycache__/modeling_chatglm.cpython-310.pyc
--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/models/ChatGLM3/__pycache__/tokenization_chatglm.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/models/ChatGLM3/__pycache__/tokenization_chatglm.cpython-310.pyc
--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/models/ChatGLM3/__pycache__/configuration_chatglm.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/models/ChatGLM3/__pycache__/configuration_chatglm.cpython-310.pyc
--------------------------------------------------------------------------------
/MSE-Llama2-7B/trains/ATIO.py:
--------------------------------------------------------------------------------
1 | """
2 | AIO -- All Trains in One
3 | """
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 | from torch.autograd import Variable
8 | from torch.nn.parameter import Parameter
9 | from torch.nn.init import xavier_uniform, xavier_normal, orthogonal
10 |
11 | from trains.multiTask import *
12 |
13 | __all__ = ['ATIO']
14 |
15 | class ATIO():
16 | def __init__(self):
17 | self.TRAIN_MAP = {
18 | 'cmcm': CMCM,
19 | }
20 |
21 | def getTrain(self, args):
22 | return self.TRAIN_MAP[args.modelName.lower()](args)
23 |
--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/trains/ATIO.py:
--------------------------------------------------------------------------------
1 | """
2 | AIO -- All Trains in One
3 | """
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 | from torch.autograd import Variable
8 | from torch.nn.parameter import Parameter
9 | from torch.nn.init import xavier_uniform, xavier_normal, orthogonal
10 |
11 | from trains.multiTask import *
12 |
13 | __all__ = ['ATIO']
14 |
15 | class ATIO():
16 | def __init__(self):
17 | self.TRAIN_MAP = {
18 | 'cmcm': CMCM,
19 | }
20 |
21 | def getTrain(self, args):
22 | return self.TRAIN_MAP[args.modelName.lower()](args)
23 |
--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/trains/ATIO.py:
--------------------------------------------------------------------------------
1 | """
2 | AIO -- All Trains in One
3 | """
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 | from torch.autograd import Variable
8 | from torch.nn.parameter import Parameter
9 | from torch.nn.init import xavier_uniform, xavier_normal, orthogonal
10 |
11 | from trains.multiTask import *
12 |
13 | __all__ = ['ATIO']
14 |
15 | class ATIO():
16 | def __init__(self):
17 | self.TRAIN_MAP = {
18 | 'cmcm': CMCM,
19 | }
20 |
21 | def getTrain(self, args):
22 | return self.TRAIN_MAP[args.modelName.lower()](args)
23 |
--------------------------------------------------------------------------------
/MSE-Llama2-7B/models/AMIO.py:
--------------------------------------------------------------------------------
1 | """
2 | AIO -- All Model in One
3 | """
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 | from torch.autograd import Variable
8 | from torch.nn.parameter import Parameter
9 | from torch.nn.init import xavier_uniform, xavier_normal, orthogonal
10 |
11 |
12 | from models.multiTask import *
13 |
14 | __all__ = ['AMIO']
15 |
16 | MODEL_MAP = {
17 | 'cmcm': CMCM
18 | }
19 |
20 | class AMIO(nn.Module):
21 | def __init__(self, args):
22 | super(AMIO, self).__init__()
23 | lastModel = MODEL_MAP[args.modelName]
24 | self.Model = lastModel(args)
25 |
26 | def forward(self, labels_m, text_x, audio_x, video_x):
27 | return self.Model(labels_m, text_x, audio_x, video_x)
28 |
29 | def generate(self, text_x, audio_x, video_x):
30 | return self.Model.generate(text_x, audio_x, video_x)
--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/models/AMIO.py:
--------------------------------------------------------------------------------
1 | """
2 | AIO -- All Model in One
3 | """
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 | from torch.autograd import Variable
8 | from torch.nn.parameter import Parameter
9 | from torch.nn.init import xavier_uniform, xavier_normal, orthogonal
10 |
11 |
12 | from models.multiTask import *
13 |
14 | __all__ = ['AMIO']
15 |
16 | MODEL_MAP = {
17 | 'cmcm': CMCM
18 | }
19 |
20 | class AMIO(nn.Module):
21 | def __init__(self, args):
22 | super(AMIO, self).__init__()
23 | lastModel = MODEL_MAP[args.modelName]
24 | self.Model = lastModel(args)
25 |
26 | def forward(self, labels_m, text_x, audio_x, video_x):
27 | return self.Model(labels_m, text_x, audio_x, video_x)
28 |
29 | def generate(self, text_x, audio_x, video_x):
30 | return self.Model.generate(text_x, audio_x, video_x)
--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/models/AMIO.py:
--------------------------------------------------------------------------------
1 | """
2 | AIO -- All Model in One
3 | """
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 | from torch.autograd import Variable
8 | from torch.nn.parameter import Parameter
9 | from torch.nn.init import xavier_uniform, xavier_normal, orthogonal
10 |
11 |
12 | from models.multiTask import *
13 |
14 | __all__ = ['AMIO']
15 |
16 | MODEL_MAP = {
17 | 'cmcm': CMCM
18 | }
19 |
20 | class AMIO(nn.Module):
21 | def __init__(self, args):
22 | super(AMIO, self).__init__()
23 | lastModel = MODEL_MAP[args.modelName]
24 | self.Model = lastModel(args)
25 |
26 | def forward(self, labels_m, text_x, audio_x, video_x):
27 | return self.Model(labels_m, text_x, audio_x, video_x)
28 |
29 | def generate(self, text_x, audio_x, video_x):
30 | return self.Model.generate(text_x, audio_x, video_x)
--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/utils/functions.py:
--------------------------------------------------------------------------------
1 | def dict_to_str(src_dict):
2 | dst_str = ""
3 | for key in src_dict.keys():
4 | dst_str += " %s: %.4f " %(key, src_dict[key])
5 | return dst_str
6 |
7 | class Storage(dict):
8 | """
9 | A Storage object is like a dictionary except `obj.foo` can be used inadition to `obj['foo']`
10 | ref: https://blog.csdn.net/a200822146085/article/details/88430450
11 | """
12 | def __getattr__(self, key):
13 | try:
14 | return self[key] if key in self else False
15 | except KeyError as k:
16 | raise AttributeError(k)
17 |
18 | def __setattr__(self, key, value):
19 | self[key] = value
20 |
21 | def __delattr__(self, key):
22 | try:
23 | del self[key]
24 | except KeyError as k:
25 | raise AttributeError(k)
26 |
27 | def __str__(self):
28 | return "<" + self.__class__.__name__ + dict.__repr__(self) + ">"
29 |
30 |
--------------------------------------------------------------------------------
/MSE-Llama2-7B/utils/functions.py:
--------------------------------------------------------------------------------
1 | def dict_to_str(src_dict):
2 | dst_str = ""
3 | for key in src_dict.keys():
4 | dst_str += " %s: %.4f " %(key, src_dict[key])
5 | return dst_str
6 |
7 | class Storage(dict):
8 | """
9 | A Storage object is like a dictionary except `obj.foo` can be used inadition to `obj['foo']`
10 | ref: https://blog.csdn.net/a200822146085/article/details/88430450
11 | """
12 | def __getattr__(self, key):
13 | try:
14 | return self[key] if key in self else False
15 | except KeyError as k:
16 | raise AttributeError(k)
17 |
18 | def __setattr__(self, key, value):
19 | self[key] = value
20 |
21 | def __delattr__(self, key):
22 | try:
23 | del self[key]
24 | except KeyError as k:
25 | raise AttributeError(k)
26 |
27 | def __str__(self):
28 | return "<" + self.__class__.__name__ + dict.__repr__(self) + ">"
29 |
30 |
--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/utils/functions.py:
--------------------------------------------------------------------------------
1 | def dict_to_str(src_dict):
2 | dst_str = ""
3 | for key in src_dict.keys():
4 | dst_str += " %s: %.4f " %(key, src_dict[key])
5 | return dst_str
6 |
7 | class Storage(dict):
8 | """
9 | A Storage object is like a dictionary except `obj.foo` can be used inadition to `obj['foo']`
10 | ref: https://blog.csdn.net/a200822146085/article/details/88430450
11 | """
12 | def __getattr__(self, key):
13 | try:
14 | return self[key] if key in self else False
15 | except KeyError as k:
16 | raise AttributeError(k)
17 |
18 | def __setattr__(self, key, value):
19 | self[key] = value
20 |
21 | def __delattr__(self, key):
22 | try:
23 | del self[key]
24 | except KeyError as k:
25 | raise AttributeError(k)
26 |
27 | def __str__(self):
28 | return "<" + self.__class__.__name__ + dict.__repr__(self) + ">"
29 |
30 |
--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 iyuge2
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/MSE-Llama2-7B/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 iyuge2
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 iyuge2
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/data/getLengths.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pickle as plk
3 | import numpy as np
4 |
5 | from tqdm import tqdm
6 |
7 | def get_lengths(mode, feature_name):
8 | fd = data[mode][feature_name]
9 | max_len = fd.shape[1]
10 |
11 | c_sum = np.sum(fd, axis=-1)
12 | lengths = []
13 | for i in tqdm(range(fd.shape[0])):
14 | null = True
15 | zeros = np.zeros([fd.shape[1], fd.shape[2]])
16 | cur_length = max_len
17 | for j in range(max_len):
18 | if c_sum[i][j] == 0:
19 | cur_length = j
20 | null = False
21 | break
22 | if cur_length == 0:
23 | cur_length = 1
24 | lengths.append(cur_length)
25 | return lengths
26 |
27 | with open('/home/sharing/disk3/dataset/multimodal-sentiment-dataset/ALL/mosei/unaligned_50.pkl', 'rb') as lf:
28 | data = plk.load(lf)
29 |
30 | def handleData(mode):
31 | # data[mode]['audio_lengths'], _ = get_lengths(mode, 'feature_A')
32 | # data[mode]['vision_lengths'], _ = get_lengths(mode, 'feature_V')
33 | data[mode]['audio_lengths'] = get_lengths(mode, 'audio')
34 | data[mode]['vision_lengths'] = get_lengths(mode, 'vision')
35 |
36 | handleData('train')
37 | handleData('valid')
38 | handleData('test')
39 |
40 | with open('/home/sharing/disk3/dataset/multimodal-sentiment-dataset/ALL/mosei/unaligned_50.pkl', 'wb') as df:
41 | plk.dump(data, df, protocol = 4)
--------------------------------------------------------------------------------
/MSE-Llama2-7B/data/getLengths.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pickle as plk
3 | import numpy as np
4 |
5 | from tqdm import tqdm
6 |
7 | def get_lengths(mode, feature_name):
8 | fd = data[mode][feature_name]
9 | max_len = fd.shape[1]
10 |
11 | c_sum = np.sum(fd, axis=-1)
12 | lengths = []
13 | for i in tqdm(range(fd.shape[0])):
14 | null = True
15 | zeros = np.zeros([fd.shape[1], fd.shape[2]])
16 | cur_length = max_len
17 | for j in range(max_len):
18 | if c_sum[i][j] == 0:
19 | cur_length = j
20 | null = False
21 | break
22 | if cur_length == 0:
23 | cur_length = 1
24 | lengths.append(cur_length)
25 | return lengths
26 |
27 | with open('/home/sharing/disk3/dataset/multimodal-sentiment-dataset/ALL/mosei/unaligned_50.pkl', 'rb') as lf:
28 | data = plk.load(lf)
29 |
30 | def handleData(mode):
31 | # data[mode]['audio_lengths'], _ = get_lengths(mode, 'feature_A')
32 | # data[mode]['vision_lengths'], _ = get_lengths(mode, 'feature_V')
33 | data[mode]['audio_lengths'] = get_lengths(mode, 'audio')
34 | data[mode]['vision_lengths'] = get_lengths(mode, 'vision')
35 |
36 | handleData('train')
37 | handleData('valid')
38 | handleData('test')
39 |
40 | with open('/home/sharing/disk3/dataset/multimodal-sentiment-dataset/ALL/mosei/unaligned_50.pkl', 'wb') as df:
41 | plk.dump(data, df, protocol = 4)
--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/data/getLengths.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pickle as plk
3 | import numpy as np
4 |
5 | from tqdm import tqdm
6 |
7 | def get_lengths(mode, feature_name):
8 | fd = data[mode][feature_name]
9 | max_len = fd.shape[1]
10 |
11 | c_sum = np.sum(fd, axis=-1)
12 | lengths = []
13 | for i in tqdm(range(fd.shape[0])):
14 | null = True
15 | zeros = np.zeros([fd.shape[1], fd.shape[2]])
16 | cur_length = max_len
17 | for j in range(max_len):
18 | if c_sum[i][j] == 0:
19 | cur_length = j
20 | null = False
21 | break
22 | if cur_length == 0:
23 | cur_length = 1
24 | lengths.append(cur_length)
25 | return lengths
26 |
27 | with open('/home/sharing/disk3/dataset/multimodal-sentiment-dataset/ALL/mosei/unaligned_50.pkl', 'rb') as lf:
28 | data = plk.load(lf)
29 |
30 | def handleData(mode):
31 | # data[mode]['audio_lengths'], _ = get_lengths(mode, 'feature_A')
32 | # data[mode]['vision_lengths'], _ = get_lengths(mode, 'feature_V')
33 | data[mode]['audio_lengths'] = get_lengths(mode, 'audio')
34 | data[mode]['vision_lengths'] = get_lengths(mode, 'vision')
35 |
36 | handleData('train')
37 | handleData('valid')
38 | handleData('test')
39 |
40 | with open('/home/sharing/disk3/dataset/multimodal-sentiment-dataset/ALL/mosei/unaligned_50.pkl', 'wb') as df:
41 | plk.dump(data, df, protocol = 4)
--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/models/ChatGLM3/configuration_chatglm.py:
--------------------------------------------------------------------------------
1 | from transformers import PretrainedConfig
2 |
3 |
4 | class ChatGLMConfig(PretrainedConfig):
5 | model_type = "chatglm"
6 | def __init__(
7 | self,
8 | num_layers=28,
9 | padded_vocab_size=65024,
10 | hidden_size=4096,
11 | ffn_hidden_size=13696,
12 | kv_channels=128,
13 | num_attention_heads=32,
14 | seq_length=2048,
15 | hidden_dropout=0.0,
16 | classifier_dropout=None,
17 | attention_dropout=0.0,
18 | layernorm_epsilon=1e-5,
19 | rmsnorm=True,
20 | apply_residual_connection_post_layernorm=False,
21 | post_layer_norm=True,
22 | add_bias_linear=False,
23 | add_qkv_bias=False,
24 | bias_dropout_fusion=True,
25 | multi_query_attention=False,
26 | multi_query_group_num=1,
27 | apply_query_key_layer_scaling=True,
28 | attention_softmax_in_fp32=True,
29 | fp32_residual_connection=False,
30 | quantization_bit=0,
31 | pre_seq_len=None,
32 | prefix_projection=False,
33 | **kwargs
34 | ):
35 | self.num_layers = num_layers
36 | self.vocab_size = padded_vocab_size
37 | self.padded_vocab_size = padded_vocab_size
38 | self.hidden_size = hidden_size
39 | self.ffn_hidden_size = ffn_hidden_size
40 | self.kv_channels = kv_channels
41 | self.num_attention_heads = num_attention_heads
42 | self.seq_length = seq_length
43 | self.hidden_dropout = hidden_dropout
44 | self.classifier_dropout = classifier_dropout
45 | self.attention_dropout = attention_dropout
46 | self.layernorm_epsilon = layernorm_epsilon
47 | self.rmsnorm = rmsnorm
48 | self.apply_residual_connection_post_layernorm = apply_residual_connection_post_layernorm
49 | self.post_layer_norm = post_layer_norm
50 | self.add_bias_linear = add_bias_linear
51 | self.add_qkv_bias = add_qkv_bias
52 | self.bias_dropout_fusion = bias_dropout_fusion
53 | self.multi_query_attention = multi_query_attention
54 | self.multi_query_group_num = multi_query_group_num
55 | self.apply_query_key_layer_scaling = apply_query_key_layer_scaling
56 | self.attention_softmax_in_fp32 = attention_softmax_in_fp32
57 | self.fp32_residual_connection = fp32_residual_connection
58 | self.quantization_bit = quantization_bit
59 | self.pre_seq_len = pre_seq_len
60 | self.prefix_projection = prefix_projection
61 | super().__init__(**kwargs)
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | # 😊 The Official Implementation of MSE-Adapter
4 |
5 |
6 |
7 |
8 |
9 |
10 | 🎉🎉 **We have been accepted at AAAI-2025!**
11 |
12 |
13 | ---
14 | This is the official code for the 《MSE-Adapter: A Lightweight Plugin Endowing LLMs with the Capability to Perform Multimodal Sentiment Analysis and Emotion Recognition》.
15 |
16 | 
17 |
18 |
19 | *Fig1: The comprehensive framework integrating MSE-Adapter with LLM.*
20 |
21 |
22 |
23 | ---
24 |
25 | ## 🚀 Get Started! (Take MSE-ChatGLM3-6B as an example.)
26 |
27 | ### 🔧 Step 1: Create the Environment
28 | ``` bash
29 | git clone https://github.com/AZYoung233/MSE-Adapter.git
30 | cd MSE-Adapter
31 | conda create --name MSE-Adapter python=3.10.13
32 | conda activate MSE-Adapter
33 | pip install -r requirements.txt
34 | ```
35 | 🚨 **Critical Notice (2025/04/29 update)**: It is **highly recommended** to create a new **virtual environment** directly using `requirements.txt`. If that's not feasible, at least ensure that the `transformers` version matches exactly. Otherwise, the training loss may decrease as expected, but the evaluation metrics could be abnormal, severely impacting the model's performance.
36 |
37 | ### 📂 Step 2: Download the Dataset
38 | - You can download the dataset at the link below:
39 | - [MOSEI](https://huggingface.co/datasets/AZYoung/MOSEI_processed)📦
40 | - [SIMS-V2](https://huggingface.co/datasets/AZYoung/SIMSV2_processed)📦
41 | - [MELD](https://huggingface.co/datasets/AZYoung/MELD_processed)📦
42 | - [CHERMA](https://huggingface.co/datasets/AZYoung/CHERMA0723_processed)📦
43 | - Place them under the same folder, and set `root_dataset_dir` in `parse_args` of `run.py` to the path where you store your dataset.
44 |
45 | ### 💾 Step 3: Download the Backbone LLM
46 | - Download backbone LLM from the [THUDM/chatglm3-6b](https://huggingface.co/THUDM/chatglm3-6b) and set `pretrain_LM` in `parse_args` of `run.py` to the path where you store your LLM. If for any particular reason your download is too slow, try using [Modelscope](https://modelscope.cn/my/overview) 🌐 or [HF-mirrors](https://hf-mirror.com/) 🌐.
47 |
48 | ### ▶️ Step 4: Run!
49 | - Once you have completed the basic setup as described above, you can run the code using the following steps. The code will run 5 random seeds and the results will be saved in `results/result`. The results presented in the paper are the average of 5 random seeds.
50 | ```bash
51 | cd MSE-ChatGLM3-6B
52 | python run.py
53 | ```
54 |
55 | ## 🙏 Acknowledgment
56 | Our code is structurally referenced to [SELF-MM](https://github.com/thuiar/Self-MM). Thanks to their open-source spirit for saving us a lot of time. 💖
57 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | addict==2.4.0
2 | aiohttp==3.9.1
3 | aiosignal==1.3.1
4 | aliyun-python-sdk-core==2.14.0
5 | aliyun-python-sdk-kms==2.16.2
6 | annotated-types==0.6.0
7 | anyio==4.8.0
8 | async-timeout==4.0.3
9 | attrs==23.1.0
10 | blis==0.7.11
11 | Brotli==1.0.9
12 | cachetools==5.3.2
13 | catalogue==2.0.10
14 | certifi==2023.11.17
15 | cffi==1.16.0
16 | charset-normalizer==2.0.4
17 | click==8.1.7
18 | cloudpathlib==0.16.0
19 | cmake==3.28.1
20 | confection==0.1.4
21 | contourpy==1.2.0
22 | crcmod==1.7
23 | cryptography==41.0.7
24 | cycler==0.12.1
25 | cymem==2.0.8
26 | datasets==2.15.0
27 | diffusers==0.31.0
28 | dill==0.3.7
29 | easydict==1.13
30 | einops==0.7.0
31 | en-core-web-sm==3.7.1
32 | exceptiongroup==1.2.2
33 | fastapi==0.115.8
34 | filelock==3.13.1
35 | fonttools==4.46.0
36 | frozenlist==1.4.1
37 | fsspec==2023.10.0
38 | gast==0.5.4
39 | gmpy2==2.1.2
40 | graphviz==0.20.3
41 | h11==0.14.0
42 | huggingface-hub==0.26.2
43 | idna==3.4
44 | importlib-metadata==7.0.0
45 | jieba==0.42.1
46 | Jinja2==3.1.2
47 | jmespath==0.10.0
48 | joblib==1.3.2
49 | kiwisolver==1.4.5
50 | langcodes==3.3.0
51 | lit==17.0.6
52 | MarkupSafe==2.1.1
53 | matplotlib==3.8.2
54 | mkl-fft==1.3.8
55 | mkl-random==1.2.4
56 | mkl-service==2.4.0
57 | modelscope==1.10.0
58 | mpmath==1.3.0
59 | multidict==6.0.4
60 | multiprocess==0.70.15
61 | murmurhash==1.0.10
62 | networkx==3.1
63 | numpy==1.26.2
64 | nvidia-cublas-cu11==11.10.3.66
65 | nvidia-cuda-cupti-cu11==11.7.101
66 | nvidia-cuda-nvrtc-cu11==11.7.99
67 | nvidia-cuda-runtime-cu11==11.7.99
68 | nvidia-cudnn-cu11==8.5.0.96
69 | nvidia-cufft-cu11==10.9.0.58
70 | nvidia-curand-cu11==10.2.10.91
71 | nvidia-cusolver-cu11==11.4.0.1
72 | nvidia-cusparse-cu11==11.7.4.91
73 | nvidia-ml-py==12.535.133
74 | nvidia-nccl-cu11==2.14.3
75 | nvidia-nvtx-cu11==11.7.91
76 | nvitop==1.3.1
77 | opencv-python==4.11.0.86
78 | oss2==2.18.3
79 | packaging==23.2
80 | pandas==2.1.4
81 | Pillow==10.0.1
82 | pip==23.3.1
83 | platformdirs==4.1.0
84 | preshed==3.0.9
85 | protobuf==4.25.3
86 | psutil==5.9.6
87 | pyarrow==14.0.1
88 | pyarrow-hotfix==0.6
89 | pycparser==2.21
90 | pycryptodome==3.19.0
91 | pydantic==2.5.3
92 | pydantic_core==2.14.6
93 | pyOpenSSL==23.2.0
94 | pyparsing==3.1.1
95 | PySocks==1.7.1
96 | python-dateutil==2.8.2
97 | pytz==2023.3.post1
98 | PyYAML==6.0.1
99 | regex==2023.10.3
100 | requests==2.31.0
101 | safetensors==0.4.1
102 | scikit-learn==1.3.2
103 | scipy==1.11.4
104 | sentencepiece==0.1.99
105 | setuptools==68.2.2
106 | simplejson==3.19.2
107 | six==1.16.0
108 | smart-open==6.4.0
109 | sniffio==1.3.1
110 | sortedcontainers==2.4.0
111 | spacy==3.7.2
112 | spacy-legacy==3.0.12
113 | spacy-loggers==1.0.5
114 | srsly==2.4.8
115 | starlette==0.45.3
116 | sympy==1.12
117 | termcolor==2.4.0
118 | thinc==8.2.2
119 | threadpoolctl==3.2.0
120 | tiktoken==0.5.2
121 | tokenizers==0.15.0
122 | tomli==2.0.1
123 | torch==2.0.1
124 | torchaudio==2.1.2
125 | torchvision==0.16.2
126 | torchviz==0.0.2
127 | tqdm==4.66.1
128 | transformers==4.36.1
129 | transformers-stream-generator==0.0.4
130 | triton==2.0.0
131 | typer==0.9.0
132 | typing_extensions==4.12.2
133 | tzdata==2023.3
134 | urllib3==1.26.18
135 | uvicorn==0.34.0
136 | wasabi==1.1.2
137 | weasel==0.3.4
138 | wheel==0.41.2
139 | xformers==0.0.21
140 | xxhash==3.4.1
141 | yapf==0.40.2
142 | yarl==1.9.4
143 | zipp==3.17.0
144 |
--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/config/config_regression.py:
--------------------------------------------------------------------------------
1 | import os
2 | import argparse
3 |
4 | from utils.functions import Storage
5 |
6 | class ConfigRegression():
7 | def __init__(self, args):
8 | # hyper parameters for models
9 | HYPER_MODEL_MAP = {
10 | 'cmcm': self.__CMCM
11 | }
12 | # hyper parameters for datasets
13 | self.root_dataset_dir = args.root_dataset_dir
14 | HYPER_DATASET_MAP = self.__datasetCommonParams()
15 | # normalize
16 | model_name = str.lower(args.modelName)
17 | dataset_name = str.lower(args.datasetName)
18 | # load params
19 | commonArgs = HYPER_MODEL_MAP[model_name]()['commonParas']
20 | dataArgs = HYPER_DATASET_MAP[dataset_name]
21 | dataArgs = dataArgs['aligned'] if (commonArgs['need_data_aligned'] and 'aligned' in dataArgs) else dataArgs['unaligned']
22 | # integrate all parameters
23 | self.args = Storage(dict(vars(args),
24 | **dataArgs,
25 | **commonArgs,
26 | **HYPER_MODEL_MAP[model_name]()['datasetParas'][dataset_name],
27 | ))
28 |
29 | def __datasetCommonParams(self):
30 | root_dataset_dir = self.root_dataset_dir
31 | tmp = {
32 | 'mosi':{
33 | 'unaligned': {
34 | 'dataPath': os.path.join(root_dataset_dir, 'MOSI/Processed/unaligned_50.pkl'),
35 | 'seq_lens': (50, 50, 50),
36 | # (text, audio, video)
37 | 'feature_dims': (4096, 5, 20),
38 | 'train_samples': 1284,
39 | 'num_classes': 3,
40 | 'language': 'en',
41 | 'KeyEval': 'MAE'
42 | }
43 | },
44 | 'mosei':{
45 | 'unaligned': {
46 | 'dataPath': os.path.join(root_dataset_dir, 'MOSEI/Processed/unaligned_50.pkl'),
47 | 'seq_lens': (50, 500, 375),
48 | # (text, audio, video)
49 | 'feature_dims': (4096, 74, 35),
50 | 'train_samples': 16326,
51 | 'num_classes': 3,
52 | 'language': 'en',
53 | 'KeyEval': 'MAE'
54 | }
55 | },
56 |
57 |
58 | 'simsv2': {
59 | 'unaligned': {
60 | 'dataPath': os.path.join(root_dataset_dir, 'SIMS_V2/ch-simsv2s.pkl'),
61 | # (batch_size, seq_lens, feature_dim)
62 | 'seq_lens': (50, 925, 232), # (text, audio, video)
63 | 'feature_dims': (4096, 25, 177), # (text, audio, video)
64 | 'train_samples': 2722,
65 | 'num_classes': 3,
66 | 'language': 'cn',
67 | 'KeyEval': 'MAE',
68 | }
69 | }
70 | }
71 | return tmp
72 |
73 | def __CMCM(self):
74 | tmp = {
75 | 'commonParas':{
76 | 'need_data_aligned': False,
77 | 'need_model_aligned': False,
78 | 'need_label_prefix':True,
79 | 'need_normalized': False,
80 | 'use_PLM': True,
81 | 'save_labels': False,
82 | },
83 | # dataset
84 | 'datasetParas':{
85 | 'mosei':{
86 | # the batch_size of each epoch is update_epochs * batch_size
87 | 'task_specific_prompt': 'Please predict the sentiment intensity of the above multimodal content in the range [-3.0, 3.0]. response: The sentiment is',
88 | 'max_new_tokens': 4,
89 | 'pseudo_tokens': 4,
90 | 'batch_size': 8,
91 | 'learning_rate': 5e-5,
92 | # feature subNets
93 | 'a_lstm_hidden_size': 64,
94 | 'v_lstm_hidden_size': 32,
95 | 'a_lstm_layers': 1,
96 | 'v_lstm_layers': 1,
97 | 'a_lstm_dropout': 0.0,
98 | 'v_lstm_dropout': 0.0,
99 | 'warm_up_epochs':30,
100 | #loss weight best:1
101 | 'gamma':1,
102 | 'update_epochs': 1,
103 | 'early_stop': 10, #10和8没啥区别
104 | # res
105 | 'H': 3.0
106 | },
107 |
108 | 'simsv2': {
109 | # the batch_size of each epoch is update_epochs * batch_size
110 | 'max_new_tokens': 4,
111 | 'pseudo_tokens': 4,
112 | 'task_specific_prompt': '请对上述多模态内容的情感强度进行预测,范围在[-1.0, 1.0]之间。响应: 情感为',
113 | 'batch_size': 8,
114 | 'learning_rate': 5e-5,
115 | # feature subNets
116 | 'a_lstm_hidden_size': 64,
117 | 'v_lstm_hidden_size': 64,
118 | 'a_lstm_layers': 1,
119 | 'v_lstm_layers': 1,
120 | 'a_lstm_dropout': 0.0,
121 | 'v_lstm_dropout': 0.0,
122 | 'warm_up_epochs': 80,
123 | 'update_epochs': 1,
124 | 'early_stop': 10,
125 | # loss weight best:0.25
126 | 'gamma': 1,
127 | # res
128 | 'H': 1.0
129 | },
130 | },
131 | }
132 | return tmp
133 |
134 | def get_config(self):
135 | return self.args
--------------------------------------------------------------------------------
/MSE-Llama2-7B/config/config_regression.py:
--------------------------------------------------------------------------------
1 | import os
2 | import argparse
3 |
4 | from utils.functions import Storage
5 |
6 | class ConfigRegression():
7 | def __init__(self, args):
8 | # hyper parameters for models
9 | HYPER_MODEL_MAP = {
10 | 'cmcm': self.__CMCM
11 | }
12 | # hyper parameters for datasets
13 | self.root_dataset_dir = args.root_dataset_dir
14 | HYPER_DATASET_MAP = self.__datasetCommonParams()
15 |
16 | # normalize
17 | model_name = str.lower(args.modelName)
18 | dataset_name = str.lower(args.datasetName)
19 | # load params
20 | commonArgs = HYPER_MODEL_MAP[model_name]()['commonParas']
21 | dataArgs = HYPER_DATASET_MAP[dataset_name]
22 | dataArgs = dataArgs['aligned'] if (commonArgs['need_data_aligned'] and 'aligned' in dataArgs) else dataArgs['unaligned']
23 | # integrate all parameters
24 | self.args = Storage(dict(vars(args),
25 | **dataArgs,
26 | **commonArgs,
27 | **HYPER_MODEL_MAP[model_name]()['datasetParas'][dataset_name],
28 | ))
29 |
30 | def __datasetCommonParams(self):
31 | root_dataset_dir = self.root_dataset_dir
32 | tmp = {
33 | 'mosi':{
34 | 'unaligned': {
35 | 'dataPath': os.path.join(root_dataset_dir, 'MOSI/Processed/unaligned_50.pkl'),
36 | 'seq_lens': (50, 50, 50),
37 | # (text, audio, video)
38 | 'feature_dims': (4096, 5, 20),
39 | 'train_samples': 1284,
40 | 'num_classes': 3,
41 | 'language': 'en',
42 | 'KeyEval': 'MAE'
43 | }
44 | },
45 | 'mosei':{
46 | 'unaligned': {
47 | 'dataPath': os.path.join(root_dataset_dir, 'MOSEI/Processed/unaligned_50.pkl'),
48 | 'seq_lens': (50, 500, 375),
49 | # (text, audio, video)
50 | 'feature_dims': (4096, 74, 35),
51 | 'train_samples': 16326,
52 | 'num_classes': 3,
53 | 'language': 'en',
54 | 'KeyEval': 'MAE'
55 | }
56 | },
57 |
58 |
59 | 'simsv2': {
60 | 'unaligned': {
61 | 'dataPath': os.path.join(root_dataset_dir, 'SIMS_V2/ch-simsv2s.pkl'),
62 | # (batch_size, seq_lens, feature_dim)
63 | 'seq_lens': (50, 925, 232), # (text, audio, video)
64 | 'feature_dims': (4096, 25, 177), # (text, audio, video)
65 | 'train_samples': 2722,
66 | 'num_classes': 3,
67 | 'language': 'cn',
68 | 'KeyEval': 'MAE',
69 | }
70 | }
71 | }
72 | return tmp
73 |
74 | def __CMCM(self):
75 | tmp = {
76 | 'commonParas':{
77 | 'need_data_aligned': False,
78 | 'need_model_aligned': False,
79 | 'need_label_prefix':True,
80 | 'need_normalized': False,
81 | 'use_PLM': True,
82 | 'save_labels': False,
83 | },
84 | # dataset
85 | 'datasetParas':{
86 | 'mosei':{
87 | # the batch_size of each epoch is update_epochs * batch_size
88 | 'task_specific_prompt': 'Please predict the sentiment intensity of the above multimodal content in the range [-3.0, +3.0]. Assistant: The sentiment is',
89 | 'max_new_tokens': 4,
90 | 'pseudo_tokens': 4,
91 | 'batch_size': 8,
92 | 'learning_rate': 5e-5,
93 | # feature subNets
94 | 'a_lstm_hidden_size': 64,
95 | 'v_lstm_hidden_size': 32,
96 | 'a_lstm_layers': 1,
97 | 'v_lstm_layers': 1,
98 | 'a_lstm_dropout': 0.0,
99 | 'v_lstm_dropout': 0.0,
100 | 'warm_up_epochs':30,
101 | #loss weight best:1
102 | 'gamma':1,
103 | 'update_epochs': 1,
104 | 'early_stop': 10, #10和8没啥区别
105 | # res
106 | 'H': 3.0
107 | },
108 |
109 | 'simsv2': {
110 | # the batch_size of each epoch is update_epochs * batch_size
111 | 'max_new_tokens': 4,
112 | 'pseudo_tokens': 4,
113 | 'task_specific_prompt': '请对上述多模态内容的情感强度进行预测,范围在[-1.0, 1.0]之间。响应: 情感为',
114 | 'batch_size': 8,
115 | 'learning_rate': 5e-5,
116 | # feature subNets
117 | 'a_lstm_hidden_size': 64,
118 | 'v_lstm_hidden_size': 64,
119 | 'a_lstm_layers': 1,
120 | 'v_lstm_layers': 1,
121 | 'a_lstm_dropout': 0.0,
122 | 'v_lstm_dropout': 0.0,
123 | 'warm_up_epochs': 40,
124 | 'update_epochs': 1,
125 | 'early_stop': 10,
126 | # loss weight best:0.25
127 | 'gamma': 1,
128 | # res
129 | 'H': 1.0
130 | },
131 | },
132 | }
133 | return tmp
134 |
135 | def get_config(self):
136 | return self.args
--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/config/config_regression.py:
--------------------------------------------------------------------------------
1 | import os
2 | import argparse
3 |
4 | from utils.functions import Storage
5 |
6 | class ConfigRegression():
7 | def __init__(self, args):
8 | # hyper parameters for models
9 | HYPER_MODEL_MAP = {
10 | 'cmcm': self.__CMCM
11 | }
12 | # hyper parameters for datasets
13 | self.root_dataset_dir = args.root_dataset_dir
14 | HYPER_DATASET_MAP = self.__datasetCommonParams()
15 |
16 | # normalize
17 | model_name = str.lower(args.modelName)
18 | dataset_name = str.lower(args.datasetName)
19 | # load params
20 | commonArgs = HYPER_MODEL_MAP[model_name]()['commonParas']
21 | dataArgs = HYPER_DATASET_MAP[dataset_name]
22 | dataArgs = dataArgs['aligned'] if (commonArgs['need_data_aligned'] and 'aligned' in dataArgs) else dataArgs['unaligned']
23 | # integrate all parameters
24 | self.args = Storage(dict(vars(args),
25 | **dataArgs,
26 | **commonArgs,
27 | **HYPER_MODEL_MAP[model_name]()['datasetParas'][dataset_name],
28 | ))
29 |
30 | def __datasetCommonParams(self):
31 | root_dataset_dir = self.root_dataset_dir
32 | tmp = {
33 | 'mosi':{
34 | 'unaligned': {
35 | 'dataPath': os.path.join(root_dataset_dir, 'MOSI/Processed/unaligned_50.pkl'),
36 | 'seq_lens': (50, 50, 50),
37 | # (text, audio, video)
38 | 'feature_dims': (2048, 5, 20),
39 | 'train_samples': 1284,
40 | 'num_classes': 3,
41 | 'language': 'en',
42 | 'KeyEval': 'MAE'
43 | }
44 | },
45 | 'mosei':{
46 | 'unaligned': {
47 | 'dataPath': os.path.join(root_dataset_dir, 'MOSEI/Processed/unaligned_50.pkl'),
48 | 'seq_lens': (50, 500, 375),
49 | # (text, audio, video)
50 | 'feature_dims': (2048, 74, 35),
51 | 'train_samples': 16326,
52 | 'num_classes': 3,
53 | 'language': 'en',
54 | 'KeyEval': 'MAE'
55 | }
56 | },
57 |
58 |
59 | 'simsv2': {
60 | 'unaligned': {
61 | 'dataPath': os.path.join(root_dataset_dir, 'SIMS_V2/ch-simsv2s.pkl'),
62 | # (batch_size, seq_lens, feature_dim)
63 | 'seq_lens': (50, 925, 232), # (text, audio, video)
64 | 'feature_dims': (2048, 25, 177), # (text, audio, video)
65 | 'train_samples': 2722,
66 | 'num_classes': 3,
67 | 'language': 'cn',
68 | 'KeyEval': 'MAE',
69 | }
70 | }
71 | }
72 | return tmp
73 |
74 | def __CMCM(self):
75 | tmp = {
76 | 'commonParas':{
77 | 'need_data_aligned': False,
78 | 'need_model_aligned': False,
79 | 'need_label_prefix':True,
80 | 'need_normalized': False,
81 | 'use_PLM': True,
82 | 'save_labels': False,
83 | },
84 | # dataset
85 | 'datasetParas':{
86 | 'mosei':{
87 | # the batch_size of each epoch is update_epochs * batch_size
88 | 'task_specific_prompt': 'Please predict the sentiment intensity of the above multimodal content in the range [-3.0, +3.0]. Assistant: The sentiment is',
89 | 'max_new_tokens': 4,
90 | 'pseudo_tokens': 4,
91 | 'batch_size': 16,
92 | 'learning_rate': 5e-3,
93 | # feature subNets
94 | 'a_lstm_hidden_size': 64,
95 | 'v_lstm_hidden_size': 32,
96 | 'a_lstm_layers': 1,
97 | 'v_lstm_layers': 1,
98 | 'a_lstm_dropout': 0.0,
99 | 'v_lstm_dropout': 0.0,
100 | 'warm_up_epochs':30,
101 | #loss weight best:1
102 | 'gamma':1,
103 | 'update_epochs': 1,
104 | 'early_stop': 10, #10和8没啥区别
105 | # res
106 | 'H': 3.0,
107 | },
108 |
109 | 'simsv2': {
110 | # the batch_size of each epoch is update_epochs * batch_size
111 | 'max_new_tokens': 4,
112 | 'pseudo_tokens': 4,
113 | 'task_specific_prompt': '请对上述多模态内容的情感强度进行预测,范围在[-1.0, +1.0]之间。响应: 情感为',
114 | 'batch_size': 16,
115 | 'learning_rate': 5e-4, #5e -4 较好
116 | # feature subNets
117 | 'a_lstm_hidden_size': 64,
118 | 'v_lstm_hidden_size': 64,
119 | 'a_lstm_layers': 1,
120 | 'v_lstm_layers': 1,
121 | 'a_lstm_dropout': 0.0,
122 | 'v_lstm_dropout': 0.0,
123 | 'warm_up_epochs': 30, # 不太确定是30还是40,先跑一把
124 | 'update_epochs': 1,
125 | 'early_stop': 10,
126 | # loss weight best:0.25
127 | 'gamma': 1,
128 | # res
129 | 'H': 1.0
130 | },
131 | },
132 | }
133 | return tmp
134 |
135 | def get_config(self):
136 | return self.args
--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/config/config_classification.py:
--------------------------------------------------------------------------------
1 | import os
2 | import argparse
3 |
4 | from utils.functions import Storage
5 |
6 | class ConfigClassification():
7 | def __init__(self, args):
8 | # hyper parameters for models
9 | HYPER_MODEL_MAP = {
10 | 'cmcm': self.__CMCM
11 | }
12 | # hyper parameters for datasets
13 | self.root_dataset_dir = args.root_dataset_dir
14 | HYPER_DATASET_MAP = self.__datasetCommonParams()
15 |
16 | # normalize
17 | model_name = str.lower(args.modelName)
18 | dataset_name = str.lower(args.datasetName)
19 | # load params
20 | commonArgs = HYPER_MODEL_MAP[model_name]()['commonParas']
21 | dataArgs = HYPER_DATASET_MAP[dataset_name]
22 | dataArgs = dataArgs['aligned'] if (commonArgs['need_data_aligned'] and 'aligned' in dataArgs) else dataArgs['unaligned']
23 | # integrate all parameters
24 | self.args = Storage(dict(vars(args),
25 | **dataArgs,
26 | **commonArgs,
27 | **HYPER_MODEL_MAP[model_name]()['datasetParas'][dataset_name],
28 | ))
29 |
30 | def __datasetCommonParams(self):
31 | root_dataset_dir = self.root_dataset_dir
32 | tmp = {
33 | 'iemocap':{
34 | 'unaligned': {
35 | 'dataPath': os.path.join(root_dataset_dir, 'IEMOCAP'),
36 | 'seq_lens': (84, 157, 32),
37 | # (text, audio, video)
38 | 'feature_dims': (4096, 64, 64),
39 | 'train_samples': 5240,
40 | 'num_classes': 3,
41 | 'language': 'en',
42 | 'KeyEval': 'weight_F1'
43 | }
44 | },
45 | 'meld':{
46 | 'unaligned': {
47 | 'dataPath': os.path.join(root_dataset_dir, 'MELD'),
48 | 'seq_lens': (65, 157, 32),
49 | # (text, audio, video)
50 | 'feature_dims': (4096, 64, 64),
51 | 'train_samples': 9992,
52 | 'num_classes': 3,
53 | 'language': 'en',
54 | 'KeyEval': 'weight_F1'
55 | }
56 | },
57 | 'cherma':{
58 | 'unaligned': {
59 | 'dataPath': os.path.join(root_dataset_dir, 'CHERMA0723'),
60 | # (batch_size, seq_lens, feature_dim)
61 | 'seq_lens': (78, 543, 16), # (text, audio, video)
62 | 'feature_dims': (4096, 1024, 2048), # (text, audio, video)
63 | 'train_samples': 16326,
64 | 'num_classes': 3,
65 | 'language': 'cn',
66 | 'KeyEval': 'weight_F1',
67 | }
68 | },
69 |
70 |
71 | }
72 | return tmp
73 |
74 | def __CMCM(self):
75 | tmp = {
76 | 'commonParas':{
77 | 'need_data_aligned': False,
78 | 'need_model_aligned': False,
79 | 'need_label_prefix':True,
80 | 'need_normalized': False,
81 | 'use_PLM': True,
82 | 'save_labels': False,
83 | },
84 | # dataset
85 | 'datasetParas':{
86 | 'meld':{
87 | # the batch_size of each epoch is update_epochs * batch_size
88 | 'task_specific_prompt': 'Please recognize the emotion of the above multimodal content from the target \
89 | set . response: The emotion is',
90 | 'max_new_tokens': 2,
91 | 'pseudo_tokens': 4,
92 | 'label_index_mapping': {'neutral': 0, 'surprise': 1, 'fear': 2, 'sadness': 3, 'joy': 4, 'disgust': 5,
93 | 'anger': 6},
94 | 'batch_size': 8,
95 | 'learning_rate': 5e-5,
96 | # feature subNets
97 | 'a_lstm_hidden_size': 64,
98 | 'v_lstm_hidden_size': 32,
99 | 'a_lstm_layers': 1,
100 | 'v_lstm_layers': 1,
101 | 'a_lstm_dropout': 0.0,
102 | 'v_lstm_dropout': 0.0,
103 | 'warm_up_epochs': 90,
104 | #loss weight best:1
105 | 'gamma':1,
106 | 'update_epochs': 1,
107 | 'early_stop': 8,
108 | # res
109 | 'H': 3.0
110 | },
111 | 'cherma':{
112 | # the batch_size of each epoch is update_epochs * batch_size
113 | 'task_specific_prompt': '请选择适用于上述多模态内容的情绪标签:<愤怒:0, 厌恶:1, 恐惧:2, 高兴:3, 平静:4, 悲伤:5, 惊奇:6>。响应: 情绪为',
114 | 'max_new_tokens': 2,
115 | 'pseudo_tokens': 4,
116 | 'label_index_mapping': {'愤怒': 0, '厌恶': 1, '恐惧': 2, '高兴': 3, '平静': 4, '悲伤': 5,
117 | '惊奇': 6},
118 | 'batch_size': 8,
119 | 'learning_rate': 5e-5,
120 | # feature subNets
121 | 'a_lstm_hidden_size': 32,
122 | 'v_lstm_hidden_size': 16,
123 | 'a_lstm_layers': 1,
124 | 'v_lstm_layers': 1,
125 | 'a_lstm_dropout': 0.0,
126 | 'v_lstm_dropout': 0.0,
127 | 'warm_up_epochs': 30,
128 | 'update_epochs': 1,
129 | 'early_stop': 8,
130 | # loss weight
131 | 'gamma': 0,
132 | # res
133 | 'H': 1.0
134 | },
135 | },
136 | }
137 | return tmp
138 |
139 | def get_config(self):
140 | return self.args
--------------------------------------------------------------------------------
/MSE-Llama2-7B/config/config_classification.py:
--------------------------------------------------------------------------------
1 | import os
2 | import argparse
3 |
4 | from utils.functions import Storage
5 |
6 | class ConfigClassification():
7 | def __init__(self, args):
8 | # hyper parameters for models
9 | HYPER_MODEL_MAP = {
10 | 'cmcm': self.__CMCM
11 | }
12 | # hyper parameters for datasets
13 | self.root_dataset_dir = args.root_dataset_dir
14 | HYPER_DATASET_MAP = self.__datasetCommonParams()
15 |
16 | # normalize
17 | model_name = str.lower(args.modelName)
18 | dataset_name = str.lower(args.datasetName)
19 | # load params
20 | commonArgs = HYPER_MODEL_MAP[model_name]()['commonParas']
21 | dataArgs = HYPER_DATASET_MAP[dataset_name]
22 | dataArgs = dataArgs['aligned'] if (commonArgs['need_data_aligned'] and 'aligned' in dataArgs) else dataArgs['unaligned']
23 | # integrate all parameters
24 | self.args = Storage(dict(vars(args),
25 | **dataArgs,
26 | **commonArgs,
27 | **HYPER_MODEL_MAP[model_name]()['datasetParas'][dataset_name],
28 | ))
29 |
30 | def __datasetCommonParams(self):
31 | root_dataset_dir = self.root_dataset_dir
32 | tmp = {
33 | 'iemocap':{
34 | 'unaligned': {
35 | 'dataPath': os.path.join(root_dataset_dir, 'IEMOCAP'),
36 | 'seq_lens': (84, 157, 32),
37 | # (text, audio, video)
38 | 'feature_dims': (4096, 64, 64),
39 | 'train_samples': 5240,
40 | 'num_classes': 3,
41 | 'language': 'en',
42 | 'KeyEval': 'weight_F1'
43 | }
44 | },
45 | 'meld':{
46 | 'unaligned': {
47 | 'dataPath': os.path.join(root_dataset_dir, 'MELD'),
48 | 'seq_lens': (65, 157, 32),
49 | # (text, audio, video)
50 | 'feature_dims': (4096, 64, 64),
51 | 'train_samples': 9992,
52 | 'num_classes': 3,
53 | 'language': 'en',
54 | 'KeyEval': 'weight_F1'
55 | }
56 | },
57 | 'cherma':{
58 | 'unaligned': {
59 | 'dataPath': os.path.join(root_dataset_dir, 'CHERMA0723'),
60 | # (batch_size, seq_lens, feature_dim)
61 | 'seq_lens': (78, 543, 16), # (text, audio, video)
62 | 'feature_dims': (4096, 1024, 2048), # (text, audio, video)
63 | 'train_samples': 16326,
64 | 'num_classes': 3,
65 | 'language': 'cn',
66 | 'KeyEval': 'weight_F1',
67 | }
68 | },
69 |
70 |
71 | }
72 | return tmp
73 |
74 | def __CMCM(self):
75 | tmp = {
76 | 'commonParas':{
77 | 'need_data_aligned': False,
78 | 'need_model_aligned': False,
79 | 'need_label_prefix':True,
80 | 'need_normalized': False,
81 | 'use_PLM': True,
82 | 'save_labels': False,
83 | },
84 | # dataset
85 | 'datasetParas':{
86 | 'meld':{
87 | # the batch_size of each epoch is update_epochs * batch_size
88 | 'task_specific_prompt': 'Please recognize the emotion of the above multimodal content from the \
89 | target set . Assistant: The emotion is',
90 | 'max_new_tokens': 2,
91 | 'pseudo_tokens': 4,
92 | 'label_index_mapping': {'neutral': 0, 'surprise': 1, 'fear': 2, 'sadness': 3, 'joy': 4, 'disgust': 5,
93 | 'anger': 6},
94 | 'batch_size': 6,
95 | 'learning_rate': 5e-4,
96 | # feature subNets
97 | 'a_lstm_hidden_size': 64,
98 | 'v_lstm_hidden_size': 32, #原来是32,16
99 | 'a_lstm_layers': 1,
100 | 'v_lstm_layers': 1,
101 | 'a_lstm_dropout': 0.0,
102 | 'v_lstm_dropout': 0.0,
103 | 'warm_up_epochs':30,
104 | #loss weight best:1
105 | 'gamma':1,
106 | 'update_epochs': 1,
107 | 'early_stop': 8,
108 | # res
109 | 'H': 3.0
110 | },
111 | 'cherma':{
112 | # the batch_size of each epoch is update_epochs * batch_size
113 | 'task_specific_prompt': '请选择适用于上述多模态内容的情绪标签:<愤怒:0, 厌恶:1, 恐惧:2, 高兴:3, 平静:4, 悲伤:5, 惊奇:6>。助手: 情绪为',
114 | 'max_new_tokens': 2,
115 | 'pseudo_tokens': 4,
116 | 'label_index_mapping': {'愤怒': 0, '厌恶': 1, '恐惧': 2, '高兴': 3, '平静': 4, '悲伤': 5,
117 | '惊奇': 6},
118 | 'batch_size': 6,
119 | 'learning_rate': 5e-5,
120 | # feature subNets
121 | 'a_lstm_hidden_size': 32,
122 | 'v_lstm_hidden_size': 16,
123 | 'a_lstm_layers': 1,
124 | 'v_lstm_layers': 1,
125 | 'a_lstm_dropout': 0.0,
126 | 'v_lstm_dropout': 0.0,
127 | 'warm_up_epochs': 30,
128 | 'update_epochs': 1,
129 | 'early_stop': 8,
130 | # loss weight
131 | 'gamma': 0,
132 | # res
133 | 'H': 1.0,
134 | },
135 | },
136 | }
137 | return tmp
138 |
139 | def get_config(self):
140 | return self.args
--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/models/multiTask/CMCM.py:
--------------------------------------------------------------------------------
1 | # self supervised multimodal multi-task learning network
2 | import math
3 | import os
4 | import sys
5 | import collections
6 | from torch.cuda.amp import autocast, GradScaler
7 | import torch
8 | import torch.nn as nn
9 | import torch.nn.functional as F
10 | from torch.autograd.function import Function
11 | from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence
12 |
13 | from models.subNets.Textmodel import Language_model
14 |
15 | __all__ = ['CMCM']
16 |
17 | class CMCM(nn.Module):
18 | def __init__(self, args):
19 | super(CMCM, self).__init__()
20 | # text enocding
21 | self.LLM = Language_model(args)
22 |
23 | # audio and video enocding
24 | text_in, audio_in, video_in = args.feature_dims[:]
25 | text_len, audio_len, video_len = args.seq_lens[:]
26 |
27 | self.audio_LSTM = TVA_LSTM(audio_in, args.a_lstm_hidden_size, num_layers=args.a_lstm_layers, dropout=args.a_lstm_dropout)
28 | self.video_LSTM = TVA_LSTM(video_in, args.v_lstm_hidden_size, num_layers=args.v_lstm_layers, dropout=args.v_lstm_dropout)
29 |
30 | self.text_guide_mixer = Text_guide_mixer()
31 | #low_rank_fusion
32 | fusion_input_size = 256
33 | self.mutli_scale_fusion = mutli_scale_fusion(input_size=fusion_input_size, output_size= text_in, pseudo_tokens= args.pseudo_tokens)
34 |
35 |
36 | def forward(self, labels, text, audio, video):
37 | audio, audio_len = audio
38 | video, video_len = video
39 | text, text_len = text
40 | text = self.LLM.text_embedding(text[:,0,:].long())
41 |
42 | video_h = self.video_LSTM(video, video_len)
43 | audio_h = self.audio_LSTM(audio, audio_len)
44 |
45 |
46 | fusion_h= self.text_guide_mixer(audio_h, video_h, text)
47 |
48 | fusion_h= self.mutli_scale_fusion(fusion_h)
49 |
50 |
51 | LLM_input = torch.cat([fusion_h, text], dim=1)
52 |
53 | LLM_output = self.LLM(LLM_input, labels)
54 |
55 | res = {
56 | 'Loss': LLM_output.loss,
57 | 'Feature_a': audio_h,
58 | 'Feature_v': video_h,
59 | 'Feature_f': fusion_h,
60 | }
61 | return res
62 |
63 | def generate(self, text, audio, video):
64 | audio, audio_len = audio
65 | video, video_len = video
66 | text, text_len = text
67 | text = self.LLM.text_embedding(text[:,0,:].long())
68 |
69 | audio_h = self.audio_LSTM(audio, audio_len)
70 | video_h = self.video_LSTM(video, video_len)
71 |
72 |
73 | fusion_h = self.text_guide_mixer(audio_h, video_h, text)
74 |
75 | # low_rank_fusion
76 |
77 | fusion_h = self.mutli_scale_fusion(fusion_h)
78 |
79 | # concatenate mutli_scale_fusion and text_embedding
80 |
81 | LLM_input = torch.cat([fusion_h, text], dim=1)
82 |
83 | LLM_output = self.LLM.generate(LLM_input)
84 |
85 | return LLM_output
86 |
87 |
88 |
89 | class TVA_LSTM(nn.Module):
90 | def __init__(self, in_size, hidden_size, num_layers=1, dropout=0.2, bidirectional=False):
91 | '''
92 | Args:
93 | in_size: input dimension
94 | hidden_size: hidden layer dimension
95 | num_layers: specify the number of layers of LSTMs.
96 | dropout: dropout probability
97 | bidirectional: specify usage of bidirectional LSTM
98 | Output:
99 | (return value in forward) a tensor of shape (batch_size, out_size)
100 | '''
101 | super(TVA_LSTM, self).__init__()
102 | self.rnn = nn.LSTM(in_size, hidden_size, num_layers=num_layers, dropout=dropout, bidirectional=bidirectional, batch_first=True)
103 | self.dropout = nn.Dropout(dropout)
104 | self.linear = nn.Linear(hidden_size, 256)
105 |
106 | def forward(self, x, lengths):
107 | '''
108 | x: (batch_size, sequence_len, in_size)
109 | '''
110 | packed_sequence = pack_padded_sequence(x, lengths.to('cpu'), batch_first=True, enforce_sorted=False) #这里把length.to cpu是因为pytorch版本问题
111 | # _, (final_states, _) = self.rnn(packed_sequence)
112 | # h = self.dropout(final_states[-1])
113 | _, final_states = self.rnn(packed_sequence)
114 | h = self.dropout(final_states[0].squeeze())
115 | h = self.linear(h)
116 | return h
117 |
118 | class Text_guide_mixer(nn.Module):
119 | def __init__(self):
120 | super(Text_guide_mixer, self).__init__()
121 | self.GAP = nn.AdaptiveAvgPool1d(1)
122 | self.text_mlp = nn.Linear(4096, 256)
123 | def forward(self, audio, video, text):
124 | text_GAP = self.GAP(text.permute(0, 2, 1)).squeeze()
125 | text_knowledge = self.text_mlp(text_GAP)
126 |
127 | audio_mixed = torch.mul(audio, text_knowledge)
128 | video_mixed = torch.mul(video, text_knowledge)
129 |
130 | fusion = audio_mixed + video_mixed
131 |
132 | return fusion
133 |
134 |
135 | class mutli_scale_fusion(nn.Module):
136 | def __init__(self, input_size, output_size, pseudo_tokens = 4):
137 | super(mutli_scale_fusion, self).__init__()
138 | multi_scale_hidden = 256
139 | self.scale1 = nn.Sequential(
140 | nn.Linear(input_size, output_size // 8),
141 | nn.GELU(),
142 | nn.Linear(output_size // 8, multi_scale_hidden)
143 | )
144 | self.scale2 = nn.Sequential(
145 | nn.Linear(input_size, output_size // 32),
146 | nn.GELU(),
147 | nn.Linear(output_size // 32, multi_scale_hidden)
148 | )
149 | self.scale3 = nn.Sequential(
150 | nn.Linear(input_size, output_size // 16),
151 | nn.GELU(),
152 | nn.Linear(output_size // 16, multi_scale_hidden)
153 | )
154 |
155 | self.integrating = Integrating(scales = 3)
156 | self.multi_scale_projector = nn.Linear(multi_scale_hidden, output_size)
157 | self.projector = nn.Linear(1, pseudo_tokens)
158 |
159 | def forward(self,x):
160 | # 增加样本复制,将单一样本复制一份,避免最后一个batch只有一个数据时的报错
161 | if x.dim() == 1:
162 | x = x.unsqueeze(0)
163 | #compute different scale experts outputs
164 | scale1 = self.scale1(x)
165 | scale2 = self.scale2(x)
166 | scale3 = self.scale3(x)
167 |
168 |
169 | # Calculate the expert outputs
170 | multi_scale_stack = torch.stack([scale1, scale2, scale3], dim=2)
171 | multi_scale_integrating = self.integrating(multi_scale_stack)
172 |
173 | multi_scale = self.multi_scale_projector(multi_scale_integrating)
174 | output = self.projector(multi_scale.unsqueeze(2))
175 | return output.permute(0, 2, 1) #[batch,seq_len,hidden_siez]
176 |
177 | # Define the gating model
178 | class Integrating(nn.Module):
179 | def __init__(self, scales):
180 | super(Integrating, self).__init__()
181 |
182 | # Layers
183 | self.Integrating_layer = nn.Sequential(nn.Conv2d(1, 1, kernel_size=(1, scales), stride=1),
184 | )
185 |
186 | def forward(self, x):
187 | x = x.unsqueeze(1)
188 | x = self.Integrating_layer(x)
189 | x = x.squeeze((1, 3))
190 | return x
191 |
--------------------------------------------------------------------------------
/MSE-Llama2-7B/models/multiTask/CMCM.py:
--------------------------------------------------------------------------------
1 | # self supervised multimodal multi-task learning network
2 | import math
3 | import os
4 | import sys
5 | import collections
6 | from torch.cuda.amp import autocast, GradScaler
7 | import torch
8 | import torch.nn as nn
9 | import torch.nn.functional as F
10 | from torch.autograd.function import Function
11 | from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence
12 |
13 | from models.subNets.Textmodel import Language_model
14 |
15 | __all__ = ['CMCM']
16 |
17 | class CMCM(nn.Module):
18 | def __init__(self, args):
19 | super(CMCM, self).__init__()
20 | # text enocding
21 | self.LLM = Language_model(args)
22 |
23 | # audio and video enocding
24 | text_in, audio_in, video_in = args.feature_dims[:]
25 | text_len, audio_len, video_len = args.seq_lens[:]
26 |
27 | self.audio_LSTM = TVA_LSTM(audio_in, args.a_lstm_hidden_size, num_layers=args.a_lstm_layers, dropout=args.a_lstm_dropout)
28 | self.video_LSTM = TVA_LSTM(video_in, args.v_lstm_hidden_size, num_layers=args.v_lstm_layers, dropout=args.v_lstm_dropout)
29 |
30 | self.text_guide_mixer = Text_guide_mixer()
31 | #low_rank_fusion
32 | fusion_input_size = 256
33 | self.mutli_scale_fusion = mutli_scale_fusion(input_size=fusion_input_size, output_size= text_in, pseudo_tokens= args.pseudo_tokens)
34 |
35 |
36 | def forward(self, labels, text, audio, video):
37 | audio, audio_len = audio
38 | video, video_len = video
39 | text, text_len = text
40 | text = self.LLM.text_embedding(text[:,0,:].long())
41 |
42 | video_h = self.video_LSTM(video, video_len)
43 | audio_h = self.audio_LSTM(audio, audio_len)
44 |
45 |
46 | fusion_h= self.text_guide_mixer(audio_h, video_h, text)
47 |
48 | fusion_h= self.mutli_scale_fusion(fusion_h)
49 |
50 |
51 | LLM_input = torch.cat([fusion_h, text], dim=1)
52 |
53 | LLM_output = self.LLM(LLM_input, labels)
54 |
55 | res = {
56 | 'Loss': LLM_output.loss,
57 | 'Feature_a': audio_h,
58 | 'Feature_v': video_h,
59 | 'Feature_f': fusion_h,
60 | }
61 | return res
62 |
63 | def generate(self, text, audio, video):
64 | audio, audio_len = audio
65 | video, video_len = video
66 | text, text_len = text
67 | text = self.LLM.text_embedding(text[:,0,:].long())
68 |
69 | audio_h = self.audio_LSTM(audio, audio_len)
70 | video_h = self.video_LSTM(video, video_len)
71 |
72 |
73 | fusion_h = self.text_guide_mixer(audio_h, video_h, text)
74 |
75 | # low_rank_fusion
76 |
77 | fusion_h = self.mutli_scale_fusion(fusion_h)
78 |
79 | # concatenate mutli_scale_fusion and text_embedding
80 |
81 | LLM_input = torch.cat([fusion_h, text], dim=1)
82 |
83 | LLM_output = self.LLM.generate(LLM_input)
84 |
85 | return LLM_output
86 |
87 |
88 |
89 | class TVA_LSTM(nn.Module):
90 | def __init__(self, in_size, hidden_size, num_layers=1, dropout=0.2, bidirectional=False):
91 | '''
92 | Args:
93 | in_size: input dimension
94 | hidden_size: hidden layer dimension
95 | num_layers: specify the number of layers of LSTMs.
96 | dropout: dropout probability
97 | bidirectional: specify usage of bidirectional LSTM
98 | Output:
99 | (return value in forward) a tensor of shape (batch_size, out_size)
100 | '''
101 | super(TVA_LSTM, self).__init__()
102 | self.rnn = nn.LSTM(in_size, hidden_size, num_layers=num_layers, dropout=dropout, bidirectional=bidirectional, batch_first=True)
103 | self.dropout = nn.Dropout(dropout)
104 | self.linear = nn.Linear(hidden_size, 256)
105 |
106 | def forward(self, x, lengths):
107 | '''
108 | x: (batch_size, sequence_len, in_size)
109 | '''
110 | packed_sequence = pack_padded_sequence(x, lengths.to('cpu'), batch_first=True, enforce_sorted=False) #这里把length.to cpu是因为pytorch版本问题
111 | # _, (final_states, _) = self.rnn(packed_sequence)
112 | # h = self.dropout(final_states[-1])
113 | _, final_states = self.rnn(packed_sequence)
114 | h = self.dropout(final_states[0].squeeze())
115 | h = self.linear(h)
116 | return h
117 |
118 | class Text_guide_mixer(nn.Module):
119 | def __init__(self):
120 | super(Text_guide_mixer, self).__init__()
121 | self.GAP = nn.AdaptiveAvgPool1d(1)
122 | self.text_mlp = nn.Linear(4096, 256)
123 | def forward(self, audio, video, text):
124 | text_GAP = self.GAP(text.permute(0, 2, 1)).squeeze()
125 | text_knowledge = self.text_mlp(text_GAP)
126 |
127 | audio_mixed = torch.mul(audio, text_knowledge)
128 | video_mixed = torch.mul(video, text_knowledge)
129 |
130 | fusion = audio_mixed + video_mixed
131 |
132 | return fusion
133 |
134 |
135 | class mutli_scale_fusion(nn.Module):
136 | def __init__(self, input_size, output_size, pseudo_tokens = 4):
137 | super(mutli_scale_fusion, self).__init__()
138 | multi_scale_hidden = 256
139 | self.scale1 = nn.Sequential(
140 | nn.Linear(input_size, output_size // 8),
141 | nn.GELU(),
142 | nn.Linear(output_size // 8, multi_scale_hidden)
143 | )
144 | self.scale2 = nn.Sequential(
145 | nn.Linear(input_size, output_size // 32),
146 | nn.GELU(),
147 | nn.Linear(output_size // 32, multi_scale_hidden)
148 | )
149 | self.scale3 = nn.Sequential(
150 | nn.Linear(input_size, output_size // 16),
151 | nn.GELU(),
152 | nn.Linear(output_size // 16, multi_scale_hidden)
153 | )
154 |
155 | self.integrating = Integrating(scales = 3)
156 | self.multi_scale_projector = nn.Linear(multi_scale_hidden, output_size)
157 | self.projector = nn.Linear(1, pseudo_tokens)
158 |
159 | def forward(self,x):
160 | # 增加样本复制,将单一样本复制一份,避免最后一个batch只有一个数据时的报错
161 | if x.dim() == 1:
162 | x = x.unsqueeze(0)
163 | #compute different scale experts outputs
164 | scale1 = self.scale1(x)
165 | scale2 = self.scale2(x)
166 | scale3 = self.scale3(x)
167 |
168 |
169 | # Calculate the expert outputs
170 | multi_scale_stack = torch.stack([scale1, scale2, scale3], dim=2)
171 | multi_scale_integrating = self.integrating(multi_scale_stack)
172 |
173 | multi_scale = self.multi_scale_projector(multi_scale_integrating)
174 | output = self.projector(multi_scale.unsqueeze(2))
175 | return output.permute(0, 2, 1) #[batch,seq_len,hidden_siez]
176 |
177 | # Define the gating model
178 | class Integrating(nn.Module):
179 | def __init__(self, scales):
180 | super(Integrating, self).__init__()
181 |
182 | # Layers
183 | self.Integrating_layer = nn.Sequential(nn.Conv2d(1, 1, kernel_size=(1, scales), stride=1),
184 | )
185 |
186 | def forward(self, x):
187 | x = x.unsqueeze(1)
188 | x = self.Integrating_layer(x)
189 | x = x.squeeze((1, 3))
190 | return x
191 |
--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/models/multiTask/CMCM.py:
--------------------------------------------------------------------------------
1 | # self supervised multimodal multi-task learning network
2 | import math
3 | import os
4 | import sys
5 | import collections
6 | from torch.cuda.amp import autocast, GradScaler
7 | import torch
8 | import torch.nn as nn
9 | import torch.nn.functional as F
10 | from torch.autograd.function import Function
11 | from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence
12 |
13 | from models.subNets.Textmodel import Language_model
14 |
15 | __all__ = ['CMCM']
16 |
17 | class CMCM(nn.Module):
18 | def __init__(self, args):
19 | super(CMCM, self).__init__()
20 | # text enocding
21 | self.LLM = Language_model(args)
22 |
23 | # audio and video enocding
24 | text_in, audio_in, video_in = args.feature_dims[:]
25 | text_len, audio_len, video_len = args.seq_lens[:]
26 |
27 | self.audio_LSTM = TVA_LSTM(audio_in, args.a_lstm_hidden_size, num_layers=args.a_lstm_layers, dropout=args.a_lstm_dropout)
28 | self.video_LSTM = TVA_LSTM(video_in, args.v_lstm_hidden_size, num_layers=args.v_lstm_layers, dropout=args.v_lstm_dropout)
29 |
30 | self.text_guide_mixer = Text_guide_mixer()
31 | #low_rank_fusion
32 | fusion_input_size = 256
33 | self.mutli_scale_fusion = mutli_scale_fusion(input_size=fusion_input_size, output_size= text_in, pseudo_tokens= args.pseudo_tokens)
34 |
35 |
36 | def forward(self, labels, text, audio, video):
37 | audio, audio_len = audio
38 | video, video_len = video
39 | text, text_len = text
40 | text = self.LLM.text_embedding(text[:,0,:].long())
41 |
42 | video_h = self.video_LSTM(video, video_len)
43 | audio_h = self.audio_LSTM(audio, audio_len)
44 |
45 |
46 | fusion_h= self.text_guide_mixer(audio_h, video_h, text)
47 |
48 | fusion_h= self.mutli_scale_fusion(fusion_h)
49 |
50 |
51 | LLM_input = torch.cat([fusion_h, text], dim=1)
52 |
53 | LLM_output = self.LLM(LLM_input, labels)
54 |
55 | res = {
56 | 'Loss': LLM_output.loss,
57 | 'Feature_a': audio_h,
58 | 'Feature_v': video_h,
59 | 'Feature_f': fusion_h,
60 | }
61 | return res
62 |
63 | def generate(self, text, audio, video):
64 | audio, audio_len = audio
65 | video, video_len = video
66 | text, text_len = text
67 | text = self.LLM.text_embedding(text[:,0,:].long())
68 |
69 | audio_h = self.audio_LSTM(audio, audio_len)
70 | video_h = self.video_LSTM(video, video_len)
71 |
72 |
73 | fusion_h = self.text_guide_mixer(audio_h, video_h, text)
74 |
75 | # low_rank_fusion
76 |
77 | fusion_h = self.mutli_scale_fusion(fusion_h)
78 |
79 | # concatenate mutli_scale_fusion and text_embedding
80 |
81 | LLM_input = torch.cat([fusion_h, text], dim=1)
82 |
83 | LLM_output = self.LLM.generate(LLM_input)
84 |
85 | return LLM_output
86 |
87 |
88 |
89 | class TVA_LSTM(nn.Module):
90 | def __init__(self, in_size, hidden_size, num_layers=1, dropout=0.2, bidirectional=False):
91 | '''
92 | Args:
93 | in_size: input dimension
94 | hidden_size: hidden layer dimension
95 | num_layers: specify the number of layers of LSTMs.
96 | dropout: dropout probability
97 | bidirectional: specify usage of bidirectional LSTM
98 | Output:
99 | (return value in forward) a tensor of shape (batch_size, out_size)
100 | '''
101 | super(TVA_LSTM, self).__init__()
102 | self.rnn = nn.LSTM(in_size, hidden_size, num_layers=num_layers, dropout=dropout, bidirectional=bidirectional, batch_first=True)
103 | self.dropout = nn.Dropout(dropout)
104 | self.linear = nn.Linear(hidden_size, 256)
105 |
106 | def forward(self, x, lengths):
107 | '''
108 | x: (batch_size, sequence_len, in_size)
109 | '''
110 | packed_sequence = pack_padded_sequence(x, lengths.to('cpu'), batch_first=True, enforce_sorted=False) #这里把length.to cpu是因为pytorch版本问题
111 | # _, (final_states, _) = self.rnn(packed_sequence)
112 | # h = self.dropout(final_states[-1])
113 | _, final_states = self.rnn(packed_sequence)
114 | h = self.dropout(final_states[0].squeeze())
115 | h = self.linear(h)
116 | return h
117 |
118 | class Text_guide_mixer(nn.Module):
119 | def __init__(self):
120 | super(Text_guide_mixer, self).__init__()
121 | self.GAP = nn.AdaptiveAvgPool1d(1)
122 | self.text_mlp = nn.Linear(2048, 256)
123 | def forward(self, audio, video, text):
124 | text_GAP = self.GAP(text.permute(0, 2, 1)).squeeze()
125 | text_knowledge = self.text_mlp(text_GAP)
126 |
127 | audio_mixed = torch.mul(audio, text_knowledge)
128 | video_mixed = torch.mul(video, text_knowledge)
129 |
130 | fusion = audio_mixed + video_mixed
131 |
132 | return fusion
133 |
134 |
135 | class mutli_scale_fusion(nn.Module):
136 | def __init__(self, input_size, output_size, pseudo_tokens = 4):
137 | super(mutli_scale_fusion, self).__init__()
138 | multi_scale_hidden = 256
139 | self.scale1 = nn.Sequential(
140 | nn.Linear(input_size, output_size // 8),
141 | nn.GELU(),
142 | nn.Linear(output_size // 8, multi_scale_hidden)
143 | )
144 | self.scale2 = nn.Sequential(
145 | nn.Linear(input_size, output_size // 32),
146 | nn.GELU(),
147 | nn.Linear(output_size // 32, multi_scale_hidden)
148 | )
149 | self.scale3 = nn.Sequential(
150 | nn.Linear(input_size, output_size // 16),
151 | nn.GELU(),
152 | nn.Linear(output_size // 16, multi_scale_hidden)
153 | )
154 |
155 | self.integrating = Integrating(scales = 3)
156 | self.multi_scale_projector = nn.Linear(multi_scale_hidden, output_size)
157 | self.projector = nn.Linear(1, pseudo_tokens)
158 |
159 | def forward(self,x):
160 | # 增加样本复制,将单一样本复制一份,避免最后一个batch只有一个数据时的报错
161 | if x.dim() == 1:
162 | x = x.unsqueeze(0)
163 | #compute different scale experts outputs
164 | scale1 = self.scale1(x)
165 | scale2 = self.scale2(x)
166 | scale3 = self.scale3(x)
167 |
168 |
169 | # Calculate the expert outputs
170 | multi_scale_stack = torch.stack([scale1, scale2, scale3], dim=2)
171 | multi_scale_integrating = self.integrating(multi_scale_stack)
172 |
173 | multi_scale = self.multi_scale_projector(multi_scale_integrating)
174 | output = self.projector(multi_scale.unsqueeze(2))
175 | return output.permute(0, 2, 1) #[batch,seq_len,hidden_siez]
176 |
177 | # Define the gating model
178 | class Integrating(nn.Module):
179 | def __init__(self, scales):
180 | super(Integrating, self).__init__()
181 |
182 | # Layers
183 | self.Integrating_layer = nn.Sequential(nn.Conv2d(1, 1, kernel_size=(1, scales), stride=1),
184 | )
185 |
186 | def forward(self, x):
187 | x = x.unsqueeze(1)
188 | x = self.Integrating_layer(x)
189 | x = x.squeeze((1, 3))
190 | return x
191 |
--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/config/config_classification.py:
--------------------------------------------------------------------------------
1 | import os
2 | import argparse
3 |
4 | from utils.functions import Storage
5 |
6 | class ConfigClassification():
7 | def __init__(self, args):
8 | # hyper parameters for models
9 | HYPER_MODEL_MAP = {
10 | 'cmcm': self.__CMCM
11 | }
12 | # hyper parameters for datasets
13 | self.root_dataset_dir = args.root_dataset_dir
14 | HYPER_DATASET_MAP = self.__datasetCommonParams()
15 |
16 | # normalize
17 | model_name = str.lower(args.modelName)
18 | dataset_name = str.lower(args.datasetName)
19 | # load params
20 | commonArgs = HYPER_MODEL_MAP[model_name]()['commonParas']
21 | dataArgs = HYPER_DATASET_MAP[dataset_name]
22 | dataArgs = dataArgs['aligned'] if (commonArgs['need_data_aligned'] and 'aligned' in dataArgs) else dataArgs['unaligned']
23 | # integrate all parameters
24 | self.args = Storage(dict(vars(args),
25 | **dataArgs,
26 | **commonArgs,
27 | **HYPER_MODEL_MAP[model_name]()['datasetParas'][dataset_name],
28 | ))
29 |
30 | def __datasetCommonParams(self):
31 | root_dataset_dir = self.root_dataset_dir
32 | tmp = {
33 | 'iemocap':{
34 | 'unaligned': {
35 | 'dataPath': os.path.join(root_dataset_dir, 'IEMOCAP'),
36 | 'seq_lens': (84, 157, 32),
37 | # (text, audio, video)
38 | 'feature_dims': (2048, 64, 64),
39 | 'train_samples': 5240,
40 | 'num_classes': 3,
41 | 'language': 'en',
42 | 'KeyEval': 'weight_F1'
43 | }
44 | },
45 | 'meld':{
46 | 'unaligned': {
47 | 'dataPath': os.path.join(root_dataset_dir, 'MELD'),
48 | 'seq_lens': (65, 157, 32),
49 | # (text, audio, video)
50 | 'feature_dims': (2048, 64, 64),
51 | 'train_samples': 9992,
52 | 'num_classes': 3,
53 | 'language': 'en',
54 | 'KeyEval': 'weight_F1'
55 | }
56 | },
57 | 'cherma':{
58 | 'unaligned': {
59 | 'dataPath': os.path.join(root_dataset_dir, 'CHERMA0723'),
60 | # (batch_size, seq_lens, feature_dim)
61 | 'seq_lens': (78, 543, 16), # (text, audio, video)
62 | 'feature_dims': (2048, 1024, 2048), # (text, audio, video)
63 | 'train_samples': 16326,
64 | 'num_classes': 3,
65 | 'language': 'cn',
66 | 'KeyEval': 'weight_F1',
67 | }
68 | },
69 |
70 |
71 | }
72 | return tmp
73 |
74 | def __CMCM(self):
75 | tmp = {
76 | 'commonParas':{
77 | 'need_data_aligned': False,
78 | 'need_model_aligned': False,
79 | 'need_label_prefix':True,
80 | 'need_normalized': False,
81 | 'use_PLM': True,
82 | 'save_labels': False,
83 | },
84 | # dataset
85 | 'datasetParas':{
86 | 'iemocap':{
87 | # the batch_size of each epoch is update_epochs * batch_size
88 | 'task_specific_prompt': 'Please recognize the emotion of the above multimodal content from the label \
89 | set . Assistant: The emotion is',
90 | 'max_new_tokens': 1,
91 | 'pseudo_tokens': 4,
92 | 'label_index_mapping': {'hap': 0, 'sad': 1, 'neu': 2, 'ang': 3, 'exc': 4, 'fru': 5},
93 | 'batch_size': 4,
94 | 'learning_rate': 5e-4,
95 | # feature subNets
96 | 'a_lstm_hidden_size': 32,
97 | 'v_lstm_hidden_size': 32,
98 | 'a_lstm_layers': 1,
99 | 'v_lstm_layers': 1,
100 | 'a_lstm_dropout': 0.0,
101 | 'v_lstm_dropout': 0.0,
102 | 'warm_up_epochs': 30, #it should be low
103 | 'gamma': 1,
104 | 'update_epochs': 1,
105 | 'early_stop': 8,
106 | # res
107 | 'H': 3.0
108 | },
109 | 'meld':{
110 | # the batch_size of each epoch is update_epochs * batch_size
111 | 'task_specific_prompt': 'Please recognize the emotion of the above multimodal content from the \
112 | target set . Assistant: The emotion is',
113 | 'max_new_tokens': 1,
114 | 'pseudo_tokens': 2,
115 | 'label_index_mapping': {'neutral': 0, 'surprise': 1, 'fear': 2, 'sadness': 3, 'joy': 4, 'disgust': 5,
116 | 'anger': 6},
117 | 'batch_size': 16,
118 | 'learning_rate': 5e-4,
119 | # feature subNets
120 | 'a_lstm_hidden_size': 32,
121 | 'v_lstm_hidden_size': 16,
122 | 'a_lstm_layers': 1,
123 | 'v_lstm_layers': 1,
124 | 'a_lstm_dropout': 0.0,
125 | 'v_lstm_dropout': 0.0,
126 | 'warm_up_epochs':50,
127 | #loss weight best:1
128 | 'gamma':1,
129 | 'update_epochs': 1,
130 | 'early_stop': 8,
131 | # res
132 | 'H': 3.0
133 | },
134 | 'cherma':{
135 | # the batch_size of each epoch is update_epochs * batch_size
136 | 'task_specific_prompt': '请选择适用于上述多模态内容的情绪标签:<愤怒:0, 厌恶:1, 恐惧:2, 高兴:3, 平静:4, 悲伤:5, 惊奇:6>。助手: 情绪为',
137 | 'max_new_tokens': 1,
138 | 'pseudo_tokens': 4,
139 | 'label_index_mapping': {'愤怒': 0, '厌恶': 1, '恐惧': 2, '高兴': 3, '平静': 4, '悲伤': 5,
140 | '惊奇': 6},
141 | 'batch_size': 16,
142 | 'learning_rate': 5e-3,
143 | # feature subNets
144 | 'a_lstm_hidden_size': 32,
145 | 'v_lstm_hidden_size': 16,
146 | 'a_lstm_layers': 1,
147 | 'v_lstm_layers': 1,
148 | 'a_lstm_dropout': 0.0,
149 | 'v_lstm_dropout': 0.0,
150 | 'warm_up_epochs': 30,
151 | 'update_epochs': 1,
152 | 'early_stop': 8,
153 | # loss weight
154 | 'gamma': 0,
155 | # res
156 | 'H': 1.0
157 | },
158 | },
159 | }
160 | return tmp
161 |
162 | def get_config(self):
163 | return self.args
--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/run.py:
--------------------------------------------------------------------------------
1 | import os
2 | import gc
3 | import time
4 | import random
5 | import torch
6 | import pynvml
7 | import logging
8 | import argparse
9 | import numpy as np
10 | import pandas as pd
11 | from tqdm import tqdm
12 |
13 | from models.AMIO import AMIO
14 | from trains.ATIO import ATIO
15 | from data.load_data import MMDataLoader
16 | from config.config_regression import ConfigRegression
17 | from config.config_classification import ConfigClassification
18 |
19 | os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
20 | os.environ['CUDA_LAUNCH_BLOCKING'] = '1' # 下面老是报错 shape 不一致
21 |
22 | def setup_seed(seed):
23 | torch.manual_seed(seed)
24 | torch.cuda.manual_seed_all(seed)
25 | np.random.seed(seed)
26 | random.seed(seed)
27 | torch.backends.cudnn.deterministic = True
28 |
29 | def run(args):
30 | if not os.path.exists(args.model_save_dir):
31 | os.makedirs(args.model_save_dir)
32 | args.model_save_path = os.path.join(args.model_save_dir,\
33 | f'{args.modelName}-{args.datasetName}-{args.train_mode}.pth')
34 |
35 | if len(args.gpu_ids) == 0 and torch.cuda.is_available():
36 | # load free-most gpu
37 | pynvml.nvmlInit()
38 | dst_gpu_id, min_mem_used = 0, 1e16
39 | for g_id in [0, 1, 2, 3]:
40 | handle = pynvml.nvmlDeviceGetHandleByIndex(g_id)
41 | meminfo = pynvml.nvmlDeviceGetMemoryInfo(handle)
42 | mem_used = meminfo.used
43 | if mem_used < min_mem_used:
44 | min_mem_used = mem_used
45 | dst_gpu_id = g_id
46 | print(f'Find gpu: {dst_gpu_id}, use memory: {min_mem_used}!')
47 | logger.info(f'Find gpu: {dst_gpu_id}, with memory: {min_mem_used} left!')
48 | args.gpu_ids.append(dst_gpu_id)
49 | # device
50 | using_cuda = len(args.gpu_ids) > 0 and torch.cuda.is_available()
51 | logger.info("Let's use the GPU %d !" % len(args.gpu_ids))
52 | device = torch.device('cuda:%d' % int(args.gpu_ids[0]) if using_cuda else 'cpu')
53 | # device = "cuda:1" if torch.cuda.is_available() else "cpu"
54 | args.device = device
55 | # data
56 | dataloader = MMDataLoader(args)
57 | model = AMIO(args).to(device)
58 |
59 | def print_trainable_parameters(model):
60 | """
61 | Prints the number of trainable parameters in the model.
62 | """
63 | trainable_params = 0
64 | all_param = 0
65 | for _, param in model.named_parameters():
66 | all_param += param.numel()
67 | if param.requires_grad:
68 | trainable_params += param.numel()
69 |
70 | logger.info(f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}")
71 |
72 | print_trainable_parameters(model)
73 |
74 | # using multiple gpus
75 | # if using_cuda and len(args.gpu_ids) > 1:
76 | # model = torch.nn.DataParallel(model,
77 | # device_ids=args.gpu_ids,
78 | # output_device=args.gpu_ids[0])
79 | atio = ATIO().getTrain(args)
80 | # do train
81 | atio.do_train(model, dataloader)
82 | # load pretrained model
83 | assert os.path.exists(args.model_save_path)
84 | # load finetune parameters
85 | checkpoint = torch.load(args.model_save_path)
86 | model.load_state_dict(checkpoint, strict=False)
87 | model.to(device)
88 |
89 | # do test
90 | if args.tune_mode:
91 | # using valid dataset to debug hyper parameters
92 | results = atio.do_test(model, dataloader['valid'], mode="VALID")
93 | else:
94 | results = atio.do_test(model, dataloader['test'], mode="TEST")
95 |
96 | del model
97 | torch.cuda.empty_cache()
98 | gc.collect()
99 |
100 | return results
101 |
102 |
103 |
104 | def run_normal(args):
105 | args.res_save_dir = os.path.join(args.res_save_dir)
106 | init_args = args
107 | model_results = []
108 | seeds = args.seeds
109 | # warm_epochs =[30,40,50,60,70,80,90,100]
110 | # for warm_up_epoch in warm_epochs:
111 | # run results
112 | for i, seed in enumerate(seeds):
113 | args = init_args
114 | # load config
115 | if args.train_mode == "regression":
116 | config = ConfigRegression(args)
117 | else :
118 | config = ConfigClassification(args)
119 | args = config.get_config()
120 |
121 | setup_seed(seed)
122 | args.seed = seed
123 | # args.warm_up_epochs = warm_up_epoch
124 | logger.info('Start running %s...' % (args.modelName))
125 | logger.info(args)
126 | # runnning
127 | args.cur_time = i + 1
128 | test_results = run(args) # 训练
129 | # restore results
130 | model_results.append(test_results)
131 |
132 | criterions = list(model_results[0].keys())
133 | # load other results
134 | save_path = os.path.join(args.res_save_dir, f'{args.datasetName}-{args.train_mode}-{args.warm_up_epochs}.csv')
135 | if not os.path.exists(args.res_save_dir):
136 | os.makedirs(args.res_save_dir)
137 | if os.path.exists(save_path):
138 | df = pd.read_csv(save_path)
139 | else:
140 | # df = pd.DataFrame(columns=["Model"] + criterions)
141 | df = pd.DataFrame(columns=["Model", "Seed"] + criterions)
142 | # save results
143 | # res = [args.modelName]
144 |
145 | for k, test_results in enumerate(model_results):
146 | res = [args.modelName, f'{seed}']
147 | for c in criterions:
148 | res.append(round(test_results[c] * 100, 2))
149 | df.loc[len(df)] = res
150 |
151 | # df.loc[len(df)] = res
152 | df.to_csv(save_path, index=None)
153 | logger.info('Results are added to %s...' % (save_path))
154 | df = df.iloc[0:0] # 保存后清0
155 | model_results = []
156 |
157 |
158 | def set_log(args):
159 | if not os.path.exists('logs'):
160 | os.makedirs('logs')
161 | log_file_path = f'logs/{args.modelName}-{args.datasetName}.log'
162 | # set logging
163 | logger = logging.getLogger()
164 | logger.setLevel(logging.DEBUG)
165 |
166 | for ph in logger.handlers:
167 | logger.removeHandler(ph)
168 | # add FileHandler to log file
169 | formatter_file = logging.Formatter('%(asctime)s:%(levelname)s:%(message)s', datefmt='%Y-%m-%d %H:%M:%S')
170 | fh = logging.FileHandler(log_file_path)
171 | fh.setLevel(logging.DEBUG)
172 | fh.setFormatter(formatter_file)
173 | logger.addHandler(fh)
174 | # add StreamHandler to terminal outputs
175 | formatter_stream = logging.Formatter('%(message)s')
176 | ch = logging.StreamHandler()
177 | ch.setLevel(logging.DEBUG)
178 | ch.setFormatter(formatter_stream)
179 | logger.addHandler(ch)
180 | return logger
181 |
182 | def parse_args():
183 | parser = argparse.ArgumentParser()
184 | parser.add_argument('--is_tune', type=bool, default=False,
185 | help='tune parameters ?')
186 | parser.add_argument('--train_mode', type=str, default="regression",
187 | help='regression / classification')
188 | parser.add_argument('--modelName', type=str, default='cmcm',
189 | help='support CMCM')
190 | parser.add_argument('--datasetName', type=str, default='mosi',
191 | help='support mosei/simsv2/meld/cherma')
192 | parser.add_argument('--root_dataset_dir', type=str, default='/home/young/DL/multimodal_dataset/',
193 | help='Location of the root directory where the dataset is stored')
194 | parser.add_argument('--num_workers', type=int, default=0,
195 | help='num workers of loading data')
196 | parser.add_argument('--model_save_dir', type=str, default='results/models',
197 | help='path to save results.')
198 | parser.add_argument('--res_save_dir', type=str, default='results/results',
199 | help='path to save results.')
200 | parser.add_argument('--pretrain_LM', type=str, default='/data/huggingface_model/THUDM/chatglm3-6b-base/',
201 | help='path to load pretrain LLM.')
202 | parser.add_argument('--gpu_ids', type=list, default=[],
203 | help='indicates the gpus will be used. If none, the most-free gpu will be used!') #使用GPU1
204 | return parser.parse_args()
205 |
206 | if __name__ == '__main__':
207 | args = parse_args()
208 | logger = set_log(args)
209 | for data_name in ['mosei', 'simsv2', 'meld', 'cherma']:
210 | if data_name in ['mosei', 'simsv2']:
211 | args.train_mode = 'regression'
212 | else:
213 | args.train_mode = 'classification'
214 |
215 | args.datasetName = data_name
216 | args.seeds = [1111, 2222, 3333, 4444, 5555]
217 | # args.seeds = [1111]
218 | run_normal(args)
--------------------------------------------------------------------------------
/MSE-Llama2-7B/run.py:
--------------------------------------------------------------------------------
1 | import os
2 | import gc
3 | import time
4 | import random
5 | import torch
6 | import pynvml
7 | import logging
8 | import argparse
9 | import numpy as np
10 | import pandas as pd
11 | from tqdm import tqdm
12 |
13 | from models.AMIO import AMIO
14 | from trains.ATIO import ATIO
15 | from data.load_data import MMDataLoader
16 | from config.config_regression import ConfigRegression
17 | from config.config_classification import ConfigClassification
18 |
19 | os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
20 | os.environ['CUDA_LAUNCH_BLOCKING'] = '1' # 下面老是报错 shape 不一致
21 |
22 | def setup_seed(seed):
23 | torch.manual_seed(seed)
24 | torch.cuda.manual_seed_all(seed)
25 | np.random.seed(seed)
26 | random.seed(seed)
27 | torch.backends.cudnn.deterministic = True
28 |
29 | def run(args):
30 | named = 'data_percent'
31 | if not os.path.exists(args.model_save_dir):
32 | os.makedirs(args.model_save_dir)
33 | args.model_save_path = os.path.join(args.model_save_dir,\
34 | f'{args.modelName}-{args.datasetName}-{args.train_mode}.pth')
35 |
36 | if len(args.gpu_ids) == 0 and torch.cuda.is_available():
37 | # load free-most gpu
38 | pynvml.nvmlInit()
39 | dst_gpu_id, min_mem_used = 0, 1e16
40 | for g_id in [0, 1, 2, 3]:
41 | handle = pynvml.nvmlDeviceGetHandleByIndex(g_id)
42 | meminfo = pynvml.nvmlDeviceGetMemoryInfo(handle)
43 | mem_used = meminfo.used
44 | if mem_used < min_mem_used:
45 | min_mem_used = mem_used
46 | dst_gpu_id = g_id
47 | print(f'Find gpu: {dst_gpu_id}, use memory: {min_mem_used}!')
48 | logger.info(f'Find gpu: {dst_gpu_id}, with memory: {min_mem_used} left!')
49 | args.gpu_ids.append(dst_gpu_id)
50 | # device
51 | using_cuda = len(args.gpu_ids) > 0 and torch.cuda.is_available()
52 | logger.info("Let's use the GPU %d !" % len(args.gpu_ids))
53 | device = torch.device('cuda:%d' % int(args.gpu_ids[0]) if using_cuda else 'cpu')
54 | # device = "cuda:1" if torch.cuda.is_available() else "cpu"
55 | args.device = device
56 | # data
57 | dataloader = MMDataLoader(args)
58 | model = AMIO(args).to(device)
59 |
60 | def print_trainable_parameters(model):
61 | """
62 | Prints the number of trainable parameters in the model.
63 | """
64 | trainable_params = 0
65 | all_param = 0
66 | for _, param in model.named_parameters():
67 | all_param += param.numel()
68 | if param.requires_grad:
69 | trainable_params += param.numel()
70 |
71 | logger.info(f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}")
72 |
73 | print_trainable_parameters(model)
74 |
75 | # using multiple gpus
76 | # if using_cuda and len(args.gpu_ids) > 1:
77 | # model = torch.nn.DataParallel(model,
78 | # device_ids=args.gpu_ids,
79 | # output_device=args.gpu_ids[0])
80 | atio = ATIO().getTrain(args)
81 | # do train
82 | atio.do_train(model, dataloader)
83 | # load pretrained model
84 | assert os.path.exists(args.model_save_path)
85 | # load finetune parameters
86 | checkpoint = torch.load(args.model_save_path)
87 | model.load_state_dict(checkpoint, strict=False)
88 | model.to(device)
89 |
90 | # do test
91 | if args.tune_mode:
92 | # using valid dataset to debug hyper parameters
93 | results = atio.do_test(model, dataloader['valid'], mode="VALID")
94 | else:
95 | results = atio.do_test(model, dataloader['test'], mode="TEST")
96 |
97 | del model
98 | torch.cuda.empty_cache()
99 | gc.collect()
100 |
101 | return results
102 |
103 |
104 |
105 | def run_normal(args):
106 | args.res_save_dir = os.path.join(args.res_save_dir)
107 | init_args = args
108 | model_results = []
109 | seeds = args.seeds
110 |
111 | for i, seed in enumerate(seeds):
112 | args = init_args
113 | # load config
114 | if args.train_mode == "regression":
115 | config = ConfigRegression(args)
116 | else:
117 | config = ConfigClassification(args)
118 | args = config.get_config()
119 |
120 | setup_seed(seed)
121 | args.seed = seed
122 | # args.warm_up_epochs = warmup
123 | logger.info('Start running %s...' % (args.modelName))
124 | logger.info(args)
125 | # runnning
126 | args.cur_time = i + 1
127 | test_results = run(args) # 训练
128 | # restore results
129 | model_results.append(test_results)
130 |
131 | criterions = list(model_results[0].keys())
132 | # load other results
133 | save_path = os.path.join(args.res_save_dir,
134 | f'{args.datasetName}-{args.train_mode}-{args.warm_up_epochs}.csv')
135 | if not os.path.exists(args.res_save_dir):
136 | os.makedirs(args.res_save_dir)
137 | if os.path.exists(save_path):
138 | df = pd.read_csv(save_path)
139 | else:
140 |
141 | df = pd.DataFrame(columns=["Model", "Seed"] + criterions)
142 | # save results
143 | # res = [args.modelName]
144 |
145 | for k, test_results in enumerate(model_results):
146 | res = [args.modelName, f'{seed}']
147 | for c in criterions:
148 | res.append(round(test_results[c] * 100, 2))
149 | df.loc[len(df)] = res
150 |
151 | # df.loc[len(df)] = res
152 | df.to_csv(save_path, index=None)
153 | logger.info('Results are added to %s...' % (save_path))
154 | df = df.iloc[0:0] # 保存后清0
155 | model_results = []
156 |
157 |
158 | def set_log(args):
159 | if not os.path.exists('logs'):
160 | os.makedirs('logs')
161 | log_file_path = f'logs/{args.modelName}-{args.datasetName}.log'
162 | # set logging
163 | logger = logging.getLogger()
164 | logger.setLevel(logging.DEBUG)
165 |
166 | for ph in logger.handlers:
167 | logger.removeHandler(ph)
168 | # add FileHandler to log file
169 | formatter_file = logging.Formatter('%(asctime)s:%(levelname)s:%(message)s', datefmt='%Y-%m-%d %H:%M:%S')
170 | fh = logging.FileHandler(log_file_path)
171 | fh.setLevel(logging.DEBUG)
172 | fh.setFormatter(formatter_file)
173 | logger.addHandler(fh)
174 | # add StreamHandler to terminal outputs
175 | formatter_stream = logging.Formatter('%(message)s')
176 | ch = logging.StreamHandler()
177 | ch.setLevel(logging.DEBUG)
178 | ch.setFormatter(formatter_stream)
179 | logger.addHandler(ch)
180 | return logger
181 |
182 | def parse_args():
183 | parser = argparse.ArgumentParser()
184 | parser.add_argument('--is_tune', type=bool, default=False,
185 | help='tune parameters ?')
186 | parser.add_argument('--train_mode', type=str, default="regression",
187 | help='regression / classification')
188 | parser.add_argument('--modelName', type=str, default='cmcm',
189 | help='support CMCM')
190 | parser.add_argument('--datasetName', type=str, default='sims',
191 | help='support mosi/mosei/simsv2/iemocap/meld/cherma')
192 | parser.add_argument('--root_dataset_dir', type=str, default='/home/young/DL/multimodal_dataset/',
193 | help='Location of the root directory where the dataset is stored')
194 | parser.add_argument('--num_workers', type=int, default=0,
195 | help='num workers of loading data')
196 | parser.add_argument('--model_save_dir', type=str, default='results/models',
197 | help='path to save results.')
198 | parser.add_argument('--res_save_dir', type=str, default='results/results',
199 | help='path to save results.')
200 | parser.add_argument('--pretrain_LM', type=str, default='/data/huggingface_model/Meta/Llama-2-7b-hf/',
201 | help='path to load pretrain LLM.')
202 | parser.add_argument('--gpu_ids', type=list, default=[2],
203 | help='indicates the gpus will be used. If none, the most-free gpu will be used!') #使用GPU1
204 | return parser.parse_args()
205 |
206 | if __name__ == '__main__':
207 | args = parse_args()
208 | logger = set_log(args)
209 | # for data_name in ['mosi', 'mosei', 'simsv2', 'iemocap', 'meld', 'cherma' ]:
210 | # for data_name in ['simsv2','cherma']:
211 | # for data_name in ['mosi']:
212 | for data_name in ['simsv2', 'mosei', 'meld', 'cherma']:
213 | if data_name in ['mosi', 'mosei', 'sims', 'simsv2']:
214 | args.train_mode = 'regression'
215 | else:
216 | args.train_mode = 'classification'
217 |
218 | args.datasetName = data_name
219 | args.seeds = [1111, 2222, 3333, 4444, 5555]
220 | run_normal(args)
--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/run.py:
--------------------------------------------------------------------------------
1 | import os
2 | import gc
3 | import time
4 | import random
5 | import torch
6 | import pynvml
7 | import logging
8 | import argparse
9 | import numpy as np
10 | import pandas as pd
11 | from tqdm import tqdm
12 |
13 | from models.AMIO import AMIO
14 | from trains.ATIO import ATIO
15 | from data.load_data import MMDataLoader
16 | from config.config_regression import ConfigRegression
17 | from config.config_classification import ConfigClassification
18 |
19 | os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
20 | os.environ['CUDA_LAUNCH_BLOCKING'] = '1' # 下面老是报错 shape 不一致
21 |
22 | def setup_seed(seed):
23 | torch.manual_seed(seed)
24 | torch.cuda.manual_seed_all(seed)
25 | np.random.seed(seed)
26 | random.seed(seed)
27 | torch.backends.cudnn.deterministic = True
28 |
29 | def run(args):
30 | if not os.path.exists(args.model_save_dir):
31 | os.makedirs(args.model_save_dir)
32 | args.model_save_path = os.path.join(args.model_save_dir,\
33 | f'{args.modelName}-{args.datasetName}-{args.train_mode}.pth')
34 |
35 | if len(args.gpu_ids) == 0 and torch.cuda.is_available():
36 | # load free-most gpu
37 | pynvml.nvmlInit()
38 | dst_gpu_id, min_mem_used = 0, 1e16
39 | for g_id in [0, 1, 2, 3]:
40 | handle = pynvml.nvmlDeviceGetHandleByIndex(g_id)
41 | meminfo = pynvml.nvmlDeviceGetMemoryInfo(handle)
42 | mem_used = meminfo.used
43 | if mem_used < min_mem_used:
44 | min_mem_used = mem_used
45 | dst_gpu_id = g_id
46 | print(f'Find gpu: {dst_gpu_id}, use memory: {min_mem_used}!')
47 | logger.info(f'Find gpu: {dst_gpu_id}, with memory: {min_mem_used} left!')
48 | args.gpu_ids.append(dst_gpu_id)
49 | # device
50 | using_cuda = len(args.gpu_ids) > 0 and torch.cuda.is_available()
51 | logger.info("Let's use the GPU %d !" % int(args.gpu_ids[0]))
52 | device = torch.device('cuda:%d' % int(args.gpu_ids[0]) if using_cuda else 'cpu')
53 | # device = "cuda:1" if torch.cuda.is_available() else "cpu"
54 | args.device = device
55 | # data
56 | dataloader = MMDataLoader(args)
57 | model = AMIO(args).to(device)
58 |
59 | def print_trainable_parameters(model):
60 | """
61 | Prints the number of trainable parameters in the model.
62 | """
63 | trainable_params = 0
64 | all_param = 0
65 | for _, param in model.named_parameters():
66 | all_param += param.numel()
67 | if param.requires_grad:
68 | trainable_params += param.numel()
69 |
70 | logger.info(f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}")
71 |
72 | print_trainable_parameters(model)
73 |
74 | # using multiple gpus
75 | # if using_cuda and len(args.gpu_ids) > 1:
76 | # model = torch.nn.DataParallel(model,
77 | # device_ids=args.gpu_ids,
78 | # output_device=args.gpu_ids[0])
79 | atio = ATIO().getTrain(args)
80 | # do train
81 | atio.do_train(model, dataloader)
82 | # load pretrained model
83 | assert os.path.exists(args.model_save_path)
84 | # load finetune parameters
85 | checkpoint = torch.load(args.model_save_path)
86 | model.load_state_dict(checkpoint, strict=False)
87 | model.to(device)
88 |
89 | # do test
90 | if args.tune_mode:
91 | # using valid dataset to debug hyper parameters
92 | results = atio.do_test(model, dataloader['valid'], mode="VALID")
93 | else:
94 | results = atio.do_test(model, dataloader['test'], mode="TEST")
95 |
96 | del model
97 | torch.cuda.empty_cache()
98 | gc.collect()
99 |
100 | return results
101 |
102 |
103 |
104 | def run_normal(args):
105 | args.res_save_dir = os.path.join(args.res_save_dir)
106 | init_args = args
107 | model_results = []
108 | seeds = args.seeds
109 | # warmup_list = [30]
110 | # # run results
111 | # for warmup in warmup_list:
112 | for i, seed in enumerate(seeds):
113 | args = init_args
114 | # load config
115 | if args.train_mode == "regression":
116 | config = ConfigRegression(args)
117 | else :
118 | config = ConfigClassification(args)
119 | args = config.get_config()
120 |
121 | setup_seed(seed)
122 | args.seed = seed
123 | # args.warm_up_epochs = warmup
124 | logger.info('Start running %s...' % (args.modelName))
125 | logger.info(args)
126 | # runnning
127 | args.cur_time = i + 1
128 | start_time = time.time()
129 | test_results = run(args) # 训练
130 |
131 | end_time = time.time()
132 | # 计算运行时间
133 | elapsed_time = end_time - start_time
134 | print(f"程序运行时间: {elapsed_time:.6f} 秒")
135 |
136 | # restore results
137 | model_results.append(test_results)
138 |
139 | criterions = list(model_results[0].keys())
140 | # load other results
141 | save_path = os.path.join(args.res_save_dir, f'{args.datasetName}-{args.train_mode}-{args.warm_up_epochs}.csv')
142 | if not os.path.exists(args.res_save_dir):
143 | os.makedirs(args.res_save_dir)
144 | if os.path.exists(save_path):
145 | df = pd.read_csv(save_path)
146 | else:
147 |
148 | df = pd.DataFrame(columns=["Model", "Seed"] + criterions)
149 | # save results
150 | # res = [args.modelName]
151 |
152 | for k, test_results in enumerate(model_results):
153 | res = [args.modelName, f'{seed}']
154 | for c in criterions:
155 | res.append(round(test_results[c] * 100, 2))
156 | df.loc[len(df)] = res
157 |
158 | # df.loc[len(df)] = res
159 | df.to_csv(save_path, index=None)
160 | logger.info('Results are added to %s...' % (save_path))
161 | df = df.iloc[0:0] # 保存后清0
162 | model_results = []
163 |
164 |
165 | def set_log(args):
166 | if not os.path.exists('logs'):
167 | os.makedirs('logs')
168 | log_file_path = f'logs/{args.modelName}-{args.datasetName}.log'
169 | # set logging
170 | logger = logging.getLogger()
171 | logger.setLevel(logging.DEBUG)
172 |
173 | for ph in logger.handlers:
174 | logger.removeHandler(ph)
175 | # add FileHandler to log file
176 | formatter_file = logging.Formatter('%(asctime)s:%(levelname)s:%(message)s', datefmt='%Y-%m-%d %H:%M:%S')
177 | fh = logging.FileHandler(log_file_path)
178 | fh.setLevel(logging.DEBUG)
179 | fh.setFormatter(formatter_file)
180 | logger.addHandler(fh)
181 | # add StreamHandler to terminal outputs
182 | formatter_stream = logging.Formatter('%(message)s')
183 | ch = logging.StreamHandler()
184 | ch.setLevel(logging.DEBUG)
185 | ch.setFormatter(formatter_stream)
186 | logger.addHandler(ch)
187 | return logger
188 |
189 | def parse_args():
190 | parser = argparse.ArgumentParser()
191 | parser.add_argument('--is_tune', type=bool, default=False,
192 | help='tune parameters ?')
193 | parser.add_argument('--train_mode', type=str, default="regression",
194 | help='regression / classification')
195 | parser.add_argument('--modelName', type=str, default='cmcm',
196 | help='support CMCM')
197 | parser.add_argument('--datasetName', type=str, default='sims',
198 | help='support mosi/mosei/simsv2/iemocap/meld/cherma')
199 | parser.add_argument('--root_dataset_dir', type=str, default='/home/young/DL/multimodal_dataset/',
200 | help='Location of the root directory where the dataset is stored')
201 | parser.add_argument('--num_workers', type=int, default=0,
202 | help='num workers of loading data')
203 | parser.add_argument('--model_save_dir', type=str, default='results/models',
204 | help='path to save results.')
205 | parser.add_argument('--res_save_dir', type=str, default='results/results',
206 | help='path to save results.')
207 | parser.add_argument('--pretrain_LM', type=str, default='/data/huggingface_model/Qwen/Qwen-1_8B/',
208 | help='path to load pretrain LLM.')
209 | parser.add_argument('--gpu_ids', type=list, default=[],
210 | help='indicates the gpus will be used. If none, the most-free gpu will be used!') #使用GPU1
211 | return parser.parse_args()
212 |
213 | if __name__ == '__main__':
214 | args = parse_args()
215 | logger = set_log(args)
216 | for data_name in [ 'simsv2', 'mosei', 'meld', 'cherma']:
217 | if data_name in ['mosi', 'mosei', 'sims', 'simsv2']:
218 | args.train_mode = 'regression'
219 | else:
220 | args.train_mode = 'classification'
221 |
222 | args.datasetName = data_name
223 | args.seeds = [1111, 2222, 3333, 4444, 5555]
224 | run_normal(args)
--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/utils/metricsTop.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import numpy as np
3 | from sklearn.metrics import classification_report
4 | from sklearn.metrics import confusion_matrix
5 | from sklearn.metrics import precision_recall_fscore_support
6 | from sklearn.metrics import accuracy_score, f1_score
7 | from sklearn.metrics import r2_score
8 | from itertools import chain
9 | __all__ = ['MetricsTop']
10 |
11 | class MetricsTop():
12 | def __init__(self, args):
13 | if args.train_mode == "regression":
14 | self.metrics_dict = {
15 | 'MOSI': self.__eval_mosi_regression,
16 | 'MOSEI': self.__eval_mosei_regression,
17 | 'SIMS': self.__eval_sims_regression,
18 | 'SIMSV2': self.__eval_simsv2_regression
19 | }
20 | else:
21 | self.metrics_dict = {
22 | 'IEMOCAP': self.__eval_iemocap_classification,
23 | 'MELD': self.__eval_meld_classification,
24 | 'CHERMA': self.__eval_cherma_classification
25 | }
26 | self.label_index_mapping = args.label_index_mapping
27 |
28 | def __eval_iemocap_classification(self, results, truths):
29 | # label_index_mapping = self.label_index_mapping
30 | # # 主要通过混淆矩阵来计算
31 | # results_indices = [label_index_mapping.get(label, label_index_mapping.get('neu')) for label in results]
32 | # truths_indices = [label_index_mapping.get(label, -1) for label in truths]
33 | # acc = accuracy_score(truths_indices, results_indices)
34 | # weight_F1 = f1_score(truths_indices, results_indices, average='weighted')
35 | acc = accuracy_score(truths, results)
36 | weight_F1 = f1_score(truths, results, average='weighted')
37 |
38 | eval_result = {
39 | 'acc': acc,
40 | 'weight_F1': weight_F1
41 | }
42 | return eval_result
43 |
44 | def __eval_cherma_classification(self, results, truths):
45 | acc = accuracy_score(truths, results)
46 | weight_F1 = f1_score(truths, results, average='weighted')
47 | eval_result = {
48 | 'acc': acc,
49 | 'weight_F1': weight_F1
50 | }
51 | return eval_result
52 |
53 | def __eval_meld_classification(self, results, truths):
54 | acc = accuracy_score(truths, results)
55 | weight_F1 = f1_score(truths, results, average='weighted')
56 |
57 |
58 | eval_result = {
59 | 'acc': acc,
60 | 'weight_F1': weight_F1
61 | }
62 | return eval_result
63 |
64 |
65 |
66 |
67 | def __multiclass_acc(self, y_pred, y_true):
68 | """
69 | Compute the multiclass accuracy w.r.t. groundtruth
70 |
71 | :param preds: Float array representing the predictions, dimension (N,)
72 | :param truths: Float/int array representing the groundtruth classes, dimension (N,)
73 | :return: Classification accuracy
74 | """
75 | return np.sum(np.round(y_pred) == np.round(y_true)) / float(len(y_true))
76 |
77 |
78 | def __eval_mosei_regression(self, y_pred, y_true, exclude_zero=False):
79 | test_preds = y_pred.view(-1).cpu().detach().numpy()
80 | test_truth = y_true.view(-1).cpu().detach().numpy()
81 |
82 | test_preds_a7 = np.clip(test_preds, a_min=-3., a_max=3.)
83 | test_truth_a7 = np.clip(test_truth, a_min=-3., a_max=3.)
84 | test_preds_a5 = np.clip(test_preds, a_min=-2., a_max=2.)
85 | test_truth_a5 = np.clip(test_truth, a_min=-2., a_max=2.)
86 | test_preds_a3 = np.clip(test_preds, a_min=-1., a_max=1.)
87 | test_truth_a3 = np.clip(test_truth, a_min=-1., a_max=1.)
88 |
89 |
90 | mae = np.mean(np.absolute(test_preds - test_truth)) # Average L1 distance between preds and truths
91 | corr = np.corrcoef(test_preds, test_truth)[0][1]
92 | mult_a7 = self.__multiclass_acc(test_preds_a7, test_truth_a7)
93 | mult_a5 = self.__multiclass_acc(test_preds_a5, test_truth_a5)
94 | mult_a3 = self.__multiclass_acc(test_preds_a3, test_truth_a3)
95 |
96 | non_zeros = np.array([i for i, e in enumerate(test_truth) if e != 0])
97 | non_zeros_binary_truth = (test_truth[non_zeros] > 0)
98 | non_zeros_binary_preds = (test_preds[non_zeros] > 0)
99 |
100 | non_zeros_acc2 = accuracy_score(non_zeros_binary_preds, non_zeros_binary_truth)
101 | non_zeros_f1_score = f1_score(non_zeros_binary_truth, non_zeros_binary_preds, average='weighted')
102 |
103 | binary_truth = (test_truth >= 0)
104 | binary_preds = (test_preds >= 0)
105 | acc2 = accuracy_score(binary_preds, binary_truth)
106 | f_score = f1_score(binary_truth, binary_preds, average='weighted')
107 |
108 | eval_results = {
109 | "Has0_acc_2": round(acc2, 4),
110 | "Has0_F1_score": round(f_score, 4),
111 | "Non0_acc_2": round(non_zeros_acc2, 4),
112 | "Non0_F1_score": round(non_zeros_f1_score, 4),
113 | "Mult_acc_5": round(mult_a5, 4),
114 | "Mult_acc_7": round(mult_a7, 4),
115 | "MAE": round(mae, 4),
116 | "Corr": round(corr, 4)
117 | }
118 | return eval_results
119 |
120 |
121 | def __eval_mosi_regression(self, y_pred, y_true):
122 | return self.__eval_mosei_regression(y_pred, y_true)
123 |
124 | def __eval_sims_regression(self, y_pred, y_true):
125 | test_preds = y_pred.view(-1).cpu().detach().numpy()
126 | test_truth = y_true.view(-1).cpu().detach().numpy()
127 | test_preds = np.clip(test_preds, a_min=-1., a_max=1.)
128 | test_truth = np.clip(test_truth, a_min=-1., a_max=1.)
129 |
130 | # weak sentiment two classes{[-0.6, 0.0], (0.0, 0.6]}
131 | ms_2 = [-1.01, 0.0, 1.01]
132 | weak_index_l = np.where(test_truth >= -0.4)[0]
133 | weak_index_r = np.where(test_truth <= 0.4)[0]
134 | weak_index = [x for x in weak_index_l if x in weak_index_r]
135 | test_preds_weak = test_preds[weak_index]
136 | test_truth_weak = test_truth[weak_index]
137 | test_preds_a2_weak = test_preds_weak.copy()
138 | test_truth_a2_weak = test_truth_weak.copy()
139 | for i in range(2):
140 | test_preds_a2_weak[np.logical_and(test_preds_weak > ms_2[i], test_preds_weak <= ms_2[i + 1])] = i
141 | for i in range(2):
142 | test_truth_a2_weak[np.logical_and(test_truth_weak > ms_2[i], test_truth_weak <= ms_2[i + 1])] = i
143 |
144 | # two classes{[-1.0, 0.0], (0.0, 1.0]}
145 | ms_2 = [-1.01, 0.0, 1.01]
146 | test_preds_a2 = test_preds.copy()
147 | test_truth_a2 = test_truth.copy()
148 | for i in range(2):
149 | test_preds_a2[np.logical_and(test_preds > ms_2[i], test_preds <= ms_2[i+1])] = i
150 | for i in range(2):
151 | test_truth_a2[np.logical_and(test_truth > ms_2[i], test_truth <= ms_2[i+1])] = i
152 |
153 | # three classes{[-1.0, -0.1], (-0.1, 0.1], (0.1, 1.0]}
154 | ms_3 = [-1.01, -0.1, 0.1, 1.01]
155 | test_preds_a3 = test_preds.copy()
156 | test_truth_a3 = test_truth.copy()
157 | for i in range(3):
158 | test_preds_a3[np.logical_and(test_preds > ms_3[i], test_preds <= ms_3[i+1])] = i
159 | for i in range(3):
160 | test_truth_a3[np.logical_and(test_truth > ms_3[i], test_truth <= ms_3[i+1])] = i
161 |
162 | # five classes{[-1.0, -0.7], (-0.7, -0.1], (-0.1, 0.1], (0.1, 0.7], (0.7, 1.0]}
163 | ms_5 = [-1.01, -0.7, -0.1, 0.1, 0.7, 1.01]
164 | test_preds_a5 = test_preds.copy()
165 | test_truth_a5 = test_truth.copy()
166 | for i in range(5):
167 | test_preds_a5[np.logical_and(test_preds > ms_5[i], test_preds <= ms_5[i+1])] = i
168 | for i in range(5):
169 | test_truth_a5[np.logical_and(test_truth > ms_5[i], test_truth <= ms_5[i+1])] = i
170 |
171 | mae = np.mean(np.absolute(test_preds - test_truth)) # Average L1 distance between preds and truths
172 | corr = np.corrcoef(test_preds, test_truth)[0][1]
173 | mult_a2 = self.__multiclass_acc(test_preds_a2, test_truth_a2)
174 | mult_a2_weak = self.__multiclass_acc(test_preds_a2_weak, test_truth_a2_weak)
175 | mult_a3 = self.__multiclass_acc(test_preds_a3, test_truth_a3)
176 | mult_a5 = self.__multiclass_acc(test_preds_a5, test_truth_a5)
177 | f_score = f1_score(test_truth_a2, test_preds_a2, average='weighted')
178 | r2 = r2_score(test_truth, test_preds)
179 | eval_results = {
180 | "Mult_acc_2": mult_a2,
181 | "Mult_acc_2_weak": mult_a2_weak,
182 | "Mult_acc_3": mult_a3,
183 | "Mult_acc_5": mult_a5,
184 | "F1_score": f_score,
185 | "MAE": mae,
186 | "Corr": corr, # Correlation Coefficient
187 | "R_squre": r2
188 | }
189 | return eval_results
190 |
191 | def __eval_simsv2_regression(self, y_pred, y_true):
192 | return self.__eval_sims_regression(y_pred, y_true)
193 | def getMetics(self, datasetName):
194 | return self.metrics_dict[datasetName.upper()]
--------------------------------------------------------------------------------
/MSE-Llama2-7B/utils/metricsTop.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import numpy as np
3 | from sklearn.metrics import classification_report
4 | from sklearn.metrics import confusion_matrix
5 | from sklearn.metrics import precision_recall_fscore_support
6 | from sklearn.metrics import accuracy_score, f1_score
7 | from sklearn.metrics import r2_score
8 | from itertools import chain
9 | __all__ = ['MetricsTop']
10 |
11 | class MetricsTop():
12 | def __init__(self, args):
13 | if args.train_mode == "regression":
14 | self.metrics_dict = {
15 | 'MOSI': self.__eval_mosi_regression,
16 | 'MOSEI': self.__eval_mosei_regression,
17 | 'SIMS': self.__eval_sims_regression,
18 | 'SIMSV2': self.__eval_simsv2_regression
19 | }
20 | else:
21 | self.metrics_dict = {
22 | 'IEMOCAP': self.__eval_iemocap_classification,
23 | 'MELD': self.__eval_meld_classification,
24 | 'CHERMA': self.__eval_cherma_classification
25 | }
26 | self.label_index_mapping = args.label_index_mapping
27 |
28 | def __eval_iemocap_classification(self, results, truths):
29 | # label_index_mapping = self.label_index_mapping
30 | # # 主要通过混淆矩阵来计算
31 | # results_indices = [label_index_mapping.get(label, label_index_mapping.get('neu')) for label in results]
32 | # truths_indices = [label_index_mapping.get(label, -1) for label in truths]
33 | # acc = accuracy_score(truths_indices, results_indices)
34 | # weight_F1 = f1_score(truths_indices, results_indices, average='weighted')
35 | acc = accuracy_score(truths, results)
36 | weight_F1 = f1_score(truths, results, average='weighted')
37 |
38 | eval_result = {
39 | 'acc': acc,
40 | 'weight_F1': weight_F1
41 | }
42 | return eval_result
43 |
44 | def __eval_cherma_classification(self, results, truths):
45 | acc = accuracy_score(truths, results)
46 | weight_F1 = f1_score(truths, results, average='weighted')
47 | eval_result = {
48 | 'acc': acc,
49 | 'weight_F1': weight_F1
50 | }
51 | return eval_result
52 |
53 | def __eval_meld_classification(self, results, truths):
54 | acc = accuracy_score(truths, results)
55 | weight_F1 = f1_score(truths, results, average='weighted')
56 |
57 |
58 | eval_result = {
59 | 'acc': acc,
60 | 'weight_F1': weight_F1
61 | }
62 | return eval_result
63 |
64 |
65 |
66 |
67 | def __multiclass_acc(self, y_pred, y_true):
68 | """
69 | Compute the multiclass accuracy w.r.t. groundtruth
70 |
71 | :param preds: Float array representing the predictions, dimension (N,)
72 | :param truths: Float/int array representing the groundtruth classes, dimension (N,)
73 | :return: Classification accuracy
74 | """
75 | return np.sum(np.round(y_pred) == np.round(y_true)) / float(len(y_true))
76 |
77 |
78 | def __eval_mosei_regression(self, y_pred, y_true, exclude_zero=False):
79 | test_preds = y_pred.view(-1).cpu().detach().numpy()
80 | test_truth = y_true.view(-1).cpu().detach().numpy()
81 |
82 | test_preds_a7 = np.clip(test_preds, a_min=-3., a_max=3.)
83 | test_truth_a7 = np.clip(test_truth, a_min=-3., a_max=3.)
84 | test_preds_a5 = np.clip(test_preds, a_min=-2., a_max=2.)
85 | test_truth_a5 = np.clip(test_truth, a_min=-2., a_max=2.)
86 | test_preds_a3 = np.clip(test_preds, a_min=-1., a_max=1.)
87 | test_truth_a3 = np.clip(test_truth, a_min=-1., a_max=1.)
88 |
89 |
90 | mae = np.mean(np.absolute(test_preds - test_truth)) # Average L1 distance between preds and truths
91 | corr = np.corrcoef(test_preds, test_truth)[0][1]
92 | mult_a7 = self.__multiclass_acc(test_preds_a7, test_truth_a7)
93 | mult_a5 = self.__multiclass_acc(test_preds_a5, test_truth_a5)
94 | mult_a3 = self.__multiclass_acc(test_preds_a3, test_truth_a3)
95 |
96 | non_zeros = np.array([i for i, e in enumerate(test_truth) if e != 0])
97 | non_zeros_binary_truth = (test_truth[non_zeros] > 0)
98 | non_zeros_binary_preds = (test_preds[non_zeros] > 0)
99 |
100 | non_zeros_acc2 = accuracy_score(non_zeros_binary_preds, non_zeros_binary_truth)
101 | non_zeros_f1_score = f1_score(non_zeros_binary_truth, non_zeros_binary_preds, average='weighted')
102 |
103 | binary_truth = (test_truth >= 0)
104 | binary_preds = (test_preds >= 0)
105 | acc2 = accuracy_score(binary_preds, binary_truth)
106 | f_score = f1_score(binary_truth, binary_preds, average='weighted')
107 |
108 | eval_results = {
109 | "Has0_acc_2": round(acc2, 4),
110 | "Has0_F1_score": round(f_score, 4),
111 | "Non0_acc_2": round(non_zeros_acc2, 4),
112 | "Non0_F1_score": round(non_zeros_f1_score, 4),
113 | "Mult_acc_5": round(mult_a5, 4),
114 | "Mult_acc_7": round(mult_a7, 4),
115 | "MAE": round(mae, 4),
116 | "Corr": round(corr, 4)
117 | }
118 | return eval_results
119 |
120 |
121 | def __eval_mosi_regression(self, y_pred, y_true):
122 | return self.__eval_mosei_regression(y_pred, y_true)
123 |
124 | def __eval_sims_regression(self, y_pred, y_true):
125 | test_preds = y_pred.view(-1).cpu().detach().numpy()
126 | test_truth = y_true.view(-1).cpu().detach().numpy()
127 | test_preds = np.clip(test_preds, a_min=-1., a_max=1.)
128 | test_truth = np.clip(test_truth, a_min=-1., a_max=1.)
129 |
130 | # weak sentiment two classes{[-0.6, 0.0], (0.0, 0.6]}
131 | ms_2 = [-1.01, 0.0, 1.01]
132 | weak_index_l = np.where(test_truth >= -0.4)[0]
133 | weak_index_r = np.where(test_truth <= 0.4)[0]
134 | weak_index = [x for x in weak_index_l if x in weak_index_r]
135 | test_preds_weak = test_preds[weak_index]
136 | test_truth_weak = test_truth[weak_index]
137 | test_preds_a2_weak = test_preds_weak.copy()
138 | test_truth_a2_weak = test_truth_weak.copy()
139 | for i in range(2):
140 | test_preds_a2_weak[np.logical_and(test_preds_weak > ms_2[i], test_preds_weak <= ms_2[i + 1])] = i
141 | for i in range(2):
142 | test_truth_a2_weak[np.logical_and(test_truth_weak > ms_2[i], test_truth_weak <= ms_2[i + 1])] = i
143 |
144 | # two classes{[-1.0, 0.0], (0.0, 1.0]}
145 | ms_2 = [-1.01, 0.0, 1.01]
146 | test_preds_a2 = test_preds.copy()
147 | test_truth_a2 = test_truth.copy()
148 | for i in range(2):
149 | test_preds_a2[np.logical_and(test_preds > ms_2[i], test_preds <= ms_2[i+1])] = i
150 | for i in range(2):
151 | test_truth_a2[np.logical_and(test_truth > ms_2[i], test_truth <= ms_2[i+1])] = i
152 |
153 | # three classes{[-1.0, -0.1], (-0.1, 0.1], (0.1, 1.0]}
154 | ms_3 = [-1.01, -0.1, 0.1, 1.01]
155 | test_preds_a3 = test_preds.copy()
156 | test_truth_a3 = test_truth.copy()
157 | for i in range(3):
158 | test_preds_a3[np.logical_and(test_preds > ms_3[i], test_preds <= ms_3[i+1])] = i
159 | for i in range(3):
160 | test_truth_a3[np.logical_and(test_truth > ms_3[i], test_truth <= ms_3[i+1])] = i
161 |
162 | # five classes{[-1.0, -0.7], (-0.7, -0.1], (-0.1, 0.1], (0.1, 0.7], (0.7, 1.0]}
163 | ms_5 = [-1.01, -0.7, -0.1, 0.1, 0.7, 1.01]
164 | test_preds_a5 = test_preds.copy()
165 | test_truth_a5 = test_truth.copy()
166 | for i in range(5):
167 | test_preds_a5[np.logical_and(test_preds > ms_5[i], test_preds <= ms_5[i+1])] = i
168 | for i in range(5):
169 | test_truth_a5[np.logical_and(test_truth > ms_5[i], test_truth <= ms_5[i+1])] = i
170 |
171 | mae = np.mean(np.absolute(test_preds - test_truth)) # Average L1 distance between preds and truths
172 | corr = np.corrcoef(test_preds, test_truth)[0][1]
173 | mult_a2 = self.__multiclass_acc(test_preds_a2, test_truth_a2)
174 | mult_a2_weak = self.__multiclass_acc(test_preds_a2_weak, test_truth_a2_weak)
175 | mult_a3 = self.__multiclass_acc(test_preds_a3, test_truth_a3)
176 | mult_a5 = self.__multiclass_acc(test_preds_a5, test_truth_a5)
177 | f_score = f1_score(test_truth_a2, test_preds_a2, average='weighted')
178 | r2 = r2_score(test_truth, test_preds)
179 | eval_results = {
180 | "Mult_acc_2": mult_a2,
181 | "Mult_acc_2_weak": mult_a2_weak,
182 | "Mult_acc_3": mult_a3,
183 | "Mult_acc_5": mult_a5,
184 | "F1_score": f_score,
185 | "MAE": mae,
186 | "Corr": corr, # Correlation Coefficient
187 | "R_squre": r2
188 | }
189 | return eval_results
190 |
191 | def __eval_simsv2_regression(self, y_pred, y_true):
192 | return self.__eval_sims_regression(y_pred, y_true)
193 | def getMetics(self, datasetName):
194 | return self.metrics_dict[datasetName.upper()]
--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/utils/metricsTop.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import numpy as np
3 | from sklearn.metrics import classification_report
4 | from sklearn.metrics import confusion_matrix
5 | from sklearn.metrics import precision_recall_fscore_support
6 | from sklearn.metrics import accuracy_score, f1_score
7 | from sklearn.metrics import r2_score
8 | from itertools import chain
9 | __all__ = ['MetricsTop']
10 |
11 | class MetricsTop():
12 | def __init__(self, args):
13 | if args.train_mode == "regression":
14 | self.metrics_dict = {
15 | 'MOSI': self.__eval_mosi_regression,
16 | 'MOSEI': self.__eval_mosei_regression,
17 | 'SIMS': self.__eval_sims_regression,
18 | 'SIMSV2': self.__eval_simsv2_regression
19 | }
20 | else:
21 | self.metrics_dict = {
22 | 'IEMOCAP': self.__eval_iemocap_classification,
23 | 'MELD': self.__eval_meld_classification,
24 | 'CHERMA': self.__eval_cherma_classification
25 | }
26 | self.label_index_mapping = args.label_index_mapping
27 |
28 | def __eval_iemocap_classification(self, results, truths):
29 | # label_index_mapping = self.label_index_mapping
30 | # # 主要通过混淆矩阵来计算
31 | # results_indices = [label_index_mapping.get(label, label_index_mapping.get('neu')) for label in results]
32 | # truths_indices = [label_index_mapping.get(label, -1) for label in truths]
33 | # acc = accuracy_score(truths_indices, results_indices)
34 | # weight_F1 = f1_score(truths_indices, results_indices, average='weighted')
35 | acc = accuracy_score(truths, results)
36 | weight_F1 = f1_score(truths, results, average='weighted')
37 |
38 | eval_result = {
39 | 'acc': acc,
40 | 'weight_F1': weight_F1
41 | }
42 | return eval_result
43 |
44 | def __eval_cherma_classification(self, results, truths):
45 | acc = accuracy_score(truths, results)
46 | weight_F1 = f1_score(truths, results, average='weighted')
47 | eval_result = {
48 | 'acc': acc,
49 | 'weight_F1': weight_F1
50 | }
51 | return eval_result
52 |
53 | def __eval_meld_classification(self, results, truths):
54 | acc = accuracy_score(truths, results)
55 | weight_F1 = f1_score(truths, results, average='weighted')
56 |
57 |
58 | eval_result = {
59 | 'acc': acc,
60 | 'weight_F1': weight_F1
61 | }
62 | return eval_result
63 |
64 |
65 |
66 |
67 | def __multiclass_acc(self, y_pred, y_true):
68 | """
69 | Compute the multiclass accuracy w.r.t. groundtruth
70 |
71 | :param preds: Float array representing the predictions, dimension (N,)
72 | :param truths: Float/int array representing the groundtruth classes, dimension (N,)
73 | :return: Classification accuracy
74 | """
75 | return np.sum(np.round(y_pred) == np.round(y_true)) / float(len(y_true))
76 |
77 |
78 | def __eval_mosei_regression(self, y_pred, y_true, exclude_zero=False):
79 | test_preds = y_pred.view(-1).cpu().detach().numpy()
80 | test_truth = y_true.view(-1).cpu().detach().numpy()
81 |
82 | test_preds_a7 = np.clip(test_preds, a_min=-3., a_max=3.)
83 | test_truth_a7 = np.clip(test_truth, a_min=-3., a_max=3.)
84 | test_preds_a5 = np.clip(test_preds, a_min=-2., a_max=2.)
85 | test_truth_a5 = np.clip(test_truth, a_min=-2., a_max=2.)
86 | test_preds_a3 = np.clip(test_preds, a_min=-1., a_max=1.)
87 | test_truth_a3 = np.clip(test_truth, a_min=-1., a_max=1.)
88 |
89 |
90 | mae = np.mean(np.absolute(test_preds - test_truth)) # Average L1 distance between preds and truths
91 | corr = np.corrcoef(test_preds, test_truth)[0][1]
92 | mult_a7 = self.__multiclass_acc(test_preds_a7, test_truth_a7)
93 | mult_a5 = self.__multiclass_acc(test_preds_a5, test_truth_a5)
94 | mult_a3 = self.__multiclass_acc(test_preds_a3, test_truth_a3)
95 |
96 | non_zeros = np.array([i for i, e in enumerate(test_truth) if e != 0])
97 | non_zeros_binary_truth = (test_truth[non_zeros] > 0)
98 | non_zeros_binary_preds = (test_preds[non_zeros] > 0)
99 |
100 | non_zeros_acc2 = accuracy_score(non_zeros_binary_preds, non_zeros_binary_truth)
101 | non_zeros_f1_score = f1_score(non_zeros_binary_truth, non_zeros_binary_preds, average='weighted')
102 |
103 | binary_truth = (test_truth >= 0)
104 | binary_preds = (test_preds >= 0)
105 | acc2 = accuracy_score(binary_preds, binary_truth)
106 | f_score = f1_score(binary_truth, binary_preds, average='weighted')
107 |
108 | eval_results = {
109 | "Has0_acc_2": round(acc2, 4),
110 | "Has0_F1_score": round(f_score, 4),
111 | "Non0_acc_2": round(non_zeros_acc2, 4),
112 | "Non0_F1_score": round(non_zeros_f1_score, 4),
113 | "Mult_acc_5": round(mult_a5, 4),
114 | "Mult_acc_7": round(mult_a7, 4),
115 | "MAE": round(mae, 4),
116 | "Corr": round(corr, 4)
117 | }
118 | return eval_results
119 |
120 |
121 | def __eval_mosi_regression(self, y_pred, y_true):
122 | return self.__eval_mosei_regression(y_pred, y_true)
123 |
124 | def __eval_sims_regression(self, y_pred, y_true):
125 | test_preds = y_pred.view(-1).cpu().detach().numpy()
126 | test_truth = y_true.view(-1).cpu().detach().numpy()
127 | test_preds = np.clip(test_preds, a_min=-1., a_max=1.)
128 | test_truth = np.clip(test_truth, a_min=-1., a_max=1.)
129 |
130 | # weak sentiment two classes{[-0.6, 0.0], (0.0, 0.6]}
131 | ms_2 = [-1.01, 0.0, 1.01]
132 | weak_index_l = np.where(test_truth >= -0.4)[0]
133 | weak_index_r = np.where(test_truth <= 0.4)[0]
134 | weak_index = [x for x in weak_index_l if x in weak_index_r]
135 | test_preds_weak = test_preds[weak_index]
136 | test_truth_weak = test_truth[weak_index]
137 | test_preds_a2_weak = test_preds_weak.copy()
138 | test_truth_a2_weak = test_truth_weak.copy()
139 | for i in range(2):
140 | test_preds_a2_weak[np.logical_and(test_preds_weak > ms_2[i], test_preds_weak <= ms_2[i + 1])] = i
141 | for i in range(2):
142 | test_truth_a2_weak[np.logical_and(test_truth_weak > ms_2[i], test_truth_weak <= ms_2[i + 1])] = i
143 |
144 | # two classes{[-1.0, 0.0], (0.0, 1.0]}
145 | ms_2 = [-1.01, 0.0, 1.01]
146 | test_preds_a2 = test_preds.copy()
147 | test_truth_a2 = test_truth.copy()
148 | for i in range(2):
149 | test_preds_a2[np.logical_and(test_preds > ms_2[i], test_preds <= ms_2[i+1])] = i
150 | for i in range(2):
151 | test_truth_a2[np.logical_and(test_truth > ms_2[i], test_truth <= ms_2[i+1])] = i
152 |
153 | # three classes{[-1.0, -0.1], (-0.1, 0.1], (0.1, 1.0]}
154 | ms_3 = [-1.01, -0.1, 0.1, 1.01]
155 | test_preds_a3 = test_preds.copy()
156 | test_truth_a3 = test_truth.copy()
157 | for i in range(3):
158 | test_preds_a3[np.logical_and(test_preds > ms_3[i], test_preds <= ms_3[i+1])] = i
159 | for i in range(3):
160 | test_truth_a3[np.logical_and(test_truth > ms_3[i], test_truth <= ms_3[i+1])] = i
161 |
162 | # five classes{[-1.0, -0.7], (-0.7, -0.1], (-0.1, 0.1], (0.1, 0.7], (0.7, 1.0]}
163 | ms_5 = [-1.01, -0.7, -0.1, 0.1, 0.7, 1.01]
164 | test_preds_a5 = test_preds.copy()
165 | test_truth_a5 = test_truth.copy()
166 | for i in range(5):
167 | test_preds_a5[np.logical_and(test_preds > ms_5[i], test_preds <= ms_5[i+1])] = i
168 | for i in range(5):
169 | test_truth_a5[np.logical_and(test_truth > ms_5[i], test_truth <= ms_5[i+1])] = i
170 |
171 | mae = np.mean(np.absolute(test_preds - test_truth)) # Average L1 distance between preds and truths
172 | corr = np.corrcoef(test_preds, test_truth)[0][1]
173 | mult_a2 = self.__multiclass_acc(test_preds_a2, test_truth_a2)
174 | mult_a2_weak = self.__multiclass_acc(test_preds_a2_weak, test_truth_a2_weak)
175 | mult_a3 = self.__multiclass_acc(test_preds_a3, test_truth_a3)
176 | mult_a5 = self.__multiclass_acc(test_preds_a5, test_truth_a5)
177 | f_score = f1_score(test_truth_a2, test_preds_a2, average='weighted')
178 | r2 = r2_score(test_truth, test_preds)
179 | eval_results = {
180 | "Mult_acc_2": mult_a2,
181 | "F1_score": f_score,
182 | "Mult_acc_2_weak": mult_a2_weak,
183 | "MAE": mae,
184 | "Corr": corr, # Correlation Coefficient
185 | "Mult_acc_3": mult_a3,
186 | "Mult_acc_5": mult_a5,
187 | "R_squre": r2
188 | }
189 | return eval_results
190 |
191 | def __eval_simsv2_regression(self, y_pred, y_true):
192 | return self.__eval_sims_regression(y_pred, y_true)
193 | def getMetics(self, datasetName):
194 | return self.metrics_dict[datasetName.upper()]
--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/models/subNets/Textmodel.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import collections
4 | import re
5 | import torch
6 | import torch.nn as nn
7 | import torch.nn.functional as F
8 |
9 | from models.ChatGLM3.modeling_chatglm import ChatGLMForConditionalGeneration
10 | from models.ChatGLM3.tokenization_chatglm import ChatGLMTokenizer
11 |
12 | __all__ = ['Language_model']
13 |
14 | class Language_model (nn.Module):
15 | def __init__(self, args, use_PLM = True):
16 | """
17 | language: en / cn
18 | """
19 | super(Language_model, self).__init__()
20 |
21 | if use_PLM:
22 | pretrained_model = args.pretrain_LM #pretrained model select
23 | self.model = ChatGLMForConditionalGeneration.from_pretrained(pretrained_model, trust_remote_code=True, torch_dtype=torch.bfloat16).half()
24 | self.tokenizer = ChatGLMTokenizer.from_pretrained(pretrained_model, trust_remote_code=True)
25 | self.device = args.device
26 | self.language = args.language
27 | self.max_new_tokens = args.max_new_tokens
28 | self.datasetName = args.datasetName
29 | self.train_mode = args.train_mode
30 | self.task_specific_prompt = args.task_specific_prompt
31 | # freeze parameter
32 | for param in self.model.parameters():
33 | param.requires_grad = False
34 | else:
35 | print('please use PLM')
36 |
37 | def text_embedding(self,text_ids):
38 | embeddings = self.model.base_model.get_input_embeddings()
39 | return embeddings(text_ids)
40 |
41 |
42 | def forward(self, fusion_embedding, labels):
43 | """
44 | Args:
45 | fusion_embedding: the "concatenate" result of multimodal low rank fusion and text embedding
46 | label: ground_truth
47 | """
48 |
49 | fusion_embedding = self.multimodal_prompt_wrap(fusion_embedding) #添加多模态输入的special prompt
50 | opt_tokens, labels = self.input_processing(fusion_embedding, labels, mode = 'train') #创建fusion+prompt+answer_mask的input和label
51 |
52 | with torch.cuda.amp.autocast():
53 | output = self.model(input_ids = opt_tokens, input_fusion=fusion_embedding, labels = labels) # Models outputs are now tuples
54 |
55 | return output
56 |
57 | def generate(self, fusion_embedding):
58 | """
59 | Args:
60 | samples (dict): A dictionary containing the following keys:
61 | use_nucleus_sampling (bool): Whether to use nucleus sampling. If False, use top-k sampling.
62 | num_beams (int): Number of beams for beam search. 1 means no beam search.
63 | max_new_tokens (int): The maximum length of the new tokens to be generated.
64 | top_p (float): The cumulative probability for nucleus sampling.
65 | top_k (int): The k for top-k sampling.
66 | penalty_alpha (float): The parameter for repetition penalty. 1.0 means no penalty.
67 | num_captions (int): Number of captions to be generated for each image.
68 | """
69 | if self.train_mode == 'regression':
70 | # gen_kwargs = {"max_new_tokens": self.max_new_tokens, "num_beams": 1, "do_sample": False, "penalty_alpha": 0.6, "top_p": 0.01, "temperature": 0.01}
71 | gen_kwargs = {"max_new_tokens": self.max_new_tokens, "num_beams": 1, "do_sample": False, "top_k": 10}
72 | else:
73 | gen_kwargs = {"max_new_tokens": self.max_new_tokens, "num_beams": 1, "do_sample": False, "top_k": 10 }
74 |
75 | fusion_embedding = self.multimodal_prompt_wrap(fusion_embedding) # 添加多模态输入的special prompt
76 | opt_tokens, _ = self.input_processing(fusion_embedding, mode = 'generate') # 创建fusion+prompt的input
77 |
78 | context_length = opt_tokens.size(1)
79 | all_responses =[]
80 |
81 | for outputs in self.model.stream_generate(opt_tokens, **gen_kwargs, input_fusion=fusion_embedding):
82 | outputs = outputs[:, context_length:].tolist()
83 | response = self.tokenizer.batch_decode(outputs)
84 | # all_responses = list(map(float, response))
85 | # all_responses = list(map(lambda x: float(x.replace('–', '-')), response))
86 | # all_responses = list(map(lambda x: float(x.replace('–', '-').replace('一', '-').replace(':', '').replace('/', '').replace('(', '').replace(':', '')), response))
87 | # all_responses = [float(re.sub(r'[^0-9.-]', '0', re.sub(r'(?'
173 | special_token = ''
174 | else:
175 | prompt = '{问题}\n\n <多模态>多模态>'
176 | special_token = ''
177 |
178 | batch_size = fusion_embeddings.shape[0]
179 | p_before, p_after = prompt.split(special_token)
180 | p_before_tokens = self.tokenizer(
181 | p_before, return_tensors="pt", add_special_tokens=True).to(self.device)
182 | p_after_tokens = self.tokenizer(
183 | p_after, return_tensors="pt", add_special_tokens=False).to(self.device)
184 | p_before_embeds = self.text_embedding(p_before_tokens.input_ids).expand(batch_size, -1, -1)
185 | p_after_embeds = self.text_embedding(p_after_tokens.input_ids).expand(batch_size, -1, -1)
186 | wrapped_fusion_embeddings = torch.cat([p_before_embeds, fusion_embeddings, p_after_embeds], dim=1)
187 |
188 | return wrapped_fusion_embeddings
--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/data/TextPre.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import h5py
4 | import pickle
5 | import argparse
6 | import numpy as np
7 | from tqdm import tqdm
8 |
9 | import torch
10 | import torch.nn as nn
11 | import torch.nn.functional as F
12 | # from pytorch_transformers.modeling_bert import BertForSequenceClassification, BertConfig, MultimodalBertForSequenceClassification
13 | # from pytorch_transformers.amir_tokenization import BertTokenizer
14 | # from pytorch_transformers.optimization import AdamW, WarmupLinearSchedule
15 |
16 | # from transformers.tokenization import BertTokenizer
17 | from models.subNets.BertTextEncoder import BertTextEncoder
18 |
19 | class TextPre(object):
20 | """A single set of features of data."""
21 |
22 | def __init__(self, args):
23 | self.device = torch.device('cuda:0')
24 | self.args = args
25 | self.loadTextMap = {
26 | 'mosi': self.__load_data_mosi,
27 | 'mosei': self.__load_data_mosei
28 | }
29 | self.bert = BertTextEncoder(language=args.language).to(self.device)
30 |
31 | def textConvertID(self, data, tokenizer):
32 | features = {}
33 | Input_ids, Input_mask, Segment_ids = [], [], []
34 | Raw_text, Visual, Audio = [], [], []
35 | Label, ids = [], []
36 | max_seq_length = self.args.max_seq_length
37 | for i in tqdm(range(len(data['raw_text']))):
38 | raw_text = data['raw_text'][i]
39 | visual = data['vision'][i]
40 | audio = data['audio'][i]
41 | tokens_a, inversions_a = tokenizer.tokenize(raw_text,invertable=True)
42 |
43 | if len(tokens_a) > max_seq_length - 2:
44 | tokens_a = tokens_a[:max_seq_length - 2]
45 | inversions_a = inversions_a[:max_seq_length - 2]
46 |
47 | tokens = ["[CLS]"] + tokens_a + ["[SEP]"]
48 |
49 | segment_ids = [0] * len(tokens)
50 |
51 | input_ids = tokenizer.convert_tokens_to_ids(tokens)
52 |
53 | input_mask = [1] * len(input_ids)
54 | padding = [0] * (max_seq_length - len(input_ids))
55 |
56 |
57 | if self.args.aligned:
58 | text_len = min(len(raw_text.split()), max_seq_length)
59 | new_visual = [visual[len(visual) - text_len + inv_id] for inv_id in inversions_a]
60 | new_audio = [audio[len(audio) - text_len + inv_id] for inv_id in inversions_a]
61 |
62 | visual = np.array(new_visual)
63 | audio = np.array(new_audio)
64 |
65 | # add "start" and "end" for audio and vision
66 | audio_zero = np.zeros((1,audio.shape[1]))
67 | audio = np.concatenate((audio_zero,audio,audio_zero))
68 |
69 | visual_zero = np.zeros((1,visual.shape[1]))
70 | visual = np.concatenate((visual_zero,visual,visual_zero))
71 |
72 | audio_padding = np.zeros((max_seq_length - len(input_ids),audio.shape[1]))
73 | audio = np.concatenate((audio,audio_padding))
74 |
75 | video_padding = np.zeros((max_seq_length - len(input_ids),visual.shape[1]))
76 | visual = np.concatenate((visual,video_padding))
77 |
78 | assert audio.shape[0] == max_seq_length
79 | assert visual.shape[0] == max_seq_length
80 |
81 | input_ids += padding
82 | input_mask += padding
83 | segment_ids += padding
84 |
85 | assert len(input_ids) == max_seq_length
86 | assert len(input_mask) == max_seq_length
87 | assert len(segment_ids) == max_seq_length
88 |
89 | label = float(data['labels'][i])
90 |
91 | Input_ids.append(input_ids)
92 | Visual.append(visual)
93 | Audio.append(audio)
94 | Input_mask.append(input_mask)
95 | Segment_ids.append(segment_ids)
96 | Label.append(label)
97 | Raw_text.append(raw_text)
98 | ids.append(data['id'][i])
99 |
100 | features['raw_text'] = np.array(Raw_text)
101 | features['audio'] = np.array(Audio)
102 | features['vision'] = np.array(Visual)
103 | features['labels'] = np.array(Label)
104 | features['id'] = np.array(ids)
105 | Input_ids = np.expand_dims(Input_ids, 1)
106 | Input_mask = np.expand_dims(Input_mask, 1)
107 | Segment_ids = np.expand_dims(Segment_ids, 1)
108 | text_bert = np.concatenate((Input_ids, Input_mask, Segment_ids), axis=1)
109 | features['text_bert'] = text_bert
110 | features['text'] = self.__convertID2Vector(text_bert)
111 | return features
112 |
113 | def __convertID2Vector(self, ids, batch_size=64):
114 | results = []
115 | left = 0
116 | ids = torch.Tensor(ids)
117 | for left in tqdm(range(0, ids.size(0), batch_size)):
118 | right = min(left + batch_size, ids.size(0))
119 | c_ids = ids[left:right].to(self.device)
120 | c_vector = self.bert(c_ids).detach().cpu().numpy()
121 | results.append(c_vector)
122 | results = np.concatenate(results, axis=0)
123 | return results
124 |
125 | def __load_data_mosi(self):
126 | # get text data
127 | link = os.path.join(self.args.data_dir, 'Raw/Transcript/Segmented')
128 | text_data = {}
129 | for file in os.listdir(link):
130 | name = file.split('.')[0]
131 | for line in open(os.path.join(link, file), "r"):
132 | num_id, cur_t = line.split('_DELIM_')
133 | name_id = name + '_' + num_id.strip()
134 | text_data[name_id] = cur_t.strip()
135 | # get data
136 | def matchData(mode='train'):
137 | r_text = []
138 | for cur_id in data[mode]['id']:
139 | r_text.append(text_data[cur_id[0]])
140 | data[mode]['raw_text'] = r_text
141 |
142 | with open(os.path.join(self.args.data_dir, 'Processed/mosei_senti_data_noalign.pkl'), 'rb') as lf:
143 | data = pickle.load(lf)
144 |
145 | matchData(mode='train')
146 | matchData(mode='valid')
147 | matchData(mode='test')
148 |
149 | return data
150 |
151 | def __load_data_mosei(self):
152 | def convert0(s):
153 | if s == '0':
154 | return '0.0'
155 | return s
156 | # get text data
157 | link = os.path.join(self.args.data_dir, 'Raw/Transcript/Segmented')
158 | text_data = {}
159 | for file in os.listdir(link):
160 | name = file.split('.')[0]
161 | for line in open(os.path.join(link, file), "r"):
162 | items = line.split('___')
163 | name_id = items[0] + '_' + convert0(items[2]) + '_' + convert0(items[3])
164 | text_data[name_id.strip()] = items[-1].strip()
165 | # get data
166 | def matchData(mode='train'):
167 | r_text = []
168 | for cur_id in data[mode]['id']:
169 | name = '_'.join(cur_id)
170 | r_text.append(text_data[name])
171 | data[mode]['raw_text'] = r_text
172 |
173 | with open(os.path.join(self.args.data_dir, 'Processed/mosei_senti_data_noalign.pkl'), 'rb') as lf:
174 | data = pickle.load(lf)
175 |
176 | matchData(mode='train')
177 | matchData(mode='valid')
178 | matchData(mode='test')
179 |
180 | return data
181 |
182 | def run(self):
183 | data = self.loadTextMap[self.args.datasetName]()
184 |
185 | train_list = data['train']
186 | valid_list = data['valid']
187 | test_list = data['test']
188 |
189 | tokenizer = self.bert.get_tokenizer()
190 |
191 | save_data = {}
192 | save_data['train'] = self.textConvertID(train_list, tokenizer)
193 | save_data['valid'] = self.textConvertID(valid_list, tokenizer)
194 | save_data['test'] = self.textConvertID(test_list, tokenizer)
195 |
196 | if self.args.aligned:
197 | saved_path = os.path.join(self.args.save_dir, 'aligned_' + str(self.args.max_seq_length) + '.pkl')
198 | else:
199 | saved_path = os.path.join(self.args.save_dir, 'unaligned_' + str(self.args.max_seq_length) + '.pkl')
200 |
201 | if not os.path.exists(os.path.dirname(saved_path)):
202 | os.makedirs(os.path.dirname(saved_path))
203 |
204 | with open(saved_path, 'wb') as file:
205 | pickle.dump(save_data, file, protocol=4)
206 | print('Save Successful!')
207 |
208 | def parse_args():
209 | parser = argparse.ArgumentParser()
210 | parser.add_argument('--datasetName', type=str, default='mosei',
211 | help='need aligned data (support mosi / mosei)')
212 | parser.add_argument('--language', type=str, default='cn',
213 | help='data language')
214 | parser.add_argument('--aligned', type=bool, default=True,
215 | help='need aligned data')
216 | parser.add_argument('--data_dir', type=str, default = '/home/sharing/disk3/dataset/multimodal-sentiment-dataset/CMU-MOSEI',
217 | help='path to MOSI / MOSEI')
218 | parser.add_argument('--save_dir', type=str, default = '/home/sharing/disk3/dataset/multimodal-sentiment-dataset/ALL/mosei/raw',
219 | help='path to saved directory')
220 | parser.add_argument('--max_seq_length', type=int, default = 50,
221 | help='length')
222 | return parser.parse_args()
223 |
224 | if __name__ == "__main__":
225 | args = parse_args()
226 | tp = TextPre(args)
227 | tp.run()
228 | # tp.convertID2Vector()
--------------------------------------------------------------------------------
/MSE-Llama2-7B/data/TextPre.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import h5py
4 | import pickle
5 | import argparse
6 | import numpy as np
7 | from tqdm import tqdm
8 |
9 | import torch
10 | import torch.nn as nn
11 | import torch.nn.functional as F
12 | # from pytorch_transformers.modeling_bert import BertForSequenceClassification, BertConfig, MultimodalBertForSequenceClassification
13 | # from pytorch_transformers.amir_tokenization import BertTokenizer
14 | # from pytorch_transformers.optimization import AdamW, WarmupLinearSchedule
15 |
16 | # from transformers.tokenization import BertTokenizer
17 | from models.subNets.BertTextEncoder import BertTextEncoder
18 |
19 | class TextPre(object):
20 | """A single set of features of data."""
21 |
22 | def __init__(self, args):
23 | self.device = torch.device('cuda:0')
24 | self.args = args
25 | self.loadTextMap = {
26 | 'mosi': self.__load_data_mosi,
27 | 'mosei': self.__load_data_mosei
28 | }
29 | self.bert = BertTextEncoder(language=args.language).to(self.device)
30 |
31 | def textConvertID(self, data, tokenizer):
32 | features = {}
33 | Input_ids, Input_mask, Segment_ids = [], [], []
34 | Raw_text, Visual, Audio = [], [], []
35 | Label, ids = [], []
36 | max_seq_length = self.args.max_seq_length
37 | for i in tqdm(range(len(data['raw_text']))):
38 | raw_text = data['raw_text'][i]
39 | visual = data['vision'][i]
40 | audio = data['audio'][i]
41 | tokens_a, inversions_a = tokenizer.tokenize(raw_text,invertable=True)
42 |
43 | if len(tokens_a) > max_seq_length - 2:
44 | tokens_a = tokens_a[:max_seq_length - 2]
45 | inversions_a = inversions_a[:max_seq_length - 2]
46 |
47 | tokens = ["[CLS]"] + tokens_a + ["[SEP]"]
48 |
49 | segment_ids = [0] * len(tokens)
50 |
51 | input_ids = tokenizer.convert_tokens_to_ids(tokens)
52 |
53 | input_mask = [1] * len(input_ids)
54 | padding = [0] * (max_seq_length - len(input_ids))
55 |
56 |
57 | if self.args.aligned:
58 | text_len = min(len(raw_text.split()), max_seq_length)
59 | new_visual = [visual[len(visual) - text_len + inv_id] for inv_id in inversions_a]
60 | new_audio = [audio[len(audio) - text_len + inv_id] for inv_id in inversions_a]
61 |
62 | visual = np.array(new_visual)
63 | audio = np.array(new_audio)
64 |
65 | # add "start" and "end" for audio and vision
66 | audio_zero = np.zeros((1,audio.shape[1]))
67 | audio = np.concatenate((audio_zero,audio,audio_zero))
68 |
69 | visual_zero = np.zeros((1,visual.shape[1]))
70 | visual = np.concatenate((visual_zero,visual,visual_zero))
71 |
72 | audio_padding = np.zeros((max_seq_length - len(input_ids),audio.shape[1]))
73 | audio = np.concatenate((audio,audio_padding))
74 |
75 | video_padding = np.zeros((max_seq_length - len(input_ids),visual.shape[1]))
76 | visual = np.concatenate((visual,video_padding))
77 |
78 | assert audio.shape[0] == max_seq_length
79 | assert visual.shape[0] == max_seq_length
80 |
81 | input_ids += padding
82 | input_mask += padding
83 | segment_ids += padding
84 |
85 | assert len(input_ids) == max_seq_length
86 | assert len(input_mask) == max_seq_length
87 | assert len(segment_ids) == max_seq_length
88 |
89 | label = float(data['labels'][i])
90 |
91 | Input_ids.append(input_ids)
92 | Visual.append(visual)
93 | Audio.append(audio)
94 | Input_mask.append(input_mask)
95 | Segment_ids.append(segment_ids)
96 | Label.append(label)
97 | Raw_text.append(raw_text)
98 | ids.append(data['id'][i])
99 |
100 | features['raw_text'] = np.array(Raw_text)
101 | features['audio'] = np.array(Audio)
102 | features['vision'] = np.array(Visual)
103 | features['labels'] = np.array(Label)
104 | features['id'] = np.array(ids)
105 | Input_ids = np.expand_dims(Input_ids, 1)
106 | Input_mask = np.expand_dims(Input_mask, 1)
107 | Segment_ids = np.expand_dims(Segment_ids, 1)
108 | text_bert = np.concatenate((Input_ids, Input_mask, Segment_ids), axis=1)
109 | features['text_bert'] = text_bert
110 | features['text'] = self.__convertID2Vector(text_bert)
111 | return features
112 |
113 | def __convertID2Vector(self, ids, batch_size=64):
114 | results = []
115 | left = 0
116 | ids = torch.Tensor(ids)
117 | for left in tqdm(range(0, ids.size(0), batch_size)):
118 | right = min(left + batch_size, ids.size(0))
119 | c_ids = ids[left:right].to(self.device)
120 | c_vector = self.bert(c_ids).detach().cpu().numpy()
121 | results.append(c_vector)
122 | results = np.concatenate(results, axis=0)
123 | return results
124 |
125 | def __load_data_mosi(self):
126 | # get text data
127 | link = os.path.join(self.args.data_dir, 'Raw/Transcript/Segmented')
128 | text_data = {}
129 | for file in os.listdir(link):
130 | name = file.split('.')[0]
131 | for line in open(os.path.join(link, file), "r"):
132 | num_id, cur_t = line.split('_DELIM_')
133 | name_id = name + '_' + num_id.strip()
134 | text_data[name_id] = cur_t.strip()
135 | # get data
136 | def matchData(mode='train'):
137 | r_text = []
138 | for cur_id in data[mode]['id']:
139 | r_text.append(text_data[cur_id[0]])
140 | data[mode]['raw_text'] = r_text
141 |
142 | with open(os.path.join(self.args.data_dir, 'Processed/mosei_senti_data_noalign.pkl'), 'rb') as lf:
143 | data = pickle.load(lf)
144 |
145 | matchData(mode='train')
146 | matchData(mode='valid')
147 | matchData(mode='test')
148 |
149 | return data
150 |
151 | def __load_data_mosei(self):
152 | def convert0(s):
153 | if s == '0':
154 | return '0.0'
155 | return s
156 | # get text data
157 | link = os.path.join(self.args.data_dir, 'Raw/Transcript/Segmented')
158 | text_data = {}
159 | for file in os.listdir(link):
160 | name = file.split('.')[0]
161 | for line in open(os.path.join(link, file), "r"):
162 | items = line.split('___')
163 | name_id = items[0] + '_' + convert0(items[2]) + '_' + convert0(items[3])
164 | text_data[name_id.strip()] = items[-1].strip()
165 | # get data
166 | def matchData(mode='train'):
167 | r_text = []
168 | for cur_id in data[mode]['id']:
169 | name = '_'.join(cur_id)
170 | r_text.append(text_data[name])
171 | data[mode]['raw_text'] = r_text
172 |
173 | with open(os.path.join(self.args.data_dir, 'Processed/mosei_senti_data_noalign.pkl'), 'rb') as lf:
174 | data = pickle.load(lf)
175 |
176 | matchData(mode='train')
177 | matchData(mode='valid')
178 | matchData(mode='test')
179 |
180 | return data
181 |
182 | def run(self):
183 | data = self.loadTextMap[self.args.datasetName]()
184 |
185 | train_list = data['train']
186 | valid_list = data['valid']
187 | test_list = data['test']
188 |
189 | tokenizer = self.bert.get_tokenizer()
190 |
191 | save_data = {}
192 | save_data['train'] = self.textConvertID(train_list, tokenizer)
193 | save_data['valid'] = self.textConvertID(valid_list, tokenizer)
194 | save_data['test'] = self.textConvertID(test_list, tokenizer)
195 |
196 | if self.args.aligned:
197 | saved_path = os.path.join(self.args.save_dir, 'aligned_' + str(self.args.max_seq_length) + '.pkl')
198 | else:
199 | saved_path = os.path.join(self.args.save_dir, 'unaligned_' + str(self.args.max_seq_length) + '.pkl')
200 |
201 | if not os.path.exists(os.path.dirname(saved_path)):
202 | os.makedirs(os.path.dirname(saved_path))
203 |
204 | with open(saved_path, 'wb') as file:
205 | pickle.dump(save_data, file, protocol=4)
206 | print('Save Successful!')
207 |
208 | def parse_args():
209 | parser = argparse.ArgumentParser()
210 | parser.add_argument('--datasetName', type=str, default='mosei',
211 | help='need aligned data (support mosi / mosei)')
212 | parser.add_argument('--language', type=str, default='cn',
213 | help='data language')
214 | parser.add_argument('--aligned', type=bool, default=True,
215 | help='need aligned data')
216 | parser.add_argument('--data_dir', type=str, default = '/home/sharing/disk3/dataset/multimodal-sentiment-dataset/CMU-MOSEI',
217 | help='path to MOSI / MOSEI')
218 | parser.add_argument('--save_dir', type=str, default = '/home/sharing/disk3/dataset/multimodal-sentiment-dataset/ALL/mosei/raw',
219 | help='path to saved directory')
220 | parser.add_argument('--max_seq_length', type=int, default = 50,
221 | help='length')
222 | return parser.parse_args()
223 |
224 | if __name__ == "__main__":
225 | args = parse_args()
226 | tp = TextPre(args)
227 | tp.run()
228 | # tp.convertID2Vector()
--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/data/TextPre.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import h5py
4 | import pickle
5 | import argparse
6 | import numpy as np
7 | from tqdm import tqdm
8 |
9 | import torch
10 | import torch.nn as nn
11 | import torch.nn.functional as F
12 | # from pytorch_transformers.modeling_bert import BertForSequenceClassification, BertConfig, MultimodalBertForSequenceClassification
13 | # from pytorch_transformers.amir_tokenization import BertTokenizer
14 | # from pytorch_transformers.optimization import AdamW, WarmupLinearSchedule
15 |
16 | # from transformers.tokenization import BertTokenizer
17 | from models.subNets.BertTextEncoder import BertTextEncoder
18 |
19 | class TextPre(object):
20 | """A single set of features of data."""
21 |
22 | def __init__(self, args):
23 | self.device = torch.device('cuda:0')
24 | self.args = args
25 | self.loadTextMap = {
26 | 'mosi': self.__load_data_mosi,
27 | 'mosei': self.__load_data_mosei
28 | }
29 | self.bert = BertTextEncoder(language=args.language).to(self.device)
30 |
31 | def textConvertID(self, data, tokenizer):
32 | features = {}
33 | Input_ids, Input_mask, Segment_ids = [], [], []
34 | Raw_text, Visual, Audio = [], [], []
35 | Label, ids = [], []
36 | max_seq_length = self.args.max_seq_length
37 | for i in tqdm(range(len(data['raw_text']))):
38 | raw_text = data['raw_text'][i]
39 | visual = data['vision'][i]
40 | audio = data['audio'][i]
41 | tokens_a, inversions_a = tokenizer.tokenize(raw_text,invertable=True)
42 |
43 | if len(tokens_a) > max_seq_length - 2:
44 | tokens_a = tokens_a[:max_seq_length - 2]
45 | inversions_a = inversions_a[:max_seq_length - 2]
46 |
47 | tokens = ["[CLS]"] + tokens_a + ["[SEP]"]
48 |
49 | segment_ids = [0] * len(tokens)
50 |
51 | input_ids = tokenizer.convert_tokens_to_ids(tokens)
52 |
53 | input_mask = [1] * len(input_ids)
54 | padding = [0] * (max_seq_length - len(input_ids))
55 |
56 |
57 | if self.args.aligned:
58 | text_len = min(len(raw_text.split()), max_seq_length)
59 | new_visual = [visual[len(visual) - text_len + inv_id] for inv_id in inversions_a]
60 | new_audio = [audio[len(audio) - text_len + inv_id] for inv_id in inversions_a]
61 |
62 | visual = np.array(new_visual)
63 | audio = np.array(new_audio)
64 |
65 | # add "start" and "end" for audio and vision
66 | audio_zero = np.zeros((1,audio.shape[1]))
67 | audio = np.concatenate((audio_zero,audio,audio_zero))
68 |
69 | visual_zero = np.zeros((1,visual.shape[1]))
70 | visual = np.concatenate((visual_zero,visual,visual_zero))
71 |
72 | audio_padding = np.zeros((max_seq_length - len(input_ids),audio.shape[1]))
73 | audio = np.concatenate((audio,audio_padding))
74 |
75 | video_padding = np.zeros((max_seq_length - len(input_ids),visual.shape[1]))
76 | visual = np.concatenate((visual,video_padding))
77 |
78 | assert audio.shape[0] == max_seq_length
79 | assert visual.shape[0] == max_seq_length
80 |
81 | input_ids += padding
82 | input_mask += padding
83 | segment_ids += padding
84 |
85 | assert len(input_ids) == max_seq_length
86 | assert len(input_mask) == max_seq_length
87 | assert len(segment_ids) == max_seq_length
88 |
89 | label = float(data['labels'][i])
90 |
91 | Input_ids.append(input_ids)
92 | Visual.append(visual)
93 | Audio.append(audio)
94 | Input_mask.append(input_mask)
95 | Segment_ids.append(segment_ids)
96 | Label.append(label)
97 | Raw_text.append(raw_text)
98 | ids.append(data['id'][i])
99 |
100 | features['raw_text'] = np.array(Raw_text)
101 | features['audio'] = np.array(Audio)
102 | features['vision'] = np.array(Visual)
103 | features['labels'] = np.array(Label)
104 | features['id'] = np.array(ids)
105 | Input_ids = np.expand_dims(Input_ids, 1)
106 | Input_mask = np.expand_dims(Input_mask, 1)
107 | Segment_ids = np.expand_dims(Segment_ids, 1)
108 | text_bert = np.concatenate((Input_ids, Input_mask, Segment_ids), axis=1)
109 | features['text_bert'] = text_bert
110 | features['text'] = self.__convertID2Vector(text_bert)
111 | return features
112 |
113 | def __convertID2Vector(self, ids, batch_size=64):
114 | results = []
115 | left = 0
116 | ids = torch.Tensor(ids)
117 | for left in tqdm(range(0, ids.size(0), batch_size)):
118 | right = min(left + batch_size, ids.size(0))
119 | c_ids = ids[left:right].to(self.device)
120 | c_vector = self.bert(c_ids).detach().cpu().numpy()
121 | results.append(c_vector)
122 | results = np.concatenate(results, axis=0)
123 | return results
124 |
125 | def __load_data_mosi(self):
126 | # get text data
127 | link = os.path.join(self.args.data_dir, 'Raw/Transcript/Segmented')
128 | text_data = {}
129 | for file in os.listdir(link):
130 | name = file.split('.')[0]
131 | for line in open(os.path.join(link, file), "r"):
132 | num_id, cur_t = line.split('_DELIM_')
133 | name_id = name + '_' + num_id.strip()
134 | text_data[name_id] = cur_t.strip()
135 | # get data
136 | def matchData(mode='train'):
137 | r_text = []
138 | for cur_id in data[mode]['id']:
139 | r_text.append(text_data[cur_id[0]])
140 | data[mode]['raw_text'] = r_text
141 |
142 | with open(os.path.join(self.args.data_dir, 'Processed/mosei_senti_data_noalign.pkl'), 'rb') as lf:
143 | data = pickle.load(lf)
144 |
145 | matchData(mode='train')
146 | matchData(mode='valid')
147 | matchData(mode='test')
148 |
149 | return data
150 |
151 | def __load_data_mosei(self):
152 | def convert0(s):
153 | if s == '0':
154 | return '0.0'
155 | return s
156 | # get text data
157 | link = os.path.join(self.args.data_dir, 'Raw/Transcript/Segmented')
158 | text_data = {}
159 | for file in os.listdir(link):
160 | name = file.split('.')[0]
161 | for line in open(os.path.join(link, file), "r"):
162 | items = line.split('___')
163 | name_id = items[0] + '_' + convert0(items[2]) + '_' + convert0(items[3])
164 | text_data[name_id.strip()] = items[-1].strip()
165 | # get data
166 | def matchData(mode='train'):
167 | r_text = []
168 | for cur_id in data[mode]['id']:
169 | name = '_'.join(cur_id)
170 | r_text.append(text_data[name])
171 | data[mode]['raw_text'] = r_text
172 |
173 | with open(os.path.join(self.args.data_dir, 'Processed/mosei_senti_data_noalign.pkl'), 'rb') as lf:
174 | data = pickle.load(lf)
175 |
176 | matchData(mode='train')
177 | matchData(mode='valid')
178 | matchData(mode='test')
179 |
180 | return data
181 |
182 | def run(self):
183 | data = self.loadTextMap[self.args.datasetName]()
184 |
185 | train_list = data['train']
186 | valid_list = data['valid']
187 | test_list = data['test']
188 |
189 | tokenizer = self.bert.get_tokenizer()
190 |
191 | save_data = {}
192 | save_data['train'] = self.textConvertID(train_list, tokenizer)
193 | save_data['valid'] = self.textConvertID(valid_list, tokenizer)
194 | save_data['test'] = self.textConvertID(test_list, tokenizer)
195 |
196 | if self.args.aligned:
197 | saved_path = os.path.join(self.args.save_dir, 'aligned_' + str(self.args.max_seq_length) + '.pkl')
198 | else:
199 | saved_path = os.path.join(self.args.save_dir, 'unaligned_' + str(self.args.max_seq_length) + '.pkl')
200 |
201 | if not os.path.exists(os.path.dirname(saved_path)):
202 | os.makedirs(os.path.dirname(saved_path))
203 |
204 | with open(saved_path, 'wb') as file:
205 | pickle.dump(save_data, file, protocol=4)
206 | print('Save Successful!')
207 |
208 | def parse_args():
209 | parser = argparse.ArgumentParser()
210 | parser.add_argument('--datasetName', type=str, default='mosei',
211 | help='need aligned data (support mosi / mosei)')
212 | parser.add_argument('--language', type=str, default='cn',
213 | help='data language')
214 | parser.add_argument('--aligned', type=bool, default=True,
215 | help='need aligned data')
216 | parser.add_argument('--data_dir', type=str, default = '/home/sharing/disk3/dataset/multimodal-sentiment-dataset/CMU-MOSEI',
217 | help='path to MOSI / MOSEI')
218 | parser.add_argument('--save_dir', type=str, default = '/home/sharing/disk3/dataset/multimodal-sentiment-dataset/ALL/mosei/raw',
219 | help='path to saved directory')
220 | parser.add_argument('--max_seq_length', type=int, default = 50,
221 | help='length')
222 | return parser.parse_args()
223 |
224 | if __name__ == "__main__":
225 | args = parse_args()
226 | tp = TextPre(args)
227 | tp.run()
228 | # tp.convertID2Vector()
--------------------------------------------------------------------------------
/MSE-Llama2-7B/models/subNets/Textmodel.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import collections
4 | import re
5 | import torch
6 | import torch.nn as nn
7 | import torch.nn.functional as F
8 |
9 | from modelscope import AutoTokenizer, AutoModel, AutoModelForCausalLM
10 |
11 |
12 | __all__ = ['Language_model']
13 |
14 | class Language_model (nn.Module):
15 | def __init__(self, args, use_PLM = True):
16 | """
17 | language: en / cn
18 | """
19 | super(Language_model, self).__init__()
20 |
21 | if use_PLM:
22 | pretrained_model = args.pretrain_LM #pretrained model select
23 | self.tokenizer = AutoTokenizer.from_pretrained(
24 | pretrained_model,
25 | padding_side='left',
26 | trust_remote_code=True
27 | )
28 | self.model = AutoModelForCausalLM.from_pretrained(
29 | pretrained_model,
30 | trust_remote_code=True,
31 | torch_dtype=torch.bfloat16
32 | ).half()
33 | # self.pad_token_id = self.tokenizer.convert_tokens_to_ids('<|extra_0|>')
34 | # self.tokenizer.pad_token_id = self.pad_token_id
35 | self.tokenizer.pad_token_id = 0
36 | self.eos_token_id = self.tokenizer.convert_tokens_to_ids('<|endoftext|>')
37 |
38 | self.device = args.device
39 | self.language = args.language
40 | self.max_new_tokens = args.max_new_tokens
41 | self.datasetName = args.datasetName
42 | self.train_mode = args.train_mode
43 | self.task_specific_prompt = args.task_specific_prompt
44 | # freeze parameter
45 | for param in self.model.parameters():
46 | param.requires_grad = False
47 | else:
48 | print('please use PLM')
49 |
50 | def text_embedding(self,text_ids):
51 | embeddings = self.model.base_model.get_input_embeddings()
52 | return embeddings(text_ids)
53 |
54 |
55 | def forward(self, fusion_embedding, labels):
56 | """
57 | Args:
58 | fusion_embedding: the "concatenate" result of multimodal low rank fusion and text embedding
59 | label: ground_truth
60 | """
61 |
62 | fusion_embedding = self.multimodal_prompt_wrap(fusion_embedding) #添加多模态输入的special prompt
63 | opt_tokens, atts_bos, atts_fusion, labels, labels_atts = self.input_processing(fusion_embedding, labels, mode = 'train') #创建fusion+prompt+answer_mask的input和label
64 |
65 | attention_mask = torch.cat([atts_bos, atts_fusion, labels_atts], dim=1)
66 |
67 |
68 | with torch.cuda.amp.autocast():
69 | output = self.model(inputs_embeds = opt_tokens, return_dict=True, labels = labels) # Models outputs are now tuples
70 |
71 | return output
72 |
73 | def generate(self, fusion_embedding):
74 | """
75 | Args:
76 | samples (dict): A dictionary containing the following keys:
77 | use_nucleus_sampling (bool): Whether to use nucleus sampling. If False, use top-k sampling.
78 | num_beams (int): Number of beams for beam search. 1 means no beam search.
79 | max_new_tokens (int): The maximum length of the new tokens to be generated.
80 | top_p (float): The cumulative probability for nucleus sampling.
81 | top_k (int): The k for top-k sampling.
82 | penalty_alpha (float): The parameter for repetition penalty. 1.0 means no penalty.
83 | num_captions (int): Number of captions to be generated for each image.
84 | """
85 |
86 |
87 | fusion_embedding = self.multimodal_prompt_wrap(fusion_embedding) # 添加多模态输入的special prompt
88 | opt_tokens, _, _, _, _= self.input_processing(fusion_embedding, mode = 'generate') # 创建fusion+prompt的input
89 | # attention_mask = torch.cat([atts_bos, atts_fusion], dim=1)
90 | context_length = opt_tokens.size(1)
91 | all_responses =[]
92 |
93 | outputs = self.model.generate(inputs_embeds = opt_tokens,
94 | num_beams=1,
95 | do_sample = False,
96 | top_p = None,
97 | max_new_tokens = self.max_new_tokens)
98 | responses = self.tokenizer.batch_decode(outputs[:,1:], add_special_tokens=False, skip_special_tokens=True, clean_up_tokenization_spaces=False)
99 |
100 | # print(responses)
101 | for response in responses:
102 | # 处理生成结果,将一些不必要的字符转换为0
103 | if self.train_mode == 'regression':
104 | try:
105 | value = float(
106 | response.replace('–', '-').replace('一', '-').replace(':', '').replace('/', '').replace('(', '').replace(
107 | ':', ''))
108 | # value = float(re.sub(r'[^0-9.-]', '0', re.sub(r'(? 0 else f"{label.item():.{1}f}" for label in
160 | # labels]
161 | # label_template = [
162 | # f"+{label.item():.1f}" if label > 0 else f"{+label.item():.1f}" if label == 0 else f"{label.item():.1f}"
163 | # for label in labels]
164 | else:
165 | label_template = [f"{label.item()}" for label in labels]
166 |
167 | labels = self.tokenizer(label_template, padding=True, return_tensors="pt", add_special_tokens=False).to(self.device)
168 | labels_id = labels["input_ids"]
169 | labels_atts = labels["attention_mask"]
170 |
171 | labels_embedding = self.text_embedding(labels_id)
172 | labels_matrix = torch.empty(opt_tokens.size(0), opt_tokens.size(1)).fill_(-100).long().to(self.device) # bz * seq_len 只构建和token_ids一个维度的矩阵
173 | opt_tokens = torch.cat([opt_tokens, labels_embedding], dim=1) # 将输入与labels拼接
174 | labels = torch.cat([labels_matrix, labels_id], dim=1)
175 |
176 |
177 | else:
178 | labels_atts = None
179 |
180 | return opt_tokens, labels, labels_atts
181 |
182 | def get_task_prompt(self):
183 | # get the task_specific_prompt
184 | prompt_text = self.task_specific_prompt
185 | prompt_ids = self.tokenizer(prompt_text, padding=True, return_tensors="pt", add_special_tokens=False)["input_ids"].to(self.device)
186 |
187 | return prompt_ids
188 |
189 | def multimodal_prompt_wrap(self,fusion_embeddings):
190 | """
191 | Args:
192 | Wrap the input with a special token
193 | """
194 | if self.language == "en":
195 | prompt = ''
196 | special_token = ''
197 | else:
198 | prompt = '<多模态>多模态>'
199 | special_token = ''
200 |
201 | batch_size = fusion_embeddings.shape[0]
202 | p_before, p_after = prompt.split(special_token)
203 | p_before_tokens = self.tokenizer(
204 | p_before, return_tensors="pt", add_special_tokens=True).to(self.device)
205 | p_after_tokens = self.tokenizer(
206 | p_after, return_tensors="pt", add_special_tokens=False).to(self.device)
207 | p_before_embeds = self.text_embedding(p_before_tokens.input_ids.expand(batch_size, -1))
208 | p_after_embeds = self.text_embedding(p_after_tokens.input_ids.expand(batch_size, -1))
209 | wrapped_fusion_embeddings = torch.cat([p_before_embeds, fusion_embeddings, p_after_embeds], dim=1)
210 |
211 |
212 | return wrapped_fusion_embeddings
--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/models/subNets/Textmodel.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import collections
4 | import re
5 | import torch
6 | import torch.nn as nn
7 | import torch.nn.functional as F
8 |
9 | from modelscope import AutoTokenizer, AutoModel, AutoModelForCausalLM
10 |
11 |
12 | __all__ = ['Language_model']
13 |
14 | class Language_model (nn.Module):
15 | def __init__(self, args, use_PLM = True):
16 | """
17 | language: en / cn
18 | """
19 | super(Language_model, self).__init__()
20 |
21 | if use_PLM:
22 | pretrained_model = args.pretrain_LM #pretrained model select
23 | self.tokenizer = AutoTokenizer.from_pretrained(
24 | pretrained_model,
25 | padding_side='left',
26 | trust_remote_code=True
27 | )
28 | self.model = AutoModelForCausalLM.from_pretrained(
29 | pretrained_model,
30 | trust_remote_code=True,
31 | torch_dtype=torch.bfloat16
32 | ).half()
33 | # self.pad_token_id = self.tokenizer.convert_tokens_to_ids('<|extra_0|>')
34 | # self.tokenizer.pad_token_id = self.pad_token_id
35 | # self.tokenizer.pad_token_id = 0
36 | self.eos_token_id = self.tokenizer.convert_tokens_to_ids('<|endoftext|>')
37 | self.tokenizer.pad_token_id = self.eos_token_id
38 |
39 | self.bos_token_id = self.tokenizer.convert_tokens_to_ids('<|im_start|>')
40 | self.tokenizer.bos_token_id = self.bos_token_id
41 |
42 | self.device = args.device
43 | self.language = args.language
44 | self.max_new_tokens = args.max_new_tokens
45 | self.datasetName = args.datasetName
46 | self.train_mode = args.train_mode
47 | self.task_specific_prompt = args.task_specific_prompt
48 | # freeze parameter
49 | for param in self.model.parameters():
50 | param.requires_grad = False
51 | else:
52 | print('please use PLM')
53 |
54 | def text_embedding(self,text_ids):
55 | embeddings = self.model.base_model.get_input_embeddings()
56 | return embeddings(text_ids)
57 |
58 |
59 | def forward(self, fusion_embedding, labels):
60 | """
61 | Args:
62 | fusion_embedding: the "concatenate" result of multimodal low rank fusion and text embedding
63 | label: ground_truth
64 | """
65 |
66 | fusion_embedding = self.multimodal_prompt_wrap(fusion_embedding) #添加多模态输入的special prompt
67 | opt_tokens, atts_bos, atts_fusion, labels, labels_atts = self.input_processing(fusion_embedding, labels, mode = 'train') #创建fusion+prompt+answer_mask的input和label
68 |
69 | attention_mask = torch.cat([atts_bos, atts_fusion, labels_atts], dim=1)
70 |
71 |
72 | with torch.cuda.amp.autocast():
73 | output = self.model(inputs_embeds = opt_tokens, return_dict=True, labels = labels) # Models outputs are now tuples
74 |
75 | return output
76 |
77 | def generate(self, fusion_embedding):
78 | """
79 | Args:
80 | samples (dict): A dictionary containing the following keys:
81 | use_nucleus_sampling (bool): Whether to use nucleus sampling. If False, use top-k sampling.
82 | num_beams (int): Number of beams for beam search. 1 means no beam search.
83 | max_new_tokens (int): The maximum length of the new tokens to be generated.
84 | top_p (float): The cumulative probability for nucleus sampling.
85 | top_k (int): The k for top-k sampling.
86 | penalty_alpha (float): The parameter for repetition penalty. 1.0 means no penalty.
87 | num_captions (int): Number of captions to be generated for each image.
88 | """
89 |
90 |
91 | fusion_embedding = self.multimodal_prompt_wrap(fusion_embedding) # 添加多模态输入的special prompt
92 | opt_tokens, atts_bos, atts_fusion, _, _= self.input_processing(fusion_embedding, mode = 'generate') # 创建fusion+prompt的input
93 | attention_mask = torch.cat([atts_bos, atts_fusion], dim=1)
94 | context_length = opt_tokens.size(1)
95 | all_responses =[]
96 |
97 | outputs = self.model.generate(inputs_embeds = opt_tokens,
98 | num_beams=1,
99 | do_sample = False,
100 | bos_token_id = self.tokenizer.bos_token_id,
101 | max_new_tokens = self.max_new_tokens)
102 | responses = self.tokenizer.batch_decode(outputs[:,1:], add_special_tokens=False, skip_special_tokens=True, clean_up_tokenization_spaces=False)
103 |
104 | # print(responses)
105 | for response in responses:
106 | # 处理生成结果,将一些不必要的字符转换为0
107 | if self.train_mode == 'regression':
108 | try:
109 | value = float(
110 | response.replace('–', '-').replace('一', '-').replace(':', '').replace('/', '').replace('(', '').replace(
111 | ':', ''))
112 | # value = float(re.sub(r'[^0-9.-]', '0', re.sub(r'(?= 0 else f"{label.item():.{1}f}" for label in
164 | labels]
165 | # label_template = [
166 | # f"+{label.item():.1f}" if label > 0 else f"{+label.item():.1f}" if label == 0 else f"{label.item():.1f}"
167 | # for label in labels]
168 | else:
169 | label_template = [f"{label.item()}" for label in labels]
170 |
171 | labels = self.tokenizer(label_template, padding=True, return_tensors="pt", add_special_tokens=False).to(self.device)
172 | labels_id = labels["input_ids"]
173 | labels_atts = labels["attention_mask"]
174 |
175 | # a = [' ','0.20','-0.2','5','2','0','1','3','4','5','6','7','8','9']
176 | # c = [31106]
177 | # b = self.tokenizer(a, padding=True, return_tensors="pt", add_special_tokens=False)
178 | # d = self.tokenizer.decode(c)
179 | labels_embedding = self.text_embedding(labels_id)
180 | labels_matrix = torch.empty(opt_tokens.size(0), opt_tokens.size(1)).fill_(-100).long().to(self.device) # bz * seq_len 只构建和token_ids一个维度的矩阵
181 | opt_tokens = torch.cat([opt_tokens, labels_embedding], dim=1) # 将输入与labels拼接
182 | labels = torch.cat([labels_matrix, labels_id], dim=1)
183 |
184 |
185 | else:
186 | labels_atts = None
187 |
188 | return opt_tokens, labels, labels_atts
189 |
190 | def get_task_prompt(self):
191 | # get the task_specific_prompt
192 | prompt_text = self.task_specific_prompt
193 | prompt_ids = self.tokenizer(prompt_text, padding=True, return_tensors="pt", add_special_tokens=False)["input_ids"].to(self.device)
194 |
195 | return prompt_ids
196 |
197 | def multimodal_prompt_wrap(self,fusion_embeddings):
198 | """
199 | Args:
200 | Wrap the input with a special token
201 | """
202 | if self.language == "en":
203 | prompt = ''
204 | special_token = ''
205 | else:
206 | prompt = '<多模态>多模态>'
207 | special_token = ''
208 |
209 | batch_size = fusion_embeddings.shape[0]
210 | p_before, p_after = prompt.split(special_token)
211 | p_before_tokens = self.tokenizer(
212 | p_before, return_tensors="pt", add_special_tokens=True).to(self.device)
213 | p_after_tokens = self.tokenizer(
214 | p_after, return_tensors="pt", add_special_tokens=False).to(self.device)
215 | p_before_embeds = self.text_embedding(p_before_tokens.input_ids.expand(batch_size, -1))
216 | p_after_embeds = self.text_embedding(p_after_tokens.input_ids.expand(batch_size, -1))
217 | wrapped_fusion_embeddings = torch.cat([p_before_embeds, fusion_embeddings, p_after_embeds], dim=1)
218 |
219 |
220 | return wrapped_fusion_embeddings
--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/data/load_data.py:
--------------------------------------------------------------------------------
1 | import os
2 | import logging
3 | import pickle
4 | import json
5 | import numpy as np
6 | import pandas as pd
7 | import torch
8 | import gzip
9 | import torch.nn.functional as F
10 | from torch.utils.data import Dataset, DataLoader
11 | from modelscope import AutoTokenizer, AutoModel
12 | from operator import itemgetter
13 | from torch.nn.utils.rnn import pad_sequence
14 |
15 | __all__ = ['MMDataLoader']
16 |
17 | logger = logging.getLogger('MSA')
18 |
19 | class MMDataset(Dataset):
20 | def __init__(self, args, mode='train'):
21 | self.mode = mode
22 | self.args = args
23 | DATA_MAP = {
24 | 'mosi': self.__init_mosi,
25 | 'mosei': self.__init_mosei,
26 | 'sims': self.__init_sims,
27 | 'simsv2': self.__init_simsv2,
28 | 'meld': self.__init_meld,
29 | 'iemocap': self.__init_iemocap,
30 | 'cherma': self.__init_cherma,
31 |
32 | }
33 | DATA_MAP[args.datasetName]()
34 |
35 |
36 |
37 | def __init_meld(self):
38 | data_path = os.path.join(self.args.dataPath, self.args.datasetName + '_' + self.mode + '.pkl')
39 | label_index_mapping = self.args.label_index_mapping
40 | with open(data_path, 'rb') as f:
41 | data = pickle.load(f)
42 | self.vision = np.array(list(map(lambda item: item['features']['video'], data))).astype(np.float32)
43 | self.audio = np.array(list(map(lambda item: item['features']['audio'], data))).astype(np.float32)
44 | self.rawText = np.array(list(map(lambda item: item['features']['text'], data)))
45 |
46 | # self.labels = {
47 | # 'M': list(map(lambda item: item['label'], data))
48 | # }
49 | self.labels = {
50 | 'M': list(map(lambda item: label_index_mapping.get(item['label'],-1), data))
51 | }
52 | if self.args.use_PLM:
53 | self.text = self.PLM_tokenizer(self.rawText)
54 |
55 | # label_mapping
56 |
57 | # self.labels['M'] = [label_index_mapping.get(label, -1) for label in self.labels['M']]
58 |
59 | if not self.args.need_data_aligned:
60 | self.audio_lengths = np.array(list(map(lambda item: item['features']['audio_len'], data)))
61 | self.vision_lengths = np.array(list(map(lambda item: item['features']['video_len'], data)))
62 |
63 | def __init_iemocap(self):
64 | return self.__init_meld()
65 |
66 | def __init_cherma(self):
67 | return self.__init_meld()
68 |
69 | def __init_mosi(self):
70 | with open(self.args.dataPath, 'rb') as f:
71 | data = pickle.load(f)
72 | if self.args.use_PLM:
73 | self.text = data[self.mode]['raw_text']
74 | self.text = self.PLM_tokenizer(self.text)
75 |
76 | self.vision = data[self.mode]['vision'].astype(np.float32)
77 | self.audio = data[self.mode]['audio'].astype(np.float32)
78 | self.rawText = data[self.mode]['raw_text']
79 | self.ids = data[self.mode]['id']
80 |
81 | self.labels = {
82 | 'M': data[self.mode][self.args.train_mode+'_labels'].astype(np.float32)
83 | }
84 |
85 | if self.args.need_label_prefix:
86 | labels = self.labels['M']
87 | label_prefix = []
88 | for i in range(len(labels)):
89 | if labels[i] < 0:
90 | label_prefix.append(f'negative,{labels[i].item():.{1}f}')
91 | elif labels[i] > 0:
92 | label_prefix.append(f'positive,{labels[i].item():.{1}f}')
93 | else:
94 | label_prefix.append(f'neutral,{labels[i].item():.{1}f}')
95 | self.labels_prefix = label_prefix
96 |
97 | if self.args.datasetName == 'sims':
98 | for m in "TAV":
99 | self.labels[m] = data[self.mode][self.args.train_mode+'_labels_'+m]
100 |
101 | logger.info(f"{self.mode} samples: {self.labels['M'].shape}")
102 |
103 | if not self.args.need_data_aligned:
104 | self.audio_lengths = data[self.mode]['audio_lengths']
105 | self.vision_lengths = data[self.mode]['vision_lengths']
106 | self.text_lengths = self.args.seq_lens[0]
107 | self.audio[self.audio == -np.inf] = 0
108 | self.vision[self.vision != self.vision] = 0
109 |
110 | if self.args.need_normalized:
111 | self.__normalize()
112 |
113 | def __init_mosei(self):
114 | return self.__init_mosi()
115 |
116 | def __init_sims(self):
117 | return self.__init_mosi()
118 |
119 | def __init_simsv2(self):
120 | return self.__init_mosi()
121 |
122 | def __truncated(self):
123 | # NOTE: Here for dataset we manually cut the input into specific length.
124 | def Truncated(modal_features, length):
125 | if length == modal_features.shape[1]:
126 | return modal_features
127 | truncated_feature = []
128 | padding = np.array([0 for i in range(modal_features.shape[2])])
129 | for instance in modal_features:
130 | for index in range(modal_features.shape[1]):
131 | if((instance[index] == padding).all()):
132 | if(index + length >= modal_features.shape[1]):
133 | truncated_feature.append(instance[index:index+20])
134 | break
135 | else:
136 | truncated_feature.append(instance[index:index+20])
137 | break
138 | truncated_feature = np.array(truncated_feature)
139 | return truncated_feature
140 |
141 | text_length, audio_length, video_length = self.args.seq_lens
142 | self.vision = Truncated(self.vision, video_length)
143 | self.text = Truncated(self.text, text_length)
144 | self.audio = Truncated(self.audio, audio_length)
145 |
146 | def __normalize(self):
147 | # (num_examples,max_len,feature_dim) -> (max_len, num_examples, feature_dim)
148 | self.vision = np.transpose(self.vision, (1, 0, 2))
149 | self.audio = np.transpose(self.audio, (1, 0, 2))
150 | # for visual and audio modality, we average across time
151 | # here the original data has shape (max_len, num_examples, feature_dim)
152 | # after averaging they become (1, num_examples, feature_dim)
153 | self.vision = np.mean(self.vision, axis=0, keepdims=True)
154 | self.audio = np.mean(self.audio, axis=0, keepdims=True)
155 |
156 | # remove possible NaN values
157 | self.vision[self.vision != self.vision] = 0
158 | self.audio[self.audio != self.audio] = 0
159 |
160 | self.vision = np.transpose(self.vision, (1, 0, 2))
161 | self.audio = np.transpose(self.audio, (1, 0, 2))
162 |
163 | def __len__(self):
164 | return len(self.labels['M'])
165 |
166 | # 这里text.shape是三维矩阵[sample_num,tokenizer_output,length]
167 | # tokenizer_output的3个维度分别是token_ids,mask(识别句子中padding的位置),segment_ids
168 | def get_seq_len(self):
169 | return (self.text.shape[2], self.audio.shape[1], self.vision.shape[1])
170 |
171 | def get_feature_dim(self):
172 | return self.text.shape[2], self.audio.shape[2], self.vision.shape[2]
173 |
174 | def PLM_tokenizer (self, rawtexts):
175 | self.tokenizer = AutoTokenizer.from_pretrained(self.args.pretrain_LM, trust_remote_code=True)
176 | token_list = []
177 | for text in rawtexts:
178 | text_tokenizer = self.tokenizer(text,
179 | padding='max_length', # 如果样本长度不满足最大长度则填充
180 | truncation=True, # 截断至最大长度
181 | max_length=self.args.seq_lens[0],
182 | return_tensors = 'pt',
183 | add_special_tokens=False
184 | )
185 |
186 | token_ids = text_tokenizer['input_ids'].squeeze(0) # tensor of token ids torch.Size([max_len])
187 | attn_masks = text_tokenizer['attention_mask'].squeeze(0) # binary tensor with "0" for padded values and "1" for the other values torch.Size([max_len])
188 | token_type_ids = [0] * len(token_ids) #不区分上下句
189 |
190 | #调整维度
191 | input_ids = np.expand_dims(token_ids, 1)
192 | input_mask = np.expand_dims(attn_masks, 1)
193 | segment_ids = np.expand_dims(token_type_ids, 1)
194 |
195 | text_pretrain = np.concatenate([input_ids, input_mask, segment_ids], axis=1).T
196 | token_list.append(text_pretrain)
197 |
198 | # x_dimensions = [array.shape[1] for array in token_list]
199 | # # 计算 x 维度的平均值
200 | # average_x = np.mean(x_dimensions)
201 | # median_x = np.median(x_dimensions)
202 | token_list = np.array(token_list)
203 | return token_list
204 |
205 |
206 | def __getitem__(self, index):
207 | if self.args.train_mode == 'regression':
208 | sample = {
209 | 'raw_text': self.rawText[index],
210 | 'text': torch.Tensor(self.text[index]),
211 | 'audio': torch.Tensor(self.audio[index]),
212 | 'vision': torch.Tensor(self.vision[index]),
213 | 'index': index,
214 | 'id': self.ids[index],
215 | 'labels': {k: torch.Tensor(v[index].reshape(-1)) for k, v in self.labels.items()},
216 | 'labels_prefix': self.labels_prefix[index]
217 | }
218 | else:
219 | sample = {
220 | 'raw_text': self.rawText[index],
221 | 'text': torch.Tensor(self.text[index]),
222 | 'audio': torch.Tensor(self.audio[index]),
223 | 'vision': torch.Tensor(self.vision[index]),
224 | 'index': index,
225 | 'labels': {k: v[index] for k, v in self.labels.items()}
226 | # 'labels': {torch.Tensor(self.labels)},
227 | }
228 |
229 | if not self.args.need_data_aligned:
230 | sample['audio_lengths'] = self.audio_lengths[index]
231 | sample['vision_lengths'] = self.vision_lengths[index]
232 | sample['text_lengths'] = self.args.seq_lens[0]
233 |
234 | return sample
235 |
236 |
237 |
238 | def MMDataLoader(args):
239 |
240 | datasets = {
241 | 'train': MMDataset(args, mode='train'),
242 | 'valid': MMDataset(args, mode='valid'),
243 | 'test': MMDataset(args, mode='test')
244 | }
245 |
246 | if 'seq_lens' in args:
247 | args.seq_lens = datasets['train'].get_seq_len()
248 |
249 | dataLoader = {
250 | ds: DataLoader(datasets[ds],
251 | batch_size=args.batch_size,
252 | num_workers=args.num_workers,
253 | shuffle=True)
254 | for ds in datasets.keys()
255 | }
256 |
257 | return dataLoader
--------------------------------------------------------------------------------