├── MSE-ChatGLM3-6B
    ├── data
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-310.pyc
    │   │   └── load_data.cpython-310.pyc
    │   ├── getLengths.py
    │   ├── TextPre.py
    │   └── load_data.py
    ├── utils
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-310.pyc
    │   │   ├── functions.cpython-310.pyc
    │   │   └── metricsTop.cpython-310.pyc
    │   ├── functions.py
    │   └── metricsTop.py
    ├── models
    │   ├── __init__.py
    │   ├── ChatGLM3
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   ├── modeling_chatglm.cpython-310.pyc
    │   │   │   ├── tokenization_chatglm.cpython-310.pyc
    │   │   │   └── configuration_chatglm.cpython-310.pyc
    │   │   └── configuration_chatglm.py
    │   ├── subNets
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   └── Textmodel.cpython-310.pyc
    │   │   └── Textmodel.py
    │   ├── multiTask
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── CMCM.cpython-310.pyc
    │   │   │   └── __init__.cpython-310.pyc
    │   │   └── CMCM.py
    │   ├── __pycache__
    │   │   ├── AMIO.cpython-310.pyc
    │   │   └── __init__.cpython-310.pyc
    │   └── AMIO.py
    ├── trains
    │   ├── __init__.py
    │   ├── multiTask
    │   │   ├── __init__.py
    │   │   └── __pycache__
    │   │   │   ├── CMCM.cpython-310.pyc
    │   │   │   └── __init__.cpython-310.pyc
    │   ├── __pycache__
    │   │   ├── ATIO.cpython-310.pyc
    │   │   └── __init__.cpython-310.pyc
    │   └── ATIO.py
    ├── config
    │   ├── __pycache__
    │   │   ├── config_regression.cpython-310.pyc
    │   │   └── config_classification.cpython-310.pyc
    │   ├── config_regression.py
    │   └── config_classification.py
    ├── LICENSE
    └── run.py
├── MSE-Llama2-7B
    ├── data
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-310.pyc
    │   │   └── load_data.cpython-310.pyc
    │   ├── getLengths.py
    │   └── TextPre.py
    ├── models
    │   ├── __init__.py
    │   ├── subNets
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   └── Textmodel.cpython-310.pyc
    │   │   └── Textmodel.py
    │   ├── multiTask
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── CMCM.cpython-310.pyc
    │   │   │   └── __init__.cpython-310.pyc
    │   │   └── CMCM.py
    │   ├── __pycache__
    │   │   ├── AMIO.cpython-310.pyc
    │   │   └── __init__.cpython-310.pyc
    │   └── AMIO.py
    ├── trains
    │   ├── __init__.py
    │   ├── multiTask
    │   │   ├── __init__.py
    │   │   └── __pycache__
    │   │   │   ├── CMCM.cpython-310.pyc
    │   │   │   └── __init__.cpython-310.pyc
    │   ├── __pycache__
    │   │   ├── ATIO.cpython-310.pyc
    │   │   └── __init__.cpython-310.pyc
    │   └── ATIO.py
    ├── utils
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-310.pyc
    │   │   ├── functions.cpython-310.pyc
    │   │   └── metricsTop.cpython-310.pyc
    │   ├── functions.py
    │   └── metricsTop.py
    ├── config
    │   ├── __pycache__
    │   │   ├── config_regression.cpython-310.pyc
    │   │   └── config_classification.cpython-310.pyc
    │   ├── config_regression.py
    │   └── config_classification.py
    ├── LICENSE
    └── run.py
├── MSE-Qwen-1.8B
    ├── data
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-310.pyc
    │   │   └── load_data.cpython-310.pyc
    │   ├── getLengths.py
    │   └── TextPre.py
    ├── models
    │   ├── __init__.py
    │   ├── subNets
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   └── Textmodel.cpython-310.pyc
    │   │   └── Textmodel.py
    │   ├── multiTask
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── CMCM.cpython-310.pyc
    │   │   │   └── __init__.cpython-310.pyc
    │   │   └── CMCM.py
    │   ├── __pycache__
    │   │   ├── AMIO.cpython-310.pyc
    │   │   └── __init__.cpython-310.pyc
    │   └── AMIO.py
    ├── trains
    │   ├── __init__.py
    │   ├── multiTask
    │   │   ├── __init__.py
    │   │   └── __pycache__
    │   │   │   ├── CMCM.cpython-310.pyc
    │   │   │   └── __init__.cpython-310.pyc
    │   ├── __pycache__
    │   │   ├── ATIO.cpython-310.pyc
    │   │   └── __init__.cpython-310.pyc
    │   └── ATIO.py
    ├── utils
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-310.pyc
    │   │   ├── functions.cpython-310.pyc
    │   │   └── metricsTop.cpython-310.pyc
    │   ├── functions.py
    │   └── metricsTop.py
    ├── config
    │   ├── __pycache__
    │   │   ├── config_regression.cpython-310.pyc
    │   │   └── config_classification.cpython-310.pyc
    │   ├── config_regression.py
    │   └── config_classification.py
    ├── LICENSE
    └── run.py
├── Fig
    └── overall.png
├── README.md
└── requirements.txt


/MSE-ChatGLM3-6B/data/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/MSE-Llama2-7B/data/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/MSE-Llama2-7B/models/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/MSE-Llama2-7B/trains/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/MSE-Llama2-7B/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/data/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/models/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/trains/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/models/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/trains/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/models/ChatGLM3/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/models/subNets/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/MSE-Llama2-7B/models/subNets/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/models/subNets/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Fig/overall.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/Fig/overall.png


--------------------------------------------------------------------------------
/MSE-Llama2-7B/trains/multiTask/__init__.py:
--------------------------------------------------------------------------------
1 | from trains.multiTask.CMCM import CMCM
2 | 
3 | __all__ = ['CMCM']


--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/trains/multiTask/__init__.py:
--------------------------------------------------------------------------------
1 | from trains.multiTask.CMCM import CMCM
2 | 
3 | __all__ = ['CMCM']


--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/trains/multiTask/__init__.py:
--------------------------------------------------------------------------------
1 | from trains.multiTask.CMCM import CMCM
2 | 
3 | __all__ = ['CMCM']


--------------------------------------------------------------------------------
/MSE-Llama2-7B/models/multiTask/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | from models.multiTask.CMCM import CMCM
3 | 
4 | __all__ = ['CMCM']


--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/models/multiTask/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | from models.multiTask.CMCM import CMCM
3 | 
4 | __all__ = ['CMCM']


--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/models/multiTask/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | from models.multiTask.CMCM import CMCM
3 | 
4 | __all__ = ['CMCM']


--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/models/__pycache__/AMIO.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/models/__pycache__/AMIO.cpython-310.pyc


--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/trains/__pycache__/ATIO.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/trains/__pycache__/ATIO.cpython-310.pyc


--------------------------------------------------------------------------------
/MSE-Llama2-7B/data/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Llama2-7B/data/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/MSE-Llama2-7B/models/__pycache__/AMIO.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Llama2-7B/models/__pycache__/AMIO.cpython-310.pyc


--------------------------------------------------------------------------------
/MSE-Llama2-7B/trains/__pycache__/ATIO.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Llama2-7B/trains/__pycache__/ATIO.cpython-310.pyc


--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/data/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Qwen-1.8B/data/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/models/__pycache__/AMIO.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Qwen-1.8B/models/__pycache__/AMIO.cpython-310.pyc


--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/trains/__pycache__/ATIO.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Qwen-1.8B/trains/__pycache__/ATIO.cpython-310.pyc


--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/data/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/data/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/MSE-Llama2-7B/data/__pycache__/load_data.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Llama2-7B/data/__pycache__/load_data.cpython-310.pyc


--------------------------------------------------------------------------------
/MSE-Llama2-7B/models/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Llama2-7B/models/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/MSE-Llama2-7B/trains/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Llama2-7B/trains/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/MSE-Llama2-7B/utils/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Llama2-7B/utils/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/MSE-Llama2-7B/utils/__pycache__/functions.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Llama2-7B/utils/__pycache__/functions.cpython-310.pyc


--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/data/__pycache__/load_data.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Qwen-1.8B/data/__pycache__/load_data.cpython-310.pyc


--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/models/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Qwen-1.8B/models/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/trains/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Qwen-1.8B/trains/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/utils/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Qwen-1.8B/utils/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/utils/__pycache__/functions.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Qwen-1.8B/utils/__pycache__/functions.cpython-310.pyc


--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/data/__pycache__/load_data.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/data/__pycache__/load_data.cpython-310.pyc


--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/models/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/models/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/trains/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/trains/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/utils/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/utils/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/utils/__pycache__/functions.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/utils/__pycache__/functions.cpython-310.pyc


--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/utils/__pycache__/metricsTop.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/utils/__pycache__/metricsTop.cpython-310.pyc


--------------------------------------------------------------------------------
/MSE-Llama2-7B/utils/__pycache__/metricsTop.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Llama2-7B/utils/__pycache__/metricsTop.cpython-310.pyc


--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/utils/__pycache__/metricsTop.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Qwen-1.8B/utils/__pycache__/metricsTop.cpython-310.pyc


--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/models/multiTask/__pycache__/CMCM.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/models/multiTask/__pycache__/CMCM.cpython-310.pyc


--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/trains/multiTask/__pycache__/CMCM.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/trains/multiTask/__pycache__/CMCM.cpython-310.pyc


--------------------------------------------------------------------------------
/MSE-Llama2-7B/models/multiTask/__pycache__/CMCM.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Llama2-7B/models/multiTask/__pycache__/CMCM.cpython-310.pyc


--------------------------------------------------------------------------------
/MSE-Llama2-7B/models/subNets/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Llama2-7B/models/subNets/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/MSE-Llama2-7B/trains/multiTask/__pycache__/CMCM.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Llama2-7B/trains/multiTask/__pycache__/CMCM.cpython-310.pyc


--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/models/multiTask/__pycache__/CMCM.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Qwen-1.8B/models/multiTask/__pycache__/CMCM.cpython-310.pyc


--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/models/subNets/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Qwen-1.8B/models/subNets/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/trains/multiTask/__pycache__/CMCM.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Qwen-1.8B/trains/multiTask/__pycache__/CMCM.cpython-310.pyc


--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/models/subNets/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/models/subNets/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/MSE-Llama2-7B/config/__pycache__/config_regression.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Llama2-7B/config/__pycache__/config_regression.cpython-310.pyc


--------------------------------------------------------------------------------
/MSE-Llama2-7B/models/multiTask/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Llama2-7B/models/multiTask/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/MSE-Llama2-7B/models/subNets/__pycache__/Textmodel.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Llama2-7B/models/subNets/__pycache__/Textmodel.cpython-310.pyc


--------------------------------------------------------------------------------
/MSE-Llama2-7B/trains/multiTask/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Llama2-7B/trains/multiTask/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/config/__pycache__/config_regression.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Qwen-1.8B/config/__pycache__/config_regression.cpython-310.pyc


--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/models/multiTask/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Qwen-1.8B/models/multiTask/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/models/subNets/__pycache__/Textmodel.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Qwen-1.8B/models/subNets/__pycache__/Textmodel.cpython-310.pyc


--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/trains/multiTask/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Qwen-1.8B/trains/multiTask/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/config/__pycache__/config_regression.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/config/__pycache__/config_regression.cpython-310.pyc


--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/models/ChatGLM3/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/models/ChatGLM3/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/models/multiTask/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/models/multiTask/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/models/subNets/__pycache__/Textmodel.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/models/subNets/__pycache__/Textmodel.cpython-310.pyc


--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/trains/multiTask/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/trains/multiTask/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/MSE-Llama2-7B/config/__pycache__/config_classification.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Llama2-7B/config/__pycache__/config_classification.cpython-310.pyc


--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/config/__pycache__/config_classification.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-Qwen-1.8B/config/__pycache__/config_classification.cpython-310.pyc


--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/config/__pycache__/config_classification.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/config/__pycache__/config_classification.cpython-310.pyc


--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/models/ChatGLM3/__pycache__/modeling_chatglm.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/models/ChatGLM3/__pycache__/modeling_chatglm.cpython-310.pyc


--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/models/ChatGLM3/__pycache__/tokenization_chatglm.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/models/ChatGLM3/__pycache__/tokenization_chatglm.cpython-310.pyc


--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/models/ChatGLM3/__pycache__/configuration_chatglm.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AZYoung233/MSE-Adapter/HEAD/MSE-ChatGLM3-6B/models/ChatGLM3/__pycache__/configuration_chatglm.cpython-310.pyc


--------------------------------------------------------------------------------
/MSE-Llama2-7B/trains/ATIO.py:
--------------------------------------------------------------------------------
 1 | """
 2 | AIO -- All Trains in One
 3 | """
 4 | import torch
 5 | import torch.nn as nn
 6 | import torch.nn.functional as F
 7 | from torch.autograd import Variable
 8 | from torch.nn.parameter import Parameter
 9 | from torch.nn.init import xavier_uniform, xavier_normal, orthogonal
10 | 
11 | from trains.multiTask import *
12 | 
13 | __all__ = ['ATIO']
14 | 
15 | class ATIO():
16 |     def __init__(self):
17 |         self.TRAIN_MAP = {
18 |            'cmcm': CMCM,
19 |         }
20 |     
21 |     def getTrain(self, args):
22 |         return self.TRAIN_MAP[args.modelName.lower()](args)
23 | 


--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/trains/ATIO.py:
--------------------------------------------------------------------------------
 1 | """
 2 | AIO -- All Trains in One
 3 | """
 4 | import torch
 5 | import torch.nn as nn
 6 | import torch.nn.functional as F
 7 | from torch.autograd import Variable
 8 | from torch.nn.parameter import Parameter
 9 | from torch.nn.init import xavier_uniform, xavier_normal, orthogonal
10 | 
11 | from trains.multiTask import *
12 | 
13 | __all__ = ['ATIO']
14 | 
15 | class ATIO():
16 |     def __init__(self):
17 |         self.TRAIN_MAP = {
18 |            'cmcm': CMCM,
19 |         }
20 |     
21 |     def getTrain(self, args):
22 |         return self.TRAIN_MAP[args.modelName.lower()](args)
23 | 


--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/trains/ATIO.py:
--------------------------------------------------------------------------------
 1 | """
 2 | AIO -- All Trains in One
 3 | """
 4 | import torch
 5 | import torch.nn as nn
 6 | import torch.nn.functional as F
 7 | from torch.autograd import Variable
 8 | from torch.nn.parameter import Parameter
 9 | from torch.nn.init import xavier_uniform, xavier_normal, orthogonal
10 | 
11 | from trains.multiTask import *
12 | 
13 | __all__ = ['ATIO']
14 | 
15 | class ATIO():
16 |     def __init__(self):
17 |         self.TRAIN_MAP = {
18 |            'cmcm': CMCM,
19 |         }
20 |     
21 |     def getTrain(self, args):
22 |         return self.TRAIN_MAP[args.modelName.lower()](args)
23 | 


--------------------------------------------------------------------------------
/MSE-Llama2-7B/models/AMIO.py:
--------------------------------------------------------------------------------
 1 | """
 2 | AIO -- All Model in One
 3 | """
 4 | import torch
 5 | import torch.nn as nn
 6 | import torch.nn.functional as F
 7 | from torch.autograd import Variable
 8 | from torch.nn.parameter import Parameter
 9 | from torch.nn.init import xavier_uniform, xavier_normal, orthogonal
10 | 
11 | 
12 | from models.multiTask import *
13 | 
14 | __all__ = ['AMIO']
15 | 
16 | MODEL_MAP = {
17 |     'cmcm': CMCM
18 | }
19 | 
20 | class AMIO(nn.Module):
21 |     def __init__(self, args):
22 |         super(AMIO, self).__init__()
23 |         lastModel = MODEL_MAP[args.modelName]
24 |         self.Model = lastModel(args)
25 | 
26 |     def forward(self, labels_m, text_x, audio_x, video_x):
27 |         return self.Model(labels_m, text_x, audio_x, video_x)
28 | 
29 |     def generate(self, text_x, audio_x, video_x):
30 |         return self.Model.generate(text_x, audio_x, video_x)


--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/models/AMIO.py:
--------------------------------------------------------------------------------
 1 | """
 2 | AIO -- All Model in One
 3 | """
 4 | import torch
 5 | import torch.nn as nn
 6 | import torch.nn.functional as F
 7 | from torch.autograd import Variable
 8 | from torch.nn.parameter import Parameter
 9 | from torch.nn.init import xavier_uniform, xavier_normal, orthogonal
10 | 
11 | 
12 | from models.multiTask import *
13 | 
14 | __all__ = ['AMIO']
15 | 
16 | MODEL_MAP = {
17 |     'cmcm': CMCM
18 | }
19 | 
20 | class AMIO(nn.Module):
21 |     def __init__(self, args):
22 |         super(AMIO, self).__init__()
23 |         lastModel = MODEL_MAP[args.modelName]
24 |         self.Model = lastModel(args)
25 | 
26 |     def forward(self, labels_m, text_x, audio_x, video_x):
27 |         return self.Model(labels_m, text_x, audio_x, video_x)
28 | 
29 |     def generate(self, text_x, audio_x, video_x):
30 |         return self.Model.generate(text_x, audio_x, video_x)


--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/models/AMIO.py:
--------------------------------------------------------------------------------
 1 | """
 2 | AIO -- All Model in One
 3 | """
 4 | import torch
 5 | import torch.nn as nn
 6 | import torch.nn.functional as F
 7 | from torch.autograd import Variable
 8 | from torch.nn.parameter import Parameter
 9 | from torch.nn.init import xavier_uniform, xavier_normal, orthogonal
10 | 
11 | 
12 | from models.multiTask import *
13 | 
14 | __all__ = ['AMIO']
15 | 
16 | MODEL_MAP = {
17 |     'cmcm': CMCM
18 | }
19 | 
20 | class AMIO(nn.Module):
21 |     def __init__(self, args):
22 |         super(AMIO, self).__init__()
23 |         lastModel = MODEL_MAP[args.modelName]
24 |         self.Model = lastModel(args)
25 | 
26 |     def forward(self, labels_m, text_x, audio_x, video_x):
27 |         return self.Model(labels_m, text_x, audio_x, video_x)
28 | 
29 |     def generate(self, text_x, audio_x, video_x):
30 |         return self.Model.generate(text_x, audio_x, video_x)


--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/utils/functions.py:
--------------------------------------------------------------------------------
 1 | def dict_to_str(src_dict):
 2 |     dst_str = ""
 3 |     for key in src_dict.keys():
 4 |         dst_str += " %s: %.4f " %(key, src_dict[key]) 
 5 |     return dst_str
 6 | 
 7 | class Storage(dict):
 8 |     """
 9 |     A Storage object is like a dictionary except `obj.foo` can be used inadition to `obj['foo']`
10 |     ref: https://blog.csdn.net/a200822146085/article/details/88430450
11 |     """
12 |     def __getattr__(self, key):
13 |         try:
14 |             return self[key] if key in self else False
15 |         except KeyError as k:
16 |             raise AttributeError(k)
17 | 
18 |     def __setattr__(self, key, value):
19 |         self[key] = value
20 | 
21 |     def __delattr__(self, key):
22 |         try:
23 |             del self[key]
24 |         except KeyError as k:
25 |             raise AttributeError(k)
26 | 
27 |     def __str__(self):
28 |         return "<" + self.__class__.__name__ + dict.__repr__(self) + ">"
29 | 
30 | 


--------------------------------------------------------------------------------
/MSE-Llama2-7B/utils/functions.py:
--------------------------------------------------------------------------------
 1 | def dict_to_str(src_dict):
 2 |     dst_str = ""
 3 |     for key in src_dict.keys():
 4 |         dst_str += " %s: %.4f " %(key, src_dict[key]) 
 5 |     return dst_str
 6 | 
 7 | class Storage(dict):
 8 |     """
 9 |     A Storage object is like a dictionary except `obj.foo` can be used inadition to `obj['foo']`
10 |     ref: https://blog.csdn.net/a200822146085/article/details/88430450
11 |     """
12 |     def __getattr__(self, key):
13 |         try:
14 |             return self[key] if key in self else False
15 |         except KeyError as k:
16 |             raise AttributeError(k)
17 | 
18 |     def __setattr__(self, key, value):
19 |         self[key] = value
20 | 
21 |     def __delattr__(self, key):
22 |         try:
23 |             del self[key]
24 |         except KeyError as k:
25 |             raise AttributeError(k)
26 | 
27 |     def __str__(self):
28 |         return "<" + self.__class__.__name__ + dict.__repr__(self) + ">"
29 | 
30 | 


--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/utils/functions.py:
--------------------------------------------------------------------------------
 1 | def dict_to_str(src_dict):
 2 |     dst_str = ""
 3 |     for key in src_dict.keys():
 4 |         dst_str += " %s: %.4f " %(key, src_dict[key]) 
 5 |     return dst_str
 6 | 
 7 | class Storage(dict):
 8 |     """
 9 |     A Storage object is like a dictionary except `obj.foo` can be used inadition to `obj['foo']`
10 |     ref: https://blog.csdn.net/a200822146085/article/details/88430450
11 |     """
12 |     def __getattr__(self, key):
13 |         try:
14 |             return self[key] if key in self else False
15 |         except KeyError as k:
16 |             raise AttributeError(k)
17 | 
18 |     def __setattr__(self, key, value):
19 |         self[key] = value
20 | 
21 |     def __delattr__(self, key):
22 |         try:
23 |             del self[key]
24 |         except KeyError as k:
25 |             raise AttributeError(k)
26 | 
27 |     def __str__(self):
28 |         return "<" + self.__class__.__name__ + dict.__repr__(self) + ">"
29 | 
30 | 


--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 iyuge2
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MSE-Llama2-7B/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 iyuge2
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 iyuge2
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/data/getLengths.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pickle as plk
 3 | import numpy as np
 4 | 
 5 | from tqdm import tqdm
 6 | 
 7 | def get_lengths(mode, feature_name):
 8 |     fd = data[mode][feature_name]
 9 |     max_len = fd.shape[1]
10 |     
11 |     c_sum = np.sum(fd, axis=-1)
12 |     lengths = []
13 |     for i in tqdm(range(fd.shape[0])):
14 |         null = True
15 |         zeros = np.zeros([fd.shape[1], fd.shape[2]])
16 |         cur_length = max_len
17 |         for j in range(max_len):
18 |             if c_sum[i][j] == 0:
19 |                 cur_length = j
20 |                 null = False
21 |                 break
22 |         if cur_length == 0:
23 |             cur_length = 1
24 |         lengths.append(cur_length)
25 |     return lengths
26 | 
27 | with open('/home/sharing/disk3/dataset/multimodal-sentiment-dataset/ALL/mosei/unaligned_50.pkl', 'rb') as lf:
28 |     data = plk.load(lf)
29 | 
30 | def handleData(mode):
31 |     # data[mode]['audio_lengths'], _ = get_lengths(mode, 'feature_A')
32 |     # data[mode]['vision_lengths'], _ = get_lengths(mode, 'feature_V')
33 |     data[mode]['audio_lengths'] = get_lengths(mode, 'audio')
34 |     data[mode]['vision_lengths'] = get_lengths(mode, 'vision')
35 | 
36 | handleData('train')
37 | handleData('valid')
38 | handleData('test')
39 | 
40 | with open('/home/sharing/disk3/dataset/multimodal-sentiment-dataset/ALL/mosei/unaligned_50.pkl', 'wb') as df:
41 |     plk.dump(data, df, protocol = 4)


--------------------------------------------------------------------------------
/MSE-Llama2-7B/data/getLengths.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pickle as plk
 3 | import numpy as np
 4 | 
 5 | from tqdm import tqdm
 6 | 
 7 | def get_lengths(mode, feature_name):
 8 |     fd = data[mode][feature_name]
 9 |     max_len = fd.shape[1]
10 |     
11 |     c_sum = np.sum(fd, axis=-1)
12 |     lengths = []
13 |     for i in tqdm(range(fd.shape[0])):
14 |         null = True
15 |         zeros = np.zeros([fd.shape[1], fd.shape[2]])
16 |         cur_length = max_len
17 |         for j in range(max_len):
18 |             if c_sum[i][j] == 0:
19 |                 cur_length = j
20 |                 null = False
21 |                 break
22 |         if cur_length == 0:
23 |             cur_length = 1
24 |         lengths.append(cur_length)
25 |     return lengths
26 | 
27 | with open('/home/sharing/disk3/dataset/multimodal-sentiment-dataset/ALL/mosei/unaligned_50.pkl', 'rb') as lf:
28 |     data = plk.load(lf)
29 | 
30 | def handleData(mode):
31 |     # data[mode]['audio_lengths'], _ = get_lengths(mode, 'feature_A')
32 |     # data[mode]['vision_lengths'], _ = get_lengths(mode, 'feature_V')
33 |     data[mode]['audio_lengths'] = get_lengths(mode, 'audio')
34 |     data[mode]['vision_lengths'] = get_lengths(mode, 'vision')
35 | 
36 | handleData('train')
37 | handleData('valid')
38 | handleData('test')
39 | 
40 | with open('/home/sharing/disk3/dataset/multimodal-sentiment-dataset/ALL/mosei/unaligned_50.pkl', 'wb') as df:
41 |     plk.dump(data, df, protocol = 4)


--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/data/getLengths.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pickle as plk
 3 | import numpy as np
 4 | 
 5 | from tqdm import tqdm
 6 | 
 7 | def get_lengths(mode, feature_name):
 8 |     fd = data[mode][feature_name]
 9 |     max_len = fd.shape[1]
10 |     
11 |     c_sum = np.sum(fd, axis=-1)
12 |     lengths = []
13 |     for i in tqdm(range(fd.shape[0])):
14 |         null = True
15 |         zeros = np.zeros([fd.shape[1], fd.shape[2]])
16 |         cur_length = max_len
17 |         for j in range(max_len):
18 |             if c_sum[i][j] == 0:
19 |                 cur_length = j
20 |                 null = False
21 |                 break
22 |         if cur_length == 0:
23 |             cur_length = 1
24 |         lengths.append(cur_length)
25 |     return lengths
26 | 
27 | with open('/home/sharing/disk3/dataset/multimodal-sentiment-dataset/ALL/mosei/unaligned_50.pkl', 'rb') as lf:
28 |     data = plk.load(lf)
29 | 
30 | def handleData(mode):
31 |     # data[mode]['audio_lengths'], _ = get_lengths(mode, 'feature_A')
32 |     # data[mode]['vision_lengths'], _ = get_lengths(mode, 'feature_V')
33 |     data[mode]['audio_lengths'] = get_lengths(mode, 'audio')
34 |     data[mode]['vision_lengths'] = get_lengths(mode, 'vision')
35 | 
36 | handleData('train')
37 | handleData('valid')
38 | handleData('test')
39 | 
40 | with open('/home/sharing/disk3/dataset/multimodal-sentiment-dataset/ALL/mosei/unaligned_50.pkl', 'wb') as df:
41 |     plk.dump(data, df, protocol = 4)


--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/models/ChatGLM3/configuration_chatglm.py:
--------------------------------------------------------------------------------
 1 | from transformers import PretrainedConfig
 2 | 
 3 | 
 4 | class ChatGLMConfig(PretrainedConfig):
 5 |     model_type = "chatglm"
 6 |     def __init__(
 7 |         self,
 8 |         num_layers=28,
 9 |         padded_vocab_size=65024,
10 |         hidden_size=4096,
11 |         ffn_hidden_size=13696,
12 |         kv_channels=128,
13 |         num_attention_heads=32,
14 |         seq_length=2048,
15 |         hidden_dropout=0.0,
16 |         classifier_dropout=None,
17 |         attention_dropout=0.0,
18 |         layernorm_epsilon=1e-5,
19 |         rmsnorm=True,
20 |         apply_residual_connection_post_layernorm=False,
21 |         post_layer_norm=True,
22 |         add_bias_linear=False,
23 |         add_qkv_bias=False,
24 |         bias_dropout_fusion=True,
25 |         multi_query_attention=False,
26 |         multi_query_group_num=1,
27 |         apply_query_key_layer_scaling=True,
28 |         attention_softmax_in_fp32=True,
29 |         fp32_residual_connection=False,
30 |         quantization_bit=0,
31 |         pre_seq_len=None,
32 |         prefix_projection=False,
33 |         **kwargs
34 |     ):
35 |         self.num_layers = num_layers
36 |         self.vocab_size = padded_vocab_size
37 |         self.padded_vocab_size = padded_vocab_size
38 |         self.hidden_size = hidden_size
39 |         self.ffn_hidden_size = ffn_hidden_size
40 |         self.kv_channels = kv_channels
41 |         self.num_attention_heads = num_attention_heads
42 |         self.seq_length = seq_length
43 |         self.hidden_dropout = hidden_dropout
44 |         self.classifier_dropout = classifier_dropout
45 |         self.attention_dropout = attention_dropout
46 |         self.layernorm_epsilon = layernorm_epsilon
47 |         self.rmsnorm = rmsnorm
48 |         self.apply_residual_connection_post_layernorm = apply_residual_connection_post_layernorm
49 |         self.post_layer_norm = post_layer_norm
50 |         self.add_bias_linear = add_bias_linear
51 |         self.add_qkv_bias = add_qkv_bias
52 |         self.bias_dropout_fusion = bias_dropout_fusion
53 |         self.multi_query_attention = multi_query_attention
54 |         self.multi_query_group_num = multi_query_group_num
55 |         self.apply_query_key_layer_scaling = apply_query_key_layer_scaling
56 |         self.attention_softmax_in_fp32 = attention_softmax_in_fp32
57 |         self.fp32_residual_connection = fp32_residual_connection
58 |         self.quantization_bit = quantization_bit
59 |         self.pre_seq_len = pre_seq_len
60 |         self.prefix_projection = prefix_projection
61 |         super().__init__(**kwargs)


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <div align="center">
 2 | 
 3 | # 😊 The Official Implementation of MSE-Adapter
 4 | 
 5 | <p align="center">
 6 |     <a href="https://arxiv.org/abs/2502.12478"><img src="https://img.shields.io/badge/arXiv-2502.12478-b31b1b?style=for-the-badge" alt="arXiv"></a>
 7 |     <a href="https://ojs.aaai.org/index.php/AAAI/article/view/34755"><img src="https://img.shields.io/badge/AAAI-2025-003973?style=for-the-badge" alt="AAAI 2025"></a>
 8 | </p>
 9 | 
10 | 🎉🎉 **We have been accepted at AAAI-2025!**
11 | </div>
12 | 
13 | ---
14 | This is the official code for the 《MSE-Adapter: A Lightweight Plugin Endowing LLMs with the Capability to Perform Multimodal Sentiment Analysis and Emotion Recognition》. 
15 | 
16 | ![Overall](Fig/overall.png)
17 | <div align="center">
18 |     
19 | *Fig1: The comprehensive framework integrating MSE-Adapter with LLM.*
20 | 
21 | </div>
22 | 
23 | ---
24 | 
25 | ## 🚀 Get Started! (Take MSE-ChatGLM3-6B as an example.)
26 | 
27 | ### 🔧 Step 1: Create the Environment
28 | ``` bash
29 | git clone https://github.com/AZYoung233/MSE-Adapter.git
30 | cd MSE-Adapter
31 | conda create --name MSE-Adapter python=3.10.13
32 | conda activate MSE-Adapter
33 | pip install -r requirements.txt
34 | ```
35 | 🚨 **Critical Notice (2025/04/29 update)**: It is **highly recommended** to create a new **virtual environment** directly using `requirements.txt`. If that's not feasible, at least ensure that the `transformers` version matches exactly. Otherwise, the training loss may decrease as expected, but the evaluation metrics could be abnormal, severely impacting the model's performance.
36 | 
37 | ### 📂 Step 2: Download the Dataset
38 | - You can download the dataset at the link below：
39 |    - [MOSEI](https://huggingface.co/datasets/AZYoung/MOSEI_processed)📦
40 |    - [SIMS-V2](https://huggingface.co/datasets/AZYoung/SIMSV2_processed)📦
41 |    - [MELD](https://huggingface.co/datasets/AZYoung/MELD_processed)📦
42 |    - [CHERMA](https://huggingface.co/datasets/AZYoung/CHERMA0723_processed)📦
43 | - Place them under the same folder, and set `root_dataset_dir` in `parse_args` of `run.py` to the path where you store your dataset.
44 | 
45 | ### 💾 Step 3: Download the Backbone LLM
46 | - Download backbone LLM from the [THUDM/chatglm3-6b](https://huggingface.co/THUDM/chatglm3-6b) and set `pretrain_LM` in `parse_args` of `run.py` to the path where you store your LLM. If for any particular reason your download is too slow, try using [Modelscope](https://modelscope.cn/my/overview) 🌐 or [HF-mirrors](https://hf-mirror.com/) 🌐.
47 | 
48 | ### ▶️ Step 4: Run!
49 | - Once you have completed the basic setup as described above, you can run the code using the following steps. The code will run 5 random seeds and the results will be saved in `results/result`. The results presented in the paper are the average of 5 random seeds.
50 | ```bash
51 | cd MSE-ChatGLM3-6B
52 | python run.py
53 | ```
54 | 
55 | ## 🙏 Acknowledgment
56 | Our code is structurally referenced to [SELF-MM](https://github.com/thuiar/Self-MM). Thanks to their open-source spirit for saving us a lot of time. 💖
57 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
  1 | addict==2.4.0
  2 | aiohttp==3.9.1
  3 | aiosignal==1.3.1
  4 | aliyun-python-sdk-core==2.14.0
  5 | aliyun-python-sdk-kms==2.16.2
  6 | annotated-types==0.6.0
  7 | anyio==4.8.0
  8 | async-timeout==4.0.3
  9 | attrs==23.1.0
 10 | blis==0.7.11
 11 | Brotli==1.0.9
 12 | cachetools==5.3.2
 13 | catalogue==2.0.10
 14 | certifi==2023.11.17
 15 | cffi==1.16.0
 16 | charset-normalizer==2.0.4
 17 | click==8.1.7
 18 | cloudpathlib==0.16.0
 19 | cmake==3.28.1
 20 | confection==0.1.4
 21 | contourpy==1.2.0
 22 | crcmod==1.7
 23 | cryptography==41.0.7
 24 | cycler==0.12.1
 25 | cymem==2.0.8
 26 | datasets==2.15.0
 27 | diffusers==0.31.0
 28 | dill==0.3.7
 29 | easydict==1.13
 30 | einops==0.7.0
 31 | en-core-web-sm==3.7.1
 32 | exceptiongroup==1.2.2
 33 | fastapi==0.115.8
 34 | filelock==3.13.1
 35 | fonttools==4.46.0
 36 | frozenlist==1.4.1
 37 | fsspec==2023.10.0
 38 | gast==0.5.4
 39 | gmpy2==2.1.2
 40 | graphviz==0.20.3
 41 | h11==0.14.0
 42 | huggingface-hub==0.26.2
 43 | idna==3.4
 44 | importlib-metadata==7.0.0
 45 | jieba==0.42.1
 46 | Jinja2==3.1.2
 47 | jmespath==0.10.0
 48 | joblib==1.3.2
 49 | kiwisolver==1.4.5
 50 | langcodes==3.3.0
 51 | lit==17.0.6
 52 | MarkupSafe==2.1.1
 53 | matplotlib==3.8.2
 54 | mkl-fft==1.3.8
 55 | mkl-random==1.2.4
 56 | mkl-service==2.4.0
 57 | modelscope==1.10.0
 58 | mpmath==1.3.0
 59 | multidict==6.0.4
 60 | multiprocess==0.70.15
 61 | murmurhash==1.0.10
 62 | networkx==3.1
 63 | numpy==1.26.2
 64 | nvidia-cublas-cu11==11.10.3.66
 65 | nvidia-cuda-cupti-cu11==11.7.101
 66 | nvidia-cuda-nvrtc-cu11==11.7.99
 67 | nvidia-cuda-runtime-cu11==11.7.99
 68 | nvidia-cudnn-cu11==8.5.0.96
 69 | nvidia-cufft-cu11==10.9.0.58
 70 | nvidia-curand-cu11==10.2.10.91
 71 | nvidia-cusolver-cu11==11.4.0.1
 72 | nvidia-cusparse-cu11==11.7.4.91
 73 | nvidia-ml-py==12.535.133
 74 | nvidia-nccl-cu11==2.14.3
 75 | nvidia-nvtx-cu11==11.7.91
 76 | nvitop==1.3.1
 77 | opencv-python==4.11.0.86
 78 | oss2==2.18.3
 79 | packaging==23.2
 80 | pandas==2.1.4
 81 | Pillow==10.0.1
 82 | pip==23.3.1
 83 | platformdirs==4.1.0
 84 | preshed==3.0.9
 85 | protobuf==4.25.3
 86 | psutil==5.9.6
 87 | pyarrow==14.0.1
 88 | pyarrow-hotfix==0.6
 89 | pycparser==2.21
 90 | pycryptodome==3.19.0
 91 | pydantic==2.5.3
 92 | pydantic_core==2.14.6
 93 | pyOpenSSL==23.2.0
 94 | pyparsing==3.1.1
 95 | PySocks==1.7.1
 96 | python-dateutil==2.8.2
 97 | pytz==2023.3.post1
 98 | PyYAML==6.0.1
 99 | regex==2023.10.3
100 | requests==2.31.0
101 | safetensors==0.4.1
102 | scikit-learn==1.3.2
103 | scipy==1.11.4
104 | sentencepiece==0.1.99
105 | setuptools==68.2.2
106 | simplejson==3.19.2
107 | six==1.16.0
108 | smart-open==6.4.0
109 | sniffio==1.3.1
110 | sortedcontainers==2.4.0
111 | spacy==3.7.2
112 | spacy-legacy==3.0.12
113 | spacy-loggers==1.0.5
114 | srsly==2.4.8
115 | starlette==0.45.3
116 | sympy==1.12
117 | termcolor==2.4.0
118 | thinc==8.2.2
119 | threadpoolctl==3.2.0
120 | tiktoken==0.5.2
121 | tokenizers==0.15.0
122 | tomli==2.0.1
123 | torch==2.0.1
124 | torchaudio==2.1.2
125 | torchvision==0.16.2
126 | torchviz==0.0.2
127 | tqdm==4.66.1
128 | transformers==4.36.1
129 | transformers-stream-generator==0.0.4
130 | triton==2.0.0
131 | typer==0.9.0
132 | typing_extensions==4.12.2
133 | tzdata==2023.3
134 | urllib3==1.26.18
135 | uvicorn==0.34.0
136 | wasabi==1.1.2
137 | weasel==0.3.4
138 | wheel==0.41.2
139 | xformers==0.0.21
140 | xxhash==3.4.1
141 | yapf==0.40.2
142 | yarl==1.9.4
143 | zipp==3.17.0
144 | 


--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/config/config_regression.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import argparse
  3 | 
  4 | from utils.functions import Storage
  5 | 
  6 | class ConfigRegression():
  7 |     def __init__(self, args):
  8 |         # hyper parameters for models
  9 |         HYPER_MODEL_MAP = {
 10 |             'cmcm': self.__CMCM
 11 |         }
 12 |         # hyper parameters for datasets
 13 |         self.root_dataset_dir = args.root_dataset_dir
 14 |         HYPER_DATASET_MAP = self.__datasetCommonParams()
 15 |         # normalize
 16 |         model_name = str.lower(args.modelName)
 17 |         dataset_name = str.lower(args.datasetName)
 18 |         # load params
 19 |         commonArgs = HYPER_MODEL_MAP[model_name]()['commonParas']
 20 |         dataArgs = HYPER_DATASET_MAP[dataset_name]
 21 |         dataArgs = dataArgs['aligned'] if (commonArgs['need_data_aligned'] and 'aligned' in dataArgs) else dataArgs['unaligned']
 22 |         # integrate all parameters
 23 |         self.args = Storage(dict(vars(args),
 24 |                             **dataArgs,
 25 |                             **commonArgs,
 26 |                             **HYPER_MODEL_MAP[model_name]()['datasetParas'][dataset_name],
 27 |                             ))
 28 |     
 29 |     def __datasetCommonParams(self):
 30 |         root_dataset_dir = self.root_dataset_dir
 31 |         tmp = {
 32 |             'mosi':{
 33 |                 'unaligned': {
 34 |                     'dataPath': os.path.join(root_dataset_dir, 'MOSI/Processed/unaligned_50.pkl'),
 35 |                     'seq_lens': (50, 50, 50),
 36 |                     # (text, audio, video)
 37 |                     'feature_dims': (4096, 5, 20),
 38 |                     'train_samples': 1284,
 39 |                     'num_classes': 3,
 40 |                     'language': 'en',
 41 |                     'KeyEval': 'MAE'
 42 |                 }
 43 |             },
 44 |             'mosei':{
 45 |                 'unaligned': {
 46 |                     'dataPath': os.path.join(root_dataset_dir, 'MOSEI/Processed/unaligned_50.pkl'),
 47 |                     'seq_lens': (50, 500, 375),
 48 |                     # (text, audio, video)
 49 |                     'feature_dims': (4096, 74, 35),
 50 |                     'train_samples': 16326,
 51 |                     'num_classes': 3,
 52 |                     'language': 'en',
 53 |                     'KeyEval': 'MAE'
 54 |                 }
 55 |             },
 56 | 
 57 | 
 58 |             'simsv2': {
 59 |                 'unaligned': {
 60 |                     'dataPath': os.path.join(root_dataset_dir, 'SIMS_V2/ch-simsv2s.pkl'),
 61 |                     # (batch_size, seq_lens, feature_dim)
 62 |                     'seq_lens': (50, 925, 232),  # (text, audio, video)
 63 |                     'feature_dims': (4096, 25, 177),  # (text, audio, video)
 64 |                     'train_samples': 2722,
 65 |                     'num_classes': 3,
 66 |                     'language': 'cn',
 67 |                     'KeyEval': 'MAE',
 68 |                 }
 69 |             }
 70 |         }
 71 |         return tmp
 72 | 
 73 |     def __CMCM(self):
 74 |         tmp = {
 75 |             'commonParas':{
 76 |                 'need_data_aligned': False,
 77 |                 'need_model_aligned': False,
 78 |                 'need_label_prefix':True,
 79 |                 'need_normalized': False,
 80 |                 'use_PLM': True,
 81 |                 'save_labels': False,
 82 |             },
 83 |             # dataset
 84 |             'datasetParas':{
 85 |                 'mosei':{
 86 |                     # the batch_size of each epoch is update_epochs * batch_size
 87 |                     'task_specific_prompt': 'Please predict the sentiment intensity of the above multimodal content in the range [-3.0, 3.0]. response: The sentiment is',
 88 |                     'max_new_tokens': 4,
 89 |                     'pseudo_tokens': 4,
 90 |                     'batch_size': 8,
 91 |                     'learning_rate': 5e-5,
 92 |                     # feature subNets
 93 |                     'a_lstm_hidden_size': 64,
 94 |                     'v_lstm_hidden_size': 32,
 95 |                     'a_lstm_layers': 1,
 96 |                     'v_lstm_layers': 1,
 97 |                     'a_lstm_dropout': 0.0,
 98 |                     'v_lstm_dropout': 0.0,
 99 |                     'warm_up_epochs':30,
100 |                     #loss weight   best：1
101 |                     'gamma':1,
102 |                     'update_epochs': 1,
103 |                     'early_stop': 10,     #10和8没啥区别
104 |                     # res
105 |                     'H': 3.0
106 |                 },
107 | 
108 |                 'simsv2': {
109 |                     # the batch_size of each epoch is update_epochs * batch_size
110 |                     'max_new_tokens': 4,
111 |                     'pseudo_tokens': 4,
112 |                     'task_specific_prompt': '请对上述多模态内容的情感强度进行预测，范围在[-1.0, 1.0]之间。响应: 情感为',
113 |                     'batch_size': 8,
114 |                     'learning_rate': 5e-5,
115 |                     # feature subNets
116 |                     'a_lstm_hidden_size': 64,
117 |                     'v_lstm_hidden_size': 64,
118 |                     'a_lstm_layers': 1,
119 |                     'v_lstm_layers': 1,
120 |                     'a_lstm_dropout': 0.0,
121 |                     'v_lstm_dropout': 0.0,
122 |                     'warm_up_epochs': 80,
123 |                     'update_epochs': 1,
124 |                     'early_stop': 10,
125 |                     # loss weight  best：0.25
126 |                     'gamma': 1,
127 |                     # res
128 |                     'H': 1.0
129 |                 },
130 |             },
131 |         }
132 |         return tmp
133 | 
134 |     def get_config(self):
135 |         return self.args


--------------------------------------------------------------------------------
/MSE-Llama2-7B/config/config_regression.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import argparse
  3 | 
  4 | from utils.functions import Storage
  5 | 
  6 | class ConfigRegression():
  7 |     def __init__(self, args):
  8 |         # hyper parameters for models
  9 |         HYPER_MODEL_MAP = {
 10 |             'cmcm': self.__CMCM
 11 |         }
 12 |         # hyper parameters for datasets
 13 |         self.root_dataset_dir = args.root_dataset_dir
 14 |         HYPER_DATASET_MAP = self.__datasetCommonParams()
 15 | 
 16 |         # normalize
 17 |         model_name = str.lower(args.modelName)
 18 |         dataset_name = str.lower(args.datasetName)
 19 |         # load params
 20 |         commonArgs = HYPER_MODEL_MAP[model_name]()['commonParas']
 21 |         dataArgs = HYPER_DATASET_MAP[dataset_name]
 22 |         dataArgs = dataArgs['aligned'] if (commonArgs['need_data_aligned'] and 'aligned' in dataArgs) else dataArgs['unaligned']
 23 |         # integrate all parameters
 24 |         self.args = Storage(dict(vars(args),
 25 |                             **dataArgs,
 26 |                             **commonArgs,
 27 |                             **HYPER_MODEL_MAP[model_name]()['datasetParas'][dataset_name],
 28 |                             ))
 29 |     
 30 |     def __datasetCommonParams(self):
 31 |         root_dataset_dir = self.root_dataset_dir
 32 |         tmp = {
 33 |             'mosi':{
 34 |                 'unaligned': {
 35 |                     'dataPath': os.path.join(root_dataset_dir, 'MOSI/Processed/unaligned_50.pkl'),
 36 |                     'seq_lens': (50, 50, 50),
 37 |                     # (text, audio, video)
 38 |                     'feature_dims': (4096, 5, 20),
 39 |                     'train_samples': 1284,
 40 |                     'num_classes': 3,
 41 |                     'language': 'en',
 42 |                     'KeyEval': 'MAE'
 43 |                 }
 44 |             },
 45 |             'mosei':{
 46 |                 'unaligned': {
 47 |                     'dataPath': os.path.join(root_dataset_dir, 'MOSEI/Processed/unaligned_50.pkl'),
 48 |                     'seq_lens': (50, 500, 375),
 49 |                     # (text, audio, video)
 50 |                     'feature_dims': (4096, 74, 35),
 51 |                     'train_samples': 16326,
 52 |                     'num_classes': 3,
 53 |                     'language': 'en',
 54 |                     'KeyEval': 'MAE'
 55 |                 }
 56 |             },
 57 | 
 58 | 
 59 |             'simsv2': {
 60 |                 'unaligned': {
 61 |                     'dataPath': os.path.join(root_dataset_dir, 'SIMS_V2/ch-simsv2s.pkl'),
 62 |                     # (batch_size, seq_lens, feature_dim)
 63 |                     'seq_lens': (50, 925, 232),  # (text, audio, video)
 64 |                     'feature_dims': (4096, 25, 177),  # (text, audio, video)
 65 |                     'train_samples': 2722,
 66 |                     'num_classes': 3,
 67 |                     'language': 'cn',
 68 |                     'KeyEval': 'MAE',
 69 |                 }
 70 |             }
 71 |         }
 72 |         return tmp
 73 | 
 74 |     def __CMCM(self):
 75 |         tmp = {
 76 |             'commonParas':{
 77 |                 'need_data_aligned': False,
 78 |                 'need_model_aligned': False,
 79 |                 'need_label_prefix':True,
 80 |                 'need_normalized': False,
 81 |                 'use_PLM': True,
 82 |                 'save_labels': False,
 83 |             },
 84 |             # dataset
 85 |             'datasetParas':{
 86 |                 'mosei':{
 87 |                     # the batch_size of each epoch is update_epochs * batch_size
 88 |                     'task_specific_prompt': 'Please predict the sentiment intensity of the above multimodal content in the range [-3.0, +3.0]. Assistant: The sentiment is',
 89 |                     'max_new_tokens': 4,
 90 |                     'pseudo_tokens': 4,
 91 |                     'batch_size': 8,
 92 |                     'learning_rate': 5e-5,
 93 |                     # feature subNets
 94 |                     'a_lstm_hidden_size': 64,
 95 |                     'v_lstm_hidden_size': 32,
 96 |                     'a_lstm_layers': 1,
 97 |                     'v_lstm_layers': 1,
 98 |                     'a_lstm_dropout': 0.0,
 99 |                     'v_lstm_dropout': 0.0,
100 |                     'warm_up_epochs':30,
101 |                     #loss weight   best：1
102 |                     'gamma':1,
103 |                     'update_epochs': 1,
104 |                     'early_stop': 10,     #10和8没啥区别
105 |                     # res
106 |                     'H': 3.0
107 |                 },
108 | 
109 |                 'simsv2': {
110 |                     # the batch_size of each epoch is update_epochs * batch_size
111 |                     'max_new_tokens': 4,
112 |                     'pseudo_tokens': 4,
113 |                     'task_specific_prompt': '请对上述多模态内容的情感强度进行预测，范围在[-1.0, 1.0]之间。响应: 情感为',
114 |                     'batch_size': 8,
115 |                     'learning_rate': 5e-5,
116 |                     # feature subNets
117 |                     'a_lstm_hidden_size': 64,
118 |                     'v_lstm_hidden_size': 64,
119 |                     'a_lstm_layers': 1,
120 |                     'v_lstm_layers': 1,
121 |                     'a_lstm_dropout': 0.0,
122 |                     'v_lstm_dropout': 0.0,
123 |                     'warm_up_epochs': 40,
124 |                     'update_epochs': 1,
125 |                     'early_stop': 10,
126 |                     # loss weight  best：0.25
127 |                     'gamma': 1,
128 |                     # res
129 |                     'H': 1.0
130 |                 },
131 |             },
132 |         }
133 |         return tmp
134 | 
135 |     def get_config(self):
136 |         return self.args


--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/config/config_regression.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import argparse
  3 | 
  4 | from utils.functions import Storage
  5 | 
  6 | class ConfigRegression():
  7 |     def __init__(self, args):
  8 |         # hyper parameters for models
  9 |         HYPER_MODEL_MAP = {
 10 |             'cmcm': self.__CMCM
 11 |         }
 12 |         # hyper parameters for datasets
 13 |         self.root_dataset_dir = args.root_dataset_dir
 14 |         HYPER_DATASET_MAP = self.__datasetCommonParams()
 15 | 
 16 |         # normalize
 17 |         model_name = str.lower(args.modelName)
 18 |         dataset_name = str.lower(args.datasetName)
 19 |         # load params
 20 |         commonArgs = HYPER_MODEL_MAP[model_name]()['commonParas']
 21 |         dataArgs = HYPER_DATASET_MAP[dataset_name]
 22 |         dataArgs = dataArgs['aligned'] if (commonArgs['need_data_aligned'] and 'aligned' in dataArgs) else dataArgs['unaligned']
 23 |         # integrate all parameters
 24 |         self.args = Storage(dict(vars(args),
 25 |                             **dataArgs,
 26 |                             **commonArgs,
 27 |                             **HYPER_MODEL_MAP[model_name]()['datasetParas'][dataset_name],
 28 |                             ))
 29 |     
 30 |     def __datasetCommonParams(self):
 31 |         root_dataset_dir = self.root_dataset_dir
 32 |         tmp = {
 33 |             'mosi':{
 34 |                 'unaligned': {
 35 |                     'dataPath': os.path.join(root_dataset_dir, 'MOSI/Processed/unaligned_50.pkl'),
 36 |                     'seq_lens': (50, 50, 50),
 37 |                     # (text, audio, video)
 38 |                     'feature_dims': (2048, 5, 20),
 39 |                     'train_samples': 1284,
 40 |                     'num_classes': 3,
 41 |                     'language': 'en',
 42 |                     'KeyEval': 'MAE'
 43 |                 }
 44 |             },
 45 |             'mosei':{
 46 |                 'unaligned': {
 47 |                     'dataPath': os.path.join(root_dataset_dir, 'MOSEI/Processed/unaligned_50.pkl'),
 48 |                     'seq_lens': (50, 500, 375),
 49 |                     # (text, audio, video)
 50 |                     'feature_dims': (2048, 74, 35),
 51 |                     'train_samples': 16326,
 52 |                     'num_classes': 3,
 53 |                     'language': 'en',
 54 |                     'KeyEval': 'MAE'
 55 |                 }
 56 |             },
 57 | 
 58 | 
 59 |             'simsv2': {
 60 |                 'unaligned': {
 61 |                     'dataPath': os.path.join(root_dataset_dir, 'SIMS_V2/ch-simsv2s.pkl'),
 62 |                     # (batch_size, seq_lens, feature_dim)
 63 |                     'seq_lens': (50, 925, 232),  # (text, audio, video)
 64 |                     'feature_dims': (2048, 25, 177),  # (text, audio, video)
 65 |                     'train_samples': 2722,
 66 |                     'num_classes': 3,
 67 |                     'language': 'cn',
 68 |                     'KeyEval': 'MAE',
 69 |                 }
 70 |             }
 71 |         }
 72 |         return tmp
 73 | 
 74 |     def __CMCM(self):
 75 |         tmp = {
 76 |             'commonParas':{
 77 |                 'need_data_aligned': False,
 78 |                 'need_model_aligned': False,
 79 |                 'need_label_prefix':True,
 80 |                 'need_normalized': False,
 81 |                 'use_PLM': True,
 82 |                 'save_labels': False,
 83 |             },
 84 |             # dataset
 85 |             'datasetParas':{
 86 |                 'mosei':{
 87 |                     # the batch_size of each epoch is update_epochs * batch_size
 88 |                     'task_specific_prompt': 'Please predict the sentiment intensity of the above multimodal content in the range [-3.0, +3.0]. Assistant: The sentiment is',
 89 |                     'max_new_tokens': 4,
 90 |                     'pseudo_tokens': 4,
 91 |                     'batch_size': 16,
 92 |                     'learning_rate': 5e-3,
 93 |                     # feature subNets
 94 |                     'a_lstm_hidden_size': 64,
 95 |                     'v_lstm_hidden_size': 32,
 96 |                     'a_lstm_layers': 1,
 97 |                     'v_lstm_layers': 1,
 98 |                     'a_lstm_dropout': 0.0,
 99 |                     'v_lstm_dropout': 0.0,
100 |                     'warm_up_epochs':30,
101 |                     #loss weight   best：1
102 |                     'gamma':1,
103 |                     'update_epochs': 1,
104 |                     'early_stop': 10,     #10和8没啥区别
105 |                     # res
106 |                     'H': 3.0,
107 |                 },
108 | 
109 |                 'simsv2': {
110 |                     # the batch_size of each epoch is update_epochs * batch_size
111 |                     'max_new_tokens': 4,
112 |                     'pseudo_tokens': 4,
113 |                     'task_specific_prompt': '请对上述多模态内容的情感强度进行预测，范围在[-1.0, +1.0]之间。响应: 情感为',
114 |                     'batch_size': 16,
115 |                     'learning_rate': 5e-4,   #5e -4 较好
116 |                     # feature subNets
117 |                     'a_lstm_hidden_size': 64,
118 |                     'v_lstm_hidden_size': 64,
119 |                     'a_lstm_layers': 1,
120 |                     'v_lstm_layers': 1,
121 |                     'a_lstm_dropout': 0.0,
122 |                     'v_lstm_dropout': 0.0,
123 |                     'warm_up_epochs': 30,  # 不太确定是30还是40，先跑一把
124 |                     'update_epochs': 1,
125 |                     'early_stop': 10,
126 |                     # loss weight  best：0.25
127 |                     'gamma': 1,
128 |                     # res
129 |                     'H': 1.0
130 |                 },
131 |             },
132 |         }
133 |         return tmp
134 | 
135 |     def get_config(self):
136 |         return self.args


--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/config/config_classification.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import argparse
  3 | 
  4 | from utils.functions import Storage
  5 | 
  6 | class ConfigClassification():
  7 |     def __init__(self, args):
  8 |         # hyper parameters for models
  9 |         HYPER_MODEL_MAP = {
 10 |             'cmcm': self.__CMCM
 11 |         }
 12 |         # hyper parameters for datasets
 13 |         self.root_dataset_dir = args.root_dataset_dir
 14 |         HYPER_DATASET_MAP = self.__datasetCommonParams()
 15 | 
 16 |         # normalize
 17 |         model_name = str.lower(args.modelName)
 18 |         dataset_name = str.lower(args.datasetName)
 19 |         # load params
 20 |         commonArgs = HYPER_MODEL_MAP[model_name]()['commonParas']
 21 |         dataArgs = HYPER_DATASET_MAP[dataset_name]
 22 |         dataArgs = dataArgs['aligned'] if (commonArgs['need_data_aligned'] and 'aligned' in dataArgs) else dataArgs['unaligned']
 23 |         # integrate all parameters
 24 |         self.args = Storage(dict(vars(args),
 25 |                             **dataArgs,
 26 |                             **commonArgs,
 27 |                             **HYPER_MODEL_MAP[model_name]()['datasetParas'][dataset_name],
 28 |                             ))
 29 |     
 30 |     def __datasetCommonParams(self):
 31 |         root_dataset_dir = self.root_dataset_dir
 32 |         tmp = {
 33 |             'iemocap':{
 34 |                 'unaligned': {
 35 |                     'dataPath': os.path.join(root_dataset_dir, 'IEMOCAP'),
 36 |                     'seq_lens': (84, 157, 32),
 37 |                     # (text, audio, video)
 38 |                     'feature_dims': (4096, 64, 64),
 39 |                     'train_samples': 5240,
 40 |                     'num_classes': 3,
 41 |                     'language': 'en',
 42 |                     'KeyEval': 'weight_F1'
 43 |                 }
 44 |             },
 45 |             'meld':{
 46 |                 'unaligned': {
 47 |                     'dataPath': os.path.join(root_dataset_dir, 'MELD'),
 48 |                     'seq_lens': (65, 157, 32),
 49 |                     # (text, audio, video)
 50 |                     'feature_dims': (4096, 64, 64),
 51 |                     'train_samples': 9992,
 52 |                     'num_classes': 3,
 53 |                     'language': 'en',
 54 |                     'KeyEval': 'weight_F1'
 55 |                 }
 56 |             },
 57 |             'cherma':{
 58 |                 'unaligned': {
 59 |                     'dataPath': os.path.join(root_dataset_dir, 'CHERMA0723'),
 60 |                     # (batch_size, seq_lens, feature_dim)
 61 |                     'seq_lens': (78, 543, 16), # (text, audio, video)
 62 |                     'feature_dims': (4096, 1024, 2048), # (text, audio, video)
 63 |                     'train_samples': 16326,
 64 |                     'num_classes': 3,
 65 |                     'language': 'cn',
 66 |                     'KeyEval': 'weight_F1',
 67 |                 }
 68 |             },
 69 | 
 70 | 
 71 |         }
 72 |         return tmp
 73 | 
 74 |     def __CMCM(self):
 75 |         tmp = {
 76 |             'commonParas':{
 77 |                 'need_data_aligned': False,
 78 |                 'need_model_aligned': False,
 79 |                 'need_label_prefix':True,
 80 |                 'need_normalized': False,
 81 |                 'use_PLM': True,
 82 |                 'save_labels': False,
 83 |             },
 84 |             # dataset
 85 |             'datasetParas':{
 86 |                 'meld':{
 87 |                     # the batch_size of each epoch is update_epochs * batch_size
 88 |                     'task_specific_prompt': 'Please recognize the emotion of the above multimodal content from the target \
 89 |                                                 set <neutral:0, surprise:1, fear:2, sadness:3, joy:4, disgust:5, anger:6>. response: The emotion is',
 90 |                     'max_new_tokens': 2,
 91 |                     'pseudo_tokens': 4,
 92 |                     'label_index_mapping': {'neutral': 0, 'surprise': 1, 'fear': 2, 'sadness': 3, 'joy': 4, 'disgust': 5,
 93 |                                            'anger': 6},
 94 |                     'batch_size': 8,
 95 |                     'learning_rate': 5e-5,
 96 |                     # feature subNets
 97 |                     'a_lstm_hidden_size': 64,
 98 |                     'v_lstm_hidden_size': 32,
 99 |                     'a_lstm_layers': 1,
100 |                     'v_lstm_layers': 1,
101 |                     'a_lstm_dropout': 0.0,
102 |                     'v_lstm_dropout': 0.0,
103 |                     'warm_up_epochs': 90,
104 |                     #loss weight   best：1
105 |                     'gamma':1,
106 |                     'update_epochs': 1,
107 |                     'early_stop': 8,
108 |                     # res
109 |                     'H': 3.0
110 |                 },
111 |                 'cherma':{
112 |                     # the batch_size of each epoch is update_epochs * batch_size
113 |                     'task_specific_prompt': '请选择适用于上述多模态内容的情绪标签：<愤怒:0, 厌恶:1, 恐惧:2, 高兴:3, 平静:4, 悲伤:5, 惊奇:6>。响应: 情绪为',
114 |                     'max_new_tokens': 2,
115 |                     'pseudo_tokens': 4,
116 |                     'label_index_mapping': {'愤怒': 0, '厌恶': 1, '恐惧': 2, '高兴': 3, '平静': 4, '悲伤': 5,
117 |                                             '惊奇': 6},
118 |                     'batch_size': 8,
119 |                     'learning_rate': 5e-5,
120 |                     # feature subNets
121 |                     'a_lstm_hidden_size': 32,
122 |                     'v_lstm_hidden_size': 16,
123 |                     'a_lstm_layers': 1,
124 |                     'v_lstm_layers': 1,
125 |                     'a_lstm_dropout': 0.0,
126 |                     'v_lstm_dropout': 0.0,
127 |                     'warm_up_epochs': 30,
128 |                     'update_epochs': 1,
129 |                     'early_stop': 8,
130 |                     # loss weight
131 |                     'gamma': 0,
132 |                     # res
133 |                     'H': 1.0
134 |                 },
135 |             },
136 |         }
137 |         return tmp
138 | 
139 |     def get_config(self):
140 |         return self.args


--------------------------------------------------------------------------------
/MSE-Llama2-7B/config/config_classification.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import argparse
  3 | 
  4 | from utils.functions import Storage
  5 | 
  6 | class ConfigClassification():
  7 |     def __init__(self, args):
  8 |         # hyper parameters for models
  9 |         HYPER_MODEL_MAP = {
 10 |             'cmcm': self.__CMCM
 11 |         }
 12 |         # hyper parameters for datasets
 13 |         self.root_dataset_dir = args.root_dataset_dir
 14 |         HYPER_DATASET_MAP = self.__datasetCommonParams()
 15 | 
 16 |         # normalize
 17 |         model_name = str.lower(args.modelName)
 18 |         dataset_name = str.lower(args.datasetName)
 19 |         # load params
 20 |         commonArgs = HYPER_MODEL_MAP[model_name]()['commonParas']
 21 |         dataArgs = HYPER_DATASET_MAP[dataset_name]
 22 |         dataArgs = dataArgs['aligned'] if (commonArgs['need_data_aligned'] and 'aligned' in dataArgs) else dataArgs['unaligned']
 23 |         # integrate all parameters
 24 |         self.args = Storage(dict(vars(args),
 25 |                             **dataArgs,
 26 |                             **commonArgs,
 27 |                             **HYPER_MODEL_MAP[model_name]()['datasetParas'][dataset_name],
 28 |                             ))
 29 |     
 30 |     def __datasetCommonParams(self):
 31 |         root_dataset_dir = self.root_dataset_dir
 32 |         tmp = {
 33 |             'iemocap':{
 34 |                 'unaligned': {
 35 |                     'dataPath': os.path.join(root_dataset_dir, 'IEMOCAP'),
 36 |                     'seq_lens': (84, 157, 32),
 37 |                     # (text, audio, video)
 38 |                     'feature_dims': (4096, 64, 64),
 39 |                     'train_samples': 5240,
 40 |                     'num_classes': 3,
 41 |                     'language': 'en',
 42 |                     'KeyEval': 'weight_F1'
 43 |                 }
 44 |             },
 45 |             'meld':{
 46 |                 'unaligned': {
 47 |                     'dataPath': os.path.join(root_dataset_dir, 'MELD'),
 48 |                     'seq_lens': (65, 157, 32),
 49 |                     # (text, audio, video)
 50 |                     'feature_dims': (4096, 64, 64),
 51 |                     'train_samples': 9992,
 52 |                     'num_classes': 3,
 53 |                     'language': 'en',
 54 |                     'KeyEval': 'weight_F1'
 55 |                 }
 56 |             },
 57 |             'cherma':{
 58 |                 'unaligned': {
 59 |                     'dataPath': os.path.join(root_dataset_dir, 'CHERMA0723'),
 60 |                     # (batch_size, seq_lens, feature_dim)
 61 |                     'seq_lens': (78, 543, 16), # (text, audio, video)
 62 |                     'feature_dims': (4096, 1024, 2048), # (text, audio, video)
 63 |                     'train_samples': 16326,
 64 |                     'num_classes': 3,
 65 |                     'language': 'cn',
 66 |                     'KeyEval': 'weight_F1',
 67 |                 }
 68 |             },
 69 | 
 70 | 
 71 |         }
 72 |         return tmp
 73 | 
 74 |     def __CMCM(self):
 75 |         tmp = {
 76 |             'commonParas':{
 77 |                 'need_data_aligned': False,
 78 |                 'need_model_aligned': False,
 79 |                 'need_label_prefix':True,
 80 |                 'need_normalized': False,
 81 |                 'use_PLM': True,
 82 |                 'save_labels': False,
 83 |             },
 84 |             # dataset
 85 |             'datasetParas':{
 86 |                 'meld':{
 87 |                     # the batch_size of each epoch is update_epochs * batch_size
 88 |                     'task_specific_prompt': 'Please recognize the emotion of the above multimodal content from the \
 89 |                                             target set <neutral:0, surprise:1, fear:2, sadness:3, joy:4, disgust:5, anger:6>. Assistant: The emotion is',
 90 |                     'max_new_tokens': 2,
 91 |                     'pseudo_tokens': 4,
 92 |                     'label_index_mapping': {'neutral': 0, 'surprise': 1, 'fear': 2, 'sadness': 3, 'joy': 4, 'disgust': 5,
 93 |                                            'anger': 6},
 94 |                     'batch_size': 6,
 95 |                     'learning_rate': 5e-4,
 96 |                     # feature subNets
 97 |                     'a_lstm_hidden_size': 64,
 98 |                     'v_lstm_hidden_size': 32, #原来是32,16
 99 |                     'a_lstm_layers': 1,
100 |                     'v_lstm_layers': 1,
101 |                     'a_lstm_dropout': 0.0,
102 |                     'v_lstm_dropout': 0.0,
103 |                     'warm_up_epochs':30,
104 |                     #loss weight   best：1
105 |                     'gamma':1,
106 |                     'update_epochs': 1,
107 |                     'early_stop': 8,
108 |                     # res
109 |                     'H': 3.0
110 |                 },
111 |                 'cherma':{
112 |                     # the batch_size of each epoch is update_epochs * batch_size
113 |                     'task_specific_prompt': '请选择适用于上述多模态内容的情绪标签：<愤怒:0, 厌恶:1, 恐惧:2, 高兴:3, 平静:4, 悲伤:5, 惊奇:6>。助手: 情绪为',
114 |                     'max_new_tokens': 2,
115 |                     'pseudo_tokens': 4,
116 |                     'label_index_mapping': {'愤怒': 0, '厌恶': 1, '恐惧': 2, '高兴': 3, '平静': 4, '悲伤': 5,
117 |                                             '惊奇': 6},
118 |                     'batch_size': 6,
119 |                     'learning_rate': 5e-5,
120 |                     # feature subNets
121 |                     'a_lstm_hidden_size': 32,
122 |                     'v_lstm_hidden_size': 16,
123 |                     'a_lstm_layers': 1,
124 |                     'v_lstm_layers': 1,
125 |                     'a_lstm_dropout': 0.0,
126 |                     'v_lstm_dropout': 0.0,
127 |                     'warm_up_epochs': 30,
128 |                     'update_epochs': 1,
129 |                     'early_stop': 8,
130 |                     # loss weight
131 |                     'gamma': 0,
132 |                     # res
133 |                     'H': 1.0,
134 |                 },
135 |             },
136 |         }
137 |         return tmp
138 | 
139 |     def get_config(self):
140 |         return self.args


--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/models/multiTask/CMCM.py:
--------------------------------------------------------------------------------
  1 | # self supervised multimodal multi-task learning network
  2 | import math
  3 | import os
  4 | import sys
  5 | import collections
  6 | from torch.cuda.amp import autocast, GradScaler
  7 | import torch
  8 | import torch.nn as nn
  9 | import torch.nn.functional as F
 10 | from torch.autograd.function import Function
 11 | from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence
 12 | 
 13 | from models.subNets.Textmodel import Language_model
 14 | 
 15 | __all__ = ['CMCM']
 16 | 
 17 | class CMCM(nn.Module):
 18 |     def __init__(self, args):
 19 |         super(CMCM, self).__init__()
 20 |         # text enocding
 21 |         self.LLM = Language_model(args)
 22 | 
 23 |         # audio and video enocding
 24 |         text_in, audio_in, video_in = args.feature_dims[:]
 25 |         text_len, audio_len, video_len = args.seq_lens[:]
 26 | 
 27 |         self.audio_LSTM = TVA_LSTM(audio_in, args.a_lstm_hidden_size, num_layers=args.a_lstm_layers, dropout=args.a_lstm_dropout)
 28 |         self.video_LSTM = TVA_LSTM(video_in, args.v_lstm_hidden_size, num_layers=args.v_lstm_layers, dropout=args.v_lstm_dropout)
 29 | 
 30 |         self.text_guide_mixer = Text_guide_mixer()
 31 |         #low_rank_fusion
 32 |         fusion_input_size = 256
 33 |         self.mutli_scale_fusion = mutli_scale_fusion(input_size=fusion_input_size, output_size= text_in, pseudo_tokens= args.pseudo_tokens)
 34 | 
 35 | 
 36 |     def forward(self, labels, text, audio, video):
 37 |         audio, audio_len = audio
 38 |         video, video_len = video
 39 |         text, text_len = text
 40 |         text = self.LLM.text_embedding(text[:,0,:].long())
 41 | 
 42 |         video_h = self.video_LSTM(video, video_len)
 43 |         audio_h = self.audio_LSTM(audio, audio_len)
 44 | 
 45 | 
 46 |         fusion_h= self.text_guide_mixer(audio_h, video_h, text)
 47 | 
 48 |         fusion_h= self.mutli_scale_fusion(fusion_h)
 49 | 
 50 | 
 51 |         LLM_input = torch.cat([fusion_h, text], dim=1)
 52 | 
 53 |         LLM_output = self.LLM(LLM_input, labels)
 54 | 
 55 |         res = {
 56 |             'Loss': LLM_output.loss,
 57 |             'Feature_a': audio_h,
 58 |             'Feature_v': video_h,
 59 |             'Feature_f': fusion_h,
 60 |         }
 61 |         return res
 62 | 
 63 |     def generate(self, text, audio, video):
 64 |         audio, audio_len = audio
 65 |         video, video_len = video
 66 |         text, text_len = text
 67 |         text = self.LLM.text_embedding(text[:,0,:].long())
 68 | 
 69 |         audio_h = self.audio_LSTM(audio, audio_len)
 70 |         video_h = self.video_LSTM(video, video_len)
 71 | 
 72 | 
 73 |         fusion_h = self.text_guide_mixer(audio_h, video_h, text)
 74 | 
 75 |         # low_rank_fusion
 76 | 
 77 |         fusion_h = self.mutli_scale_fusion(fusion_h)
 78 | 
 79 |         # concatenate mutli_scale_fusion and text_embedding
 80 | 
 81 |         LLM_input = torch.cat([fusion_h, text], dim=1)
 82 | 
 83 |         LLM_output = self.LLM.generate(LLM_input)
 84 | 
 85 |         return LLM_output
 86 | 
 87 | 
 88 | 
 89 | class TVA_LSTM(nn.Module):
 90 |     def __init__(self, in_size, hidden_size, num_layers=1, dropout=0.2, bidirectional=False):
 91 |         '''
 92 |         Args:
 93 |             in_size: input dimension
 94 |             hidden_size: hidden layer dimension
 95 |             num_layers: specify the number of layers of LSTMs.
 96 |             dropout: dropout probability
 97 |             bidirectional: specify usage of bidirectional LSTM
 98 |         Output:
 99 |             (return value in forward) a tensor of shape (batch_size, out_size)
100 |         '''
101 |         super(TVA_LSTM, self).__init__()
102 |         self.rnn = nn.LSTM(in_size, hidden_size, num_layers=num_layers, dropout=dropout, bidirectional=bidirectional, batch_first=True)
103 |         self.dropout = nn.Dropout(dropout)
104 |         self.linear = nn.Linear(hidden_size, 256)
105 | 
106 |     def forward(self, x, lengths):
107 |         '''
108 |         x: (batch_size, sequence_len, in_size)
109 |         '''
110 |         packed_sequence = pack_padded_sequence(x, lengths.to('cpu'), batch_first=True, enforce_sorted=False) #这里把length.to cpu是因为pytorch版本问题
111 |         # _, (final_states, _) = self.rnn(packed_sequence)
112 |         # h = self.dropout(final_states[-1])
113 |         _, final_states = self.rnn(packed_sequence)
114 |         h = self.dropout(final_states[0].squeeze())
115 |         h = self.linear(h)
116 |         return h
117 | 
118 | class Text_guide_mixer(nn.Module):
119 |     def __init__(self):
120 |         super(Text_guide_mixer, self).__init__()
121 |         self.GAP = nn.AdaptiveAvgPool1d(1)
122 |         self.text_mlp = nn.Linear(4096, 256)
123 |     def forward(self, audio, video, text):
124 |         text_GAP = self.GAP(text.permute(0, 2, 1)).squeeze()
125 |         text_knowledge = self.text_mlp(text_GAP)
126 | 
127 |         audio_mixed = torch.mul(audio, text_knowledge)
128 |         video_mixed = torch.mul(video, text_knowledge)
129 | 
130 |         fusion = audio_mixed + video_mixed
131 | 
132 |         return fusion
133 | 
134 | 
135 | class mutli_scale_fusion(nn.Module):
136 |     def __init__(self, input_size, output_size, pseudo_tokens = 4):
137 |         super(mutli_scale_fusion, self).__init__()
138 |         multi_scale_hidden = 256
139 |         self.scale1 = nn.Sequential(
140 |             nn.Linear(input_size, output_size // 8),
141 |             nn.GELU(),
142 |             nn.Linear(output_size // 8, multi_scale_hidden)
143 |         )
144 |         self.scale2 = nn.Sequential(
145 |             nn.Linear(input_size, output_size // 32),
146 |             nn.GELU(),
147 |             nn.Linear(output_size // 32, multi_scale_hidden)
148 |         )
149 |         self.scale3 = nn.Sequential(
150 |             nn.Linear(input_size, output_size // 16),
151 |             nn.GELU(),
152 |             nn.Linear(output_size // 16, multi_scale_hidden)
153 |         )
154 | 
155 |         self.integrating = Integrating(scales = 3)
156 |         self.multi_scale_projector =  nn.Linear(multi_scale_hidden, output_size)
157 |         self.projector = nn.Linear(1, pseudo_tokens)
158 | 
159 |     def forward(self,x):
160 |         # 增加样本复制，将单一样本复制一份,避免最后一个batch只有一个数据时的报错
161 |         if x.dim() == 1:
162 |             x = x.unsqueeze(0)
163 |         #compute different scale experts outputs
164 |         scale1 = self.scale1(x)
165 |         scale2 = self.scale2(x)
166 |         scale3 = self.scale3(x)
167 | 
168 | 
169 |         # Calculate the expert outputs
170 |         multi_scale_stack = torch.stack([scale1, scale2, scale3], dim=2)
171 |         multi_scale_integrating =  self.integrating(multi_scale_stack)
172 | 
173 |         multi_scale = self.multi_scale_projector(multi_scale_integrating)
174 |         output = self.projector(multi_scale.unsqueeze(2))
175 |         return output.permute(0, 2, 1)  #[batch,seq_len,hidden_siez]
176 | 
177 | # Define the gating model
178 | class Integrating(nn.Module):
179 |     def __init__(self,  scales):
180 |         super(Integrating, self).__init__()
181 | 
182 |     # Layers
183 |         self.Integrating_layer = nn.Sequential(nn.Conv2d(1, 1, kernel_size=(1, scales), stride=1),
184 |         )
185 | 
186 |     def forward(self, x):
187 |         x = x.unsqueeze(1)
188 |         x = self.Integrating_layer(x)
189 |         x = x.squeeze((1, 3))
190 |         return x
191 | 


--------------------------------------------------------------------------------
/MSE-Llama2-7B/models/multiTask/CMCM.py:
--------------------------------------------------------------------------------
  1 | # self supervised multimodal multi-task learning network
  2 | import math
  3 | import os
  4 | import sys
  5 | import collections
  6 | from torch.cuda.amp import autocast, GradScaler
  7 | import torch
  8 | import torch.nn as nn
  9 | import torch.nn.functional as F
 10 | from torch.autograd.function import Function
 11 | from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence
 12 | 
 13 | from models.subNets.Textmodel import Language_model
 14 | 
 15 | __all__ = ['CMCM']
 16 | 
 17 | class CMCM(nn.Module):
 18 |     def __init__(self, args):
 19 |         super(CMCM, self).__init__()
 20 |         # text enocding
 21 |         self.LLM = Language_model(args)
 22 | 
 23 |         # audio and video enocding
 24 |         text_in, audio_in, video_in = args.feature_dims[:]
 25 |         text_len, audio_len, video_len = args.seq_lens[:]
 26 | 
 27 |         self.audio_LSTM = TVA_LSTM(audio_in, args.a_lstm_hidden_size, num_layers=args.a_lstm_layers, dropout=args.a_lstm_dropout)
 28 |         self.video_LSTM = TVA_LSTM(video_in, args.v_lstm_hidden_size, num_layers=args.v_lstm_layers, dropout=args.v_lstm_dropout)
 29 | 
 30 |         self.text_guide_mixer = Text_guide_mixer()
 31 |         #low_rank_fusion
 32 |         fusion_input_size = 256
 33 |         self.mutli_scale_fusion = mutli_scale_fusion(input_size=fusion_input_size, output_size= text_in, pseudo_tokens= args.pseudo_tokens)
 34 | 
 35 | 
 36 |     def forward(self, labels, text, audio, video):
 37 |         audio, audio_len = audio
 38 |         video, video_len = video
 39 |         text, text_len = text
 40 |         text = self.LLM.text_embedding(text[:,0,:].long())
 41 | 
 42 |         video_h = self.video_LSTM(video, video_len)
 43 |         audio_h = self.audio_LSTM(audio, audio_len)
 44 | 
 45 | 
 46 |         fusion_h= self.text_guide_mixer(audio_h, video_h, text)
 47 | 
 48 |         fusion_h= self.mutli_scale_fusion(fusion_h)
 49 | 
 50 | 
 51 |         LLM_input = torch.cat([fusion_h, text], dim=1)
 52 | 
 53 |         LLM_output = self.LLM(LLM_input, labels)
 54 | 
 55 |         res = {
 56 |             'Loss': LLM_output.loss,
 57 |             'Feature_a': audio_h,
 58 |             'Feature_v': video_h,
 59 |             'Feature_f': fusion_h,
 60 |         }
 61 |         return res
 62 | 
 63 |     def generate(self, text, audio, video):
 64 |         audio, audio_len = audio
 65 |         video, video_len = video
 66 |         text, text_len = text
 67 |         text = self.LLM.text_embedding(text[:,0,:].long())
 68 | 
 69 |         audio_h = self.audio_LSTM(audio, audio_len)
 70 |         video_h = self.video_LSTM(video, video_len)
 71 | 
 72 | 
 73 |         fusion_h = self.text_guide_mixer(audio_h, video_h, text)
 74 | 
 75 |         # low_rank_fusion
 76 | 
 77 |         fusion_h = self.mutli_scale_fusion(fusion_h)
 78 | 
 79 |         # concatenate mutli_scale_fusion and text_embedding
 80 | 
 81 |         LLM_input = torch.cat([fusion_h, text], dim=1)
 82 | 
 83 |         LLM_output = self.LLM.generate(LLM_input)
 84 | 
 85 |         return LLM_output
 86 | 
 87 | 
 88 | 
 89 | class TVA_LSTM(nn.Module):
 90 |     def __init__(self, in_size, hidden_size, num_layers=1, dropout=0.2, bidirectional=False):
 91 |         '''
 92 |         Args:
 93 |             in_size: input dimension
 94 |             hidden_size: hidden layer dimension
 95 |             num_layers: specify the number of layers of LSTMs.
 96 |             dropout: dropout probability
 97 |             bidirectional: specify usage of bidirectional LSTM
 98 |         Output:
 99 |             (return value in forward) a tensor of shape (batch_size, out_size)
100 |         '''
101 |         super(TVA_LSTM, self).__init__()
102 |         self.rnn = nn.LSTM(in_size, hidden_size, num_layers=num_layers, dropout=dropout, bidirectional=bidirectional, batch_first=True)
103 |         self.dropout = nn.Dropout(dropout)
104 |         self.linear = nn.Linear(hidden_size, 256)
105 | 
106 |     def forward(self, x, lengths):
107 |         '''
108 |         x: (batch_size, sequence_len, in_size)
109 |         '''
110 |         packed_sequence = pack_padded_sequence(x, lengths.to('cpu'), batch_first=True, enforce_sorted=False) #这里把length.to cpu是因为pytorch版本问题
111 |         # _, (final_states, _) = self.rnn(packed_sequence)
112 |         # h = self.dropout(final_states[-1])
113 |         _, final_states = self.rnn(packed_sequence)
114 |         h = self.dropout(final_states[0].squeeze())
115 |         h = self.linear(h)
116 |         return h
117 | 
118 | class Text_guide_mixer(nn.Module):
119 |     def __init__(self):
120 |         super(Text_guide_mixer, self).__init__()
121 |         self.GAP = nn.AdaptiveAvgPool1d(1)
122 |         self.text_mlp = nn.Linear(4096, 256)
123 |     def forward(self, audio, video, text):
124 |         text_GAP = self.GAP(text.permute(0, 2, 1)).squeeze()
125 |         text_knowledge = self.text_mlp(text_GAP)
126 | 
127 |         audio_mixed = torch.mul(audio, text_knowledge)
128 |         video_mixed = torch.mul(video, text_knowledge)
129 | 
130 |         fusion = audio_mixed + video_mixed
131 | 
132 |         return fusion
133 | 
134 | 
135 | class mutli_scale_fusion(nn.Module):
136 |     def __init__(self, input_size, output_size, pseudo_tokens = 4):
137 |         super(mutli_scale_fusion, self).__init__()
138 |         multi_scale_hidden = 256
139 |         self.scale1 = nn.Sequential(
140 |             nn.Linear(input_size, output_size // 8),
141 |             nn.GELU(),
142 |             nn.Linear(output_size // 8, multi_scale_hidden)
143 |         )
144 |         self.scale2 = nn.Sequential(
145 |             nn.Linear(input_size, output_size // 32),
146 |             nn.GELU(),
147 |             nn.Linear(output_size // 32, multi_scale_hidden)
148 |         )
149 |         self.scale3 = nn.Sequential(
150 |             nn.Linear(input_size, output_size // 16),
151 |             nn.GELU(),
152 |             nn.Linear(output_size // 16, multi_scale_hidden)
153 |         )
154 | 
155 |         self.integrating = Integrating(scales = 3)
156 |         self.multi_scale_projector =  nn.Linear(multi_scale_hidden, output_size)
157 |         self.projector = nn.Linear(1, pseudo_tokens)
158 | 
159 |     def forward(self,x):
160 |         # 增加样本复制，将单一样本复制一份,避免最后一个batch只有一个数据时的报错
161 |         if x.dim() == 1:
162 |             x = x.unsqueeze(0)
163 |         #compute different scale experts outputs
164 |         scale1 = self.scale1(x)
165 |         scale2 = self.scale2(x)
166 |         scale3 = self.scale3(x)
167 | 
168 | 
169 |         # Calculate the expert outputs
170 |         multi_scale_stack = torch.stack([scale1, scale2, scale3], dim=2)
171 |         multi_scale_integrating =  self.integrating(multi_scale_stack)
172 | 
173 |         multi_scale = self.multi_scale_projector(multi_scale_integrating)
174 |         output = self.projector(multi_scale.unsqueeze(2))
175 |         return output.permute(0, 2, 1)  #[batch,seq_len,hidden_siez]
176 | 
177 | # Define the gating model
178 | class Integrating(nn.Module):
179 |     def __init__(self,  scales):
180 |         super(Integrating, self).__init__()
181 | 
182 |     # Layers
183 |         self.Integrating_layer = nn.Sequential(nn.Conv2d(1, 1, kernel_size=(1, scales), stride=1),
184 |         )
185 | 
186 |     def forward(self, x):
187 |         x = x.unsqueeze(1)
188 |         x = self.Integrating_layer(x)
189 |         x = x.squeeze((1, 3))
190 |         return x
191 | 


--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/models/multiTask/CMCM.py:
--------------------------------------------------------------------------------
  1 | # self supervised multimodal multi-task learning network
  2 | import math
  3 | import os
  4 | import sys
  5 | import collections
  6 | from torch.cuda.amp import autocast, GradScaler
  7 | import torch
  8 | import torch.nn as nn
  9 | import torch.nn.functional as F
 10 | from torch.autograd.function import Function
 11 | from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence
 12 | 
 13 | from models.subNets.Textmodel import Language_model
 14 | 
 15 | __all__ = ['CMCM']
 16 | 
 17 | class CMCM(nn.Module):
 18 |     def __init__(self, args):
 19 |         super(CMCM, self).__init__()
 20 |         # text enocding
 21 |         self.LLM = Language_model(args)
 22 | 
 23 |         # audio and video enocding
 24 |         text_in, audio_in, video_in = args.feature_dims[:]
 25 |         text_len, audio_len, video_len = args.seq_lens[:]
 26 | 
 27 |         self.audio_LSTM = TVA_LSTM(audio_in, args.a_lstm_hidden_size, num_layers=args.a_lstm_layers, dropout=args.a_lstm_dropout)
 28 |         self.video_LSTM = TVA_LSTM(video_in, args.v_lstm_hidden_size, num_layers=args.v_lstm_layers, dropout=args.v_lstm_dropout)
 29 | 
 30 |         self.text_guide_mixer = Text_guide_mixer()
 31 |         #low_rank_fusion
 32 |         fusion_input_size = 256
 33 |         self.mutli_scale_fusion = mutli_scale_fusion(input_size=fusion_input_size, output_size= text_in, pseudo_tokens= args.pseudo_tokens)
 34 | 
 35 | 
 36 |     def forward(self, labels, text, audio, video):
 37 |         audio, audio_len = audio
 38 |         video, video_len = video
 39 |         text, text_len = text
 40 |         text = self.LLM.text_embedding(text[:,0,:].long())
 41 | 
 42 |         video_h = self.video_LSTM(video, video_len)
 43 |         audio_h = self.audio_LSTM(audio, audio_len)
 44 | 
 45 | 
 46 |         fusion_h= self.text_guide_mixer(audio_h, video_h, text)
 47 | 
 48 |         fusion_h= self.mutli_scale_fusion(fusion_h)
 49 | 
 50 | 
 51 |         LLM_input = torch.cat([fusion_h, text], dim=1)
 52 | 
 53 |         LLM_output = self.LLM(LLM_input, labels)
 54 | 
 55 |         res = {
 56 |             'Loss': LLM_output.loss,
 57 |             'Feature_a': audio_h,
 58 |             'Feature_v': video_h,
 59 |             'Feature_f': fusion_h,
 60 |         }
 61 |         return res
 62 | 
 63 |     def generate(self, text, audio, video):
 64 |         audio, audio_len = audio
 65 |         video, video_len = video
 66 |         text, text_len = text
 67 |         text = self.LLM.text_embedding(text[:,0,:].long())
 68 | 
 69 |         audio_h = self.audio_LSTM(audio, audio_len)
 70 |         video_h = self.video_LSTM(video, video_len)
 71 | 
 72 | 
 73 |         fusion_h = self.text_guide_mixer(audio_h, video_h, text)
 74 | 
 75 |         # low_rank_fusion
 76 | 
 77 |         fusion_h = self.mutli_scale_fusion(fusion_h)
 78 | 
 79 |         # concatenate mutli_scale_fusion and text_embedding
 80 | 
 81 |         LLM_input = torch.cat([fusion_h, text], dim=1)
 82 | 
 83 |         LLM_output = self.LLM.generate(LLM_input)
 84 | 
 85 |         return LLM_output
 86 | 
 87 | 
 88 | 
 89 | class TVA_LSTM(nn.Module):
 90 |     def __init__(self, in_size, hidden_size, num_layers=1, dropout=0.2, bidirectional=False):
 91 |         '''
 92 |         Args:
 93 |             in_size: input dimension
 94 |             hidden_size: hidden layer dimension
 95 |             num_layers: specify the number of layers of LSTMs.
 96 |             dropout: dropout probability
 97 |             bidirectional: specify usage of bidirectional LSTM
 98 |         Output:
 99 |             (return value in forward) a tensor of shape (batch_size, out_size)
100 |         '''
101 |         super(TVA_LSTM, self).__init__()
102 |         self.rnn = nn.LSTM(in_size, hidden_size, num_layers=num_layers, dropout=dropout, bidirectional=bidirectional, batch_first=True)
103 |         self.dropout = nn.Dropout(dropout)
104 |         self.linear = nn.Linear(hidden_size, 256)
105 | 
106 |     def forward(self, x, lengths):
107 |         '''
108 |         x: (batch_size, sequence_len, in_size)
109 |         '''
110 |         packed_sequence = pack_padded_sequence(x, lengths.to('cpu'), batch_first=True, enforce_sorted=False) #这里把length.to cpu是因为pytorch版本问题
111 |         # _, (final_states, _) = self.rnn(packed_sequence)
112 |         # h = self.dropout(final_states[-1])
113 |         _, final_states = self.rnn(packed_sequence)
114 |         h = self.dropout(final_states[0].squeeze())
115 |         h = self.linear(h)
116 |         return h
117 | 
118 | class Text_guide_mixer(nn.Module):
119 |     def __init__(self):
120 |         super(Text_guide_mixer, self).__init__()
121 |         self.GAP = nn.AdaptiveAvgPool1d(1)
122 |         self.text_mlp = nn.Linear(2048, 256)
123 |     def forward(self, audio, video, text):
124 |         text_GAP = self.GAP(text.permute(0, 2, 1)).squeeze()
125 |         text_knowledge = self.text_mlp(text_GAP)
126 | 
127 |         audio_mixed = torch.mul(audio, text_knowledge)
128 |         video_mixed = torch.mul(video, text_knowledge)
129 | 
130 |         fusion = audio_mixed + video_mixed
131 | 
132 |         return fusion
133 | 
134 | 
135 | class mutli_scale_fusion(nn.Module):
136 |     def __init__(self, input_size, output_size, pseudo_tokens = 4):
137 |         super(mutli_scale_fusion, self).__init__()
138 |         multi_scale_hidden = 256
139 |         self.scale1 = nn.Sequential(
140 |             nn.Linear(input_size, output_size // 8),
141 |             nn.GELU(),
142 |             nn.Linear(output_size // 8, multi_scale_hidden)
143 |         )
144 |         self.scale2 = nn.Sequential(
145 |             nn.Linear(input_size, output_size // 32),
146 |             nn.GELU(),
147 |             nn.Linear(output_size // 32, multi_scale_hidden)
148 |         )
149 |         self.scale3 = nn.Sequential(
150 |             nn.Linear(input_size, output_size // 16),
151 |             nn.GELU(),
152 |             nn.Linear(output_size // 16, multi_scale_hidden)
153 |         )
154 | 
155 |         self.integrating = Integrating(scales = 3)
156 |         self.multi_scale_projector =  nn.Linear(multi_scale_hidden, output_size)
157 |         self.projector = nn.Linear(1, pseudo_tokens)
158 | 
159 |     def forward(self,x):
160 |         # 增加样本复制，将单一样本复制一份,避免最后一个batch只有一个数据时的报错
161 |         if x.dim() == 1:
162 |             x = x.unsqueeze(0)
163 |         #compute different scale experts outputs
164 |         scale1 = self.scale1(x)
165 |         scale2 = self.scale2(x)
166 |         scale3 = self.scale3(x)
167 | 
168 | 
169 |         # Calculate the expert outputs
170 |         multi_scale_stack = torch.stack([scale1, scale2, scale3], dim=2)
171 |         multi_scale_integrating =  self.integrating(multi_scale_stack)
172 | 
173 |         multi_scale = self.multi_scale_projector(multi_scale_integrating)
174 |         output = self.projector(multi_scale.unsqueeze(2))
175 |         return output.permute(0, 2, 1)  #[batch,seq_len,hidden_siez]
176 | 
177 | # Define the gating model
178 | class Integrating(nn.Module):
179 |     def __init__(self,  scales):
180 |         super(Integrating, self).__init__()
181 | 
182 |     # Layers
183 |         self.Integrating_layer = nn.Sequential(nn.Conv2d(1, 1, kernel_size=(1, scales), stride=1),
184 |         )
185 | 
186 |     def forward(self, x):
187 |         x = x.unsqueeze(1)
188 |         x = self.Integrating_layer(x)
189 |         x = x.squeeze((1, 3))
190 |         return x
191 | 


--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/config/config_classification.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import argparse
  3 | 
  4 | from utils.functions import Storage
  5 | 
  6 | class ConfigClassification():
  7 |     def __init__(self, args):
  8 |         # hyper parameters for models
  9 |         HYPER_MODEL_MAP = {
 10 |             'cmcm': self.__CMCM
 11 |         }
 12 |         # hyper parameters for datasets
 13 |         self.root_dataset_dir = args.root_dataset_dir
 14 |         HYPER_DATASET_MAP = self.__datasetCommonParams()
 15 | 
 16 |         # normalize
 17 |         model_name = str.lower(args.modelName)
 18 |         dataset_name = str.lower(args.datasetName)
 19 |         # load params
 20 |         commonArgs = HYPER_MODEL_MAP[model_name]()['commonParas']
 21 |         dataArgs = HYPER_DATASET_MAP[dataset_name]
 22 |         dataArgs = dataArgs['aligned'] if (commonArgs['need_data_aligned'] and 'aligned' in dataArgs) else dataArgs['unaligned']
 23 |         # integrate all parameters
 24 |         self.args = Storage(dict(vars(args),
 25 |                             **dataArgs,
 26 |                             **commonArgs,
 27 |                             **HYPER_MODEL_MAP[model_name]()['datasetParas'][dataset_name],
 28 |                             ))
 29 |     
 30 |     def __datasetCommonParams(self):
 31 |         root_dataset_dir = self.root_dataset_dir
 32 |         tmp = {
 33 |             'iemocap':{
 34 |                 'unaligned': {
 35 |                     'dataPath': os.path.join(root_dataset_dir, 'IEMOCAP'),
 36 |                     'seq_lens': (84, 157, 32),
 37 |                     # (text, audio, video)
 38 |                     'feature_dims': (2048, 64, 64),
 39 |                     'train_samples': 5240,
 40 |                     'num_classes': 3,
 41 |                     'language': 'en',
 42 |                     'KeyEval': 'weight_F1'
 43 |                 }
 44 |             },
 45 |             'meld':{
 46 |                 'unaligned': {
 47 |                     'dataPath': os.path.join(root_dataset_dir, 'MELD'),
 48 |                     'seq_lens': (65, 157, 32),
 49 |                     # (text, audio, video)
 50 |                     'feature_dims': (2048, 64, 64),
 51 |                     'train_samples': 9992,
 52 |                     'num_classes': 3,
 53 |                     'language': 'en',
 54 |                     'KeyEval': 'weight_F1'
 55 |                 }
 56 |             },
 57 |             'cherma':{
 58 |                 'unaligned': {
 59 |                     'dataPath': os.path.join(root_dataset_dir, 'CHERMA0723'),
 60 |                     # (batch_size, seq_lens, feature_dim)
 61 |                     'seq_lens': (78, 543, 16), # (text, audio, video)
 62 |                     'feature_dims': (2048, 1024, 2048), # (text, audio, video)
 63 |                     'train_samples': 16326,
 64 |                     'num_classes': 3,
 65 |                     'language': 'cn',
 66 |                     'KeyEval': 'weight_F1',
 67 |                 }
 68 |             },
 69 | 
 70 | 
 71 |         }
 72 |         return tmp
 73 | 
 74 |     def __CMCM(self):
 75 |         tmp = {
 76 |             'commonParas':{
 77 |                 'need_data_aligned': False,
 78 |                 'need_model_aligned': False,
 79 |                 'need_label_prefix':True,
 80 |                 'need_normalized': False,
 81 |                 'use_PLM': True,
 82 |                 'save_labels': False,
 83 |             },
 84 |             # dataset
 85 |             'datasetParas':{
 86 |                 'iemocap':{
 87 |                     # the batch_size of each epoch is update_epochs * batch_size
 88 |                     'task_specific_prompt': 'Please recognize the emotion of the above multimodal content from the label \
 89 |                                                 set <hap: 0, sad: 1, neu: 2, ang: 3, exc: 4, fru: 5>. Assistant: The emotion is',
 90 |                     'max_new_tokens': 1,
 91 |                     'pseudo_tokens': 4,
 92 |                     'label_index_mapping': {'hap': 0, 'sad': 1, 'neu': 2, 'ang': 3, 'exc': 4, 'fru': 5},
 93 |                     'batch_size': 4,
 94 |                     'learning_rate': 5e-4,
 95 |                     # feature subNets
 96 |                     'a_lstm_hidden_size': 32,
 97 |                     'v_lstm_hidden_size': 32,
 98 |                     'a_lstm_layers': 1,
 99 |                     'v_lstm_layers': 1,
100 |                     'a_lstm_dropout': 0.0,
101 |                     'v_lstm_dropout': 0.0,
102 |                     'warm_up_epochs': 30,   #it should be low
103 |                     'gamma': 1,
104 |                     'update_epochs': 1,
105 |                     'early_stop': 8,
106 |                     # res
107 |                     'H': 3.0
108 |                 },
109 |                 'meld':{
110 |                     # the batch_size of each epoch is update_epochs * batch_size
111 |                     'task_specific_prompt': 'Please recognize the emotion of the above multimodal content from the \
112 |                                             target set <neutral:0, surprise:1, fear:2, sadness:3, joy:4, disgust:5, anger:6>. Assistant: The emotion is',
113 |                     'max_new_tokens': 1,
114 |                     'pseudo_tokens': 2,
115 |                     'label_index_mapping': {'neutral': 0, 'surprise': 1, 'fear': 2, 'sadness': 3, 'joy': 4, 'disgust': 5,
116 |                                            'anger': 6},
117 |                     'batch_size': 16,
118 |                     'learning_rate': 5e-4,
119 |                     # feature subNets
120 |                     'a_lstm_hidden_size': 32,
121 |                     'v_lstm_hidden_size': 16,
122 |                     'a_lstm_layers': 1,
123 |                     'v_lstm_layers': 1,
124 |                     'a_lstm_dropout': 0.0,
125 |                     'v_lstm_dropout': 0.0,
126 |                     'warm_up_epochs':50,
127 |                     #loss weight   best：1
128 |                     'gamma':1,
129 |                     'update_epochs': 1,
130 |                     'early_stop': 8,
131 |                     # res
132 |                     'H': 3.0
133 |                 },
134 |                 'cherma':{
135 |                     # the batch_size of each epoch is update_epochs * batch_size
136 |                     'task_specific_prompt': '请选择适用于上述多模态内容的情绪标签：<愤怒:0, 厌恶:1, 恐惧:2, 高兴:3, 平静:4, 悲伤:5, 惊奇:6>。助手: 情绪为',
137 |                     'max_new_tokens': 1,
138 |                     'pseudo_tokens': 4,
139 |                     'label_index_mapping': {'愤怒': 0, '厌恶': 1, '恐惧': 2, '高兴': 3, '平静': 4, '悲伤': 5,
140 |                                             '惊奇': 6},
141 |                     'batch_size': 16,
142 |                     'learning_rate': 5e-3,
143 |                     # feature subNets
144 |                     'a_lstm_hidden_size': 32,
145 |                     'v_lstm_hidden_size': 16,
146 |                     'a_lstm_layers': 1,
147 |                     'v_lstm_layers': 1,
148 |                     'a_lstm_dropout': 0.0,
149 |                     'v_lstm_dropout': 0.0,
150 |                     'warm_up_epochs': 30,
151 |                     'update_epochs': 1,
152 |                     'early_stop': 8,
153 |                     # loss weight
154 |                     'gamma': 0,
155 |                     # res
156 |                     'H': 1.0
157 |                 },
158 |             },
159 |         }
160 |         return tmp
161 | 
162 |     def get_config(self):
163 |         return self.args


--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/run.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import gc
  3 | import time
  4 | import random
  5 | import torch
  6 | import pynvml
  7 | import logging
  8 | import argparse
  9 | import numpy as np
 10 | import pandas as pd
 11 | from tqdm import tqdm
 12 | 
 13 | from models.AMIO import AMIO
 14 | from trains.ATIO import ATIO
 15 | from data.load_data import MMDataLoader
 16 | from config.config_regression import ConfigRegression
 17 | from config.config_classification import ConfigClassification
 18 | 
 19 | os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
 20 | os.environ['CUDA_LAUNCH_BLOCKING'] = '1' # 下面老是报错 shape 不一致
 21 | 
 22 | def setup_seed(seed):
 23 |     torch.manual_seed(seed)
 24 |     torch.cuda.manual_seed_all(seed)
 25 |     np.random.seed(seed)
 26 |     random.seed(seed)
 27 |     torch.backends.cudnn.deterministic = True
 28 | 
 29 | def run(args):
 30 |     if not os.path.exists(args.model_save_dir):
 31 |         os.makedirs(args.model_save_dir)
 32 |     args.model_save_path = os.path.join(args.model_save_dir,\
 33 |                                         f'{args.modelName}-{args.datasetName}-{args.train_mode}.pth')
 34 |     
 35 |     if len(args.gpu_ids) == 0 and torch.cuda.is_available():
 36 |         # load free-most gpu
 37 |         pynvml.nvmlInit()
 38 |         dst_gpu_id, min_mem_used = 0, 1e16
 39 |         for g_id in [0, 1, 2, 3]:
 40 |             handle = pynvml.nvmlDeviceGetHandleByIndex(g_id)
 41 |             meminfo = pynvml.nvmlDeviceGetMemoryInfo(handle)
 42 |             mem_used = meminfo.used
 43 |             if mem_used < min_mem_used:
 44 |                 min_mem_used = mem_used
 45 |                 dst_gpu_id = g_id
 46 |         print(f'Find gpu: {dst_gpu_id}, use memory: {min_mem_used}!')
 47 |         logger.info(f'Find gpu: {dst_gpu_id}, with memory: {min_mem_used} left!')
 48 |         args.gpu_ids.append(dst_gpu_id)
 49 |     # device
 50 |     using_cuda = len(args.gpu_ids) > 0 and torch.cuda.is_available()
 51 |     logger.info("Let's use the GPU %d !" % len(args.gpu_ids))
 52 |     device = torch.device('cuda:%d' % int(args.gpu_ids[0]) if using_cuda else 'cpu')
 53 |     # device = "cuda:1" if torch.cuda.is_available() else "cpu"
 54 |     args.device = device
 55 |     # data
 56 |     dataloader = MMDataLoader(args)
 57 |     model = AMIO(args).to(device)
 58 | 
 59 |     def print_trainable_parameters(model):
 60 |         """
 61 |         Prints the number of trainable parameters in the model.
 62 |         """
 63 |         trainable_params = 0
 64 |         all_param = 0
 65 |         for _, param in model.named_parameters():
 66 |             all_param += param.numel()
 67 |             if param.requires_grad:
 68 |                 trainable_params += param.numel()
 69 | 
 70 |         logger.info(f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}")
 71 | 
 72 |     print_trainable_parameters(model)
 73 | 
 74 |     # using multiple gpus
 75 |     # if using_cuda and len(args.gpu_ids) > 1:
 76 |     #     model = torch.nn.DataParallel(model,
 77 |     #                                   device_ids=args.gpu_ids,
 78 |     #                                   output_device=args.gpu_ids[0])
 79 |     atio = ATIO().getTrain(args)
 80 |     # do train
 81 |     atio.do_train(model, dataloader)
 82 |     # load pretrained model
 83 |     assert os.path.exists(args.model_save_path)
 84 |     # load finetune parameters
 85 |     checkpoint = torch.load(args.model_save_path)
 86 |     model.load_state_dict(checkpoint, strict=False)
 87 |     model.to(device)
 88 | 
 89 |     # do test
 90 |     if args.tune_mode:
 91 |         # using valid dataset to debug hyper parameters
 92 |         results = atio.do_test(model, dataloader['valid'], mode="VALID")
 93 |     else:
 94 |         results = atio.do_test(model, dataloader['test'], mode="TEST")
 95 | 
 96 |     del model
 97 |     torch.cuda.empty_cache()
 98 |     gc.collect()
 99 | 
100 |     return results
101 | 
102 | 
103 | 
104 | def run_normal(args):
105 |     args.res_save_dir = os.path.join(args.res_save_dir)
106 |     init_args = args
107 |     model_results = []
108 |     seeds = args.seeds
109 |     # warm_epochs =[30,40,50,60,70,80,90,100]
110 |     # for warm_up_epoch in warm_epochs:
111 |     # run results
112 |     for i, seed in enumerate(seeds):
113 |         args = init_args
114 |         # load config
115 |         if args.train_mode == "regression":
116 |             config = ConfigRegression(args)
117 |         else :
118 |             config = ConfigClassification(args)
119 |         args = config.get_config()
120 | 
121 |         setup_seed(seed)
122 |         args.seed = seed
123 |         # args.warm_up_epochs = warm_up_epoch
124 |         logger.info('Start running %s...' % (args.modelName))
125 |         logger.info(args)
126 |         # runnning
127 |         args.cur_time = i + 1
128 |         test_results = run(args)  # 训练
129 |         # restore results
130 |         model_results.append(test_results)
131 | 
132 |         criterions = list(model_results[0].keys())
133 |         # load other results
134 |         save_path = os.path.join(args.res_save_dir, f'{args.datasetName}-{args.train_mode}-{args.warm_up_epochs}.csv')
135 |         if not os.path.exists(args.res_save_dir):
136 |             os.makedirs(args.res_save_dir)
137 |         if os.path.exists(save_path):
138 |             df = pd.read_csv(save_path)
139 |         else:
140 |             # df = pd.DataFrame(columns=["Model"] + criterions)
141 |             df = pd.DataFrame(columns=["Model", "Seed"] + criterions)
142 |         # save results
143 |         # res = [args.modelName]
144 | 
145 |         for k, test_results in enumerate(model_results):
146 |             res = [args.modelName, f'{seed}']
147 |             for c in criterions:
148 |                 res.append(round(test_results[c] * 100, 2))
149 |             df.loc[len(df)] = res
150 | 
151 |         # df.loc[len(df)] = res
152 |         df.to_csv(save_path, index=None)
153 |         logger.info('Results are added to %s...' % (save_path))
154 |         df = df.iloc[0:0]  # 保存后清0
155 |         model_results = []
156 | 
157 | 
158 | def set_log(args):
159 |     if not os.path.exists('logs'):
160 |         os.makedirs('logs')
161 |     log_file_path = f'logs/{args.modelName}-{args.datasetName}.log'
162 |     # set logging
163 |     logger = logging.getLogger() 
164 |     logger.setLevel(logging.DEBUG)
165 | 
166 |     for ph in logger.handlers:
167 |         logger.removeHandler(ph)
168 |     # add FileHandler to log file
169 |     formatter_file = logging.Formatter('%(asctime)s:%(levelname)s:%(message)s', datefmt='%Y-%m-%d %H:%M:%S')
170 |     fh = logging.FileHandler(log_file_path)
171 |     fh.setLevel(logging.DEBUG)
172 |     fh.setFormatter(formatter_file)
173 |     logger.addHandler(fh)
174 |     # add StreamHandler to terminal outputs
175 |     formatter_stream = logging.Formatter('%(message)s')
176 |     ch = logging.StreamHandler()
177 |     ch.setLevel(logging.DEBUG)
178 |     ch.setFormatter(formatter_stream)
179 |     logger.addHandler(ch)
180 |     return logger
181 | 
182 | def parse_args():
183 |     parser = argparse.ArgumentParser()
184 |     parser.add_argument('--is_tune', type=bool, default=False,
185 |                         help='tune parameters ?')
186 |     parser.add_argument('--train_mode', type=str, default="regression",
187 |                         help='regression / classification')
188 |     parser.add_argument('--modelName', type=str, default='cmcm',
189 |                         help='support CMCM')
190 |     parser.add_argument('--datasetName', type=str, default='mosi',
191 |                         help='support mosei/simsv2/meld/cherma')
192 |     parser.add_argument('--root_dataset_dir', type=str, default='/home/young/DL/multimodal_dataset/',
193 |                         help='Location of the root directory where the dataset is stored')
194 |     parser.add_argument('--num_workers', type=int, default=0,
195 |                         help='num workers of loading data')
196 |     parser.add_argument('--model_save_dir', type=str, default='results/models',
197 |                         help='path to save results.')
198 |     parser.add_argument('--res_save_dir', type=str, default='results/results',
199 |                         help='path to save results.')
200 |     parser.add_argument('--pretrain_LM', type=str, default='/data/huggingface_model/THUDM/chatglm3-6b-base/',
201 |                         help='path to load pretrain LLM.')
202 |     parser.add_argument('--gpu_ids', type=list, default=[],
203 |                         help='indicates the gpus will be used. If none, the most-free gpu will be used!')   #使用GPU1
204 |     return parser.parse_args()
205 | 
206 | if __name__ == '__main__':
207 |     args = parse_args()
208 |     logger = set_log(args)
209 |     for data_name in ['mosei', 'simsv2', 'meld', 'cherma']:
210 |         if data_name in ['mosei', 'simsv2']:
211 |             args.train_mode = 'regression'
212 |         else:
213 |             args.train_mode = 'classification'
214 | 
215 |         args.datasetName = data_name
216 |         args.seeds = [1111, 2222, 3333, 4444, 5555]
217 |         # args.seeds = [1111]
218 |         run_normal(args)


--------------------------------------------------------------------------------
/MSE-Llama2-7B/run.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import gc
  3 | import time
  4 | import random
  5 | import torch
  6 | import pynvml
  7 | import logging
  8 | import argparse
  9 | import numpy as np
 10 | import pandas as pd
 11 | from tqdm import tqdm
 12 | 
 13 | from models.AMIO import AMIO
 14 | from trains.ATIO import ATIO
 15 | from data.load_data import MMDataLoader
 16 | from config.config_regression import ConfigRegression
 17 | from config.config_classification import ConfigClassification
 18 | 
 19 | os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
 20 | os.environ['CUDA_LAUNCH_BLOCKING'] = '1' # 下面老是报错 shape 不一致
 21 | 
 22 | def setup_seed(seed):
 23 |     torch.manual_seed(seed)
 24 |     torch.cuda.manual_seed_all(seed)
 25 |     np.random.seed(seed)
 26 |     random.seed(seed)
 27 |     torch.backends.cudnn.deterministic = True
 28 | 
 29 | def run(args):
 30 |     named = 'data_percent'
 31 |     if not os.path.exists(args.model_save_dir):
 32 |         os.makedirs(args.model_save_dir)
 33 |     args.model_save_path = os.path.join(args.model_save_dir,\
 34 |                                         f'{args.modelName}-{args.datasetName}-{args.train_mode}.pth')
 35 |     
 36 |     if len(args.gpu_ids) == 0 and torch.cuda.is_available():
 37 |         # load free-most gpu
 38 |         pynvml.nvmlInit()
 39 |         dst_gpu_id, min_mem_used = 0, 1e16
 40 |         for g_id in [0, 1, 2, 3]:
 41 |             handle = pynvml.nvmlDeviceGetHandleByIndex(g_id)
 42 |             meminfo = pynvml.nvmlDeviceGetMemoryInfo(handle)
 43 |             mem_used = meminfo.used
 44 |             if mem_used < min_mem_used:
 45 |                 min_mem_used = mem_used
 46 |                 dst_gpu_id = g_id
 47 |         print(f'Find gpu: {dst_gpu_id}, use memory: {min_mem_used}!')
 48 |         logger.info(f'Find gpu: {dst_gpu_id}, with memory: {min_mem_used} left!')
 49 |         args.gpu_ids.append(dst_gpu_id)
 50 |     # device
 51 |     using_cuda = len(args.gpu_ids) > 0 and torch.cuda.is_available()
 52 |     logger.info("Let's use the GPU %d !" % len(args.gpu_ids))
 53 |     device = torch.device('cuda:%d' % int(args.gpu_ids[0]) if using_cuda else 'cpu')
 54 |     # device = "cuda:1" if torch.cuda.is_available() else "cpu"
 55 |     args.device = device
 56 |     # data
 57 |     dataloader = MMDataLoader(args)
 58 |     model = AMIO(args).to(device)
 59 | 
 60 |     def print_trainable_parameters(model):
 61 |         """
 62 |         Prints the number of trainable parameters in the model.
 63 |         """
 64 |         trainable_params = 0
 65 |         all_param = 0
 66 |         for _, param in model.named_parameters():
 67 |             all_param += param.numel()
 68 |             if param.requires_grad:
 69 |                 trainable_params += param.numel()
 70 | 
 71 |         logger.info(f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}")
 72 | 
 73 |     print_trainable_parameters(model)
 74 | 
 75 |     # using multiple gpus
 76 |     # if using_cuda and len(args.gpu_ids) > 1:
 77 |     #     model = torch.nn.DataParallel(model,
 78 |     #                                   device_ids=args.gpu_ids,
 79 |     #                                   output_device=args.gpu_ids[0])
 80 |     atio = ATIO().getTrain(args)
 81 |     # do train
 82 |     atio.do_train(model, dataloader)
 83 |     # load pretrained model
 84 |     assert os.path.exists(args.model_save_path)
 85 |     # load finetune parameters
 86 |     checkpoint = torch.load(args.model_save_path)
 87 |     model.load_state_dict(checkpoint, strict=False)
 88 |     model.to(device)
 89 | 
 90 |     # do test
 91 |     if args.tune_mode:
 92 |         # using valid dataset to debug hyper parameters
 93 |         results = atio.do_test(model, dataloader['valid'], mode="VALID")
 94 |     else:
 95 |         results = atio.do_test(model, dataloader['test'], mode="TEST")
 96 | 
 97 |     del model
 98 |     torch.cuda.empty_cache()
 99 |     gc.collect()
100 | 
101 |     return results
102 | 
103 | 
104 | 
105 | def run_normal(args):
106 |     args.res_save_dir = os.path.join(args.res_save_dir)
107 |     init_args = args
108 |     model_results = []
109 |     seeds = args.seeds
110 | 
111 |     for i, seed in enumerate(seeds):
112 |         args = init_args
113 |         # load config
114 |         if args.train_mode == "regression":
115 |             config = ConfigRegression(args)
116 |         else:
117 |             config = ConfigClassification(args)
118 |         args = config.get_config()
119 | 
120 |         setup_seed(seed)
121 |         args.seed = seed
122 |         # args.warm_up_epochs = warmup
123 |         logger.info('Start running %s...' % (args.modelName))
124 |         logger.info(args)
125 |         # runnning
126 |         args.cur_time = i + 1
127 |         test_results = run(args)  # 训练
128 |         # restore results
129 |         model_results.append(test_results)
130 | 
131 |         criterions = list(model_results[0].keys())
132 |         # load other results
133 |         save_path = os.path.join(args.res_save_dir,
134 |                                     f'{args.datasetName}-{args.train_mode}-{args.warm_up_epochs}.csv')
135 |         if not os.path.exists(args.res_save_dir):
136 |             os.makedirs(args.res_save_dir)
137 |         if os.path.exists(save_path):
138 |             df = pd.read_csv(save_path)
139 |         else:
140 | 
141 |             df = pd.DataFrame(columns=["Model", "Seed"] + criterions)
142 |         # save results
143 |         # res = [args.modelName]
144 | 
145 |         for k, test_results in enumerate(model_results):
146 |             res = [args.modelName, f'{seed}']
147 |             for c in criterions:
148 |                 res.append(round(test_results[c] * 100, 2))
149 |             df.loc[len(df)] = res
150 | 
151 |         # df.loc[len(df)] = res
152 |         df.to_csv(save_path, index=None)
153 |         logger.info('Results are added to %s...' % (save_path))
154 |         df = df.iloc[0:0]  # 保存后清0
155 |         model_results = []
156 | 
157 | 
158 | def set_log(args):
159 |     if not os.path.exists('logs'):
160 |         os.makedirs('logs')
161 |     log_file_path = f'logs/{args.modelName}-{args.datasetName}.log'
162 |     # set logging
163 |     logger = logging.getLogger() 
164 |     logger.setLevel(logging.DEBUG)
165 | 
166 |     for ph in logger.handlers:
167 |         logger.removeHandler(ph)
168 |     # add FileHandler to log file
169 |     formatter_file = logging.Formatter('%(asctime)s:%(levelname)s:%(message)s', datefmt='%Y-%m-%d %H:%M:%S')
170 |     fh = logging.FileHandler(log_file_path)
171 |     fh.setLevel(logging.DEBUG)
172 |     fh.setFormatter(formatter_file)
173 |     logger.addHandler(fh)
174 |     # add StreamHandler to terminal outputs
175 |     formatter_stream = logging.Formatter('%(message)s')
176 |     ch = logging.StreamHandler()
177 |     ch.setLevel(logging.DEBUG)
178 |     ch.setFormatter(formatter_stream)
179 |     logger.addHandler(ch)
180 |     return logger
181 | 
182 | def parse_args():
183 |     parser = argparse.ArgumentParser()
184 |     parser.add_argument('--is_tune', type=bool, default=False,
185 |                         help='tune parameters ?')
186 |     parser.add_argument('--train_mode', type=str, default="regression",
187 |                         help='regression / classification')
188 |     parser.add_argument('--modelName', type=str, default='cmcm',
189 |                         help='support CMCM')
190 |     parser.add_argument('--datasetName', type=str, default='sims',
191 |                         help='support mosi/mosei/simsv2/iemocap/meld/cherma')
192 |     parser.add_argument('--root_dataset_dir', type=str, default='/home/young/DL/multimodal_dataset/',
193 |                         help='Location of the root directory where the dataset is stored')
194 |     parser.add_argument('--num_workers', type=int, default=0,
195 |                         help='num workers of loading data')
196 |     parser.add_argument('--model_save_dir', type=str, default='results/models',
197 |                         help='path to save results.')
198 |     parser.add_argument('--res_save_dir', type=str, default='results/results',
199 |                         help='path to save results.')
200 |     parser.add_argument('--pretrain_LM', type=str, default='/data/huggingface_model/Meta/Llama-2-7b-hf/',
201 |                         help='path to load pretrain LLM.')
202 |     parser.add_argument('--gpu_ids', type=list, default=[2],
203 |                         help='indicates the gpus will be used. If none, the most-free gpu will be used!')   #使用GPU1
204 |     return parser.parse_args()
205 | 
206 | if __name__ == '__main__':
207 |     args = parse_args()
208 |     logger = set_log(args)
209 |     # for data_name in ['mosi', 'mosei', 'simsv2', 'iemocap', 'meld', 'cherma' ]:
210 |     # for data_name in ['simsv2','cherma']:
211 |     # for data_name in ['mosi']:
212 |     for data_name in ['simsv2', 'mosei',  'meld', 'cherma']:
213 |         if data_name in ['mosi', 'mosei', 'sims', 'simsv2']:
214 |             args.train_mode = 'regression'
215 |         else:
216 |             args.train_mode = 'classification'
217 | 
218 |         args.datasetName = data_name
219 |         args.seeds = [1111, 2222, 3333, 4444, 5555]
220 |         run_normal(args)


--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/run.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import gc
  3 | import time
  4 | import random
  5 | import torch
  6 | import pynvml
  7 | import logging
  8 | import argparse
  9 | import numpy as np
 10 | import pandas as pd
 11 | from tqdm import tqdm
 12 | 
 13 | from models.AMIO import AMIO
 14 | from trains.ATIO import ATIO
 15 | from data.load_data import MMDataLoader
 16 | from config.config_regression import ConfigRegression
 17 | from config.config_classification import ConfigClassification
 18 | 
 19 | os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
 20 | os.environ['CUDA_LAUNCH_BLOCKING'] = '1' # 下面老是报错 shape 不一致
 21 | 
 22 | def setup_seed(seed):
 23 |     torch.manual_seed(seed)
 24 |     torch.cuda.manual_seed_all(seed)
 25 |     np.random.seed(seed)
 26 |     random.seed(seed)
 27 |     torch.backends.cudnn.deterministic = True
 28 | 
 29 | def run(args):
 30 |     if not os.path.exists(args.model_save_dir):
 31 |         os.makedirs(args.model_save_dir)
 32 |     args.model_save_path = os.path.join(args.model_save_dir,\
 33 |                                         f'{args.modelName}-{args.datasetName}-{args.train_mode}.pth')
 34 |     
 35 |     if len(args.gpu_ids) == 0 and torch.cuda.is_available():
 36 |         # load free-most gpu
 37 |         pynvml.nvmlInit()
 38 |         dst_gpu_id, min_mem_used = 0, 1e16
 39 |         for g_id in [0, 1, 2, 3]:
 40 |             handle = pynvml.nvmlDeviceGetHandleByIndex(g_id)
 41 |             meminfo = pynvml.nvmlDeviceGetMemoryInfo(handle)
 42 |             mem_used = meminfo.used
 43 |             if mem_used < min_mem_used:
 44 |                 min_mem_used = mem_used
 45 |                 dst_gpu_id = g_id
 46 |         print(f'Find gpu: {dst_gpu_id}, use memory: {min_mem_used}!')
 47 |         logger.info(f'Find gpu: {dst_gpu_id}, with memory: {min_mem_used} left!')
 48 |         args.gpu_ids.append(dst_gpu_id)
 49 |     # device
 50 |     using_cuda = len(args.gpu_ids) > 0 and torch.cuda.is_available()
 51 |     logger.info("Let's use the GPU %d !" % int(args.gpu_ids[0]))
 52 |     device = torch.device('cuda:%d' % int(args.gpu_ids[0]) if using_cuda else 'cpu')
 53 |     # device = "cuda:1" if torch.cuda.is_available() else "cpu"
 54 |     args.device = device
 55 |     # data
 56 |     dataloader = MMDataLoader(args)
 57 |     model = AMIO(args).to(device)
 58 | 
 59 |     def print_trainable_parameters(model):
 60 |         """
 61 |         Prints the number of trainable parameters in the model.
 62 |         """
 63 |         trainable_params = 0
 64 |         all_param = 0
 65 |         for _, param in model.named_parameters():
 66 |             all_param += param.numel()
 67 |             if param.requires_grad:
 68 |                 trainable_params += param.numel()
 69 | 
 70 |         logger.info(f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}")
 71 | 
 72 |     print_trainable_parameters(model)
 73 | 
 74 |     # using multiple gpus
 75 |     # if using_cuda and len(args.gpu_ids) > 1:
 76 |     #     model = torch.nn.DataParallel(model,
 77 |     #                                   device_ids=args.gpu_ids,
 78 |     #                                   output_device=args.gpu_ids[0])
 79 |     atio = ATIO().getTrain(args)
 80 |     # do train
 81 |     atio.do_train(model, dataloader)
 82 |     # load pretrained model
 83 |     assert os.path.exists(args.model_save_path)
 84 |     # load finetune parameters
 85 |     checkpoint = torch.load(args.model_save_path)
 86 |     model.load_state_dict(checkpoint, strict=False)
 87 |     model.to(device)
 88 | 
 89 |     # do test
 90 |     if args.tune_mode:
 91 |         # using valid dataset to debug hyper parameters
 92 |         results = atio.do_test(model, dataloader['valid'], mode="VALID")
 93 |     else:
 94 |         results = atio.do_test(model, dataloader['test'], mode="TEST")
 95 | 
 96 |     del model
 97 |     torch.cuda.empty_cache()
 98 |     gc.collect()
 99 | 
100 |     return results
101 | 
102 | 
103 | 
104 | def run_normal(args):
105 |     args.res_save_dir = os.path.join(args.res_save_dir)
106 |     init_args = args
107 |     model_results = []
108 |     seeds = args.seeds
109 |     # warmup_list = [30]
110 |     # # run results
111 |     # for warmup in warmup_list:
112 |     for i, seed in enumerate(seeds):
113 |         args = init_args
114 |         # load config
115 |         if args.train_mode == "regression":
116 |             config = ConfigRegression(args)
117 |         else :
118 |             config = ConfigClassification(args)
119 |         args = config.get_config()
120 | 
121 |         setup_seed(seed)
122 |         args.seed = seed
123 |         # args.warm_up_epochs =  warmup
124 |         logger.info('Start running %s...' % (args.modelName))
125 |         logger.info(args)
126 |         # runnning
127 |         args.cur_time = i + 1
128 |         start_time = time.time()
129 |         test_results = run(args)  # 训练
130 | 
131 |         end_time = time.time()
132 |         # 计算运行时间
133 |         elapsed_time = end_time - start_time
134 |         print(f"程序运行时间: {elapsed_time:.6f} 秒")
135 | 
136 |         # restore results
137 |         model_results.append(test_results)
138 | 
139 |         criterions = list(model_results[0].keys())
140 |         # load other results
141 |         save_path = os.path.join(args.res_save_dir, f'{args.datasetName}-{args.train_mode}-{args.warm_up_epochs}.csv')
142 |         if not os.path.exists(args.res_save_dir):
143 |             os.makedirs(args.res_save_dir)
144 |         if os.path.exists(save_path):
145 |             df = pd.read_csv(save_path)
146 |         else:
147 | 
148 |             df = pd.DataFrame(columns=["Model", "Seed"] + criterions)
149 |         # save results
150 |         # res = [args.modelName]
151 | 
152 |         for k, test_results in enumerate(model_results):
153 |             res = [args.modelName, f'{seed}']
154 |             for c in criterions:
155 |                 res.append(round(test_results[c] * 100, 2))
156 |             df.loc[len(df)] = res
157 | 
158 |         # df.loc[len(df)] = res
159 |         df.to_csv(save_path, index=None)
160 |         logger.info('Results are added to %s...' % (save_path))
161 |         df = df.iloc[0:0]  # 保存后清0
162 |         model_results = []
163 | 
164 | 
165 | def set_log(args):
166 |     if not os.path.exists('logs'):
167 |         os.makedirs('logs')
168 |     log_file_path = f'logs/{args.modelName}-{args.datasetName}.log'
169 |     # set logging
170 |     logger = logging.getLogger() 
171 |     logger.setLevel(logging.DEBUG)
172 | 
173 |     for ph in logger.handlers:
174 |         logger.removeHandler(ph)
175 |     # add FileHandler to log file
176 |     formatter_file = logging.Formatter('%(asctime)s:%(levelname)s:%(message)s', datefmt='%Y-%m-%d %H:%M:%S')
177 |     fh = logging.FileHandler(log_file_path)
178 |     fh.setLevel(logging.DEBUG)
179 |     fh.setFormatter(formatter_file)
180 |     logger.addHandler(fh)
181 |     # add StreamHandler to terminal outputs
182 |     formatter_stream = logging.Formatter('%(message)s')
183 |     ch = logging.StreamHandler()
184 |     ch.setLevel(logging.DEBUG)
185 |     ch.setFormatter(formatter_stream)
186 |     logger.addHandler(ch)
187 |     return logger
188 | 
189 | def parse_args():
190 |     parser = argparse.ArgumentParser()
191 |     parser.add_argument('--is_tune', type=bool, default=False,
192 |                         help='tune parameters ?')
193 |     parser.add_argument('--train_mode', type=str, default="regression",
194 |                         help='regression / classification')
195 |     parser.add_argument('--modelName', type=str, default='cmcm',
196 |                         help='support CMCM')
197 |     parser.add_argument('--datasetName', type=str, default='sims',
198 |                         help='support mosi/mosei/simsv2/iemocap/meld/cherma')
199 |     parser.add_argument('--root_dataset_dir', type=str, default='/home/young/DL/multimodal_dataset/',
200 |                         help='Location of the root directory where the dataset is stored')
201 |     parser.add_argument('--num_workers', type=int, default=0,
202 |                         help='num workers of loading data')
203 |     parser.add_argument('--model_save_dir', type=str, default='results/models',
204 |                         help='path to save results.')
205 |     parser.add_argument('--res_save_dir', type=str, default='results/results',
206 |                         help='path to save results.')
207 |     parser.add_argument('--pretrain_LM', type=str, default='/data/huggingface_model/Qwen/Qwen-1_8B/',
208 |                         help='path to load pretrain LLM.')
209 |     parser.add_argument('--gpu_ids', type=list, default=[],
210 |                         help='indicates the gpus will be used. If none, the most-free gpu will be used!')   #使用GPU1
211 |     return parser.parse_args()
212 | 
213 | if __name__ == '__main__':
214 |     args = parse_args()
215 |     logger = set_log(args)
216 |     for data_name in [ 'simsv2', 'mosei', 'meld', 'cherma']:
217 |         if data_name in ['mosi', 'mosei', 'sims', 'simsv2']:
218 |             args.train_mode = 'regression'
219 |         else:
220 |             args.train_mode = 'classification'
221 | 
222 |         args.datasetName = data_name
223 |         args.seeds = [1111, 2222, 3333, 4444, 5555]
224 |         run_normal(args)


--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/utils/metricsTop.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | from sklearn.metrics import classification_report
  4 | from sklearn.metrics import confusion_matrix
  5 | from sklearn.metrics import precision_recall_fscore_support
  6 | from sklearn.metrics import accuracy_score, f1_score
  7 | from sklearn.metrics import r2_score
  8 | from itertools import chain
  9 | __all__ = ['MetricsTop']
 10 | 
 11 | class MetricsTop():
 12 |     def __init__(self, args):
 13 |         if args.train_mode == "regression":
 14 |             self.metrics_dict = {
 15 |                 'MOSI': self.__eval_mosi_regression,
 16 |                 'MOSEI': self.__eval_mosei_regression,
 17 |                 'SIMS': self.__eval_sims_regression,
 18 |                 'SIMSV2': self.__eval_simsv2_regression
 19 |             }
 20 |         else:
 21 |             self.metrics_dict = {
 22 |                 'IEMOCAP': self.__eval_iemocap_classification,
 23 |                 'MELD': self.__eval_meld_classification,
 24 |                 'CHERMA': self.__eval_cherma_classification
 25 |             }
 26 |             self.label_index_mapping = args.label_index_mapping
 27 | 
 28 |     def __eval_iemocap_classification(self, results, truths):
 29 |         # label_index_mapping = self.label_index_mapping
 30 |         # # 主要通过混淆矩阵来计算
 31 |         # results_indices = [label_index_mapping.get(label, label_index_mapping.get('neu')) for label in results]
 32 |         # truths_indices = [label_index_mapping.get(label, -1) for label in truths]
 33 |         # acc = accuracy_score(truths_indices, results_indices)
 34 |         # weight_F1 = f1_score(truths_indices, results_indices, average='weighted')
 35 |         acc = accuracy_score(truths, results)
 36 |         weight_F1 = f1_score(truths, results, average='weighted')
 37 | 
 38 |         eval_result = {
 39 |             'acc': acc,
 40 |             'weight_F1': weight_F1
 41 |         }
 42 |         return eval_result
 43 | 
 44 |     def __eval_cherma_classification(self, results, truths):
 45 |         acc = accuracy_score(truths, results)
 46 |         weight_F1 = f1_score(truths, results, average='weighted')
 47 |         eval_result = {
 48 |             'acc': acc,
 49 |             'weight_F1': weight_F1
 50 |         }
 51 |         return eval_result
 52 | 
 53 |     def __eval_meld_classification(self, results, truths):
 54 |         acc = accuracy_score(truths, results)
 55 |         weight_F1 = f1_score(truths, results, average='weighted')
 56 | 
 57 | 
 58 |         eval_result = {
 59 |             'acc': acc,
 60 |             'weight_F1': weight_F1
 61 |         }
 62 |         return eval_result
 63 | 
 64 | 
 65 | 
 66 | 
 67 |     def __multiclass_acc(self, y_pred, y_true):
 68 |         """
 69 |         Compute the multiclass accuracy w.r.t. groundtruth
 70 | 
 71 |         :param preds: Float array representing the predictions, dimension (N,)
 72 |         :param truths: Float/int array representing the groundtruth classes, dimension (N,)
 73 |         :return: Classification accuracy
 74 |         """
 75 |         return np.sum(np.round(y_pred) == np.round(y_true)) / float(len(y_true))
 76 | 
 77 | 
 78 |     def __eval_mosei_regression(self, y_pred, y_true, exclude_zero=False):
 79 |         test_preds = y_pred.view(-1).cpu().detach().numpy()
 80 |         test_truth = y_true.view(-1).cpu().detach().numpy()
 81 | 
 82 |         test_preds_a7 = np.clip(test_preds, a_min=-3., a_max=3.)
 83 |         test_truth_a7 = np.clip(test_truth, a_min=-3., a_max=3.)
 84 |         test_preds_a5 = np.clip(test_preds, a_min=-2., a_max=2.)
 85 |         test_truth_a5 = np.clip(test_truth, a_min=-2., a_max=2.)
 86 |         test_preds_a3 = np.clip(test_preds, a_min=-1., a_max=1.)
 87 |         test_truth_a3 = np.clip(test_truth, a_min=-1., a_max=1.)
 88 | 
 89 | 
 90 |         mae = np.mean(np.absolute(test_preds - test_truth))   # Average L1 distance between preds and truths
 91 |         corr = np.corrcoef(test_preds, test_truth)[0][1]
 92 |         mult_a7 = self.__multiclass_acc(test_preds_a7, test_truth_a7)
 93 |         mult_a5 = self.__multiclass_acc(test_preds_a5, test_truth_a5)
 94 |         mult_a3 = self.__multiclass_acc(test_preds_a3, test_truth_a3)
 95 |         
 96 |         non_zeros = np.array([i for i, e in enumerate(test_truth) if e != 0])
 97 |         non_zeros_binary_truth = (test_truth[non_zeros] > 0)
 98 |         non_zeros_binary_preds = (test_preds[non_zeros] > 0)
 99 | 
100 |         non_zeros_acc2 = accuracy_score(non_zeros_binary_preds, non_zeros_binary_truth)
101 |         non_zeros_f1_score = f1_score(non_zeros_binary_truth, non_zeros_binary_preds, average='weighted')
102 | 
103 |         binary_truth = (test_truth >= 0)
104 |         binary_preds = (test_preds >= 0)
105 |         acc2 = accuracy_score(binary_preds, binary_truth)
106 |         f_score = f1_score(binary_truth, binary_preds, average='weighted')
107 |         
108 |         eval_results = {
109 |             "Has0_acc_2":  round(acc2, 4),
110 |             "Has0_F1_score": round(f_score, 4),
111 |             "Non0_acc_2":  round(non_zeros_acc2, 4),
112 |             "Non0_F1_score": round(non_zeros_f1_score, 4),
113 |             "Mult_acc_5": round(mult_a5, 4),
114 |             "Mult_acc_7": round(mult_a7, 4),
115 |             "MAE": round(mae, 4),
116 |             "Corr": round(corr, 4)
117 |         }
118 |         return eval_results
119 | 
120 | 
121 |     def __eval_mosi_regression(self, y_pred, y_true):
122 |         return self.__eval_mosei_regression(y_pred, y_true)
123 | 
124 |     def __eval_sims_regression(self, y_pred, y_true):
125 |         test_preds = y_pred.view(-1).cpu().detach().numpy()
126 |         test_truth = y_true.view(-1).cpu().detach().numpy()
127 |         test_preds = np.clip(test_preds, a_min=-1., a_max=1.)
128 |         test_truth = np.clip(test_truth, a_min=-1., a_max=1.)
129 | 
130 |         # weak sentiment two classes{[-0.6, 0.0], (0.0, 0.6]}
131 |         ms_2 = [-1.01, 0.0, 1.01]
132 |         weak_index_l = np.where(test_truth >= -0.4)[0]
133 |         weak_index_r = np.where(test_truth <= 0.4)[0]
134 |         weak_index = [x for x in weak_index_l if x in weak_index_r]
135 |         test_preds_weak = test_preds[weak_index]
136 |         test_truth_weak = test_truth[weak_index]
137 |         test_preds_a2_weak = test_preds_weak.copy()
138 |         test_truth_a2_weak = test_truth_weak.copy()
139 |         for i in range(2):
140 |             test_preds_a2_weak[np.logical_and(test_preds_weak > ms_2[i], test_preds_weak <= ms_2[i + 1])] = i
141 |         for i in range(2):
142 |             test_truth_a2_weak[np.logical_and(test_truth_weak > ms_2[i], test_truth_weak <= ms_2[i + 1])] = i
143 | 
144 |         # two classes{[-1.0, 0.0], (0.0, 1.0]}
145 |         ms_2 = [-1.01, 0.0, 1.01]
146 |         test_preds_a2 = test_preds.copy()
147 |         test_truth_a2 = test_truth.copy()
148 |         for i in range(2):
149 |             test_preds_a2[np.logical_and(test_preds > ms_2[i], test_preds <= ms_2[i+1])] = i
150 |         for i in range(2):
151 |             test_truth_a2[np.logical_and(test_truth > ms_2[i], test_truth <= ms_2[i+1])] = i
152 | 
153 |         # three classes{[-1.0, -0.1], (-0.1, 0.1], (0.1, 1.0]}
154 |         ms_3 = [-1.01, -0.1, 0.1, 1.01]
155 |         test_preds_a3 = test_preds.copy()
156 |         test_truth_a3 = test_truth.copy()
157 |         for i in range(3):
158 |             test_preds_a3[np.logical_and(test_preds > ms_3[i], test_preds <= ms_3[i+1])] = i
159 |         for i in range(3):
160 |             test_truth_a3[np.logical_and(test_truth > ms_3[i], test_truth <= ms_3[i+1])] = i
161 |         
162 |         # five classes{[-1.0, -0.7], (-0.7, -0.1], (-0.1, 0.1], (0.1, 0.7], (0.7, 1.0]}
163 |         ms_5 = [-1.01, -0.7, -0.1, 0.1, 0.7, 1.01]
164 |         test_preds_a5 = test_preds.copy()
165 |         test_truth_a5 = test_truth.copy()
166 |         for i in range(5):
167 |             test_preds_a5[np.logical_and(test_preds > ms_5[i], test_preds <= ms_5[i+1])] = i
168 |         for i in range(5):
169 |             test_truth_a5[np.logical_and(test_truth > ms_5[i], test_truth <= ms_5[i+1])] = i
170 |  
171 |         mae = np.mean(np.absolute(test_preds - test_truth))   # Average L1 distance between preds and truths
172 |         corr = np.corrcoef(test_preds, test_truth)[0][1]
173 |         mult_a2 = self.__multiclass_acc(test_preds_a2, test_truth_a2)
174 |         mult_a2_weak = self.__multiclass_acc(test_preds_a2_weak, test_truth_a2_weak)
175 |         mult_a3 = self.__multiclass_acc(test_preds_a3, test_truth_a3)
176 |         mult_a5 = self.__multiclass_acc(test_preds_a5, test_truth_a5)
177 |         f_score = f1_score(test_truth_a2, test_preds_a2, average='weighted')
178 |         r2 = r2_score(test_truth, test_preds)
179 |         eval_results = {
180 |             "Mult_acc_2": mult_a2,
181 |             "Mult_acc_2_weak": mult_a2_weak,
182 |             "Mult_acc_3": mult_a3,
183 |             "Mult_acc_5": mult_a5,
184 |             "F1_score": f_score,
185 |             "MAE": mae,
186 |             "Corr": corr, # Correlation Coefficient
187 |             "R_squre": r2
188 |         }
189 |         return eval_results
190 | 
191 |     def __eval_simsv2_regression(self, y_pred, y_true):
192 |         return self.__eval_sims_regression(y_pred, y_true)
193 |     def getMetics(self, datasetName):
194 |         return self.metrics_dict[datasetName.upper()]


--------------------------------------------------------------------------------
/MSE-Llama2-7B/utils/metricsTop.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | from sklearn.metrics import classification_report
  4 | from sklearn.metrics import confusion_matrix
  5 | from sklearn.metrics import precision_recall_fscore_support
  6 | from sklearn.metrics import accuracy_score, f1_score
  7 | from sklearn.metrics import r2_score
  8 | from itertools import chain
  9 | __all__ = ['MetricsTop']
 10 | 
 11 | class MetricsTop():
 12 |     def __init__(self, args):
 13 |         if args.train_mode == "regression":
 14 |             self.metrics_dict = {
 15 |                 'MOSI': self.__eval_mosi_regression,
 16 |                 'MOSEI': self.__eval_mosei_regression,
 17 |                 'SIMS': self.__eval_sims_regression,
 18 |                 'SIMSV2': self.__eval_simsv2_regression
 19 |             }
 20 |         else:
 21 |             self.metrics_dict = {
 22 |                 'IEMOCAP': self.__eval_iemocap_classification,
 23 |                 'MELD': self.__eval_meld_classification,
 24 |                 'CHERMA': self.__eval_cherma_classification
 25 |             }
 26 |             self.label_index_mapping = args.label_index_mapping
 27 | 
 28 |     def __eval_iemocap_classification(self, results, truths):
 29 |         # label_index_mapping = self.label_index_mapping
 30 |         # # 主要通过混淆矩阵来计算
 31 |         # results_indices = [label_index_mapping.get(label, label_index_mapping.get('neu')) for label in results]
 32 |         # truths_indices = [label_index_mapping.get(label, -1) for label in truths]
 33 |         # acc = accuracy_score(truths_indices, results_indices)
 34 |         # weight_F1 = f1_score(truths_indices, results_indices, average='weighted')
 35 |         acc = accuracy_score(truths, results)
 36 |         weight_F1 = f1_score(truths, results, average='weighted')
 37 | 
 38 |         eval_result = {
 39 |             'acc': acc,
 40 |             'weight_F1': weight_F1
 41 |         }
 42 |         return eval_result
 43 | 
 44 |     def __eval_cherma_classification(self, results, truths):
 45 |         acc = accuracy_score(truths, results)
 46 |         weight_F1 = f1_score(truths, results, average='weighted')
 47 |         eval_result = {
 48 |             'acc': acc,
 49 |             'weight_F1': weight_F1
 50 |         }
 51 |         return eval_result
 52 | 
 53 |     def __eval_meld_classification(self, results, truths):
 54 |         acc = accuracy_score(truths, results)
 55 |         weight_F1 = f1_score(truths, results, average='weighted')
 56 | 
 57 | 
 58 |         eval_result = {
 59 |             'acc': acc,
 60 |             'weight_F1': weight_F1
 61 |         }
 62 |         return eval_result
 63 | 
 64 | 
 65 | 
 66 | 
 67 |     def __multiclass_acc(self, y_pred, y_true):
 68 |         """
 69 |         Compute the multiclass accuracy w.r.t. groundtruth
 70 | 
 71 |         :param preds: Float array representing the predictions, dimension (N,)
 72 |         :param truths: Float/int array representing the groundtruth classes, dimension (N,)
 73 |         :return: Classification accuracy
 74 |         """
 75 |         return np.sum(np.round(y_pred) == np.round(y_true)) / float(len(y_true))
 76 | 
 77 | 
 78 |     def __eval_mosei_regression(self, y_pred, y_true, exclude_zero=False):
 79 |         test_preds = y_pred.view(-1).cpu().detach().numpy()
 80 |         test_truth = y_true.view(-1).cpu().detach().numpy()
 81 | 
 82 |         test_preds_a7 = np.clip(test_preds, a_min=-3., a_max=3.)
 83 |         test_truth_a7 = np.clip(test_truth, a_min=-3., a_max=3.)
 84 |         test_preds_a5 = np.clip(test_preds, a_min=-2., a_max=2.)
 85 |         test_truth_a5 = np.clip(test_truth, a_min=-2., a_max=2.)
 86 |         test_preds_a3 = np.clip(test_preds, a_min=-1., a_max=1.)
 87 |         test_truth_a3 = np.clip(test_truth, a_min=-1., a_max=1.)
 88 | 
 89 | 
 90 |         mae = np.mean(np.absolute(test_preds - test_truth))   # Average L1 distance between preds and truths
 91 |         corr = np.corrcoef(test_preds, test_truth)[0][1]
 92 |         mult_a7 = self.__multiclass_acc(test_preds_a7, test_truth_a7)
 93 |         mult_a5 = self.__multiclass_acc(test_preds_a5, test_truth_a5)
 94 |         mult_a3 = self.__multiclass_acc(test_preds_a3, test_truth_a3)
 95 |         
 96 |         non_zeros = np.array([i for i, e in enumerate(test_truth) if e != 0])
 97 |         non_zeros_binary_truth = (test_truth[non_zeros] > 0)
 98 |         non_zeros_binary_preds = (test_preds[non_zeros] > 0)
 99 | 
100 |         non_zeros_acc2 = accuracy_score(non_zeros_binary_preds, non_zeros_binary_truth)
101 |         non_zeros_f1_score = f1_score(non_zeros_binary_truth, non_zeros_binary_preds, average='weighted')
102 | 
103 |         binary_truth = (test_truth >= 0)
104 |         binary_preds = (test_preds >= 0)
105 |         acc2 = accuracy_score(binary_preds, binary_truth)
106 |         f_score = f1_score(binary_truth, binary_preds, average='weighted')
107 |         
108 |         eval_results = {
109 |             "Has0_acc_2":  round(acc2, 4),
110 |             "Has0_F1_score": round(f_score, 4),
111 |             "Non0_acc_2":  round(non_zeros_acc2, 4),
112 |             "Non0_F1_score": round(non_zeros_f1_score, 4),
113 |             "Mult_acc_5": round(mult_a5, 4),
114 |             "Mult_acc_7": round(mult_a7, 4),
115 |             "MAE": round(mae, 4),
116 |             "Corr": round(corr, 4)
117 |         }
118 |         return eval_results
119 | 
120 | 
121 |     def __eval_mosi_regression(self, y_pred, y_true):
122 |         return self.__eval_mosei_regression(y_pred, y_true)
123 | 
124 |     def __eval_sims_regression(self, y_pred, y_true):
125 |         test_preds = y_pred.view(-1).cpu().detach().numpy()
126 |         test_truth = y_true.view(-1).cpu().detach().numpy()
127 |         test_preds = np.clip(test_preds, a_min=-1., a_max=1.)
128 |         test_truth = np.clip(test_truth, a_min=-1., a_max=1.)
129 | 
130 |         # weak sentiment two classes{[-0.6, 0.0], (0.0, 0.6]}
131 |         ms_2 = [-1.01, 0.0, 1.01]
132 |         weak_index_l = np.where(test_truth >= -0.4)[0]
133 |         weak_index_r = np.where(test_truth <= 0.4)[0]
134 |         weak_index = [x for x in weak_index_l if x in weak_index_r]
135 |         test_preds_weak = test_preds[weak_index]
136 |         test_truth_weak = test_truth[weak_index]
137 |         test_preds_a2_weak = test_preds_weak.copy()
138 |         test_truth_a2_weak = test_truth_weak.copy()
139 |         for i in range(2):
140 |             test_preds_a2_weak[np.logical_and(test_preds_weak > ms_2[i], test_preds_weak <= ms_2[i + 1])] = i
141 |         for i in range(2):
142 |             test_truth_a2_weak[np.logical_and(test_truth_weak > ms_2[i], test_truth_weak <= ms_2[i + 1])] = i
143 | 
144 |         # two classes{[-1.0, 0.0], (0.0, 1.0]}
145 |         ms_2 = [-1.01, 0.0, 1.01]
146 |         test_preds_a2 = test_preds.copy()
147 |         test_truth_a2 = test_truth.copy()
148 |         for i in range(2):
149 |             test_preds_a2[np.logical_and(test_preds > ms_2[i], test_preds <= ms_2[i+1])] = i
150 |         for i in range(2):
151 |             test_truth_a2[np.logical_and(test_truth > ms_2[i], test_truth <= ms_2[i+1])] = i
152 | 
153 |         # three classes{[-1.0, -0.1], (-0.1, 0.1], (0.1, 1.0]}
154 |         ms_3 = [-1.01, -0.1, 0.1, 1.01]
155 |         test_preds_a3 = test_preds.copy()
156 |         test_truth_a3 = test_truth.copy()
157 |         for i in range(3):
158 |             test_preds_a3[np.logical_and(test_preds > ms_3[i], test_preds <= ms_3[i+1])] = i
159 |         for i in range(3):
160 |             test_truth_a3[np.logical_and(test_truth > ms_3[i], test_truth <= ms_3[i+1])] = i
161 |         
162 |         # five classes{[-1.0, -0.7], (-0.7, -0.1], (-0.1, 0.1], (0.1, 0.7], (0.7, 1.0]}
163 |         ms_5 = [-1.01, -0.7, -0.1, 0.1, 0.7, 1.01]
164 |         test_preds_a5 = test_preds.copy()
165 |         test_truth_a5 = test_truth.copy()
166 |         for i in range(5):
167 |             test_preds_a5[np.logical_and(test_preds > ms_5[i], test_preds <= ms_5[i+1])] = i
168 |         for i in range(5):
169 |             test_truth_a5[np.logical_and(test_truth > ms_5[i], test_truth <= ms_5[i+1])] = i
170 |  
171 |         mae = np.mean(np.absolute(test_preds - test_truth))   # Average L1 distance between preds and truths
172 |         corr = np.corrcoef(test_preds, test_truth)[0][1]
173 |         mult_a2 = self.__multiclass_acc(test_preds_a2, test_truth_a2)
174 |         mult_a2_weak = self.__multiclass_acc(test_preds_a2_weak, test_truth_a2_weak)
175 |         mult_a3 = self.__multiclass_acc(test_preds_a3, test_truth_a3)
176 |         mult_a5 = self.__multiclass_acc(test_preds_a5, test_truth_a5)
177 |         f_score = f1_score(test_truth_a2, test_preds_a2, average='weighted')
178 |         r2 = r2_score(test_truth, test_preds)
179 |         eval_results = {
180 |             "Mult_acc_2": mult_a2,
181 |             "Mult_acc_2_weak": mult_a2_weak,
182 |             "Mult_acc_3": mult_a3,
183 |             "Mult_acc_5": mult_a5,
184 |             "F1_score": f_score,
185 |             "MAE": mae,
186 |             "Corr": corr, # Correlation Coefficient
187 |             "R_squre": r2
188 |         }
189 |         return eval_results
190 | 
191 |     def __eval_simsv2_regression(self, y_pred, y_true):
192 |         return self.__eval_sims_regression(y_pred, y_true)
193 |     def getMetics(self, datasetName):
194 |         return self.metrics_dict[datasetName.upper()]


--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/utils/metricsTop.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | from sklearn.metrics import classification_report
  4 | from sklearn.metrics import confusion_matrix
  5 | from sklearn.metrics import precision_recall_fscore_support
  6 | from sklearn.metrics import accuracy_score, f1_score
  7 | from sklearn.metrics import r2_score
  8 | from itertools import chain
  9 | __all__ = ['MetricsTop']
 10 | 
 11 | class MetricsTop():
 12 |     def __init__(self, args):
 13 |         if args.train_mode == "regression":
 14 |             self.metrics_dict = {
 15 |                 'MOSI': self.__eval_mosi_regression,
 16 |                 'MOSEI': self.__eval_mosei_regression,
 17 |                 'SIMS': self.__eval_sims_regression,
 18 |                 'SIMSV2': self.__eval_simsv2_regression
 19 |             }
 20 |         else:
 21 |             self.metrics_dict = {
 22 |                 'IEMOCAP': self.__eval_iemocap_classification,
 23 |                 'MELD': self.__eval_meld_classification,
 24 |                 'CHERMA': self.__eval_cherma_classification
 25 |             }
 26 |             self.label_index_mapping = args.label_index_mapping
 27 | 
 28 |     def __eval_iemocap_classification(self, results, truths):
 29 |         # label_index_mapping = self.label_index_mapping
 30 |         # # 主要通过混淆矩阵来计算
 31 |         # results_indices = [label_index_mapping.get(label, label_index_mapping.get('neu')) for label in results]
 32 |         # truths_indices = [label_index_mapping.get(label, -1) for label in truths]
 33 |         # acc = accuracy_score(truths_indices, results_indices)
 34 |         # weight_F1 = f1_score(truths_indices, results_indices, average='weighted')
 35 |         acc = accuracy_score(truths, results)
 36 |         weight_F1 = f1_score(truths, results, average='weighted')
 37 | 
 38 |         eval_result = {
 39 |             'acc': acc,
 40 |             'weight_F1': weight_F1
 41 |         }
 42 |         return eval_result
 43 | 
 44 |     def __eval_cherma_classification(self, results, truths):
 45 |         acc = accuracy_score(truths, results)
 46 |         weight_F1 = f1_score(truths, results, average='weighted')
 47 |         eval_result = {
 48 |             'acc': acc,
 49 |             'weight_F1': weight_F1
 50 |         }
 51 |         return eval_result
 52 | 
 53 |     def __eval_meld_classification(self, results, truths):
 54 |         acc = accuracy_score(truths, results)
 55 |         weight_F1 = f1_score(truths, results, average='weighted')
 56 | 
 57 | 
 58 |         eval_result = {
 59 |             'acc': acc,
 60 |             'weight_F1': weight_F1
 61 |         }
 62 |         return eval_result
 63 | 
 64 | 
 65 | 
 66 | 
 67 |     def __multiclass_acc(self, y_pred, y_true):
 68 |         """
 69 |         Compute the multiclass accuracy w.r.t. groundtruth
 70 | 
 71 |         :param preds: Float array representing the predictions, dimension (N,)
 72 |         :param truths: Float/int array representing the groundtruth classes, dimension (N,)
 73 |         :return: Classification accuracy
 74 |         """
 75 |         return np.sum(np.round(y_pred) == np.round(y_true)) / float(len(y_true))
 76 | 
 77 | 
 78 |     def __eval_mosei_regression(self, y_pred, y_true, exclude_zero=False):
 79 |         test_preds = y_pred.view(-1).cpu().detach().numpy()
 80 |         test_truth = y_true.view(-1).cpu().detach().numpy()
 81 | 
 82 |         test_preds_a7 = np.clip(test_preds, a_min=-3., a_max=3.)
 83 |         test_truth_a7 = np.clip(test_truth, a_min=-3., a_max=3.)
 84 |         test_preds_a5 = np.clip(test_preds, a_min=-2., a_max=2.)
 85 |         test_truth_a5 = np.clip(test_truth, a_min=-2., a_max=2.)
 86 |         test_preds_a3 = np.clip(test_preds, a_min=-1., a_max=1.)
 87 |         test_truth_a3 = np.clip(test_truth, a_min=-1., a_max=1.)
 88 | 
 89 | 
 90 |         mae = np.mean(np.absolute(test_preds - test_truth))   # Average L1 distance between preds and truths
 91 |         corr = np.corrcoef(test_preds, test_truth)[0][1]
 92 |         mult_a7 = self.__multiclass_acc(test_preds_a7, test_truth_a7)
 93 |         mult_a5 = self.__multiclass_acc(test_preds_a5, test_truth_a5)
 94 |         mult_a3 = self.__multiclass_acc(test_preds_a3, test_truth_a3)
 95 |         
 96 |         non_zeros = np.array([i for i, e in enumerate(test_truth) if e != 0])
 97 |         non_zeros_binary_truth = (test_truth[non_zeros] > 0)
 98 |         non_zeros_binary_preds = (test_preds[non_zeros] > 0)
 99 | 
100 |         non_zeros_acc2 = accuracy_score(non_zeros_binary_preds, non_zeros_binary_truth)
101 |         non_zeros_f1_score = f1_score(non_zeros_binary_truth, non_zeros_binary_preds, average='weighted')
102 | 
103 |         binary_truth = (test_truth >= 0)
104 |         binary_preds = (test_preds >= 0)
105 |         acc2 = accuracy_score(binary_preds, binary_truth)
106 |         f_score = f1_score(binary_truth, binary_preds, average='weighted')
107 |         
108 |         eval_results = {
109 |             "Has0_acc_2":  round(acc2, 4),
110 |             "Has0_F1_score": round(f_score, 4),
111 |             "Non0_acc_2":  round(non_zeros_acc2, 4),
112 |             "Non0_F1_score": round(non_zeros_f1_score, 4),
113 |             "Mult_acc_5": round(mult_a5, 4),
114 |             "Mult_acc_7": round(mult_a7, 4),
115 |             "MAE": round(mae, 4),
116 |             "Corr": round(corr, 4)
117 |         }
118 |         return eval_results
119 | 
120 | 
121 |     def __eval_mosi_regression(self, y_pred, y_true):
122 |         return self.__eval_mosei_regression(y_pred, y_true)
123 | 
124 |     def __eval_sims_regression(self, y_pred, y_true):
125 |         test_preds = y_pred.view(-1).cpu().detach().numpy()
126 |         test_truth = y_true.view(-1).cpu().detach().numpy()
127 |         test_preds = np.clip(test_preds, a_min=-1., a_max=1.)
128 |         test_truth = np.clip(test_truth, a_min=-1., a_max=1.)
129 | 
130 |         # weak sentiment two classes{[-0.6, 0.0], (0.0, 0.6]}
131 |         ms_2 = [-1.01, 0.0, 1.01]
132 |         weak_index_l = np.where(test_truth >= -0.4)[0]
133 |         weak_index_r = np.where(test_truth <= 0.4)[0]
134 |         weak_index = [x for x in weak_index_l if x in weak_index_r]
135 |         test_preds_weak = test_preds[weak_index]
136 |         test_truth_weak = test_truth[weak_index]
137 |         test_preds_a2_weak = test_preds_weak.copy()
138 |         test_truth_a2_weak = test_truth_weak.copy()
139 |         for i in range(2):
140 |             test_preds_a2_weak[np.logical_and(test_preds_weak > ms_2[i], test_preds_weak <= ms_2[i + 1])] = i
141 |         for i in range(2):
142 |             test_truth_a2_weak[np.logical_and(test_truth_weak > ms_2[i], test_truth_weak <= ms_2[i + 1])] = i
143 | 
144 |         # two classes{[-1.0, 0.0], (0.0, 1.0]}
145 |         ms_2 = [-1.01, 0.0, 1.01]
146 |         test_preds_a2 = test_preds.copy()
147 |         test_truth_a2 = test_truth.copy()
148 |         for i in range(2):
149 |             test_preds_a2[np.logical_and(test_preds > ms_2[i], test_preds <= ms_2[i+1])] = i
150 |         for i in range(2):
151 |             test_truth_a2[np.logical_and(test_truth > ms_2[i], test_truth <= ms_2[i+1])] = i
152 | 
153 |         # three classes{[-1.0, -0.1], (-0.1, 0.1], (0.1, 1.0]}
154 |         ms_3 = [-1.01, -0.1, 0.1, 1.01]
155 |         test_preds_a3 = test_preds.copy()
156 |         test_truth_a3 = test_truth.copy()
157 |         for i in range(3):
158 |             test_preds_a3[np.logical_and(test_preds > ms_3[i], test_preds <= ms_3[i+1])] = i
159 |         for i in range(3):
160 |             test_truth_a3[np.logical_and(test_truth > ms_3[i], test_truth <= ms_3[i+1])] = i
161 |         
162 |         # five classes{[-1.0, -0.7], (-0.7, -0.1], (-0.1, 0.1], (0.1, 0.7], (0.7, 1.0]}
163 |         ms_5 = [-1.01, -0.7, -0.1, 0.1, 0.7, 1.01]
164 |         test_preds_a5 = test_preds.copy()
165 |         test_truth_a5 = test_truth.copy()
166 |         for i in range(5):
167 |             test_preds_a5[np.logical_and(test_preds > ms_5[i], test_preds <= ms_5[i+1])] = i
168 |         for i in range(5):
169 |             test_truth_a5[np.logical_and(test_truth > ms_5[i], test_truth <= ms_5[i+1])] = i
170 |  
171 |         mae = np.mean(np.absolute(test_preds - test_truth))   # Average L1 distance between preds and truths
172 |         corr = np.corrcoef(test_preds, test_truth)[0][1]
173 |         mult_a2 = self.__multiclass_acc(test_preds_a2, test_truth_a2)
174 |         mult_a2_weak = self.__multiclass_acc(test_preds_a2_weak, test_truth_a2_weak)
175 |         mult_a3 = self.__multiclass_acc(test_preds_a3, test_truth_a3)
176 |         mult_a5 = self.__multiclass_acc(test_preds_a5, test_truth_a5)
177 |         f_score = f1_score(test_truth_a2, test_preds_a2, average='weighted')
178 |         r2 = r2_score(test_truth, test_preds)
179 |         eval_results = {
180 |             "Mult_acc_2": mult_a2,
181 |             "F1_score": f_score,
182 |             "Mult_acc_2_weak": mult_a2_weak,
183 |             "MAE": mae,
184 |             "Corr": corr,  # Correlation Coefficient
185 |             "Mult_acc_3": mult_a3,
186 |             "Mult_acc_5": mult_a5,
187 |             "R_squre": r2
188 |         }
189 |         return eval_results
190 | 
191 |     def __eval_simsv2_regression(self, y_pred, y_true):
192 |         return self.__eval_sims_regression(y_pred, y_true)
193 |     def getMetics(self, datasetName):
194 |         return self.metrics_dict[datasetName.upper()]


--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/models/subNets/Textmodel.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import collections
  4 | import re
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.nn.functional as F
  8 | 
  9 | from models.ChatGLM3.modeling_chatglm import ChatGLMForConditionalGeneration
 10 | from models.ChatGLM3.tokenization_chatglm import ChatGLMTokenizer
 11 | 
 12 | __all__ = ['Language_model']
 13 | 
 14 | class Language_model (nn.Module):
 15 |     def __init__(self, args, use_PLM = True):
 16 |         """
 17 |         language: en / cn
 18 |         """
 19 |         super(Language_model, self).__init__()
 20 | 
 21 |         if use_PLM:
 22 |             pretrained_model = args.pretrain_LM              #pretrained model select
 23 |             self.model = ChatGLMForConditionalGeneration.from_pretrained(pretrained_model, trust_remote_code=True, torch_dtype=torch.bfloat16).half()
 24 |             self.tokenizer = ChatGLMTokenizer.from_pretrained(pretrained_model, trust_remote_code=True)
 25 |             self.device = args.device
 26 |             self.language = args.language
 27 |             self.max_new_tokens = args.max_new_tokens
 28 |             self.datasetName = args.datasetName
 29 |             self.train_mode = args.train_mode
 30 |             self.task_specific_prompt = args.task_specific_prompt
 31 |             # freeze parameter
 32 |             for param in self.model.parameters():
 33 |                 param.requires_grad = False
 34 |         else:
 35 |             print('please use PLM')
 36 | 
 37 |     def text_embedding(self,text_ids):
 38 |         embeddings = self.model.base_model.get_input_embeddings()
 39 |         return embeddings(text_ids)
 40 | 
 41 | 
 42 |     def forward(self, fusion_embedding, labels):
 43 |         """
 44 |         Args:
 45 |             fusion_embedding: the "concatenate" result of  multimodal low rank fusion  and text embedding
 46 |             label: ground_truth
 47 |         """
 48 | 
 49 |         fusion_embedding = self.multimodal_prompt_wrap(fusion_embedding)  #添加多模态输入的special prompt
 50 |         opt_tokens, labels = self.input_processing(fusion_embedding, labels, mode = 'train')          #创建fusion+prompt+answer_mask的input和label
 51 | 
 52 |         with torch.cuda.amp.autocast():
 53 |             output = self.model(input_ids = opt_tokens, input_fusion=fusion_embedding, labels = labels)  # Models outputs are now tuples
 54 | 
 55 |         return output
 56 | 
 57 |     def generate(self, fusion_embedding):
 58 |         """
 59 |         Args:
 60 |             samples (dict): A dictionary containing the following keys:
 61 |             use_nucleus_sampling (bool): Whether to use nucleus sampling. If False, use top-k sampling.
 62 |             num_beams (int): Number of beams for beam search. 1 means no beam search.
 63 |             max_new_tokens (int): The maximum length of the new tokens to be generated.
 64 |             top_p (float): The cumulative probability for nucleus sampling.
 65 |             top_k (int): The k for top-k sampling.
 66 |             penalty_alpha (float): The parameter for repetition penalty. 1.0 means no penalty.
 67 |             num_captions (int): Number of captions to be generated for each image.
 68 |         """
 69 |         if self.train_mode == 'regression':
 70 |             # gen_kwargs = {"max_new_tokens": self.max_new_tokens, "num_beams": 1, "do_sample": False, "penalty_alpha": 0.6, "top_p": 0.01, "temperature": 0.01}
 71 |             gen_kwargs = {"max_new_tokens": self.max_new_tokens, "num_beams": 1, "do_sample": False, "top_k": 10}
 72 |         else:
 73 |             gen_kwargs = {"max_new_tokens": self.max_new_tokens, "num_beams": 1, "do_sample": False, "top_k": 10 }
 74 | 
 75 |         fusion_embedding = self.multimodal_prompt_wrap(fusion_embedding)  # 添加多模态输入的special prompt
 76 |         opt_tokens, _ = self.input_processing(fusion_embedding, mode = 'generate')  # 创建fusion+prompt的input
 77 | 
 78 |         context_length = opt_tokens.size(1)
 79 |         all_responses =[]
 80 | 
 81 |         for outputs in self.model.stream_generate(opt_tokens, **gen_kwargs, input_fusion=fusion_embedding):
 82 |             outputs = outputs[:, context_length:].tolist()
 83 |             response = self.tokenizer.batch_decode(outputs)
 84 |         # all_responses = list(map(float, response))
 85 |         # all_responses = list(map(lambda x: float(x.replace('–', '-')), response))
 86 |         # all_responses = list(map(lambda x: float(x.replace('–', '-').replace('一', '-').replace('：', '').replace('/', '').replace('(', '').replace(':', '')), response))
 87 |         # all_responses = [float(re.sub(r'[^0-9.-]', '0', re.sub(r'(?<!^)-', '0', x.replace('–', '-').replace('一', '-').replace('：', '')))) for x in response]
 88 |         # 处理生成结果，将一些不必要的字符转换为0
 89 |         A = 1
 90 |         for x in response:
 91 |             if self.train_mode == 'regression':
 92 |                 try:
 93 |                     value = float(
 94 |                         x.replace('–', '-').replace('一', '-').replace('：', '').replace('/', '').replace('(', '').replace(
 95 |                             ':', ''))
 96 |                     # value = float(re.sub(r'[^0-9.-]', '0', re.sub(r'(?<!^)-', '0', x.replace('–', '-').replace('一', '-').replace('：', ''))))
 97 |                 except ValueError:
 98 |                     value = 0.0
 99 |             else:
100 |                 try:
101 |                     value = float(x)
102 |                 except ValueError:
103 |                     value = 0.0
104 |             all_responses.append(value)
105 |         return all_responses
106 | 
107 | 
108 |     def input_processing(self, fusion_embedding, labels = None, mode = None):
109 |         """
110 |         Args:
111 |             fusion_embedding: the "concatenate" result of  multimodal low rank fusion  and text embedding
112 |             fusion_empty: Create an empty matrix of the same size as fusion's batch, seq, so that it can be filled in during input
113 |             prompt: tokenizer prompt for different language cases
114 |         """
115 |         input_lengths = fusion_embedding[:, :, 0]
116 |         fusion_empty =(torch.ones(input_lengths.size(), dtype=torch.long).to(self.device).fill_(0))
117 | 
118 |         task_prompt = self.get_task_prompt()
119 |         prompt_broadcasted = task_prompt.expand(fusion_empty.size(0), -1)
120 | 
121 |         opt_tokens = torch.cat([fusion_empty, prompt_broadcasted], dim=1)    #构建fusion+prompt的tokens(其中fusion部分是空白，用于后面填充)
122 | 
123 |         opt_tokens, labels = self.input_labels_construct(opt_tokens, labels, mode)
124 | 
125 |         return opt_tokens, labels
126 | 
127 |     def input_labels_construct(self, opt_tokens, labels = None, mode = None):
128 |         """
129 |         Args:
130 |             opt_tokens: the "concatenate" size of  multimodal low rank fusion, text embedding and prompt
131 |             label: ground_truth
132 |             labels_id: tokenizer labels
133 |         """
134 |         batch_size = opt_tokens.shape[0]
135 | 
136 |         if mode == "train":
137 |             if self.train_mode == "regression":
138 |                 label_template = [f"{label.item():.{1}f}" for label in labels]
139 |                 # labels_id = self.tokenizer(label_template, padding=True, return_tensors="pt", add_special_tokens=False)[
140 |                 #     "input_ids"].to(self.device)
141 |                 # labels_matrix = torch.empty_like(opt_tokens).fill_(-100).long().to(self.device)  # bz * seq_len
142 |                 # opt_tokens = torch.cat([opt_tokens, labels_id], dim=1)  # 将输入与labels拼接
143 |                 # labels = torch.cat([labels_matrix, labels_id], dim=1)
144 |             else:
145 |                 label_template = [f"{label.item()}" for label in labels]
146 | 
147 |             labels_id = self.tokenizer(label_template, padding=True, return_tensors="pt", add_special_tokens=False)[
148 |                 "input_ids"].to(self.device)
149 | 
150 |             # eos = torch.ones([batch_size, 1], dtype= opt_tokens.dtype, device=self.device) * self.tokenizer.eos_token_id
151 |             # labels_id = torch.cat([labels_id, eos], dim=1)
152 | 
153 |             labels_matrix = torch.empty_like(opt_tokens).fill_(-100).long().to(self.device)  # bz * seq_len
154 |             opt_tokens = torch.cat([opt_tokens, labels_id], dim=1)  # 将输入与labels拼接
155 |             labels = torch.cat([labels_matrix, labels_id], dim=1)
156 | 
157 |         return opt_tokens, labels
158 | 
159 |     def get_task_prompt(self):
160 |         # get the task_specific_prompt
161 |         prompt_text = self.task_specific_prompt
162 |         prompt_ids = self.tokenizer(prompt_text, padding=True, return_tensors="pt", add_special_tokens=False)["input_ids"].to(self.device)
163 | 
164 |         return prompt_ids
165 | 
166 |     def multimodal_prompt_wrap(self,fusion_embeddings):
167 |         """
168 |         Args:
169 |             Wrap the input with a special token
170 |         """
171 |         if self.language == "en":
172 |             prompt = '{question}\n\n <Multimodal><MultimodalHere></Multimodal>'
173 |             special_token = '<MultimodalHere>'
174 |         else:
175 |             prompt = '{问题}\n\n <多模态><MultimodalHere></多模态>'
176 |             special_token = '<MultimodalHere>'
177 | 
178 |         batch_size = fusion_embeddings.shape[0]
179 |         p_before, p_after = prompt.split(special_token)
180 |         p_before_tokens = self.tokenizer(
181 |             p_before, return_tensors="pt", add_special_tokens=True).to(self.device)
182 |         p_after_tokens = self.tokenizer(
183 |             p_after, return_tensors="pt", add_special_tokens=False).to(self.device)
184 |         p_before_embeds = self.text_embedding(p_before_tokens.input_ids).expand(batch_size, -1, -1)
185 |         p_after_embeds = self.text_embedding(p_after_tokens.input_ids).expand(batch_size, -1, -1)
186 |         wrapped_fusion_embeddings = torch.cat([p_before_embeds, fusion_embeddings, p_after_embeds], dim=1)
187 | 
188 |         return wrapped_fusion_embeddings


--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/data/TextPre.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import h5py
  4 | import pickle
  5 | import argparse
  6 | import numpy as np
  7 | from tqdm import tqdm
  8 | 
  9 | import torch
 10 | import torch.nn as nn
 11 | import torch.nn.functional as F
 12 | # from pytorch_transformers.modeling_bert import BertForSequenceClassification, BertConfig, MultimodalBertForSequenceClassification
 13 | # from pytorch_transformers.amir_tokenization import BertTokenizer
 14 | # from pytorch_transformers.optimization import AdamW, WarmupLinearSchedule
 15 | 
 16 | # from transformers.tokenization import BertTokenizer
 17 | from models.subNets.BertTextEncoder import BertTextEncoder
 18 | 
 19 | class TextPre(object):
 20 |     """A single set of features of data."""
 21 | 
 22 |     def __init__(self, args):
 23 |         self.device = torch.device('cuda:0')
 24 |         self.args = args
 25 |         self.loadTextMap = {
 26 |             'mosi': self.__load_data_mosi,
 27 |             'mosei': self.__load_data_mosei
 28 |         }
 29 |         self.bert = BertTextEncoder(language=args.language).to(self.device)
 30 |     
 31 |     def textConvertID(self, data, tokenizer):
 32 |         features = {}
 33 |         Input_ids, Input_mask, Segment_ids = [], [], []
 34 |         Raw_text, Visual, Audio = [], [], []
 35 |         Label, ids = [], []
 36 |         max_seq_length = self.args.max_seq_length
 37 |         for i in tqdm(range(len(data['raw_text']))):
 38 |             raw_text = data['raw_text'][i]
 39 |             visual = data['vision'][i]
 40 |             audio = data['audio'][i]
 41 |             tokens_a, inversions_a = tokenizer.tokenize(raw_text,invertable=True)
 42 |             
 43 |             if len(tokens_a) > max_seq_length - 2:
 44 |                 tokens_a = tokens_a[:max_seq_length - 2]
 45 |                 inversions_a = inversions_a[:max_seq_length - 2]
 46 |             
 47 |             tokens = ["[CLS]"] + tokens_a + ["[SEP]"]
 48 | 
 49 |             segment_ids = [0] * len(tokens)
 50 | 
 51 |             input_ids = tokenizer.convert_tokens_to_ids(tokens)
 52 | 
 53 |             input_mask = [1] * len(input_ids)
 54 |             padding = [0] * (max_seq_length - len(input_ids))
 55 | 
 56 | 
 57 |             if self.args.aligned:
 58 |                 text_len = min(len(raw_text.split()), max_seq_length)
 59 |                 new_visual = [visual[len(visual) - text_len + inv_id] for inv_id in inversions_a]
 60 |                 new_audio = [audio[len(audio) - text_len + inv_id] for inv_id in inversions_a]
 61 | 
 62 |                 visual = np.array(new_visual)
 63 |                 audio = np.array(new_audio)
 64 | 
 65 |                 # add "start" and "end" for audio and vision
 66 |                 audio_zero = np.zeros((1,audio.shape[1]))
 67 |                 audio = np.concatenate((audio_zero,audio,audio_zero))
 68 | 
 69 |                 visual_zero = np.zeros((1,visual.shape[1]))
 70 |                 visual = np.concatenate((visual_zero,visual,visual_zero))
 71 | 
 72 |                 audio_padding = np.zeros((max_seq_length - len(input_ids),audio.shape[1]))
 73 |                 audio = np.concatenate((audio,audio_padding))
 74 | 
 75 |                 video_padding = np.zeros((max_seq_length - len(input_ids),visual.shape[1]))
 76 |                 visual = np.concatenate((visual,video_padding))
 77 | 
 78 |                 assert audio.shape[0] == max_seq_length
 79 |                 assert visual.shape[0] == max_seq_length
 80 | 
 81 |             input_ids += padding
 82 |             input_mask += padding
 83 |             segment_ids += padding
 84 | 
 85 |             assert len(input_ids) == max_seq_length
 86 |             assert len(input_mask) == max_seq_length
 87 |             assert len(segment_ids) == max_seq_length
 88 | 
 89 |             label = float(data['labels'][i])
 90 | 
 91 |             Input_ids.append(input_ids)
 92 |             Visual.append(visual)
 93 |             Audio.append(audio)
 94 |             Input_mask.append(input_mask)
 95 |             Segment_ids.append(segment_ids)
 96 |             Label.append(label)
 97 |             Raw_text.append(raw_text)
 98 |             ids.append(data['id'][i])
 99 | 
100 |         features['raw_text'] = np.array(Raw_text)
101 |         features['audio'] = np.array(Audio)
102 |         features['vision'] = np.array(Visual)
103 |         features['labels'] = np.array(Label)
104 |         features['id'] = np.array(ids)
105 |         Input_ids = np.expand_dims(Input_ids, 1)
106 |         Input_mask = np.expand_dims(Input_mask, 1)
107 |         Segment_ids = np.expand_dims(Segment_ids, 1)
108 |         text_bert = np.concatenate((Input_ids, Input_mask, Segment_ids), axis=1) 
109 |         features['text_bert'] = text_bert
110 |         features['text'] = self.__convertID2Vector(text_bert)
111 |         return features
112 |     
113 |     def __convertID2Vector(self, ids, batch_size=64):
114 |         results = []
115 |         left = 0
116 |         ids = torch.Tensor(ids)
117 |         for left in tqdm(range(0, ids.size(0), batch_size)):
118 |             right = min(left + batch_size, ids.size(0))
119 |             c_ids = ids[left:right].to(self.device)
120 |             c_vector = self.bert(c_ids).detach().cpu().numpy()
121 |             results.append(c_vector)
122 |         results = np.concatenate(results, axis=0)
123 |         return results
124 |     
125 |     def __load_data_mosi(self):
126 |         # get text data
127 |         link = os.path.join(self.args.data_dir, 'Raw/Transcript/Segmented')
128 |         text_data = {}
129 |         for file in os.listdir(link):
130 |             name = file.split('.')[0]
131 |             for line in open(os.path.join(link, file), "r"):
132 |                 num_id, cur_t = line.split('_DELIM_')
133 |                 name_id = name + '_' + num_id.strip()
134 |                 text_data[name_id] = cur_t.strip()
135 |         # get data
136 |         def matchData(mode='train'):
137 |             r_text = []
138 |             for cur_id in data[mode]['id']:
139 |                 r_text.append(text_data[cur_id[0]])
140 |             data[mode]['raw_text'] = r_text
141 |         
142 |         with open(os.path.join(self.args.data_dir, 'Processed/mosei_senti_data_noalign.pkl'), 'rb') as lf:
143 |             data = pickle.load(lf)
144 |         
145 |         matchData(mode='train')
146 |         matchData(mode='valid')
147 |         matchData(mode='test')
148 | 
149 |         return data
150 |     
151 |     def __load_data_mosei(self):
152 |         def convert0(s):
153 |             if s == '0':
154 |                 return '0.0'
155 |             return s
156 |         # get text data
157 |         link = os.path.join(self.args.data_dir, 'Raw/Transcript/Segmented')
158 |         text_data = {}
159 |         for file in os.listdir(link):
160 |             name = file.split('.')[0]
161 |             for line in open(os.path.join(link, file), "r"):
162 |                 items = line.split('___')
163 |                 name_id = items[0] + '_' + convert0(items[2]) + '_' + convert0(items[3])
164 |                 text_data[name_id.strip()] = items[-1].strip()
165 |         # get data
166 |         def matchData(mode='train'):
167 |             r_text = []
168 |             for cur_id in data[mode]['id']:
169 |                 name = '_'.join(cur_id)
170 |                 r_text.append(text_data[name])
171 |             data[mode]['raw_text'] = r_text
172 |         
173 |         with open(os.path.join(self.args.data_dir, 'Processed/mosei_senti_data_noalign.pkl'), 'rb') as lf:
174 |             data = pickle.load(lf)
175 |         
176 |         matchData(mode='train')
177 |         matchData(mode='valid')
178 |         matchData(mode='test')
179 | 
180 |         return data
181 | 
182 |     def run(self):
183 |         data = self.loadTextMap[self.args.datasetName]()
184 | 
185 |         train_list = data['train']
186 |         valid_list = data['valid']
187 |         test_list = data['test']
188 | 
189 |         tokenizer = self.bert.get_tokenizer()
190 | 
191 |         save_data = {}
192 |         save_data['train'] = self.textConvertID(train_list, tokenizer)
193 |         save_data['valid'] = self.textConvertID(valid_list, tokenizer)
194 |         save_data['test'] = self.textConvertID(test_list, tokenizer)
195 | 
196 |         if self.args.aligned:
197 |             saved_path = os.path.join(self.args.save_dir, 'aligned_' + str(self.args.max_seq_length) + '.pkl')
198 |         else:
199 |             saved_path = os.path.join(self.args.save_dir, 'unaligned_' + str(self.args.max_seq_length) + '.pkl')
200 |         
201 |         if not os.path.exists(os.path.dirname(saved_path)):
202 |             os.makedirs(os.path.dirname(saved_path))
203 | 
204 |         with open(saved_path, 'wb') as file:
205 |             pickle.dump(save_data, file, protocol=4)
206 |             print('Save Successful!')
207 | 
208 | def parse_args():
209 |     parser = argparse.ArgumentParser()
210 |     parser.add_argument('--datasetName', type=str, default='mosei',
211 |                         help='need aligned data (support mosi / mosei)')
212 |     parser.add_argument('--language', type=str, default='cn',
213 |                         help='data language')
214 |     parser.add_argument('--aligned', type=bool, default=True,
215 |                         help='need aligned data')
216 |     parser.add_argument('--data_dir', type=str, default = '/home/sharing/disk3/dataset/multimodal-sentiment-dataset/CMU-MOSEI',
217 |                         help='path to MOSI / MOSEI')
218 |     parser.add_argument('--save_dir', type=str, default = '/home/sharing/disk3/dataset/multimodal-sentiment-dataset/ALL/mosei/raw',
219 |                         help='path to saved directory')
220 |     parser.add_argument('--max_seq_length', type=int, default = 50,
221 |                         help='length')
222 |     return parser.parse_args()
223 | 
224 | if __name__ == "__main__":
225 |     args = parse_args()
226 |     tp = TextPre(args)
227 |     tp.run()
228 |     # tp.convertID2Vector()


--------------------------------------------------------------------------------
/MSE-Llama2-7B/data/TextPre.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import h5py
  4 | import pickle
  5 | import argparse
  6 | import numpy as np
  7 | from tqdm import tqdm
  8 | 
  9 | import torch
 10 | import torch.nn as nn
 11 | import torch.nn.functional as F
 12 | # from pytorch_transformers.modeling_bert import BertForSequenceClassification, BertConfig, MultimodalBertForSequenceClassification
 13 | # from pytorch_transformers.amir_tokenization import BertTokenizer
 14 | # from pytorch_transformers.optimization import AdamW, WarmupLinearSchedule
 15 | 
 16 | # from transformers.tokenization import BertTokenizer
 17 | from models.subNets.BertTextEncoder import BertTextEncoder
 18 | 
 19 | class TextPre(object):
 20 |     """A single set of features of data."""
 21 | 
 22 |     def __init__(self, args):
 23 |         self.device = torch.device('cuda:0')
 24 |         self.args = args
 25 |         self.loadTextMap = {
 26 |             'mosi': self.__load_data_mosi,
 27 |             'mosei': self.__load_data_mosei
 28 |         }
 29 |         self.bert = BertTextEncoder(language=args.language).to(self.device)
 30 |     
 31 |     def textConvertID(self, data, tokenizer):
 32 |         features = {}
 33 |         Input_ids, Input_mask, Segment_ids = [], [], []
 34 |         Raw_text, Visual, Audio = [], [], []
 35 |         Label, ids = [], []
 36 |         max_seq_length = self.args.max_seq_length
 37 |         for i in tqdm(range(len(data['raw_text']))):
 38 |             raw_text = data['raw_text'][i]
 39 |             visual = data['vision'][i]
 40 |             audio = data['audio'][i]
 41 |             tokens_a, inversions_a = tokenizer.tokenize(raw_text,invertable=True)
 42 |             
 43 |             if len(tokens_a) > max_seq_length - 2:
 44 |                 tokens_a = tokens_a[:max_seq_length - 2]
 45 |                 inversions_a = inversions_a[:max_seq_length - 2]
 46 |             
 47 |             tokens = ["[CLS]"] + tokens_a + ["[SEP]"]
 48 | 
 49 |             segment_ids = [0] * len(tokens)
 50 | 
 51 |             input_ids = tokenizer.convert_tokens_to_ids(tokens)
 52 | 
 53 |             input_mask = [1] * len(input_ids)
 54 |             padding = [0] * (max_seq_length - len(input_ids))
 55 | 
 56 | 
 57 |             if self.args.aligned:
 58 |                 text_len = min(len(raw_text.split()), max_seq_length)
 59 |                 new_visual = [visual[len(visual) - text_len + inv_id] for inv_id in inversions_a]
 60 |                 new_audio = [audio[len(audio) - text_len + inv_id] for inv_id in inversions_a]
 61 | 
 62 |                 visual = np.array(new_visual)
 63 |                 audio = np.array(new_audio)
 64 | 
 65 |                 # add "start" and "end" for audio and vision
 66 |                 audio_zero = np.zeros((1,audio.shape[1]))
 67 |                 audio = np.concatenate((audio_zero,audio,audio_zero))
 68 | 
 69 |                 visual_zero = np.zeros((1,visual.shape[1]))
 70 |                 visual = np.concatenate((visual_zero,visual,visual_zero))
 71 | 
 72 |                 audio_padding = np.zeros((max_seq_length - len(input_ids),audio.shape[1]))
 73 |                 audio = np.concatenate((audio,audio_padding))
 74 | 
 75 |                 video_padding = np.zeros((max_seq_length - len(input_ids),visual.shape[1]))
 76 |                 visual = np.concatenate((visual,video_padding))
 77 | 
 78 |                 assert audio.shape[0] == max_seq_length
 79 |                 assert visual.shape[0] == max_seq_length
 80 | 
 81 |             input_ids += padding
 82 |             input_mask += padding
 83 |             segment_ids += padding
 84 | 
 85 |             assert len(input_ids) == max_seq_length
 86 |             assert len(input_mask) == max_seq_length
 87 |             assert len(segment_ids) == max_seq_length
 88 | 
 89 |             label = float(data['labels'][i])
 90 | 
 91 |             Input_ids.append(input_ids)
 92 |             Visual.append(visual)
 93 |             Audio.append(audio)
 94 |             Input_mask.append(input_mask)
 95 |             Segment_ids.append(segment_ids)
 96 |             Label.append(label)
 97 |             Raw_text.append(raw_text)
 98 |             ids.append(data['id'][i])
 99 | 
100 |         features['raw_text'] = np.array(Raw_text)
101 |         features['audio'] = np.array(Audio)
102 |         features['vision'] = np.array(Visual)
103 |         features['labels'] = np.array(Label)
104 |         features['id'] = np.array(ids)
105 |         Input_ids = np.expand_dims(Input_ids, 1)
106 |         Input_mask = np.expand_dims(Input_mask, 1)
107 |         Segment_ids = np.expand_dims(Segment_ids, 1)
108 |         text_bert = np.concatenate((Input_ids, Input_mask, Segment_ids), axis=1) 
109 |         features['text_bert'] = text_bert
110 |         features['text'] = self.__convertID2Vector(text_bert)
111 |         return features
112 |     
113 |     def __convertID2Vector(self, ids, batch_size=64):
114 |         results = []
115 |         left = 0
116 |         ids = torch.Tensor(ids)
117 |         for left in tqdm(range(0, ids.size(0), batch_size)):
118 |             right = min(left + batch_size, ids.size(0))
119 |             c_ids = ids[left:right].to(self.device)
120 |             c_vector = self.bert(c_ids).detach().cpu().numpy()
121 |             results.append(c_vector)
122 |         results = np.concatenate(results, axis=0)
123 |         return results
124 |     
125 |     def __load_data_mosi(self):
126 |         # get text data
127 |         link = os.path.join(self.args.data_dir, 'Raw/Transcript/Segmented')
128 |         text_data = {}
129 |         for file in os.listdir(link):
130 |             name = file.split('.')[0]
131 |             for line in open(os.path.join(link, file), "r"):
132 |                 num_id, cur_t = line.split('_DELIM_')
133 |                 name_id = name + '_' + num_id.strip()
134 |                 text_data[name_id] = cur_t.strip()
135 |         # get data
136 |         def matchData(mode='train'):
137 |             r_text = []
138 |             for cur_id in data[mode]['id']:
139 |                 r_text.append(text_data[cur_id[0]])
140 |             data[mode]['raw_text'] = r_text
141 |         
142 |         with open(os.path.join(self.args.data_dir, 'Processed/mosei_senti_data_noalign.pkl'), 'rb') as lf:
143 |             data = pickle.load(lf)
144 |         
145 |         matchData(mode='train')
146 |         matchData(mode='valid')
147 |         matchData(mode='test')
148 | 
149 |         return data
150 |     
151 |     def __load_data_mosei(self):
152 |         def convert0(s):
153 |             if s == '0':
154 |                 return '0.0'
155 |             return s
156 |         # get text data
157 |         link = os.path.join(self.args.data_dir, 'Raw/Transcript/Segmented')
158 |         text_data = {}
159 |         for file in os.listdir(link):
160 |             name = file.split('.')[0]
161 |             for line in open(os.path.join(link, file), "r"):
162 |                 items = line.split('___')
163 |                 name_id = items[0] + '_' + convert0(items[2]) + '_' + convert0(items[3])
164 |                 text_data[name_id.strip()] = items[-1].strip()
165 |         # get data
166 |         def matchData(mode='train'):
167 |             r_text = []
168 |             for cur_id in data[mode]['id']:
169 |                 name = '_'.join(cur_id)
170 |                 r_text.append(text_data[name])
171 |             data[mode]['raw_text'] = r_text
172 |         
173 |         with open(os.path.join(self.args.data_dir, 'Processed/mosei_senti_data_noalign.pkl'), 'rb') as lf:
174 |             data = pickle.load(lf)
175 |         
176 |         matchData(mode='train')
177 |         matchData(mode='valid')
178 |         matchData(mode='test')
179 | 
180 |         return data
181 | 
182 |     def run(self):
183 |         data = self.loadTextMap[self.args.datasetName]()
184 | 
185 |         train_list = data['train']
186 |         valid_list = data['valid']
187 |         test_list = data['test']
188 | 
189 |         tokenizer = self.bert.get_tokenizer()
190 | 
191 |         save_data = {}
192 |         save_data['train'] = self.textConvertID(train_list, tokenizer)
193 |         save_data['valid'] = self.textConvertID(valid_list, tokenizer)
194 |         save_data['test'] = self.textConvertID(test_list, tokenizer)
195 | 
196 |         if self.args.aligned:
197 |             saved_path = os.path.join(self.args.save_dir, 'aligned_' + str(self.args.max_seq_length) + '.pkl')
198 |         else:
199 |             saved_path = os.path.join(self.args.save_dir, 'unaligned_' + str(self.args.max_seq_length) + '.pkl')
200 |         
201 |         if not os.path.exists(os.path.dirname(saved_path)):
202 |             os.makedirs(os.path.dirname(saved_path))
203 | 
204 |         with open(saved_path, 'wb') as file:
205 |             pickle.dump(save_data, file, protocol=4)
206 |             print('Save Successful!')
207 | 
208 | def parse_args():
209 |     parser = argparse.ArgumentParser()
210 |     parser.add_argument('--datasetName', type=str, default='mosei',
211 |                         help='need aligned data (support mosi / mosei)')
212 |     parser.add_argument('--language', type=str, default='cn',
213 |                         help='data language')
214 |     parser.add_argument('--aligned', type=bool, default=True,
215 |                         help='need aligned data')
216 |     parser.add_argument('--data_dir', type=str, default = '/home/sharing/disk3/dataset/multimodal-sentiment-dataset/CMU-MOSEI',
217 |                         help='path to MOSI / MOSEI')
218 |     parser.add_argument('--save_dir', type=str, default = '/home/sharing/disk3/dataset/multimodal-sentiment-dataset/ALL/mosei/raw',
219 |                         help='path to saved directory')
220 |     parser.add_argument('--max_seq_length', type=int, default = 50,
221 |                         help='length')
222 |     return parser.parse_args()
223 | 
224 | if __name__ == "__main__":
225 |     args = parse_args()
226 |     tp = TextPre(args)
227 |     tp.run()
228 |     # tp.convertID2Vector()


--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/data/TextPre.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import h5py
  4 | import pickle
  5 | import argparse
  6 | import numpy as np
  7 | from tqdm import tqdm
  8 | 
  9 | import torch
 10 | import torch.nn as nn
 11 | import torch.nn.functional as F
 12 | # from pytorch_transformers.modeling_bert import BertForSequenceClassification, BertConfig, MultimodalBertForSequenceClassification
 13 | # from pytorch_transformers.amir_tokenization import BertTokenizer
 14 | # from pytorch_transformers.optimization import AdamW, WarmupLinearSchedule
 15 | 
 16 | # from transformers.tokenization import BertTokenizer
 17 | from models.subNets.BertTextEncoder import BertTextEncoder
 18 | 
 19 | class TextPre(object):
 20 |     """A single set of features of data."""
 21 | 
 22 |     def __init__(self, args):
 23 |         self.device = torch.device('cuda:0')
 24 |         self.args = args
 25 |         self.loadTextMap = {
 26 |             'mosi': self.__load_data_mosi,
 27 |             'mosei': self.__load_data_mosei
 28 |         }
 29 |         self.bert = BertTextEncoder(language=args.language).to(self.device)
 30 |     
 31 |     def textConvertID(self, data, tokenizer):
 32 |         features = {}
 33 |         Input_ids, Input_mask, Segment_ids = [], [], []
 34 |         Raw_text, Visual, Audio = [], [], []
 35 |         Label, ids = [], []
 36 |         max_seq_length = self.args.max_seq_length
 37 |         for i in tqdm(range(len(data['raw_text']))):
 38 |             raw_text = data['raw_text'][i]
 39 |             visual = data['vision'][i]
 40 |             audio = data['audio'][i]
 41 |             tokens_a, inversions_a = tokenizer.tokenize(raw_text,invertable=True)
 42 |             
 43 |             if len(tokens_a) > max_seq_length - 2:
 44 |                 tokens_a = tokens_a[:max_seq_length - 2]
 45 |                 inversions_a = inversions_a[:max_seq_length - 2]
 46 |             
 47 |             tokens = ["[CLS]"] + tokens_a + ["[SEP]"]
 48 | 
 49 |             segment_ids = [0] * len(tokens)
 50 | 
 51 |             input_ids = tokenizer.convert_tokens_to_ids(tokens)
 52 | 
 53 |             input_mask = [1] * len(input_ids)
 54 |             padding = [0] * (max_seq_length - len(input_ids))
 55 | 
 56 | 
 57 |             if self.args.aligned:
 58 |                 text_len = min(len(raw_text.split()), max_seq_length)
 59 |                 new_visual = [visual[len(visual) - text_len + inv_id] for inv_id in inversions_a]
 60 |                 new_audio = [audio[len(audio) - text_len + inv_id] for inv_id in inversions_a]
 61 | 
 62 |                 visual = np.array(new_visual)
 63 |                 audio = np.array(new_audio)
 64 | 
 65 |                 # add "start" and "end" for audio and vision
 66 |                 audio_zero = np.zeros((1,audio.shape[1]))
 67 |                 audio = np.concatenate((audio_zero,audio,audio_zero))
 68 | 
 69 |                 visual_zero = np.zeros((1,visual.shape[1]))
 70 |                 visual = np.concatenate((visual_zero,visual,visual_zero))
 71 | 
 72 |                 audio_padding = np.zeros((max_seq_length - len(input_ids),audio.shape[1]))
 73 |                 audio = np.concatenate((audio,audio_padding))
 74 | 
 75 |                 video_padding = np.zeros((max_seq_length - len(input_ids),visual.shape[1]))
 76 |                 visual = np.concatenate((visual,video_padding))
 77 | 
 78 |                 assert audio.shape[0] == max_seq_length
 79 |                 assert visual.shape[0] == max_seq_length
 80 | 
 81 |             input_ids += padding
 82 |             input_mask += padding
 83 |             segment_ids += padding
 84 | 
 85 |             assert len(input_ids) == max_seq_length
 86 |             assert len(input_mask) == max_seq_length
 87 |             assert len(segment_ids) == max_seq_length
 88 | 
 89 |             label = float(data['labels'][i])
 90 | 
 91 |             Input_ids.append(input_ids)
 92 |             Visual.append(visual)
 93 |             Audio.append(audio)
 94 |             Input_mask.append(input_mask)
 95 |             Segment_ids.append(segment_ids)
 96 |             Label.append(label)
 97 |             Raw_text.append(raw_text)
 98 |             ids.append(data['id'][i])
 99 | 
100 |         features['raw_text'] = np.array(Raw_text)
101 |         features['audio'] = np.array(Audio)
102 |         features['vision'] = np.array(Visual)
103 |         features['labels'] = np.array(Label)
104 |         features['id'] = np.array(ids)
105 |         Input_ids = np.expand_dims(Input_ids, 1)
106 |         Input_mask = np.expand_dims(Input_mask, 1)
107 |         Segment_ids = np.expand_dims(Segment_ids, 1)
108 |         text_bert = np.concatenate((Input_ids, Input_mask, Segment_ids), axis=1) 
109 |         features['text_bert'] = text_bert
110 |         features['text'] = self.__convertID2Vector(text_bert)
111 |         return features
112 |     
113 |     def __convertID2Vector(self, ids, batch_size=64):
114 |         results = []
115 |         left = 0
116 |         ids = torch.Tensor(ids)
117 |         for left in tqdm(range(0, ids.size(0), batch_size)):
118 |             right = min(left + batch_size, ids.size(0))
119 |             c_ids = ids[left:right].to(self.device)
120 |             c_vector = self.bert(c_ids).detach().cpu().numpy()
121 |             results.append(c_vector)
122 |         results = np.concatenate(results, axis=0)
123 |         return results
124 |     
125 |     def __load_data_mosi(self):
126 |         # get text data
127 |         link = os.path.join(self.args.data_dir, 'Raw/Transcript/Segmented')
128 |         text_data = {}
129 |         for file in os.listdir(link):
130 |             name = file.split('.')[0]
131 |             for line in open(os.path.join(link, file), "r"):
132 |                 num_id, cur_t = line.split('_DELIM_')
133 |                 name_id = name + '_' + num_id.strip()
134 |                 text_data[name_id] = cur_t.strip()
135 |         # get data
136 |         def matchData(mode='train'):
137 |             r_text = []
138 |             for cur_id in data[mode]['id']:
139 |                 r_text.append(text_data[cur_id[0]])
140 |             data[mode]['raw_text'] = r_text
141 |         
142 |         with open(os.path.join(self.args.data_dir, 'Processed/mosei_senti_data_noalign.pkl'), 'rb') as lf:
143 |             data = pickle.load(lf)
144 |         
145 |         matchData(mode='train')
146 |         matchData(mode='valid')
147 |         matchData(mode='test')
148 | 
149 |         return data
150 |     
151 |     def __load_data_mosei(self):
152 |         def convert0(s):
153 |             if s == '0':
154 |                 return '0.0'
155 |             return s
156 |         # get text data
157 |         link = os.path.join(self.args.data_dir, 'Raw/Transcript/Segmented')
158 |         text_data = {}
159 |         for file in os.listdir(link):
160 |             name = file.split('.')[0]
161 |             for line in open(os.path.join(link, file), "r"):
162 |                 items = line.split('___')
163 |                 name_id = items[0] + '_' + convert0(items[2]) + '_' + convert0(items[3])
164 |                 text_data[name_id.strip()] = items[-1].strip()
165 |         # get data
166 |         def matchData(mode='train'):
167 |             r_text = []
168 |             for cur_id in data[mode]['id']:
169 |                 name = '_'.join(cur_id)
170 |                 r_text.append(text_data[name])
171 |             data[mode]['raw_text'] = r_text
172 |         
173 |         with open(os.path.join(self.args.data_dir, 'Processed/mosei_senti_data_noalign.pkl'), 'rb') as lf:
174 |             data = pickle.load(lf)
175 |         
176 |         matchData(mode='train')
177 |         matchData(mode='valid')
178 |         matchData(mode='test')
179 | 
180 |         return data
181 | 
182 |     def run(self):
183 |         data = self.loadTextMap[self.args.datasetName]()
184 | 
185 |         train_list = data['train']
186 |         valid_list = data['valid']
187 |         test_list = data['test']
188 | 
189 |         tokenizer = self.bert.get_tokenizer()
190 | 
191 |         save_data = {}
192 |         save_data['train'] = self.textConvertID(train_list, tokenizer)
193 |         save_data['valid'] = self.textConvertID(valid_list, tokenizer)
194 |         save_data['test'] = self.textConvertID(test_list, tokenizer)
195 | 
196 |         if self.args.aligned:
197 |             saved_path = os.path.join(self.args.save_dir, 'aligned_' + str(self.args.max_seq_length) + '.pkl')
198 |         else:
199 |             saved_path = os.path.join(self.args.save_dir, 'unaligned_' + str(self.args.max_seq_length) + '.pkl')
200 |         
201 |         if not os.path.exists(os.path.dirname(saved_path)):
202 |             os.makedirs(os.path.dirname(saved_path))
203 | 
204 |         with open(saved_path, 'wb') as file:
205 |             pickle.dump(save_data, file, protocol=4)
206 |             print('Save Successful!')
207 | 
208 | def parse_args():
209 |     parser = argparse.ArgumentParser()
210 |     parser.add_argument('--datasetName', type=str, default='mosei',
211 |                         help='need aligned data (support mosi / mosei)')
212 |     parser.add_argument('--language', type=str, default='cn',
213 |                         help='data language')
214 |     parser.add_argument('--aligned', type=bool, default=True,
215 |                         help='need aligned data')
216 |     parser.add_argument('--data_dir', type=str, default = '/home/sharing/disk3/dataset/multimodal-sentiment-dataset/CMU-MOSEI',
217 |                         help='path to MOSI / MOSEI')
218 |     parser.add_argument('--save_dir', type=str, default = '/home/sharing/disk3/dataset/multimodal-sentiment-dataset/ALL/mosei/raw',
219 |                         help='path to saved directory')
220 |     parser.add_argument('--max_seq_length', type=int, default = 50,
221 |                         help='length')
222 |     return parser.parse_args()
223 | 
224 | if __name__ == "__main__":
225 |     args = parse_args()
226 |     tp = TextPre(args)
227 |     tp.run()
228 |     # tp.convertID2Vector()


--------------------------------------------------------------------------------
/MSE-Llama2-7B/models/subNets/Textmodel.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import collections
  4 | import re
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.nn.functional as F
  8 | 
  9 | from modelscope import AutoTokenizer, AutoModel, AutoModelForCausalLM
 10 | 
 11 | 
 12 | __all__ = ['Language_model']
 13 | 
 14 | class Language_model (nn.Module):
 15 |     def __init__(self, args, use_PLM = True):
 16 |         """
 17 |         language: en / cn
 18 |         """
 19 |         super(Language_model, self).__init__()
 20 | 
 21 |         if use_PLM:
 22 |             pretrained_model = args.pretrain_LM              #pretrained model select
 23 |             self.tokenizer = AutoTokenizer.from_pretrained(
 24 |                 pretrained_model,
 25 |                 padding_side='left',
 26 |                 trust_remote_code=True
 27 |             )
 28 |             self.model = AutoModelForCausalLM.from_pretrained(
 29 |                 pretrained_model,
 30 |                 trust_remote_code=True,
 31 |                 torch_dtype=torch.bfloat16
 32 |             ).half()
 33 |             # self.pad_token_id = self.tokenizer.convert_tokens_to_ids('<|extra_0|>')
 34 |             # self.tokenizer.pad_token_id = self.pad_token_id
 35 |             self.tokenizer.pad_token_id = 0
 36 |             self.eos_token_id = self.tokenizer.convert_tokens_to_ids('<|endoftext|>')
 37 | 
 38 |             self.device = args.device
 39 |             self.language = args.language
 40 |             self.max_new_tokens = args.max_new_tokens
 41 |             self.datasetName = args.datasetName
 42 |             self.train_mode = args.train_mode
 43 |             self.task_specific_prompt = args.task_specific_prompt
 44 |             # freeze parameter
 45 |             for param in self.model.parameters():
 46 |                 param.requires_grad = False
 47 |         else:
 48 |             print('please use PLM')
 49 | 
 50 |     def text_embedding(self,text_ids):
 51 |         embeddings = self.model.base_model.get_input_embeddings()
 52 |         return embeddings(text_ids)
 53 | 
 54 | 
 55 |     def forward(self, fusion_embedding, labels):
 56 |         """
 57 |         Args:
 58 |             fusion_embedding: the "concatenate" result of  multimodal low rank fusion  and text embedding
 59 |             label: ground_truth
 60 |         """
 61 | 
 62 |         fusion_embedding = self.multimodal_prompt_wrap(fusion_embedding)  #添加多模态输入的special prompt
 63 |         opt_tokens, atts_bos, atts_fusion, labels, labels_atts = self.input_processing(fusion_embedding, labels, mode = 'train')          #创建fusion+prompt+answer_mask的input和label
 64 | 
 65 |         attention_mask = torch.cat([atts_bos, atts_fusion, labels_atts], dim=1)
 66 | 
 67 | 
 68 |         with torch.cuda.amp.autocast():
 69 |             output = self.model(inputs_embeds = opt_tokens, return_dict=True, labels = labels)  # Models outputs are now tuples
 70 | 
 71 |         return output
 72 | 
 73 |     def generate(self, fusion_embedding):
 74 |         """
 75 |         Args:
 76 |             samples (dict): A dictionary containing the following keys:
 77 |             use_nucleus_sampling (bool): Whether to use nucleus sampling. If False, use top-k sampling.
 78 |             num_beams (int): Number of beams for beam search. 1 means no beam search.
 79 |             max_new_tokens (int): The maximum length of the new tokens to be generated.
 80 |             top_p (float): The cumulative probability for nucleus sampling.
 81 |             top_k (int): The k for top-k sampling.
 82 |             penalty_alpha (float): The parameter for repetition penalty. 1.0 means no penalty.
 83 |             num_captions (int): Number of captions to be generated for each image.
 84 |         """
 85 | 
 86 | 
 87 |         fusion_embedding = self.multimodal_prompt_wrap(fusion_embedding)  # 添加多模态输入的special prompt
 88 |         opt_tokens, _, _, _, _= self.input_processing(fusion_embedding, mode = 'generate')  # 创建fusion+prompt的input
 89 |         # attention_mask = torch.cat([atts_bos, atts_fusion], dim=1)
 90 |         context_length = opt_tokens.size(1)
 91 |         all_responses =[]
 92 | 
 93 |         outputs = self.model.generate(inputs_embeds = opt_tokens,
 94 |                                       num_beams=1,
 95 |                                       do_sample = False,
 96 |                                       top_p = None,
 97 |                                       max_new_tokens  = self.max_new_tokens)
 98 |         responses = self.tokenizer.batch_decode(outputs[:,1:], add_special_tokens=False, skip_special_tokens=True, clean_up_tokenization_spaces=False)
 99 | 
100 |         # print(responses)
101 |         for response in responses:
102 |         # 处理生成结果，将一些不必要的字符转换为0
103 |             if self.train_mode == 'regression':
104 |                 try:
105 |                     value = float(
106 |                         response.replace('–', '-').replace('一', '-').replace('：', '').replace('/', '').replace('(', '').replace(
107 |                             ':', ''))
108 |                     # value = float(re.sub(r'[^0-9.-]', '0', re.sub(r'(?<!^)-', '0', x.replace('–', '-').replace('一', '-').replace('：', ''))))
109 |                 except ValueError:
110 |                     value = 0.0
111 |             else:
112 |                 try:
113 |                     value = float(response)
114 |                 except ValueError:
115 |                     value = 0.0
116 |             all_responses.append(value)
117 |         return all_responses
118 | 
119 | 
120 |     def input_processing(self, fusion_embedding,  labels = None, mode = None):
121 |         """
122 |         Args:
123 |             fusion_embedding: the "concatenate" result of  multimodal low rank fusion  and text embedding
124 |             fusion_empty: Create an empty matrix of the same size as fusion's batch, seq, so that it can be filled in during input
125 |             prompt: tokenizer prompt for different language cases
126 |         """
127 | 
128 |         batch_size = fusion_embedding.shape[0]
129 | 
130 |         task_prompt = self.get_task_prompt()
131 |         task_prompt_embedding =  self.text_embedding(task_prompt.expand(batch_size, -1))
132 | 
133 | 
134 |         opt_tokens = torch.cat([fusion_embedding, task_prompt_embedding], dim=1)    #构建fusion+prompt的tokens
135 |         atts_fusion = torch.ones(opt_tokens.size()[:-1], dtype=torch.long).to(self.device)    #构建opt_tokens 的attention mask
136 | 
137 |         bos = torch.ones([batch_size, 1], dtype=atts_fusion.dtype, device=self.device) * self.tokenizer.bos_token_id
138 |         bos_embeds = self.text_embedding(bos)
139 |         atts_bos = atts_fusion[:, :1]
140 | 
141 |         opt_tokens =  torch.cat([bos_embeds, opt_tokens], dim=1)
142 | 
143 |         opt_tokens, labels, labels_atts = self.input_labels_construct(opt_tokens, labels, mode)
144 | 
145 |         return opt_tokens, atts_bos, atts_fusion, labels, labels_atts
146 | 
147 |     def input_labels_construct(self, opt_tokens, labels = None, mode = None):
148 |         """
149 |         Args:
150 |             opt_tokens: the "concatenate" size of  multimodal fusion, text embedding and prompt
151 |             label: ground_truth
152 |             labels_id: tokenizer labels
153 |         """
154 |         batch_size = opt_tokens.shape[0]
155 | 
156 |         if mode == "train":
157 |             if self.train_mode == "regression":
158 |                 label_template = [f"{label.item():.{1}f}" for label in labels]
159 |                 # label_template = [f"+{label.item():.{1}f}" if label > 0 else f"{label.item():.{1}f}" for label in
160 |                 #                   labels]
161 |                 # label_template = [
162 |                 #     f"+{label.item():.1f}" if label > 0 else f"{+label.item():.1f}" if label == 0 else f"{label.item():.1f}"
163 |                 #     for label in labels]
164 |             else:
165 |                 label_template = [f"{label.item()}" for label in labels]
166 | 
167 |             labels = self.tokenizer(label_template,  padding=True, return_tensors="pt", add_special_tokens=False).to(self.device)
168 |             labels_id = labels["input_ids"]
169 |             labels_atts = labels["attention_mask"]
170 | 
171 |             labels_embedding = self.text_embedding(labels_id)
172 |             labels_matrix = torch.empty(opt_tokens.size(0), opt_tokens.size(1)).fill_(-100).long().to(self.device)  # bz * seq_len 只构建和token_ids一个维度的矩阵
173 |             opt_tokens = torch.cat([opt_tokens, labels_embedding], dim=1)  # 将输入与labels拼接
174 |             labels = torch.cat([labels_matrix, labels_id], dim=1)
175 | 
176 | 
177 |         else:
178 |             labels_atts = None
179 | 
180 |         return opt_tokens, labels, labels_atts
181 | 
182 |     def get_task_prompt(self):
183 |         # get the task_specific_prompt
184 |         prompt_text = self.task_specific_prompt
185 |         prompt_ids = self.tokenizer(prompt_text, padding=True, return_tensors="pt", add_special_tokens=False)["input_ids"].to(self.device)
186 | 
187 |         return prompt_ids
188 | 
189 |     def multimodal_prompt_wrap(self,fusion_embeddings):
190 |         """
191 |         Args:
192 |             Wrap the input with a special token
193 |         """
194 |         if self.language == "en":
195 |             prompt = '<Multimodal><MultimodalHere></Multimodal>'
196 |             special_token = '<MultimodalHere>'
197 |         else:
198 |             prompt = '<多模态><MultimodalHere></多模态>'
199 |             special_token = '<MultimodalHere>'
200 | 
201 |         batch_size = fusion_embeddings.shape[0]
202 |         p_before, p_after = prompt.split(special_token)
203 |         p_before_tokens = self.tokenizer(
204 |             p_before, return_tensors="pt", add_special_tokens=True).to(self.device)
205 |         p_after_tokens = self.tokenizer(
206 |             p_after, return_tensors="pt", add_special_tokens=False).to(self.device)
207 |         p_before_embeds = self.text_embedding(p_before_tokens.input_ids.expand(batch_size, -1))
208 |         p_after_embeds = self.text_embedding(p_after_tokens.input_ids.expand(batch_size, -1))
209 |         wrapped_fusion_embeddings = torch.cat([p_before_embeds, fusion_embeddings, p_after_embeds], dim=1)
210 | 
211 | 
212 |         return wrapped_fusion_embeddings


--------------------------------------------------------------------------------
/MSE-Qwen-1.8B/models/subNets/Textmodel.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import collections
  4 | import re
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.nn.functional as F
  8 | 
  9 | from modelscope import AutoTokenizer, AutoModel, AutoModelForCausalLM
 10 | 
 11 | 
 12 | __all__ = ['Language_model']
 13 | 
 14 | class Language_model (nn.Module):
 15 |     def __init__(self, args, use_PLM = True):
 16 |         """
 17 |         language: en / cn
 18 |         """
 19 |         super(Language_model, self).__init__()
 20 | 
 21 |         if use_PLM:
 22 |             pretrained_model = args.pretrain_LM              #pretrained model select
 23 |             self.tokenizer = AutoTokenizer.from_pretrained(
 24 |                 pretrained_model,
 25 |                 padding_side='left',
 26 |                 trust_remote_code=True
 27 |             )
 28 |             self.model = AutoModelForCausalLM.from_pretrained(
 29 |                 pretrained_model,
 30 |                 trust_remote_code=True,
 31 |                 torch_dtype=torch.bfloat16
 32 |             ).half()
 33 |             # self.pad_token_id = self.tokenizer.convert_tokens_to_ids('<|extra_0|>')
 34 |             # self.tokenizer.pad_token_id = self.pad_token_id
 35 |             # self.tokenizer.pad_token_id = 0
 36 |             self.eos_token_id = self.tokenizer.convert_tokens_to_ids('<|endoftext|>')
 37 |             self.tokenizer.pad_token_id = self.eos_token_id
 38 | 
 39 |             self.bos_token_id = self.tokenizer.convert_tokens_to_ids('<|im_start|>')
 40 |             self.tokenizer.bos_token_id = self.bos_token_id
 41 | 
 42 |             self.device = args.device
 43 |             self.language = args.language
 44 |             self.max_new_tokens = args.max_new_tokens
 45 |             self.datasetName = args.datasetName
 46 |             self.train_mode = args.train_mode
 47 |             self.task_specific_prompt = args.task_specific_prompt
 48 |             # freeze parameter
 49 |             for param in self.model.parameters():
 50 |                 param.requires_grad = False
 51 |         else:
 52 |             print('please use PLM')
 53 | 
 54 |     def text_embedding(self,text_ids):
 55 |         embeddings = self.model.base_model.get_input_embeddings()
 56 |         return embeddings(text_ids)
 57 | 
 58 | 
 59 |     def forward(self, fusion_embedding, labels):
 60 |         """
 61 |         Args:
 62 |             fusion_embedding: the "concatenate" result of  multimodal low rank fusion  and text embedding
 63 |             label: ground_truth
 64 |         """
 65 | 
 66 |         fusion_embedding = self.multimodal_prompt_wrap(fusion_embedding)  #添加多模态输入的special prompt
 67 |         opt_tokens, atts_bos, atts_fusion, labels, labels_atts = self.input_processing(fusion_embedding, labels, mode = 'train')          #创建fusion+prompt+answer_mask的input和label
 68 | 
 69 |         attention_mask = torch.cat([atts_bos, atts_fusion, labels_atts], dim=1)
 70 | 
 71 | 
 72 |         with torch.cuda.amp.autocast():
 73 |             output = self.model(inputs_embeds = opt_tokens, return_dict=True, labels = labels)  # Models outputs are now tuples
 74 | 
 75 |         return output
 76 | 
 77 |     def generate(self, fusion_embedding):
 78 |         """
 79 |         Args:
 80 |             samples (dict): A dictionary containing the following keys:
 81 |             use_nucleus_sampling (bool): Whether to use nucleus sampling. If False, use top-k sampling.
 82 |             num_beams (int): Number of beams for beam search. 1 means no beam search.
 83 |             max_new_tokens (int): The maximum length of the new tokens to be generated.
 84 |             top_p (float): The cumulative probability for nucleus sampling.
 85 |             top_k (int): The k for top-k sampling.
 86 |             penalty_alpha (float): The parameter for repetition penalty. 1.0 means no penalty.
 87 |             num_captions (int): Number of captions to be generated for each image.
 88 |         """
 89 | 
 90 | 
 91 |         fusion_embedding = self.multimodal_prompt_wrap(fusion_embedding)  # 添加多模态输入的special prompt
 92 |         opt_tokens, atts_bos, atts_fusion, _, _= self.input_processing(fusion_embedding, mode = 'generate')  # 创建fusion+prompt的input
 93 |         attention_mask = torch.cat([atts_bos, atts_fusion], dim=1)
 94 |         context_length = opt_tokens.size(1)
 95 |         all_responses =[]
 96 | 
 97 |         outputs = self.model.generate(inputs_embeds = opt_tokens,
 98 |                                       num_beams=1,
 99 |                                       do_sample = False,
100 |                                       bos_token_id = self.tokenizer.bos_token_id,
101 |                                       max_new_tokens  = self.max_new_tokens)
102 |         responses = self.tokenizer.batch_decode(outputs[:,1:], add_special_tokens=False, skip_special_tokens=True, clean_up_tokenization_spaces=False)
103 | 
104 |         # print(responses)
105 |         for response in responses:
106 |         # 处理生成结果，将一些不必要的字符转换为0
107 |             if self.train_mode == 'regression':
108 |                 try:
109 |                     value = float(
110 |                         response.replace('–', '-').replace('一', '-').replace('：', '').replace('/', '').replace('(', '').replace(
111 |                             ':', ''))
112 |                     # value = float(re.sub(r'[^0-9.-]', '0', re.sub(r'(?<!^)-', '0', x.replace('–', '-').replace('一', '-').replace('：', ''))))
113 |                 except ValueError:
114 |                     value = 0.0
115 |             else:
116 |                 try:
117 |                     value = float(response)
118 |                 except ValueError:
119 |                     value = 0.0
120 |             all_responses.append(value)
121 |         return all_responses
122 | 
123 | 
124 |     def input_processing(self, fusion_embedding,  labels = None, mode = None):
125 |         """
126 |         Args:
127 |             fusion_embedding: the "concatenate" result of  multimodal low rank fusion  and text embedding
128 |             fusion_empty: Create an empty matrix of the same size as fusion's batch, seq, so that it can be filled in during input
129 |             prompt: tokenizer prompt for different language cases
130 |         """
131 | 
132 |         batch_size = fusion_embedding.shape[0]
133 | 
134 |         task_prompt = self.get_task_prompt()
135 |         task_prompt_embedding =  self.text_embedding(task_prompt.expand(batch_size, -1))
136 | 
137 | 
138 |         opt_tokens = torch.cat([fusion_embedding, task_prompt_embedding], dim=1)    #构建fusion+prompt的tokens
139 |         atts_fusion = torch.ones(opt_tokens.size()[:-1], dtype=torch.long).to(self.device)    #构建opt_tokens 的attention mask
140 | 
141 |         bos = torch.ones([batch_size, 1], dtype=atts_fusion.dtype, device=self.device) * self.tokenizer.bos_token_id
142 |         bos_embeds = self.text_embedding(bos)
143 |         atts_bos = atts_fusion[:, :1]
144 | 
145 |         opt_tokens =  torch.cat([bos_embeds, opt_tokens], dim=1)
146 | 
147 |         opt_tokens, labels, labels_atts = self.input_labels_construct(opt_tokens, labels, mode)
148 | 
149 |         return opt_tokens, atts_bos, atts_fusion, labels, labels_atts
150 | 
151 |     def input_labels_construct(self, opt_tokens, labels = None, mode = None):
152 |         """
153 |         Args:
154 |             opt_tokens: the "concatenate" size of  multimodal fusion, text embedding and prompt
155 |             label: ground_truth
156 |             labels_id: tokenizer labels
157 |         """
158 |         batch_size = opt_tokens.shape[0]
159 | 
160 |         if mode == "train":
161 |             if self.train_mode == "regression":
162 |                 # label_template = [f"{label.item():.{1}f}" for label in labels]
163 |                 label_template = [f"+{label.item():.{1}f}" if label >= 0 else f"{label.item():.{1}f}" for label in
164 |                                   labels]
165 |                 # label_template = [
166 |                 #     f"+{label.item():.1f}" if label > 0 else f"{+label.item():.1f}" if label == 0 else f"{label.item():.1f}"
167 |                 #     for label in labels]
168 |             else:
169 |                 label_template = [f"{label.item()}" for label in labels]
170 | 
171 |             labels = self.tokenizer(label_template,  padding=True, return_tensors="pt", add_special_tokens=False).to(self.device)
172 |             labels_id = labels["input_ids"]
173 |             labels_atts = labels["attention_mask"]
174 | 
175 |             # a = [' ','0.20','-0.2','5','2','0','1','3','4','5','6','7','8','9']
176 |             # c = [31106]
177 |             # b = self.tokenizer(a, padding=True, return_tensors="pt", add_special_tokens=False)
178 |             # d = self.tokenizer.decode(c)
179 |             labels_embedding = self.text_embedding(labels_id)
180 |             labels_matrix = torch.empty(opt_tokens.size(0), opt_tokens.size(1)).fill_(-100).long().to(self.device)  # bz * seq_len 只构建和token_ids一个维度的矩阵
181 |             opt_tokens = torch.cat([opt_tokens, labels_embedding], dim=1)  # 将输入与labels拼接
182 |             labels = torch.cat([labels_matrix, labels_id], dim=1)
183 | 
184 | 
185 |         else:
186 |             labels_atts = None
187 | 
188 |         return opt_tokens, labels, labels_atts
189 | 
190 |     def get_task_prompt(self):
191 |         # get the task_specific_prompt
192 |         prompt_text = self.task_specific_prompt
193 |         prompt_ids = self.tokenizer(prompt_text, padding=True, return_tensors="pt", add_special_tokens=False)["input_ids"].to(self.device)
194 | 
195 |         return prompt_ids
196 | 
197 |     def multimodal_prompt_wrap(self,fusion_embeddings):
198 |         """
199 |         Args:
200 |             Wrap the input with a special token
201 |         """
202 |         if self.language == "en":
203 |             prompt = '<Multimodal><MultimodalHere></Multimodal>'
204 |             special_token = '<MultimodalHere>'
205 |         else:
206 |             prompt = '<多模态><MultimodalHere></多模态>'
207 |             special_token = '<MultimodalHere>'
208 | 
209 |         batch_size = fusion_embeddings.shape[0]
210 |         p_before, p_after = prompt.split(special_token)
211 |         p_before_tokens = self.tokenizer(
212 |             p_before, return_tensors="pt", add_special_tokens=True).to(self.device)
213 |         p_after_tokens = self.tokenizer(
214 |             p_after, return_tensors="pt", add_special_tokens=False).to(self.device)
215 |         p_before_embeds = self.text_embedding(p_before_tokens.input_ids.expand(batch_size, -1))
216 |         p_after_embeds = self.text_embedding(p_after_tokens.input_ids.expand(batch_size, -1))
217 |         wrapped_fusion_embeddings = torch.cat([p_before_embeds, fusion_embeddings, p_after_embeds], dim=1)
218 | 
219 | 
220 |         return wrapped_fusion_embeddings


--------------------------------------------------------------------------------
/MSE-ChatGLM3-6B/data/load_data.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import logging
  3 | import pickle
  4 | import json
  5 | import numpy as np
  6 | import pandas as pd
  7 | import torch
  8 | import gzip
  9 | import torch.nn.functional as F
 10 | from torch.utils.data import Dataset, DataLoader
 11 | from modelscope import AutoTokenizer, AutoModel
 12 | from operator import itemgetter
 13 | from torch.nn.utils.rnn import pad_sequence
 14 | 
 15 | __all__ = ['MMDataLoader']
 16 | 
 17 | logger = logging.getLogger('MSA')
 18 | 
 19 | class MMDataset(Dataset):
 20 |     def __init__(self, args, mode='train'):
 21 |         self.mode = mode
 22 |         self.args = args
 23 |         DATA_MAP = {
 24 |             'mosi': self.__init_mosi,
 25 |             'mosei': self.__init_mosei,
 26 |             'sims': self.__init_sims,
 27 |             'simsv2': self.__init_simsv2,
 28 |             'meld': self.__init_meld,
 29 |             'iemocap': self.__init_iemocap,
 30 |             'cherma': self.__init_cherma,
 31 | 
 32 |         }
 33 |         DATA_MAP[args.datasetName]()
 34 | 
 35 | 
 36 | 
 37 |     def __init_meld(self):
 38 |         data_path = os.path.join(self.args.dataPath, self.args.datasetName + '_' + self.mode + '.pkl')
 39 |         label_index_mapping = self.args.label_index_mapping
 40 |         with open(data_path, 'rb') as f:
 41 |             data = pickle.load(f)
 42 |             self.vision = np.array(list(map(lambda item: item['features']['video'], data))).astype(np.float32)
 43 |             self.audio = np.array(list(map(lambda item: item['features']['audio'], data))).astype(np.float32)
 44 |             self.rawText = np.array(list(map(lambda item: item['features']['text'], data)))
 45 | 
 46 |             # self.labels = {
 47 |             #     'M': list(map(lambda item: item['label'], data))
 48 |             # }
 49 |             self.labels = {
 50 |                 'M': list(map(lambda item: label_index_mapping.get(item['label'],-1), data))
 51 |             }
 52 |             if self.args.use_PLM:
 53 |                 self.text = self.PLM_tokenizer(self.rawText)
 54 | 
 55 |         # label_mapping
 56 | 
 57 |         # self.labels['M']  = [label_index_mapping.get(label, -1) for label in self.labels['M']]
 58 | 
 59 |         if not self.args.need_data_aligned:
 60 |             self.audio_lengths = np.array(list(map(lambda item: item['features']['audio_len'], data)))
 61 |             self.vision_lengths = np.array(list(map(lambda item: item['features']['video_len'], data)))
 62 | 
 63 |     def __init_iemocap(self):
 64 |         return self.__init_meld()
 65 | 
 66 |     def __init_cherma(self):
 67 |         return self.__init_meld()
 68 | 
 69 |     def __init_mosi(self):
 70 |         with open(self.args.dataPath, 'rb') as f:
 71 |             data = pickle.load(f)
 72 |             if self.args.use_PLM:
 73 |                 self.text = data[self.mode]['raw_text']
 74 |                 self.text = self.PLM_tokenizer(self.text)
 75 | 
 76 |         self.vision = data[self.mode]['vision'].astype(np.float32)
 77 |         self.audio = data[self.mode]['audio'].astype(np.float32)
 78 |         self.rawText = data[self.mode]['raw_text']
 79 |         self.ids = data[self.mode]['id']
 80 | 
 81 |         self.labels = {
 82 |             'M': data[self.mode][self.args.train_mode+'_labels'].astype(np.float32)
 83 |         }
 84 | 
 85 |         if self.args.need_label_prefix:
 86 |             labels = self.labels['M']
 87 |             label_prefix = []
 88 |             for i in range(len(labels)):
 89 |                 if labels[i] < 0:
 90 |                     label_prefix.append(f'negative,{labels[i].item():.{1}f}')
 91 |                 elif labels[i] > 0:
 92 |                     label_prefix.append(f'positive,{labels[i].item():.{1}f}')
 93 |                 else:
 94 |                     label_prefix.append(f'neutral,{labels[i].item():.{1}f}')
 95 |             self.labels_prefix = label_prefix
 96 | 
 97 |         if self.args.datasetName == 'sims':
 98 |             for m in "TAV":
 99 |                 self.labels[m] = data[self.mode][self.args.train_mode+'_labels_'+m]
100 | 
101 |         logger.info(f"{self.mode} samples: {self.labels['M'].shape}")
102 | 
103 |         if not self.args.need_data_aligned:
104 |             self.audio_lengths = data[self.mode]['audio_lengths']
105 |             self.vision_lengths = data[self.mode]['vision_lengths']
106 |             self.text_lengths = self.args.seq_lens[0]
107 |         self.audio[self.audio == -np.inf] = 0
108 |         self.vision[self.vision != self.vision] = 0
109 | 
110 |         if  self.args.need_normalized:
111 |             self.__normalize()
112 |     
113 |     def __init_mosei(self):
114 |         return self.__init_mosi()
115 | 
116 |     def __init_sims(self):
117 |         return self.__init_mosi()
118 | 
119 |     def __init_simsv2(self):
120 |         return self.__init_mosi()
121 | 
122 |     def __truncated(self):
123 |         # NOTE: Here for dataset we manually cut the input into specific length.
124 |         def Truncated(modal_features, length):
125 |             if length == modal_features.shape[1]:
126 |                 return modal_features
127 |             truncated_feature = []
128 |             padding = np.array([0 for i in range(modal_features.shape[2])])
129 |             for instance in modal_features:
130 |                 for index in range(modal_features.shape[1]):
131 |                     if((instance[index] == padding).all()):
132 |                         if(index + length >= modal_features.shape[1]):
133 |                             truncated_feature.append(instance[index:index+20])
134 |                             break
135 |                     else:                        
136 |                         truncated_feature.append(instance[index:index+20])
137 |                         break
138 |             truncated_feature = np.array(truncated_feature)
139 |             return truncated_feature
140 |                        
141 |         text_length, audio_length, video_length = self.args.seq_lens
142 |         self.vision = Truncated(self.vision, video_length)
143 |         self.text = Truncated(self.text, text_length)
144 |         self.audio = Truncated(self.audio, audio_length)
145 | 
146 |     def __normalize(self):
147 |         # (num_examples,max_len,feature_dim) -> (max_len, num_examples, feature_dim)
148 |         self.vision = np.transpose(self.vision, (1, 0, 2))
149 |         self.audio = np.transpose(self.audio, (1, 0, 2))
150 |         # for visual and audio modality, we average across time
151 |         # here the original data has shape (max_len, num_examples, feature_dim)
152 |         # after averaging they become (1, num_examples, feature_dim)
153 |         self.vision = np.mean(self.vision, axis=0, keepdims=True)
154 |         self.audio = np.mean(self.audio, axis=0, keepdims=True)
155 | 
156 |         # remove possible NaN values
157 |         self.vision[self.vision != self.vision] = 0
158 |         self.audio[self.audio != self.audio] = 0
159 | 
160 |         self.vision = np.transpose(self.vision, (1, 0, 2))
161 |         self.audio = np.transpose(self.audio, (1, 0, 2))
162 | 
163 |     def __len__(self):
164 |         return len(self.labels['M'])
165 | 
166 |         # 这里text.shape是三维矩阵[sample_num,tokenizer_output,length]
167 |         # tokenizer_output的3个维度分别是token_ids,mask(识别句子中padding的位置),segment_ids
168 |     def get_seq_len(self):
169 |         return (self.text.shape[2], self.audio.shape[1], self.vision.shape[1])
170 | 
171 |     def get_feature_dim(self):
172 |         return self.text.shape[2], self.audio.shape[2], self.vision.shape[2]
173 | 
174 |     def PLM_tokenizer (self, rawtexts):
175 |         self.tokenizer = AutoTokenizer.from_pretrained(self.args.pretrain_LM, trust_remote_code=True)
176 |         token_list = []
177 |         for text in rawtexts:
178 |             text_tokenizer = self.tokenizer(text,
179 |                                  padding='max_length',  # 如果样本长度不满足最大长度则填充
180 |                                  truncation=True,  # 截断至最大长度
181 |                                  max_length=self.args.seq_lens[0],
182 |                                  return_tensors = 'pt',
183 |                                  add_special_tokens=False
184 |                                 )
185 | 
186 |             token_ids = text_tokenizer['input_ids'].squeeze(0)  # tensor of token ids  torch.Size([max_len])
187 |             attn_masks = text_tokenizer['attention_mask'].squeeze(0)  # binary tensor with "0" for padded values and "1" for the other values  torch.Size([max_len])
188 |             token_type_ids = [0] * len(token_ids)               #不区分上下句
189 | 
190 |             #调整维度
191 |             input_ids = np.expand_dims(token_ids, 1)
192 |             input_mask = np.expand_dims(attn_masks, 1)
193 |             segment_ids = np.expand_dims(token_type_ids, 1)
194 | 
195 |             text_pretrain = np.concatenate([input_ids, input_mask, segment_ids], axis=1).T
196 |             token_list.append(text_pretrain)
197 | 
198 |         # x_dimensions = [array.shape[1] for array in token_list]
199 |         # # 计算 x 维度的平均值
200 |         # average_x = np.mean(x_dimensions)
201 |         # median_x = np.median(x_dimensions)
202 |         token_list = np.array(token_list)
203 |         return token_list
204 | 
205 | 
206 |     def __getitem__(self, index):
207 |         if self.args.train_mode == 'regression':
208 |             sample = {
209 |                 'raw_text': self.rawText[index],
210 |                 'text': torch.Tensor(self.text[index]),
211 |                 'audio': torch.Tensor(self.audio[index]),
212 |                 'vision': torch.Tensor(self.vision[index]),
213 |                 'index': index,
214 |                 'id': self.ids[index],
215 |                 'labels': {k: torch.Tensor(v[index].reshape(-1)) for k, v in self.labels.items()},
216 |                 'labels_prefix': self.labels_prefix[index]
217 |             }
218 |         else:
219 |             sample = {
220 |                 'raw_text': self.rawText[index],
221 |                 'text': torch.Tensor(self.text[index]),
222 |                 'audio': torch.Tensor(self.audio[index]),
223 |                 'vision': torch.Tensor(self.vision[index]),
224 |                 'index': index,
225 |                 'labels': {k: v[index] for k, v in self.labels.items()}
226 |                 # 'labels': {torch.Tensor(self.labels)},
227 |             }
228 | 
229 |         if not self.args.need_data_aligned:
230 |             sample['audio_lengths'] = self.audio_lengths[index]
231 |             sample['vision_lengths'] = self.vision_lengths[index]
232 |             sample['text_lengths'] = self.args.seq_lens[0]
233 | 
234 |         return sample
235 | 
236 | 
237 | 
238 | def MMDataLoader(args):
239 | 
240 |     datasets = {
241 |         'train': MMDataset(args, mode='train'),
242 |         'valid': MMDataset(args, mode='valid'),
243 |         'test': MMDataset(args, mode='test')
244 |     }
245 | 
246 |     if 'seq_lens' in args:
247 |         args.seq_lens = datasets['train'].get_seq_len() 
248 | 
249 |     dataLoader = {
250 |         ds: DataLoader(datasets[ds],
251 |                        batch_size=args.batch_size,
252 |                        num_workers=args.num_workers,
253 |                        shuffle=True)
254 |         for ds in datasets.keys()
255 |     }
256 |     
257 |     return dataLoader


--------------------------------------------------------------------------------