├── .gitignore
├── CVPR_Workshop_ABAW_2024.pdf
├── CVPR_Workshop_ABAW_2024
├── README.md
├── arxiv_main.tex
├── cvpr.sty
├── ieeenat_fullname.bst
├── main.bib
├── main.tex
├── pictures
│ ├── Shuttle ganz.jpg
│ ├── affectnet
│ │ ├── Bild_Russel_AffectNet.pdf
│ │ ├── affectnet_cdf.pdf
│ │ ├── arousal_distribution.pdf
│ │ ├── av_for_each_category.pdf
│ │ ├── example_image.pdf
│ │ ├── frequency_of_expression.pdf
│ │ ├── inference_best_va_affectnet8.pdf
│ │ ├── scatterplot.pdf
│ │ └── valence_distribution.pdf
│ ├── affectnet8onemotic.png
│ ├── confusion_7VA.png
│ ├── confusion_8VA.png
│ ├── emotic
│ │ ├── emotic_cdf.pdf
│ │ ├── example_image.pdf
│ │ ├── frequency_of_expression.pdf
│ │ ├── frequency_of_expressions.pdf
│ │ └── inference_affectnet8_on_emotic.pdf
│ ├── emoticonaffectnet8.png
│ ├── inference_affectnet8_on_emotic.pdf
│ ├── inference_cross_validation.pdf
│ └── inference_emotic_on_affectnet8.pdf
├── preamble.tex
├── sec
│ ├── 00_Abstract.tex
│ ├── 01_Intro.tex
│ ├── 02_Related_Work.tex
│ ├── 03_Datasets.tex
│ ├── 04_Model.tex
│ ├── 06_Conclusion.tex
│ └── X_suppl.tex
└── todos.txt
├── Honnold_inference.gif
├── LICENSE
├── README.md
├── affectnet_annotations
├── train_set_annotation_without_lnd.csv
└── val_set_annotation_without_lnd.csv
├── inference_on_webcam.py
├── mat2py.py
├── models
├── AffectNet7_Efficientnet_Combined
│ ├── generate_csv.py
│ └── train.py
├── AffectNet7_Efficientnet_Discrete
│ ├── generate_csv.py
│ └── train.py
├── AffectNet7_Efficientnet_VA
│ ├── generate_csv.py
│ └── train.py
├── AffectNet7_Maxvit_Combined
│ ├── generate_csv.py
│ └── train.py
├── AffectNet7_Maxvit_Discrete
│ ├── generate_csv.py
│ └── train.py
├── AffectNet7_Maxvit_VA
│ ├── generate_csv.py
│ └── train.py
├── AffectNet7_Swin_Combined
│ ├── generate_csv.py
│ └── train.py
├── AffectNet7_Swin_Discrete
│ ├── generate_csv.py
│ └── train.py
├── AffectNet7_Swin_VA
│ ├── generate_csv.py
│ └── train.py
├── AffectNet8_Efficientnet_Combined
│ ├── generate_csv.py
│ └── train.py
├── AffectNet8_Efficientnet_Discrete
│ ├── generate_csv.py
│ └── train.py
├── AffectNet8_Efficientnet_VA
│ ├── generate_csv.py
│ └── train.py
├── AffectNet8_Maxvit_Combined
│ ├── generate_csv.py
│ └── train.py
├── AffectNet8_Maxvit_Discrete
│ ├── generate_csv.py
│ └── train.py
├── AffectNet8_Maxvit_VA
│ ├── generate_csv.py
│ └── train.py
├── AffectNet8_Swin_Combined
│ ├── generate_csv.py
│ └── train.py
├── AffectNet8_Swin_Discrete
│ ├── generate_csv.py
│ └── train.py
├── AffectNet8_Swin_VA
│ ├── generate_csv.py
│ └── train.py
└── evaluation.py
└── requirements.txt
/.gitignore:
--------------------------------------------------------------------------------
1 | /.DS_Store
2 | /.ipynb_checkpoints
3 | /output
4 | /__pycache__
5 | /data.csv
6 | myvenv
7 | .venv
--------------------------------------------------------------------------------
/CVPR_Workshop_ABAW_2024.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wagner-niklas/CAGE_expression_inference/02732acea92326aacf3b303a833a161b97d4a3cd/CVPR_Workshop_ABAW_2024.pdf
--------------------------------------------------------------------------------
/CVPR_Workshop_ABAW_2024/README.md:
--------------------------------------------------------------------------------
1 | # CVPR/ICCV/3DV Official LaTeX template
2 |
3 | History (in reverse chronological order)
4 |
5 | - References in `cvprblue` for CVPR 2024 by [Klaus Greff](https://github.com/Qwlouse)
6 | - added natbib for CVPR 2024 by [Christian Richardt](https://richardt.name/)
7 | - replaced buggy (review-mode) line numbering for 3DV 2024 by [Adin Ramirez Rivera](https://openreview.net/profile?id=~Ad%C3%ADn_Ram%C3%ADrez_Rivera1)
8 | - logic for inline supplementary for 3DV 2024 by [Andrea Tagliasacchi](https://taiya.github.io)
9 | - modernized for CVPR 2022 by [Stefan Roth](stefan.roth@NOSPAMtu-darmstadt.de)
10 | - created cvpr.sty file to unify review/rebuttal/final versions by [Ming-Ming Cheng](https://github.com/MCG-NKU/CVPR_Template)
11 | - developed CVPR 2005 template by [Paolo Ienne](Paolo.Ienne@di.epfl.ch) and [Andrew Fitzgibbon](awf@acm.org)
12 |
--------------------------------------------------------------------------------
/CVPR_Workshop_ABAW_2024/arxiv_main.tex:
--------------------------------------------------------------------------------
1 | % CVPR 2024 Paper Template; see https://github.com/cvpr-org/author-kit
2 | \newcommand{\val}{\textit{valence}}
3 | \newcommand{\aro}{\textit{arousal}}
4 | \newcommand{\dom}{\textit{dominance}}
5 | \newcommand{\Val}{\textit{Valence}}
6 | \newcommand{\Aro}{\textit{Arousal}}
7 | \newcommand{\Dom}{\textit{Dominance}}
8 | \newcommand{\va}{\val{} and \aro{}}
9 | \newcommand{\VA}{\Val{} and \Aro{}}
10 | \newcommand{\affectnet}{AffectNet}
11 | \newcommand{\emotic}{EMOTIC}
12 |
13 | \newcommand\copyrighttext{%
14 | \footnotesize \textcopyright 2024 IEEE. Personal use of this material is permitted. Permission from IEEE must be obtained for all other uses, in any current or future media, including reprinting/republishing this material for advertising or promotional purposes, creating new collective works, for resale or redistribution to servers or lists, or reuse of any copyrighted component of this work in other works.}
15 | \newcommand\copyrightnotice{%
16 | \begin{tikzpicture}[remember picture,overlay]
17 | \node[anchor=south,yshift=10pt] at (current page.south) {\fbox{\parbox{\dimexpr\textwidth-\fboxsep-\fboxrule\relax}{\copyrighttext}}};
18 | \end{tikzpicture}%
19 | }
20 | \documentclass[10pt,twocolumn,letterpaper]{article}
21 |
22 | %%%%%%%%% PAPER TYPE - PLEASE UPDATE FOR FINAL VERSION
23 | %\usepackage{cvpr} % To produce the CAMERA-READY version
24 | %\usepackage[review]{cvpr} % To produce the REVIEW version
25 | \usepackage[pagenumbers]{cvpr} % To force page numbers, e.g. for an arXiv version
26 |
27 | % NW: For long table
28 | \usepackage{tabularx,booktabs}
29 | \usepackage[accsupp]{axessibility} % Improves PDF readability for those with disabilities.
30 |
31 | % Import additional packages in the preamble file, before hyperref
32 | \input{preamble}
33 |
34 | % It is strongly recommended to use hyperref, especially for the review version.
35 | % hyperref with option pagebackref eases the reviewers' job.
36 | % Please disable hyperref *only* if you encounter grave issues,
37 | % e.g. with the file validation for the camera-ready version.
38 | %
39 | % If you comment hyperref and then uncomment it, you should delete *.aux before re-running LaTeX.
40 | % (Or just hit 'q' on the first LaTeX run, let it finish, and you should be clear).
41 | \definecolor{cvprblue}{rgb}{0.21,0.49,0.74}
42 | \usepackage[pagebackref,breaklinks,colorlinks,citecolor=cvprblue]{hyperref}
43 | \usepackage{tikz}
44 | %%%%%%%%% PAPER ID - PLEASE UPDATE
45 | \def\paperID{33} % *** Enter the Paper ID here
46 | \def\confName{CVPR}
47 | \def\confYear{2024}
48 |
49 | %%%%%%%%% TITLE - PLEASE UPDATE
50 | \title{CAGE: Circumplex Affect Guided Expression Inference}
51 |
52 | %%%%%%%%% AUTHORS - PLEASE UPDATE
53 | % \author{Niklas Wagner\\
54 | % Karlsruhe Institute of Technology\\
55 | % {\tt\small uvssk@student.kit.edu}
56 | % % For a paper whose authors are all at the same institution,
57 | % % omit the following lines up until the closing ``}''.
58 | % % Additional authors and addresses can be added with ``\and'',
59 | % % just like the second author.
60 | % % To save space, use either the email address or home page, not both
61 | % \and
62 | % Felix Mätzler\\
63 | % Karlsruhe Institute of Technology\\
64 | % {\tt\small uvian@student.kit.edu}
65 | % \and
66 | % Samed R. Vossberg\\
67 | % Karlsruhe Institute of Technology\\
68 | % {\tt\small urgfl@student.kit.edu}
69 | % }
70 |
71 | \author{Niklas Wagner$^{1}$$^,$$^*$, Felix Mätzler$^{1}$$^,$$^*$, Samed R. Vossberg$^{1}$$^,$$^*$, Helen Schneider$^{1}$$^*$, Svetlana Pavlitska$^{2}$, \\J. Marius Zöllner$^{1,2}$\\
72 | \textit{$^{1}$ Karlsruhe Institute of Technology (KIT), Germany}\\
73 | \textit{$^{2}$ FZI Research Center for Information Technology, Germany} \\
74 | {\tt\small helen.schneider@kit.edu}\\
75 | }
76 | \begin{document}
77 | \maketitle
78 | \def\thefootnote{*}\footnotetext{These authors contributed equally to this work}
79 | \copyrightnotice
80 | \thispagestyle{empty}
81 | \pagestyle{empty}
82 | \input{sec/00_Abstract}
83 | \input{sec/01_Intro}
84 | % \newpage
85 | % \clearpage
86 | \input{sec/02_Related_Work}
87 | % \newpage
88 | % \clearpage
89 | \input{sec/03_Datasets}
90 | % \newpage
91 | % \clearpage
92 | \input{sec/04_Model}
93 |
94 | \input{sec/06_Conclusion}
95 | \newpage
96 | \clearpage
97 | {
98 | \small
99 | \bibliographystyle{ieeenat_fullname}
100 | \bibliography{main}
101 | }
102 |
103 | % WARNING: do not forget to delete the supplementary pages from your submission
104 | % \input{sec/X_suppl}
105 |
106 | \end{document}
107 |
--------------------------------------------------------------------------------
/CVPR_Workshop_ABAW_2024/main.tex:
--------------------------------------------------------------------------------
1 | % CVPR 2024 Paper Template; see https://github.com/cvpr-org/author-kit
2 | \newcommand{\val}{\textit{valence}}
3 | \newcommand{\aro}{\textit{arousal}}
4 | \newcommand{\dom}{\textit{dominance}}
5 | \newcommand{\Val}{\textit{Valence}}
6 | \newcommand{\Aro}{\textit{Arousal}}
7 | \newcommand{\Dom}{\textit{Dominance}}
8 | \newcommand{\va}{\val{} and \aro{}}
9 | \newcommand{\VA}{\Val{} and \Aro{}}
10 | \newcommand{\affectnet}{AffectNet}
11 | \newcommand{\emotic}{EMOTIC}
12 |
13 | \documentclass[10pt,twocolumn,letterpaper]{article}
14 |
15 | %%%%%%%%% PAPER TYPE - PLEASE UPDATE FOR FINAL VERSION
16 | \usepackage{cvpr} % To produce the CAMERA-READY version
17 | %\usepackage[review]{cvpr} % To produce the REVIEW version
18 | % \usepackage[pagenumbers]{cvpr} % To force page numbers, e.g. for an arXiv version
19 |
20 | % NW: For long table
21 | \usepackage{tabularx,booktabs}
22 | \usepackage[accsupp]{axessibility} % Improves PDF readability for those with disabilities.
23 |
24 | % Import additional packages in the preamble file, before hyperref
25 | \input{preamble}
26 |
27 | % It is strongly recommended to use hyperref, especially for the review version.
28 | % hyperref with option pagebackref eases the reviewers' job.
29 | % Please disable hyperref *only* if you encounter grave issues,
30 | % e.g. with the file validation for the camera-ready version.
31 | %
32 | % If you comment hyperref and then uncomment it, you should delete *.aux before re-running LaTeX.
33 | % (Or just hit 'q' on the first LaTeX run, let it finish, and you should be clear).
34 | \definecolor{cvprblue}{rgb}{0.21,0.49,0.74}
35 | \usepackage[pagebackref,breaklinks,colorlinks,citecolor=cvprblue]{hyperref}
36 |
37 | %%%%%%%%% PAPER ID - PLEASE UPDATE
38 | \def\paperID{33} % *** Enter the Paper ID here
39 | \def\confName{CVPR}
40 | \def\confYear{2024}
41 |
42 | %%%%%%%%% TITLE - PLEASE UPDATE
43 | \title{CAGE: Circumplex Affect Guided Expression Inference}
44 |
45 | %%%%%%%%% AUTHORS - PLEASE UPDATE
46 | % \author{Niklas Wagner\\
47 | % Karlsruhe Institute of Technology\\
48 | % {\tt\small uvssk@student.kit.edu}
49 | % % For a paper whose authors are all at the same institution,
50 | % % omit the following lines up until the closing ``}''.
51 | % % Additional authors and addresses can be added with ``\and'',
52 | % % just like the second author.
53 | % % To save space, use either the email address or home page, not both
54 | % \and
55 | % Felix Mätzler\\
56 | % Karlsruhe Institute of Technology\\
57 | % {\tt\small uvian@student.kit.edu}
58 | % \and
59 | % Samed R. Vossberg\\
60 | % Karlsruhe Institute of Technology\\
61 | % {\tt\small urgfl@student.kit.edu}
62 | % }
63 |
64 | \author{Niklas Wagner$^{1}$$^,$$^*$, Felix Mätzler$^{1}$$^,$$^*$, Samed R. Vossberg$^{1}$$^,$$^*$, Helen Schneider$^{1}$$^*$, Svetlana Pavlitska$^{2}$, \\J. Marius Zöllner$^{1,2}$\\
65 | \textit{$^{1}$ Karlsruhe Institute of Technology (KIT), Germany}\\
66 | \textit{$^{2}$ FZI Research Center for Information Technology, Germany} \\
67 | {\tt\small helen.schneider@kit.edu}\\
68 | }
69 | \begin{document}
70 | \maketitle
71 | \def\thefootnote{*}\footnotetext{These authors contributed equally to this work}
72 | \input{sec/00_Abstract}
73 | \input{sec/01_Intro}
74 | % \newpage
75 | % \clearpage
76 | \input{sec/02_Related_Work}
77 | % \newpage
78 | % \clearpage
79 | \input{sec/03_Datasets}
80 | % \newpage
81 | % \clearpage
82 | \input{sec/04_Model}
83 |
84 | \input{sec/06_Conclusion}
85 | \newpage
86 | \clearpage
87 | {
88 | \small
89 | \bibliographystyle{ieeenat_fullname}
90 | \bibliography{main}
91 | }
92 |
93 | % WARNING: do not forget to delete the supplementary pages from your submission
94 | % \input{sec/X_suppl}
95 |
96 | \end{document}
97 |
--------------------------------------------------------------------------------
/CVPR_Workshop_ABAW_2024/pictures/Shuttle ganz.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wagner-niklas/CAGE_expression_inference/02732acea92326aacf3b303a833a161b97d4a3cd/CVPR_Workshop_ABAW_2024/pictures/Shuttle ganz.jpg
--------------------------------------------------------------------------------
/CVPR_Workshop_ABAW_2024/pictures/affectnet/Bild_Russel_AffectNet.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wagner-niklas/CAGE_expression_inference/02732acea92326aacf3b303a833a161b97d4a3cd/CVPR_Workshop_ABAW_2024/pictures/affectnet/Bild_Russel_AffectNet.pdf
--------------------------------------------------------------------------------
/CVPR_Workshop_ABAW_2024/pictures/affectnet/affectnet_cdf.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wagner-niklas/CAGE_expression_inference/02732acea92326aacf3b303a833a161b97d4a3cd/CVPR_Workshop_ABAW_2024/pictures/affectnet/affectnet_cdf.pdf
--------------------------------------------------------------------------------
/CVPR_Workshop_ABAW_2024/pictures/affectnet/arousal_distribution.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wagner-niklas/CAGE_expression_inference/02732acea92326aacf3b303a833a161b97d4a3cd/CVPR_Workshop_ABAW_2024/pictures/affectnet/arousal_distribution.pdf
--------------------------------------------------------------------------------
/CVPR_Workshop_ABAW_2024/pictures/affectnet/av_for_each_category.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wagner-niklas/CAGE_expression_inference/02732acea92326aacf3b303a833a161b97d4a3cd/CVPR_Workshop_ABAW_2024/pictures/affectnet/av_for_each_category.pdf
--------------------------------------------------------------------------------
/CVPR_Workshop_ABAW_2024/pictures/affectnet/example_image.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wagner-niklas/CAGE_expression_inference/02732acea92326aacf3b303a833a161b97d4a3cd/CVPR_Workshop_ABAW_2024/pictures/affectnet/example_image.pdf
--------------------------------------------------------------------------------
/CVPR_Workshop_ABAW_2024/pictures/affectnet/frequency_of_expression.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wagner-niklas/CAGE_expression_inference/02732acea92326aacf3b303a833a161b97d4a3cd/CVPR_Workshop_ABAW_2024/pictures/affectnet/frequency_of_expression.pdf
--------------------------------------------------------------------------------
/CVPR_Workshop_ABAW_2024/pictures/affectnet/inference_best_va_affectnet8.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wagner-niklas/CAGE_expression_inference/02732acea92326aacf3b303a833a161b97d4a3cd/CVPR_Workshop_ABAW_2024/pictures/affectnet/inference_best_va_affectnet8.pdf
--------------------------------------------------------------------------------
/CVPR_Workshop_ABAW_2024/pictures/affectnet/scatterplot.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wagner-niklas/CAGE_expression_inference/02732acea92326aacf3b303a833a161b97d4a3cd/CVPR_Workshop_ABAW_2024/pictures/affectnet/scatterplot.pdf
--------------------------------------------------------------------------------
/CVPR_Workshop_ABAW_2024/pictures/affectnet/valence_distribution.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wagner-niklas/CAGE_expression_inference/02732acea92326aacf3b303a833a161b97d4a3cd/CVPR_Workshop_ABAW_2024/pictures/affectnet/valence_distribution.pdf
--------------------------------------------------------------------------------
/CVPR_Workshop_ABAW_2024/pictures/affectnet8onemotic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wagner-niklas/CAGE_expression_inference/02732acea92326aacf3b303a833a161b97d4a3cd/CVPR_Workshop_ABAW_2024/pictures/affectnet8onemotic.png
--------------------------------------------------------------------------------
/CVPR_Workshop_ABAW_2024/pictures/confusion_7VA.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wagner-niklas/CAGE_expression_inference/02732acea92326aacf3b303a833a161b97d4a3cd/CVPR_Workshop_ABAW_2024/pictures/confusion_7VA.png
--------------------------------------------------------------------------------
/CVPR_Workshop_ABAW_2024/pictures/confusion_8VA.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wagner-niklas/CAGE_expression_inference/02732acea92326aacf3b303a833a161b97d4a3cd/CVPR_Workshop_ABAW_2024/pictures/confusion_8VA.png
--------------------------------------------------------------------------------
/CVPR_Workshop_ABAW_2024/pictures/emotic/emotic_cdf.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wagner-niklas/CAGE_expression_inference/02732acea92326aacf3b303a833a161b97d4a3cd/CVPR_Workshop_ABAW_2024/pictures/emotic/emotic_cdf.pdf
--------------------------------------------------------------------------------
/CVPR_Workshop_ABAW_2024/pictures/emotic/example_image.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wagner-niklas/CAGE_expression_inference/02732acea92326aacf3b303a833a161b97d4a3cd/CVPR_Workshop_ABAW_2024/pictures/emotic/example_image.pdf
--------------------------------------------------------------------------------
/CVPR_Workshop_ABAW_2024/pictures/emotic/frequency_of_expression.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wagner-niklas/CAGE_expression_inference/02732acea92326aacf3b303a833a161b97d4a3cd/CVPR_Workshop_ABAW_2024/pictures/emotic/frequency_of_expression.pdf
--------------------------------------------------------------------------------
/CVPR_Workshop_ABAW_2024/pictures/emotic/frequency_of_expressions.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wagner-niklas/CAGE_expression_inference/02732acea92326aacf3b303a833a161b97d4a3cd/CVPR_Workshop_ABAW_2024/pictures/emotic/frequency_of_expressions.pdf
--------------------------------------------------------------------------------
/CVPR_Workshop_ABAW_2024/pictures/emotic/inference_affectnet8_on_emotic.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wagner-niklas/CAGE_expression_inference/02732acea92326aacf3b303a833a161b97d4a3cd/CVPR_Workshop_ABAW_2024/pictures/emotic/inference_affectnet8_on_emotic.pdf
--------------------------------------------------------------------------------
/CVPR_Workshop_ABAW_2024/pictures/emoticonaffectnet8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wagner-niklas/CAGE_expression_inference/02732acea92326aacf3b303a833a161b97d4a3cd/CVPR_Workshop_ABAW_2024/pictures/emoticonaffectnet8.png
--------------------------------------------------------------------------------
/CVPR_Workshop_ABAW_2024/pictures/inference_affectnet8_on_emotic.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wagner-niklas/CAGE_expression_inference/02732acea92326aacf3b303a833a161b97d4a3cd/CVPR_Workshop_ABAW_2024/pictures/inference_affectnet8_on_emotic.pdf
--------------------------------------------------------------------------------
/CVPR_Workshop_ABAW_2024/pictures/inference_cross_validation.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wagner-niklas/CAGE_expression_inference/02732acea92326aacf3b303a833a161b97d4a3cd/CVPR_Workshop_ABAW_2024/pictures/inference_cross_validation.pdf
--------------------------------------------------------------------------------
/CVPR_Workshop_ABAW_2024/pictures/inference_emotic_on_affectnet8.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wagner-niklas/CAGE_expression_inference/02732acea92326aacf3b303a833a161b97d4a3cd/CVPR_Workshop_ABAW_2024/pictures/inference_emotic_on_affectnet8.pdf
--------------------------------------------------------------------------------
/CVPR_Workshop_ABAW_2024/preamble.tex:
--------------------------------------------------------------------------------
1 | %
2 | % --- inline annotations
3 | %
4 | \usepackage[dvipsnames]{xcolor}
5 | \newcommand{\red}[1]{{\color{red}#1}}
6 | \newcommand{\todo}[1]{{\color{red}#1}}
7 | \newcommand{\TODO}[1]{\textbf{\color{red}[TODO: #1]}}
8 | % --- disable by uncommenting
9 | % \renewcommand{\TODO}[1]{}
10 | % \renewcommand{\todo}[1]{#1}
11 |
12 |
--------------------------------------------------------------------------------
/CVPR_Workshop_ABAW_2024/sec/00_Abstract.tex:
--------------------------------------------------------------------------------
1 | \begin{abstract}
2 | Understanding emotions and expressions is a task of interest across multiple disciplines, especially for improving user experiences. Contrary to the common perception, it has been shown that emotions are not discrete entities but instead exist along a continuum. People understand discrete emotions differently due to a variety of factors, including cultural background, individual experiences, and cognitive biases. Therefore, most approaches to expression understanding, particularly those relying on discrete categories, are inherently biased. In this paper, we present a comparative in-depth analysis of two common datasets (\affectnet{} and \emotic{}) equipped with the components of the circumplex model of affect. Further, we propose a model for the prediction of facial expressions tailored for lightweight applications. Using a small-scaled MaxViT-based model architecture, we evaluate the impact of discrete expression category labels % (\textit{Neutral, Happiness, Sadness, Surprise, Fear, Disgust, Anger, Contempt})
3 | in training with the continuous \va{} labels. We show that considering valence and arousal in addition to discrete category labels helps to significantly improve expression inference. The proposed model outperforms the current state-of-the-art models on \affectnet{}, establishing it as the best-performing model for inferring \va{} achieving a 7\% lower RMSE. Training scripts and trained weights to reproduce our results can be found here: \url{https://github.com/wagner-niklas/CAGE_expression_inference}.
4 | \end{abstract}
--------------------------------------------------------------------------------
/CVPR_Workshop_ABAW_2024/sec/01_Intro.tex:
--------------------------------------------------------------------------------
1 | \section{Introduction}
2 | \label{sec:intro}
3 |
4 | The inference of emotions through expressions has been a topic of interest for the past years as it might give insights into a person's feelings towards other individuals or topics. Mehrabian and Wiener~\cite{mehrabian1967decoding} suggest 55\% of communication is perceived by expressions. Lapakko~\cite{Lapakko2015CommunicationI9} argues, however, that these findings are limited to emotional states. Automation of analysis of expressions to get insights into user experience is one step towards live feedback without direct interaction with an individual.
5 |
6 | \begin{figure}[t]
7 | \centering
8 | \includegraphics[width=0.8\columnwidth, trim={3cm 11cm 3cm 3cm}, clip]{pictures/affectnet/Bild_Russel_AffectNet.pdf}
9 | \caption{\textit{Valence/arousal} for sample images from \affectnet{}~\cite{mollahosseini2017affectnet}.}
10 | \label{fig:Russel_Affectnet}
11 | \end{figure}
12 |
13 |
14 | A common approach is \textit{expression inference}, i.e. classification of emotional expressions into discrete categories. However, a comprehensive meta-analysis of facial expressions research by Barrett et al.~\cite{barretetal2019} has shown, that there is no consensus across cultures and intra-cultural over specific facial movements reliably depicting one category of emotion. They suggest that affective states can more reliably be inferred by a third-party individual. They emphasize that these states are inferred, not recognized. According to Russell~\cite{rusellmodell}, affects can be described as a set of dimensions with each dimension varying independently. These dimensions are called \va{}, representing the positivity/negativity and intensity-/activation of expressions respectively. Using \va{} of the circumplex model of affect~\cite{rusellmodell} as additional dimensions rather than only discrete emotions for expression inference thus offers a more robust framework, as they provide a continuous spectrum that captures the underlying affective states.
15 |
16 |
17 | In this work, we compare training with \va{} labels merged with the commonly used discrete emotions to train with the two approaches separately.
18 | Our approach involves pinpointing the differences and similarities between two leading datasets that catalog images according to their explicit discrete and continuous emotional states: \affectnet~\cite{mollahosseini2017affectnet} and \emotic{}~\cite{kosti_emotic_2017}.
19 | % We examine the labeling process and show that there is still a need for further datasets to create a universal model that does not guess emotions based on labels of third-party individuals but rather gets information about the true internal state of each image subject.
20 | % We refer to~\cite{barretetal2019} for a more detailed discussion.
21 | We then develop a lightweight deep neural network tailored for computer vision tasks, aiming to accurately infer these discrete emotions as well as the continuous dimensions of \va{}, surpassing the performance of existing models. In particular, our model improves accuracy by reducing the root-mean-square error (RMSE) by 7.0\% for \val{} and 6.8\% for \aro{}. It also increases the concordance correlation coefficients (CCC) by 0.8\% for \val{} and 2.0\% for \aro{} when tested on the \affectnet{} dataset. These improvements are reflected in our final results, with CCC values of 0.716 for \val{} and 0.642 for \aro{}, and RMSE values of 0.331 for \val{} and 0.305 for \aro{}. Furthermore, we exceed the top-3 accuracy set by Khan \etal~\cite{khan2024focusclip} on the \emotic{} dataset by 1.0\%. %Additionally, we conduct a cross-evaluation of the model's effectiveness using the given test datasets. %In summary, this research focuses on the following question:
22 | % Consequently, the impact of these approaches needs to be measured. To do so, we
23 | % \begin{enumerate} [label=(\roman*)]
24 | % \item identify differences and similarities of two state-of-the-art datasets containing images with their apparent discrete and continuous emotion states
25 | % \item enhance a lightweight deep neural network architecture suited for computer vision to suggest these discrete emotions and/or continuous dimensions \va{}l{}, \aro{}
26 | % \item cross-evaluate the resulting model performances on the given test datasets. Hence, the following research question motivates our research:
27 | % \end{enumerate}
28 | %\textit{What effect does the addition of \val{}/\aro{} regression to discrete emotion classification have on facial emotion guessing performance across datasets?}
29 |
30 | %In the following we examine related work in \autoref{sec:relatedwork}, looking into different datasets and related approaches in emotion guessing. Then, we describe our applied data analysis and our model training approach in \autoref{sec:method}. Subsequently, we share our insights on the data and the outcomes of our model training in \autoref{sec:results}. Lastly, we present our conclusions and future perspectives in \autoref{sec:conclusion}.
31 |
--------------------------------------------------------------------------------
/CVPR_Workshop_ABAW_2024/sec/02_Related_Work.tex:
--------------------------------------------------------------------------------
1 | \section{Related Work}
2 | \label{sec:relatedwork}
3 |
4 | % In this chapter, we go into the existing research and methodologies relevant to emotion guessing via facial expressions.
5 | In the field of affective computing, in particular expression inference, the integration of \val{}/\aro{} regression with discrete emotion classification has emerged as a promising approach to enhance the performance and applicability across diverse datasets. In the following, we discuss existing works in this domain.
6 |
7 | \subsection{Datasets for Expression Inference}
8 |
9 | In the domain of expression inference, several datasets exist. However, these datasets vary significantly in both the data they offer and their popularity.
10 | Among the most widely used datasets are FER2013~\cite{goodfellow_challenges_2013} and FERPlus~\cite{barsoum_training_2016}, which provide annotated 48$\times$48 pixel black-and-white facial images classified in seven (FER) or eight (FER+) discrete emotional states.
11 | % While these datasets have contributed significantly to the advancement of emotion state research, they may have limitations in capturing the complexity and nuances of human emotions due to limited data labeling. In our approach, we have therefore chosen the \affectnet{}~\cite{mollahosseini2017affectnet} and \emotic{}~\cite{emotic_pami2019} datasets.
12 | While these datasets have been the foundation for numerous research contributions, they have been expanded in various ways over the past years. Notable examples in this context are the \emotic{}~\cite{kosti_emotic_2017} and \affectnet{}~\cite{mollahosseini2017affectnet} datasets, which both contain high-resolution RGB images.
13 | \affectnet{} is a large-scale database containing around 0.4 million facial images labeled by 12 annotators. Each image is annotated with categorical emotions, mirroring those used in the FER+ dataset, in addition to \va{} values. This approach offers a more refined representation of emotions compared to categorical labels only.
14 |
15 | The \emotic{} (\textit{Emotions in Context}) dataset provides a more nuanced perspective on affective states. Unlike earlier datasets focused solely on facial expressions, \emotic{} captures individuals in full-body shots within their surrounding context. \emotic{} features bounding boxes that encompass each individual's entire body, eliminating the need for a visible face. Furthermore, it categorizes emotions into 26 discrete categories, allowing for multiple labels per individual. In addition, the dataset expands these discrete values with continuous measures of \va{} as well as \dom{} that measures the level of control a person feels during a situation, ranging from submissive / non-control to dominant / in-control~\cite{emotic_pami2019}.
16 |
17 | While there are at least 28 datasets such as CK+~\cite{5543262}, RAF-DB~\cite{Li_2017_CVPR} or Aff-Wild2~\cite{kollias2023abaw2, kollias2023multi, kollias2022abaw, kollias2023abaw, kollias2021analysing, kollias2021affect, kollias2021distribution, kollias2020analysing, kollias_expression_2019, kollias2019deep, kollias2019face, zafeiriou2017aff, kollias2019affwild2} focusing specifically on \textit{facial expression recognition/inference} featuring continuous and/or discrete measures, we chose to focus on the two mentioned above, since we are interested in both discrete emotion labeling on an individual basis as well as continuous measures of \va{}.
18 | \affectnet{}~\cite{mollahosseini2017affectnet} as a state-of-the-art, is arguably the most represented dataset in the current research field.
19 | On the other hand, \emotic{}, although not being the most utilized dataset, offers the most refined representation of measures while still focusing on a combination of discrete and continuous variables to define individuals emotion.
20 | % Außerdem hier noch Related work angeben was schon gemacht wurde im Sinne vergleich? Gibt es Paper die unser Thema schon genauer anschauen? Gibt es Vergleich zwischen den Datensätzen oder zwischen FER und den einzelnen?! Hier einbinden
21 | % Elicit research machen
22 |
23 | \begin{table}[t]
24 | \centering
25 | \begin{tabular}{r | c | c }
26 | \hline
27 | \textbf{Method} & \textbf{Accuracy [\%]} & \textbf{Date [mm-yy]} \\
28 | \hline
29 | DDAMFN~\cite{electronics12173595} & 64.25 & 08-23 \\
30 | POSTER++~\cite{mao2023poster} &63.77 & 01-23 \\
31 | S2D~\cite{chen2023static}&63.06 & 12-22 \\
32 | MT EffNet-B2~\cite{9815154} & 63.03 & 07-22 \\
33 | MT-ArcRes~\cite{kollias_expression_2019} & 63.00 & 09-19 \\ \hline
34 | \end{tabular}
35 | \caption{Top five models on \affectnet{}-8 benchmark~\cite{paperswithcodeaff}.}
36 | \label{tab:relatedworkaffectnet8}
37 | \end{table}
38 |
39 | \begin{table}[t]
40 | \centering
41 | \begin{tabular}{r | c | c }
42 | \hline
43 | \textbf{Method} & \textbf{Accuracy [\%]} & \textbf{Date [mm-yy]} \\
44 | \hline
45 | S2D~\cite{chen2023static}&67.62 & 12-22 \\
46 | POSTER++~\cite{mao2023poster} &67.49 & 01-23 \\
47 | DDAMFN~\cite{electronics12173595} & 67.03 & 08-23 \\
48 | Emo\affectnet{}~\cite{RYUMINA2022435} & 66.49 & 12-22 \\
49 | Emotion-GCN~\cite{Antoniadis_2021} & 66.46 & 07-21 \\\hline
50 | \end{tabular}
51 | \caption{Top five models on \affectnet{}-7 benchmark~\cite{paperswithcodeaff}.}
52 | \label{tab:relatedworkaffectnet7}
53 | \end{table}
54 |
55 | \subsection{Expression Inference Models}
56 |
57 | Expression inference on datasets like \affectnet{} has been addressed in numerous publications.
58 | According to Paperswithcode~\cite{paperswithcodeaff}, 207 \affectnet{}-related papers have been published since 2020. Tables~\ref{tab:relatedworkaffectnet8} and~\ref{tab:relatedworkaffectnet7} show five best models in leaderboards for the \affectnet{}-8 and \affectnet{}-7 test benchmark as of 01.01.2024. As the initial FER dataset does not contain the emotion \textit{Contempt}, there exists also an \affectnet{}-7 benchmark omitting this emotion.
59 | So far, the best-performing models for expression inference have been almost exclusively based on convolutional neural networks (CNNs), e.g. ResNet-18~\cite{he2016deep}. Although CNNs are still competitive as shown by Savchenko \etal~\cite{9815154}, more recent architectures like the POSTER++~\cite{mao2023poster} facilitate hybrid facial expression inference via networks that combine CNNs for feature extraction with vision transformer elements for efficient multi-scale feature integration and attention-based cross-fusion, achieving state-of-the-art performance with reduced computational cost.
60 | Because \emotic{} allows for multiple discrete labels for each individual, a general accuracy score is less applicable. Instead, Khan \etal~\cite{khan2024focusclip} suggests the \textit{top-k accuracy} can provide more insights. Utilizing a multi-modal approach leveraging region of interest heatmaps, a vision encoder, and a text encoder they achieve a top-3 accuracy of 13.73\%.
61 | % Far less popular is \emotic{}, cited by 63 papers since 2020 according to Paperswithcode (fig.~\ref{tab:relatedworkemotic})
62 | % \begin{table}[htbp]
63 | % \centering
64 | % \begin{tabular}{r | c | c }
65 | % \textbf{Method} & \textbf{mAP} & \textbf{Date [mm-yy]} \\
66 | % \hline
67 | % EmotiCon~\cite{mittal2020emoticon} &35.48 & 03-20 \\
68 | % EmotiCon (GCN)~\cite{mittal2020emoticon} & 32.03 & 03-20 \\
69 | % Fusion Model 1~\cite{Kosti_2019} & 29.45 & 03-20 \\
70 | % Fusion Model 2~\cite{Kosti_2019} & 27.70 & 03-20 \\
71 | % CAER-Net~\cite{Lee_2019_ICCV} & 20.84 & 10-19 \\
72 | % \end{tabular}
73 | % \caption{Comparison Top-5 \emotic{} Benchmarks~\cite{paperswithcodeemo} }
74 | % \label{tab:relatedworkemotic}
75 | % \end{table}
76 | Khor Wen Hwooi \etal~\cite{hwooi_deep_2022} suggested to extract features from CNNs and then apply model regression with a CultureNet~\cite{rudovic2018culturenet} for the continuous prediction of affect from facial expression images within the \va{} space. The best results were achieved with DenseNet201~\cite{huang2017densely} for feature extraction. The work demonstrates superior performance in predicting \va{} levels, particularly on the \affectnet{} dataset.
77 | %The authors highlight their model's ability to generalize across unseen datasets by testing on the Aff-Wild2~\cite{kollias_expression_2019} dataset.
78 |
--------------------------------------------------------------------------------
/CVPR_Workshop_ABAW_2024/sec/06_Conclusion.tex:
--------------------------------------------------------------------------------
1 | \section{Conclusion \& Outlook}
2 | \label{sec:conclusion}
3 |
4 | In this paper, we assessed the capability of discrete classifier approaches with multi-task learning models when inferring emotional expressions.
5 | We used two prominent datasets tailored for discrete expressions and values based on the circumplex model of affect to train our models.
6 |
7 | \textbf{Firstly}, we have performed in-depth analysis of the datasets. It was observed that while test datasets are often balanced concerning emotional expressions, the balance is not maintained for \va{}. Models trained solely on \va{} tend to minimize errors. Additionally, it is noteworthy to delve into the intricate distribution of the \emotic{} dataset, especially how it varies concerning the number of classes in the train and test sets.
8 |
9 | \textbf{Secondly}, we proposed to use the MaxViT model architecture and described the training and evaluation protocol for both datasets. The proposed approach significantly improved model accuracy. Even in cases of misclassification, the predicted \va{} values often remained accurate. Establishing a threshold for correct prediction of \va{} poses an interesting challenge for future work, as it involves considering factors such as human error and the inherent complexity of emotional expression perception. Furthermore, our model based on \affectnet{} demonstrated robust performance in \va{} estimation via cross-validation. This suggests the potential for it to serve as a well-generalized model. Conversely, the performance of our \emotic{}-based approach was less conclusive, possibly due to insufficient data or other factors.
10 |
11 | In conclusion, our research underscores the effectiveness of continuous value approaches within multi-task learning frameworks for emotional expression inference. Further exploration and refinement of these methodologies could yield even more accurate and robust models in the future.
12 |
--------------------------------------------------------------------------------
/CVPR_Workshop_ABAW_2024/sec/X_suppl.tex:
--------------------------------------------------------------------------------
1 | \clearpage
2 | \setcounter{page}{1}
3 | \maketitlesupplementary
4 |
5 |
6 | \section{Rationale}
7 | \label{sec:rationale}
8 | %
9 | Having the supplementary compiled together with the main paper means that:
10 | %
11 | \begin{itemize}
12 | \item The supplementary can back-reference sections of the main paper, for example, we can refer to \cref{sec:intro};
13 | \item The main paper can forward reference sub-sections within the supplementary explicitly (e.g. referring to a particular experiment);
14 | \item When submitted to arXiv, the supplementary will already included at the end of the paper.
15 | \end{itemize}
16 | %
17 | To split the supplementary pages from the main paper, you can use \href{https://support.apple.com/en-ca/guide/preview/prvw11793/mac#:~:text=Delete%20a%20page%20from%20a,or%20choose%20Edit%20%3E%20Delete).}{Preview (on macOS)}, \href{https://www.adobe.com/acrobat/how-to/delete-pages-from-pdf.html#:~:text=Choose%20%E2%80%9CTools%E2%80%9D%20%3E%20%E2%80%9COrganize,or%20pages%20from%20the%20file.}{Adobe Acrobat} (on all OSs), as well as \href{https://superuser.com/questions/517986/is-it-possible-to-delete-some-pages-of-a-pdf-document}{command line tools}.
--------------------------------------------------------------------------------
/CVPR_Workshop_ABAW_2024/todos.txt:
--------------------------------------------------------------------------------
1 | - The references for Aff-Wild2 database are not right, please use the ones found here:
2 | https://affective-behavior-analysis-in-the-wild.github.io/6th/index.html -
3 | - emotion guessing? facial expression inference
4 | - cite: Distribution Matching for Multi-Task Learning of Classification Tasks:
5 | a Large-Scale Study on Faces & Beyond
6 | - lines 114-119: Aff-Wild2 also contains both discrete categories (7 basic expressions), as well as continuous valence-arousal (it also contains action units)
7 | - more citations for Aff-Wild2
8 | - IEEE copyright submiten
9 | - Repo: https://github.com/wagner-niklas/CAGE-CircumplexAffectGuidedExpressionInference
10 |
11 | - results for swin transformer
12 |
13 |
14 | - Subsection 5.1.3
--------------------------------------------------------------------------------
/Honnold_inference.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wagner-niklas/CAGE_expression_inference/02732acea92326aacf3b303a833a161b97d4a3cd/Honnold_inference.gif
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2024 Niklas Wagner
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Circumplex Affect Guided Expression Inference (CAGE)
2 |
3 | ## Realtime Expression Inference Supported By The Circumplex Model
4 |
5 | ### Keywords: User experience, Expression Inference, FER, Expression Recgonition, Emotion Recognition, Supervised Learning, Computer Vision, Data Set Comparison, Autonomous driving
6 |
7 | [](https://paperswithcode.com/sota/arousal-estimation-on-affectnet?p=cage-circumplex-affect-guided-expression)
8 | [](https://paperswithcode.com/sota/valence-estimation-on-affectnet?p=cage-circumplex-affect-guided-expression)
9 | [](https://paperswithcode.com/sota/dominance-estimation-on-emotic?p=cage-circumplex-affect-guided-expression)
10 | [](https://paperswithcode.com/sota/arousal-estimation-on-emotic?p=cage-circumplex-affect-guided-expression)
11 | [](https://paperswithcode.com/sota/valence-estimation-on-emotic?p=cage-circumplex-affect-guided-expression)
12 | [](https://paperswithcode.com/sota/emotion-recognition-on-emotic?p=cage-circumplex-affect-guided-expression)
13 | [](https://paperswithcode.com/sota/facial-expression-recognition-on-affectnet?p=cage-circumplex-affect-guided-expression)
14 |
15 | ### Citation
16 | If you use this repository or any of its contents please consider citiing our Paper:
17 | [CAGE: Circumplex Affect Guided Expression Inference](https://arxiv.org/abs/2404.14975)
18 | ```
19 | @InProceedings{Wagner_2024_CVPR,
20 | author = {Wagner, Niklas and M\"atzler, Felix and Vossberg, Samed R. and Schneider, Helen and Pavlitska, Svetlana and Z\"ollner, J. Marius},
21 | title = {CAGE: Circumplex Affect Guided Expression Inference},
22 | booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops},
23 | month = {June},
24 | year = {2024},
25 | pages = {4683-4692}
26 | ```
27 |
28 | ### Abstract:
29 | Understanding expressions and emotions is a task of interest across multiple disciplines, especially for improving user experiences. Contrary to the common perception, it has been shown that expressions are not discrete entities but instead exist along a continuum. People understand discrete expressions differently due to a variety of factors, including cultural background, individual experiences and cognitive biases. Therefore, most approaches to expression understanding, particularly those relying on discrete categories, are inherently biased. In this paper, we present a comparative indepth analysis of two common datasets (AffectNet and EMOTIC) equipped with the components of the circumplex model of affect. Further, we propose a model for prediction of facial expression tailored for lightweight applications. Using a small-scaled MaxViT-based model architecture, we evaluate the impact of discrete expression category labels in training with the continuous valence and arousal labels. We show that considering valence and arousal in addition to discrete category labels helps to significantly improve expression prediction. The proposed model outperforms the current state-of-the-art models on AffectNet, establishing it as the best-performing model for inferring valence and arousal achieving a 7% lower RMSE.
30 |
31 | ### Model inference on a video:
32 | 
33 |
34 |
35 | ### Usage:
36 | To run the version with our best performing model simply cd into the project directory and run:
37 | Install requirements:
38 | ```
39 | pip install -r requirements.txt
40 | ```
41 |
42 | If you want to train / alter the models you can run one of the python scripts in the directory.
43 | To run the train scripts, make sure you have the datasets of EMOTIC[[1]](#1) and AffectNet[[2]](#2) downloaded and saved in the right directory.
44 | The Datasets are not publically available and access has to be requested ([EMOTIC, 2019](https://s3.sunai.uoc.edu/emotic/download.html)) ([AffectNet, 2017](http://mohammadmahoor.com/affectnet/))
45 |
46 |
47 |
48 | [1]
49 | R. Kosti, J.M. Álvarez, A. Recasens and A. Lapedriza, "Context based emotion recognition using emotic dataset", IEEE Transactions on Pattern Analysis and Machine Intelligence (PAMI), 2019.
50 |
51 | [2]
52 | Ali Mollahosseini, Behzad Hasani and Mohammad H. Mahoor, "AffectNet: A Database for Facial Expression, Valence, and Arousal Computing in the Wild," in IEEE Transactions on Affective Computing, vol. 10, no. 1, pp. 18-31, 1 Jan.-March 2019, doi: 10.1109/TAFFC.2017.2740923.'
53 |
54 | ### Tasks of this project:
55 |
56 | [1] Implement live video expression inference discrete
57 |
58 | [2] Extend code to guess the continuous values of the circumplex model of affect
59 |
60 | [3] Test model performance on AffectNet and EMOTIC
61 |
62 | [4] Live test expression inference
63 |
64 | [5] Research methods for validating and improving results for future work
65 |
--------------------------------------------------------------------------------
/inference_on_webcam.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import torchvision.models as models
3 | import torch.nn as nn
4 | import torch
5 | import numpy as np
6 | from PIL import Image
7 | from torchvision import transforms
8 | from torchvision.models import efficientnet_b4, EfficientNet_B4_Weights, EfficientNet
9 | import torchvision
10 | import re
11 |
12 | DEVICE = torch.device(
13 | "cuda" if torch.cuda.is_available() else "cpu"
14 | ) # For Macbook, use mps
15 |
16 |
17 | def draw_valence_bar(frame, valence, x, y, w, bar_height=20):
18 | if valence >= 0.25:
19 | color = (0, 255, 0) # Green for positive valence
20 | elif valence <= -0.25:
21 | color = (0, 0, 255) # Red for negative valence
22 | else:
23 | color = (255, 255, 0) # Blue for neutral valence
24 |
25 | bar_width = int(w)
26 | cv2.rectangle(frame, (x, y - bar_height), (x + bar_width, y), (100, 100, 100), -1)
27 | cursor_x = x + int(bar_width * (valence + 1) / 2)
28 | cv2.line(frame, (cursor_x, y - bar_height), (cursor_x, y), color, 2)
29 |
30 | valence_text = f"Valence: {valence:.2f}"
31 | cv2.putText(
32 | frame,
33 | valence_text,
34 | (x, y - 30),
35 | cv2.FONT_HERSHEY_SIMPLEX,
36 | 0.5,
37 | (255, 255, 255),
38 | 1,
39 | cv2.LINE_AA,
40 | )
41 |
42 |
43 | def draw_arousal_bar(frame, arousal, x, y, h, bar_width=20):
44 | if arousal >= 0.25:
45 | color = (0, 255, 0) # Green for positive valence
46 | elif arousal <= -0.25:
47 | color = (0, 0, 255) # Red for negative valence
48 | else:
49 | color = (255, 255, 0) # Blue for neutral valence
50 |
51 | bar_height = int(h)
52 | cv2.rectangle(frame, (x, y), (x + bar_width, y + bar_height), (100, 100, 100), -1)
53 | cursor_y = y + int(bar_height * (-arousal + 1) / 2)
54 | cv2.line(frame, (x, cursor_y), (x + bar_width, cursor_y), color, 2)
55 |
56 | arousal_text = f"Arousal: {arousal:.2f}"
57 | cv2.putText(
58 | frame,
59 | arousal_text,
60 | (x + 30, y + 10),
61 | cv2.FONT_HERSHEY_SIMPLEX,
62 | 0.5,
63 | (255, 255, 255),
64 | 1,
65 | cv2.LINE_AA,
66 | )
67 |
68 |
69 | def get_emotion(outputs_cls):
70 | emotions = [
71 | "Neutral",
72 | "Happy",
73 | "Sad",
74 | "Suprise",
75 | "Fear",
76 | "Disgust",
77 | "Angry",
78 | "Contempt", # AffectNet8 has 8 classes, when using the AffectNet7 model, remove this class
79 | ]
80 |
81 | max_indices = outputs_cls.argmax(dim=1)
82 | emotions_batch = [emotions[idx.item()] for idx in max_indices]
83 | return emotions_batch
84 |
85 | valence_text = f"Valence: {valence:.2f}"
86 | cv2.putText(
87 | frame,
88 | valence_text,
89 | (x, y - 40),
90 | cv2.FONT_HERSHEY_SIMPLEX,
91 | 0.5,
92 | (255, 255, 255),
93 | 1,
94 | cv2.LINE_AA,
95 | )
96 |
97 |
98 | cap = cv2.VideoCapture(0) # 0 is usually the default camera (webcam)
99 |
100 | # Load the model
101 | MODEL = models.maxvit_t(weights="DEFAULT")
102 | block_channels = MODEL.classifier[3].in_features
103 | MODEL.classifier = nn.Sequential(
104 | nn.AdaptiveAvgPool2d(1),
105 | nn.Flatten(),
106 | nn.LayerNorm(block_channels),
107 | nn.Linear(block_channels, block_channels),
108 | nn.Tanh(),
109 | nn.Linear(
110 | block_channels, 10, bias=False
111 | ), # Change the number of output classes, e.g. for AffectNet7 combined use 9 output neurons
112 | )
113 | MODEL.load_state_dict(
114 | torch.load(
115 | "models/AffectNet8_Maxvit_Combined/model.pt", map_location=torch.device(DEVICE)
116 | )
117 | )
118 | MODEL.eval()
119 | MODEL.to(DEVICE)
120 |
121 | test_transform = transforms.Compose(
122 | [
123 | transforms.ToPILImage(),
124 | transforms.Resize((224, 224)),
125 | transforms.ToTensor(),
126 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
127 | ]
128 | )
129 | # Inititalize the face classifier
130 | face_classifier = cv2.CascadeClassifier(
131 | cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
132 | )
133 |
134 | # ***** Access the webcam *****
135 |
136 | if not cap.isOpened():
137 | print("Error: Could not open webcam.")
138 | else:
139 | while True:
140 | ret, frame = cap.read()
141 | text = "Press 'q' to quit"
142 | cv2.putText(frame, text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
143 |
144 | faces = face_classifier.detectMultiScale(
145 | frame, scaleFactor=1.1, minNeighbors=5, minSize=(40, 40)
146 | )
147 | # Loop over multiple detected faces
148 | for x, y, w, h in faces:
149 | # Cut out the face from the frame
150 | face_roi = frame[y : y + h, x : x + w]
151 |
152 | img = test_transform(face_roi)
153 | img = img.unsqueeze(0) # Add a batch dimension for the model
154 | outputs = MODEL(img.to(DEVICE))
155 | outputs_cls = outputs[:, :7]
156 | valence = outputs[:, 7:8].item()
157 | arousal = outputs[:, 8:].item()
158 |
159 | # Draw the valence bar over the face
160 | draw_valence_bar(frame, valence, x, y, w)
161 | draw_arousal_bar(frame, arousal, x + w, y, h)
162 |
163 | emotion = get_emotion(outputs_cls)
164 | emotion_text = f"Emotion: {emotion}"
165 | text_size = cv2.getTextSize(emotion_text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)[
166 | 0
167 | ]
168 | cv2.putText(
169 | frame,
170 | emotion_text,
171 | (x - text_size[0] - 10, y + text_size[1] // 2),
172 | cv2.FONT_HERSHEY_SIMPLEX,
173 | 0.5,
174 | (255, 255, 255),
175 | 1,
176 | cv2.LINE_AA,
177 | )
178 |
179 | cv2.imshow("Webcam", frame)
180 | if cv2.waitKey(1) & 0xFF == ord("q"):
181 | break
182 |
183 | cap.release()
184 | cv2.destroyAllWindows()
185 |
--------------------------------------------------------------------------------
/models/AffectNet7_Efficientnet_Combined/generate_csv.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import os
3 | import torch
4 | import torchvision.transforms as transforms
5 | import torchvision.models as models
6 | from torch.utils.data import DataLoader, Dataset
7 | import torch.nn as nn
8 | from PIL import Image
9 |
10 | # Load the annotations for training and validation from separate CSV files
11 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/"
12 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/"
13 | valid_annotations_path = (
14 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv"
15 | )
16 | valid_annotations_df = pd.read_csv(valid_annotations_path)
17 | valid_annotations_df = valid_annotations_df[valid_annotations_df["exp"] != 7]
18 | # Set parameters
19 | BATCHSIZE = 128
20 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
21 |
22 |
23 | # **** Create dataset and data loaders ****
24 | class CustomDataset(Dataset):
25 | def __init__(self, dataframe, root_dir, transform=None, balance=False):
26 | self.dataframe = dataframe
27 | self.transform = transform
28 | self.root_dir = root_dir
29 | self.balance = balance
30 |
31 | if self.balance:
32 | self.dataframe = self.balance_dataset()
33 |
34 | def __len__(self):
35 | return len(self.dataframe)
36 |
37 | def __getitem__(self, idx):
38 | image_path = os.path.join(
39 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg"
40 | )
41 | if os.path.exists(image_path):
42 | image = Image.open(image_path)
43 | else:
44 | image = Image.new(
45 | "RGB", (224, 224), color="white"
46 | ) # Handle missing image file
47 |
48 | classes = torch.tensor(self.dataframe["exp"].iloc[idx], dtype=torch.long)
49 | labels = torch.tensor(self.dataframe.iloc[idx, 2:4].values, dtype=torch.float32)
50 |
51 | if self.transform:
52 | image = self.transform(image)
53 |
54 | return image, classes, labels
55 |
56 | def balance_dataset(self):
57 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply(
58 | lambda x: x.sample(self.dataframe["exp"].value_counts().min())
59 | )
60 | return balanced_df
61 |
62 |
63 | transform_valid = transforms.Compose(
64 | [
65 | transforms.ToTensor(),
66 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
67 | ]
68 | )
69 |
70 | valid_dataset = CustomDataset(
71 | dataframe=valid_annotations_df,
72 | root_dir=IMAGE_FOLDER_TEST,
73 | transform=transform_valid,
74 | balance=False,
75 | )
76 | valid_loader = DataLoader(
77 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48
78 | )
79 |
80 | # * Define the model *
81 |
82 | # Initialize the model
83 | MODEL = models.swin_v2_t(weights="DEFAULT")
84 | MODEL.head = torch.nn.Linear(in_features=768, out_features=9, bias=True)
85 | MODEL.to(DEVICE)
86 |
87 | # **** Test the model performance for classification ****
88 |
89 | # Set the model to evaluation mode
90 | MODEL.load_state_dict(torch.load("model.pt"))
91 | MODEL.to(DEVICE)
92 | MODEL.eval()
93 |
94 | all_labels_cls = []
95 | all_predicted_cls = []
96 |
97 | all_true_val = []
98 | all_pred_val = []
99 | all_true_aro = []
100 | all_pred_aro = []
101 |
102 | # Start inference on test set
103 | with torch.no_grad():
104 | for images, classes, labels in iter(valid_loader):
105 | images, classes, labels = (
106 | images.to(DEVICE),
107 | classes.to(DEVICE),
108 | labels.to(DEVICE),
109 | )
110 |
111 | outputs = MODEL(images)
112 | outputs_cls = outputs[:, :7]
113 | outputs_reg = outputs[:, 7:]
114 | val_pred = outputs_reg[:, 0]
115 | aro_pred = outputs_reg[:, 1]
116 |
117 | _, predicted_cls = torch.max(outputs_cls, 1)
118 |
119 | all_labels_cls.extend(classes.cpu().numpy())
120 | all_predicted_cls.extend(predicted_cls.cpu().numpy())
121 | val_true = labels[:, 0]
122 | aro_true = labels[:, 1]
123 |
124 | all_true_val.extend(val_true.cpu().numpy())
125 | all_true_aro.extend(aro_true.cpu().numpy())
126 | all_pred_val.extend(val_pred.cpu().numpy())
127 | all_pred_aro.extend(aro_pred.cpu().numpy())
128 |
129 | df = pd.DataFrame(
130 | {
131 | "cat_pred": all_predicted_cls,
132 | "cat_true": all_labels_cls,
133 | "val_pred": all_pred_val,
134 | "val_true": all_true_val,
135 | "aro_pred": all_pred_aro,
136 | "aro_true": all_true_aro,
137 | }
138 | )
139 | df.to_csv("inference.csv", index=False)
140 |
--------------------------------------------------------------------------------
/models/AffectNet7_Efficientnet_Combined/train.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import os
3 | import torch
4 | import torchvision.transforms as transforms
5 | import torchvision.models as models
6 | from torch.utils.data import DataLoader, Dataset
7 | import torch.nn as nn
8 | import torch.optim as optim
9 | from PIL import Image
10 | from torch.optim import lr_scheduler
11 | from tqdm import tqdm
12 |
13 | # Load the annotations for training and validation from separate CSV files
14 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/"
15 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/"
16 | train_annotations_path = (
17 | "../../affectnet_annotations/train_set_annotation_without_lnd.csv"
18 | )
19 | valid_annotations_path = (
20 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv"
21 | )
22 | train_annotations_df = pd.read_csv(train_annotations_path)
23 | valid_annotations_df = pd.read_csv(valid_annotations_path)
24 |
25 | train_annotations_df = train_annotations_df[train_annotations_df["exp"] != 7]
26 | valid_annotations_df = valid_annotations_df[valid_annotations_df["exp"] != 7]
27 |
28 | # Set parameters
29 | BATCHSIZE = 128
30 | NUM_EPOCHS = 20
31 | LR = 4e-5
32 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
33 |
34 |
35 | # **** Create dataset and data loaders ****
36 | class CustomDataset(Dataset):
37 | def __init__(self, dataframe, root_dir, transform=None, balance=False):
38 | self.dataframe = dataframe
39 | self.transform = transform
40 | self.root_dir = root_dir
41 | self.balance = balance
42 |
43 | if self.balance:
44 | self.dataframe = self.balance_dataset()
45 |
46 | def __len__(self):
47 | return len(self.dataframe)
48 |
49 | def __getitem__(self, idx):
50 | image_path = os.path.join(
51 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg"
52 | )
53 | if os.path.exists(image_path):
54 | image = Image.open(image_path)
55 | else:
56 | image = Image.new(
57 | "RGB", (224, 224), color="white"
58 | ) # Handle missing image file
59 |
60 | classes = torch.tensor(self.dataframe["exp"].iloc[idx], dtype=torch.long)
61 | labels = torch.tensor(self.dataframe.iloc[idx, 2:4].values, dtype=torch.float32)
62 |
63 | if self.transform:
64 | image = self.transform(image)
65 |
66 | return image, classes, labels
67 |
68 | def balance_dataset(self):
69 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply(
70 | lambda x: x.sample(self.dataframe["exp"].value_counts().min())
71 | )
72 | return balanced_df
73 |
74 |
75 | transform = transforms.Compose(
76 | [
77 | transforms.RandomHorizontalFlip(0.5),
78 | transforms.RandomGrayscale(0.01),
79 | transforms.RandomRotation(10),
80 | transforms.ColorJitter(
81 | brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1
82 | ), # model more robust to changes in lighting conditions.
83 | transforms.RandomPerspective(
84 | distortion_scale=0.2, p=0.5
85 | ), # can be helpful if your images might have varying perspectives.
86 | transforms.ToTensor(), # saves image as tensor (automatically divides by 255)
87 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
88 | transforms.RandomErasing(
89 | p=0.5, scale=(0.02, 0.2), ratio=(0.3, 3.3), value="random"
90 | ), # TEST: Should help overfitting
91 | ]
92 | )
93 |
94 | transform_valid = transforms.Compose(
95 | [
96 | transforms.ToTensor(),
97 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
98 | ]
99 | )
100 |
101 | train_dataset = CustomDataset(
102 | dataframe=train_annotations_df,
103 | root_dir=IMAGE_FOLDER,
104 | transform=transform,
105 | balance=False,
106 | )
107 | valid_dataset = CustomDataset(
108 | dataframe=valid_annotations_df,
109 | root_dir=IMAGE_FOLDER_TEST,
110 | transform=transform_valid,
111 | balance=False,
112 | )
113 | train_loader = DataLoader(
114 | train_dataset, batch_size=BATCHSIZE, shuffle=True, num_workers=48
115 | )
116 | valid_loader = DataLoader(
117 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48
118 | )
119 |
120 | # ***** Define the model *****
121 |
122 | # Initialize the model
123 | MODEL = models.efficientnet_v2_s(weights="DEFAULT")
124 | num_features = MODEL.classifier[1].in_features
125 | MODEL.classifier[1] = nn.Linear(in_features=num_features, out_features=9)
126 | MODEL.to(DEVICE)
127 | # Define (weighted) loss function
128 | weights7 = torch.tensor(
129 | [0.022600, 0.012589, 0.066464, 0.120094, 0.265305, 0.444943, 0.068006]
130 | )
131 | criterion_cls = nn.CrossEntropyLoss(weights7.to(DEVICE))
132 | criterion_cls_val = (
133 | nn.CrossEntropyLoss()
134 | ) # Use two loss functions, as the validation dataset is balanced
135 | criterion_reg = nn.MSELoss()
136 |
137 | optimizer = optim.AdamW(MODEL.parameters(), lr=LR)
138 | lr_scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=BATCHSIZE * NUM_EPOCHS)
139 |
140 | # ***** Train the model *****
141 | print("--- Start training ---")
142 | scaler = torch.cuda.amp.GradScaler()
143 | best_valid_loss = 100
144 |
145 | for epoch in range(NUM_EPOCHS):
146 | MODEL.train()
147 | total_train_correct = 0
148 | total_train_samples = 0
149 | for images, classes, labels in tqdm(
150 | train_loader, desc="Epoch train_loader progress"
151 | ):
152 | images, classes, labels = (
153 | images.to(DEVICE),
154 | classes.to(DEVICE),
155 | labels.to(DEVICE),
156 | )
157 | optimizer.zero_grad()
158 | with torch.autocast(device_type="cuda", dtype=torch.float16):
159 | outputs = MODEL(images)
160 | outputs_cls = outputs[:, :7]
161 | outputs_reg = outputs[:, 7:]
162 | loss = criterion_cls(
163 | outputs_cls.cuda(), classes.cuda()
164 | ) + 5 * criterion_reg(outputs_reg.cuda(), labels.cuda())
165 | scaler.scale(loss).backward()
166 | scaler.step(optimizer)
167 | scaler.update()
168 | lr_scheduler.step()
169 | current_lr = optimizer.param_groups[0]["lr"]
170 |
171 | _, train_predicted = torch.max(outputs_cls, 1)
172 | total_train_samples += classes.size(0)
173 | total_train_correct += (train_predicted == classes).sum().item()
174 |
175 | train_accuracy = (total_train_correct / total_train_samples) * 100
176 |
177 | MODEL.eval()
178 | valid_loss = 0.0
179 | correct = 0
180 | total = 0
181 | with torch.no_grad():
182 | for images, classes, labels in valid_loader:
183 | images, classes, labels = (
184 | images.to(DEVICE),
185 | classes.to(DEVICE),
186 | labels.to(DEVICE),
187 | )
188 | outputs = MODEL(images)
189 | outputs_cls = outputs[:, :7]
190 | outputs_reg = outputs[:, 7:]
191 | loss = criterion_cls_val(
192 | outputs_cls.cuda(), classes.cuda()
193 | ) + 5 * criterion_reg(outputs_reg.cuda(), labels.cuda())
194 | valid_loss += loss.item()
195 | _, predicted = torch.max(outputs_cls, 1)
196 | total += classes.size(0)
197 | correct += (predicted == classes).sum().item()
198 |
199 | print(
200 | f"Epoch [{epoch+1}/{NUM_EPOCHS}] - "
201 | f"Validation Loss: {valid_loss/len(valid_loader):.4f}, "
202 | f"Validation Accuracy: {(correct/total)*100:.2f}%"
203 | f", Training Accuracy: {train_accuracy:.2f}%, "
204 | )
205 |
206 | if valid_loss < best_valid_loss:
207 | best_valid_loss = valid_loss
208 | print(f"Saving model at epoch {epoch+1}")
209 | torch.save(MODEL.state_dict(), "model.pt") # Save the best model
210 |
--------------------------------------------------------------------------------
/models/AffectNet7_Efficientnet_Discrete/generate_csv.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import os
3 | import torch
4 | import torchvision.transforms as transforms
5 | import torchvision.models as models
6 | from torch.utils.data import DataLoader, Dataset
7 | import torch.nn as nn
8 | from PIL import Image
9 |
10 | # Load the annotations for training and validation from separate CSV files
11 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/"
12 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/"
13 | valid_annotations_path = (
14 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv"
15 | )
16 | valid_annotations_df = pd.read_csv(valid_annotations_path)
17 | valid_annotations_df = valid_annotations_df[valid_annotations_df["exp"] != 7]
18 | # Set parameters
19 | BATCHSIZE = 128
20 |
21 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
22 |
23 |
24 | # **** Create dataset and data loaders ****
25 | class CustomDataset(Dataset):
26 | def __init__(self, dataframe, root_dir, transform=None, balance=False):
27 | self.dataframe = dataframe
28 | self.transform = transform
29 | self.root_dir = root_dir
30 | self.balance = balance
31 |
32 | if self.balance:
33 | self.dataframe = self.balance_dataset()
34 |
35 | def __len__(self):
36 | return len(self.dataframe)
37 |
38 | def __getitem__(self, idx):
39 | image_path = os.path.join(
40 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg"
41 | )
42 | if os.path.exists(image_path):
43 | image = Image.open(image_path)
44 | else:
45 | image = Image.new(
46 | "RGB", (224, 224), color="white"
47 | ) # Handle missing image file
48 |
49 | label = torch.tensor(self.dataframe["exp"].iloc[idx], dtype=torch.long)
50 |
51 | if self.transform:
52 | image = self.transform(image)
53 |
54 | return image, label
55 |
56 | def balance_dataset(self):
57 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply(
58 | lambda x: x.sample(self.dataframe["exp"].value_counts().min())
59 | )
60 | return balanced_df
61 |
62 |
63 | transform_valid = transforms.Compose(
64 | [
65 | transforms.ToTensor(),
66 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
67 | ]
68 | )
69 | valid_dataset = CustomDataset(
70 | dataframe=valid_annotations_df,
71 | root_dir=IMAGE_FOLDER_TEST,
72 | transform=transform_valid,
73 | balance=False,
74 | )
75 | valid_loader = DataLoader(
76 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48
77 | )
78 | # ***** Define the model *****
79 |
80 | # Initialize the model
81 | MODEL = models.efficientnet_v2_s(weights="DEFAULT")
82 | num_features = MODEL.classifier[1].in_features
83 | MODEL.classifier[1] = nn.Linear(in_features=num_features, out_features=7)
84 | MODEL.to(DEVICE)
85 |
86 | # Set the model to evaluation mode
87 | MODEL.load_state_dict(torch.load("model.pt"))
88 | MODEL.to(DEVICE)
89 | MODEL.eval()
90 |
91 | all_labels_cls = []
92 | all_predicted_cls = []
93 |
94 | # Start inference on test set
95 | with torch.no_grad():
96 | for images, labels_cls in iter(valid_loader):
97 | images = images.to(DEVICE)
98 | labels_cls = labels_cls.to(DEVICE)
99 |
100 | outputs = MODEL(images)
101 |
102 | _, predicted_cls = torch.max(outputs, 1)
103 |
104 | all_labels_cls.extend(labels_cls.cpu().numpy())
105 | all_predicted_cls.extend(predicted_cls.cpu().numpy())
106 |
107 |
108 | df = pd.DataFrame({"cat_pred": all_predicted_cls, "cat_true": all_labels_cls})
109 | df.to_csv("inference.csv", index=False)
110 |
--------------------------------------------------------------------------------
/models/AffectNet7_Efficientnet_Discrete/train.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import os
3 | import torch
4 | import torchvision.transforms as transforms
5 | import torchvision.models as models
6 | from torch.utils.data import DataLoader, Dataset
7 | import torch.nn as nn
8 | import torch.optim as optim
9 | from PIL import Image
10 | import torchvision
11 | from torch.optim import lr_scheduler
12 | import re
13 | from tqdm import tqdm
14 |
15 | # Load the annotations for training and validation from separate CSV files
16 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/"
17 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/"
18 | train_annotations_path = (
19 | "../../affectnet_annotations/train_set_annotation_without_lnd.csv"
20 | )
21 | valid_annotations_path = (
22 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv"
23 | )
24 | train_annotations_df = pd.read_csv(train_annotations_path)
25 | valid_annotations_df = pd.read_csv(valid_annotations_path)
26 |
27 | train_annotations_df = train_annotations_df[train_annotations_df["exp"] != 7]
28 | valid_annotations_df = valid_annotations_df[valid_annotations_df["exp"] != 7]
29 |
30 | # Set parameters
31 | BATCHSIZE = 128
32 | NUM_EPOCHS = 20
33 | LR = 4e-5
34 |
35 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
36 |
37 |
38 | # **** Create dataset and data loaders ****
39 | class CustomDataset(Dataset):
40 | def __init__(self, dataframe, root_dir, transform=None, balance=False):
41 | self.dataframe = dataframe
42 | self.transform = transform
43 | self.root_dir = root_dir
44 | self.balance = balance
45 |
46 | if self.balance:
47 | self.dataframe = self.balance_dataset()
48 |
49 | def __len__(self):
50 | return len(self.dataframe)
51 |
52 | def __getitem__(self, idx):
53 | image_path = os.path.join(
54 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg"
55 | )
56 | if os.path.exists(image_path):
57 | image = Image.open(image_path)
58 | else:
59 | image = Image.new(
60 | "RGB", (224, 224), color="white"
61 | ) # Handle missing image file
62 |
63 | label = torch.tensor(self.dataframe["exp"].iloc[idx], dtype=torch.long)
64 |
65 | if self.transform:
66 | image = self.transform(image)
67 |
68 | return image, label
69 |
70 | def balance_dataset(self):
71 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply(
72 | lambda x: x.sample(self.dataframe["exp"].value_counts().min())
73 | )
74 | return balanced_df
75 |
76 |
77 | transform = transforms.Compose(
78 | [
79 | transforms.ElasticTransform(alpha=5.0, sigma=5.0),
80 | transforms.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5)),
81 | transforms.RandomGrayscale(p=0.1),
82 | transforms.RandomRotation(degrees=15),
83 | transforms.RandomVerticalFlip(),
84 | transforms.ColorJitter(0.15, 0.15, 0.15),
85 | torchvision.transforms.RandomAutocontrast(p=0.4),
86 | transforms.ToTensor(),
87 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
88 | ]
89 | )
90 |
91 | transform_valid = transforms.Compose(
92 | [
93 | transforms.ToTensor(),
94 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
95 | ]
96 | )
97 |
98 | train_dataset = CustomDataset(
99 | dataframe=train_annotations_df,
100 | root_dir=IMAGE_FOLDER,
101 | transform=transform,
102 | balance=False,
103 | )
104 | valid_dataset = CustomDataset(
105 | dataframe=valid_annotations_df,
106 | root_dir=IMAGE_FOLDER_TEST,
107 | transform=transform_valid,
108 | balance=False,
109 | )
110 | train_loader = DataLoader(
111 | train_dataset, batch_size=BATCHSIZE, shuffle=True, num_workers=48
112 | )
113 | valid_loader = DataLoader(
114 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48
115 | )
116 |
117 | # ***** Define the model *****
118 |
119 | # Initialize the model
120 | MODEL = models.efficientnet_v2_s(weights="DEFAULT")
121 | num_features = MODEL.classifier[1].in_features
122 | MODEL.classifier[1] = nn.Linear(in_features=num_features, out_features=7)
123 | MODEL.to(DEVICE)
124 |
125 | # Define (weighted) loss function
126 | # weights = torch.tensor([0.015605, 0.008709, 0.046078, 0.083078, 0.185434, 0.305953, 0.046934, 0.30821])
127 | weights7 = torch.tensor(
128 | [0.022600, 0.012589, 0.066464, 0.120094, 0.265305, 0.444943, 0.068006]
129 | )
130 | criterion = nn.CrossEntropyLoss(weights7.to(DEVICE))
131 | criterion_val = (
132 | nn.CrossEntropyLoss()
133 | ) # Use two loss functions, as the validation dataset is balanced
134 |
135 |
136 | # Filter parameters for weight decay and no weight decay and create optimizer/scheduler
137 | def filter_params(params, include_patterns, exclude_patterns):
138 | included_params = []
139 | excluded_params = []
140 | for name, param in params:
141 | if any(re.search(pattern, name) for pattern in include_patterns):
142 | included_params.append(param)
143 | elif not any(re.search(pattern, name) for pattern in exclude_patterns):
144 | excluded_params.append(param)
145 | return included_params, excluded_params
146 |
147 |
148 | include_patterns = [
149 | r"^(?!.*\.bn)"
150 | ] # Match any layer name that doesn't contain '.bn' = BatchNorm parameters
151 | exclude_patterns = [r".*\.bn.*"] # Vice versa
152 | params_to_decay, params_not_to_decay = filter_params(
153 | MODEL.named_parameters(), include_patterns, exclude_patterns
154 | )
155 |
156 | # optimizer = optim.AdamW([
157 | # {'params': params_to_decay, 'weight_decay': ADAMW_WEIGHT_DECAY}, # Apply weight decay to these parameters
158 | # {'params': params_not_to_decay, 'weight_decay': 0.0} # Exclude weight decay for these parameters = 0.0
159 | # ], lr=LR)
160 | optimizer = optim.AdamW(MODEL.parameters(), lr=LR)
161 | lr_scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=BATCHSIZE * NUM_EPOCHS)
162 |
163 | # ***** Train the model *****
164 | print("--- Start training ---")
165 | scaler = torch.cuda.amp.GradScaler()
166 | best_valid_loss = 100
167 |
168 | for epoch in range(NUM_EPOCHS):
169 | MODEL.train()
170 | total_train_correct = 0
171 | total_train_samples = 0
172 | for images, labels in tqdm(train_loader, desc="Epoch train_loader progress"):
173 | images, labels = images.to(DEVICE), labels.to(DEVICE)
174 | optimizer.zero_grad()
175 | with torch.autocast(device_type="cuda", dtype=torch.float16):
176 | output = MODEL(images)
177 | loss = criterion(output.cuda(), labels.cuda())
178 | scaler.scale(loss).backward()
179 | scaler.step(optimizer)
180 | scaler.update()
181 | lr_scheduler.step()
182 | current_lr = optimizer.param_groups[0]["lr"]
183 |
184 | _, train_predicted = torch.max(output, 1)
185 | total_train_samples += labels.size(0)
186 | total_train_correct += (train_predicted == labels).sum().item()
187 |
188 | train_accuracy = (total_train_correct / total_train_samples) * 100
189 |
190 | MODEL.eval()
191 | valid_loss = 0.0
192 | correct = 0
193 | total = 0
194 | with torch.no_grad():
195 | for images, labels in valid_loader:
196 | images, labels = images.to(DEVICE), labels.to(DEVICE)
197 | outputs = MODEL(images)
198 | loss = criterion_val(outputs.cuda(), labels.cuda())
199 | valid_loss += loss.item()
200 | _, predicted = torch.max(outputs, 1)
201 | total += labels.size(0)
202 | correct += (predicted == labels).sum().item()
203 |
204 | print(
205 | f"Epoch [{epoch+1}/{NUM_EPOCHS}] - "
206 | f"Validation Loss: {valid_loss/len(valid_loader):.4f}, "
207 | f"Validation Accuracy: {(correct/total)*100:.2f}%"
208 | f", Training Accuracy: {train_accuracy:.2f}%, "
209 | )
210 | # TBD: Valid loss überschreiben, dann model speichern wie unten, wenn kleiner als zuvor
211 |
212 | if valid_loss < best_valid_loss:
213 | best_valid_loss = valid_loss
214 | print(f"Saving model at epoch {epoch+1}")
215 | torch.save(MODEL.state_dict(), "model.pt") # Save the best model
216 |
--------------------------------------------------------------------------------
/models/AffectNet7_Efficientnet_VA/generate_csv.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import os
3 | import torch
4 | import torchvision.transforms as transforms
5 | import torchvision.models as models
6 | from torch.utils.data import DataLoader, Dataset
7 | import torch.nn as nn
8 | from PIL import Image
9 |
10 |
11 | # Load the annotations for training and validation from separate CSV files
12 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/"
13 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/"
14 |
15 | valid_annotations_path = (
16 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv"
17 | )
18 | valid_annotations_df = pd.read_csv(valid_annotations_path)
19 | valid_annotations_df = valid_annotations_df[valid_annotations_df["exp"] != 7]
20 | # Set parameters
21 | BATCHSIZE = 128
22 |
23 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
24 |
25 |
26 | # **** Create dataset and data loaders ****
27 | class CustomDataset(Dataset):
28 | def __init__(self, dataframe, root_dir, transform=None, balance=False):
29 | self.dataframe = dataframe
30 | self.transform = transform
31 | self.root_dir = root_dir
32 | self.balance = balance
33 |
34 | if self.balance:
35 | self.dataframe = self.balance_dataset()
36 |
37 | def __len__(self):
38 | return len(self.dataframe)
39 |
40 | def __getitem__(self, idx):
41 | image_path = os.path.join(
42 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg"
43 | )
44 | image = Image.open(image_path)
45 |
46 | classes = torch.tensor(self.dataframe.iloc[idx, 1], dtype=torch.int8)
47 | valence = torch.tensor(self.dataframe.iloc[idx, 2], dtype=torch.float16)
48 | arousal = torch.tensor(self.dataframe.iloc[idx, 3], dtype=torch.float16)
49 |
50 | if self.transform:
51 | image = self.transform(image)
52 |
53 | return image, classes, valence, arousal
54 |
55 | def balance_dataset(self):
56 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply(
57 | lambda x: x.sample(self.dataframe["exp"].value_counts().min())
58 | )
59 | return balanced_df
60 |
61 |
62 | transform_valid = transforms.Compose(
63 | [
64 | transforms.ToTensor(),
65 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
66 | ]
67 | )
68 |
69 | valid_dataset = CustomDataset(
70 | dataframe=valid_annotations_df,
71 | root_dir=IMAGE_FOLDER_TEST,
72 | transform=transform_valid,
73 | balance=False,
74 | )
75 |
76 | valid_loader = DataLoader(
77 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48
78 | )
79 |
80 | # ***** Define the model *****
81 |
82 | # Initialize the model
83 | MODEL = models.efficientnet_v2_s(weights="DEFAULT")
84 | num_features = MODEL.classifier[1].in_features
85 | MODEL.classifier[1] = nn.Linear(in_features=num_features, out_features=2)
86 | MODEL.to(DEVICE)
87 |
88 |
89 | # **** Test the model performance for classification ****
90 |
91 | # Set the model to evaluation mode
92 | MODEL.load_state_dict(torch.load("model.pt"))
93 | MODEL.to(DEVICE)
94 | MODEL.eval()
95 |
96 | all_val_true_values = []
97 | all_val_predicted_values = []
98 | all_aro_true_values = []
99 | all_aro_predicted_values = []
100 |
101 | # Start inference on test set
102 | with torch.no_grad():
103 | for images, _, val_true, aro_true in valid_loader:
104 | images, val_true, aro_true = (
105 | images.to(DEVICE),
106 | val_true.to(DEVICE),
107 | aro_true.to(DEVICE),
108 | )
109 |
110 | outputs = MODEL(images)
111 | val_pred = outputs[:, 0]
112 | aro_pred = outputs[:, 1]
113 |
114 | # Append to the lists --> Regression
115 | true_val_values = val_true.cpu().numpy()
116 | true_aro_values = aro_true.cpu().numpy()
117 | pred_val_values = val_pred.cpu().numpy()
118 | pred_aro_values = aro_pred.cpu().numpy()
119 | all_val_true_values.extend(true_val_values)
120 | all_aro_true_values.extend(true_aro_values)
121 | all_val_predicted_values.extend(pred_val_values)
122 | all_aro_predicted_values.extend(pred_aro_values)
123 |
124 | df = pd.DataFrame(
125 | {
126 | "val_pred": all_val_predicted_values,
127 | "val_true": all_val_true_values,
128 | "aro_pred": all_aro_predicted_values,
129 | "aro_true": all_aro_true_values,
130 | }
131 | )
132 | df.to_csv("inference.csv", index=False)
133 |
--------------------------------------------------------------------------------
/models/AffectNet7_Maxvit_Combined/generate_csv.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import os
3 | import torch
4 | import torchvision.transforms as transforms
5 | import torchvision.models as models
6 | from torch.utils.data import DataLoader, Dataset
7 | import torch.nn as nn
8 | from PIL import Image
9 |
10 |
11 | # Load the annotations for training and validation from separate CSV files
12 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/"
13 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/"
14 |
15 | valid_annotations_path = (
16 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv"
17 | )
18 | valid_annotations_df = pd.read_csv(valid_annotations_path)
19 | valid_annotations_df = valid_annotations_df[valid_annotations_df["exp"] != 7]
20 |
21 | exp_counts_valid = valid_annotations_df["exp"].value_counts().sort_index()
22 |
23 | # Set parameters
24 | BATCHSIZE = 128
25 | MODEL = models.maxvit_t(weights="DEFAULT")
26 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
27 |
28 |
29 | # **** Create dataset and data loaders ****
30 | class CustomDataset(Dataset):
31 | def __init__(self, dataframe, root_dir, transform=None, balance=False):
32 | self.dataframe = dataframe
33 | self.transform = transform
34 | self.root_dir = root_dir
35 | self.balance = balance
36 |
37 | if self.balance:
38 | self.dataframe = self.balance_dataset()
39 |
40 | def __len__(self):
41 | return len(self.dataframe)
42 |
43 | def __getitem__(self, idx):
44 | image_path = os.path.join(
45 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg"
46 | )
47 | if os.path.exists(image_path):
48 | image = Image.open(image_path)
49 | else:
50 | image = Image.new(
51 | "RGB", (224, 224), color="white"
52 | ) # Handle missing image file
53 |
54 | classes = torch.tensor(self.dataframe["exp"].iloc[idx], dtype=torch.long)
55 | labels = torch.tensor(self.dataframe.iloc[idx, 2:4].values, dtype=torch.float32)
56 |
57 | if self.transform:
58 | image = self.transform(image)
59 |
60 | return image, classes, labels
61 |
62 | def balance_dataset(self):
63 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply(
64 | lambda x: x.sample(self.dataframe["exp"].value_counts().min())
65 | )
66 | return balanced_df
67 |
68 |
69 | transform_valid = transforms.Compose(
70 | [
71 | transforms.ToTensor(),
72 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
73 | ]
74 | )
75 | valid_dataset = CustomDataset(
76 | dataframe=valid_annotations_df,
77 | root_dir=IMAGE_FOLDER_TEST,
78 | transform=transform_valid,
79 | balance=False,
80 | )
81 | valid_loader = DataLoader(
82 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48
83 | )
84 |
85 | # ***** Define the model *****
86 |
87 | # Initialize the model
88 | block_channels = MODEL.classifier[3].in_features
89 | MODEL.classifier = nn.Sequential(
90 | nn.AdaptiveAvgPool2d(1),
91 | nn.Flatten(),
92 | nn.LayerNorm(block_channels),
93 | nn.Linear(block_channels, block_channels),
94 | nn.Tanh(),
95 | nn.Linear(block_channels, 9, bias=False),
96 | )
97 | MODEL.to(DEVICE) # Put the model to the GPU
98 |
99 | # **** Test the model performance for classification ****
100 |
101 | # Set the model to evaluation mode
102 | MODEL.load_state_dict(torch.load("model.pt"))
103 | MODEL.to(DEVICE)
104 | MODEL.eval()
105 |
106 | all_labels_cls = []
107 | all_predicted_cls = []
108 |
109 | all_true_val = []
110 | all_pred_val = []
111 | all_true_aro = []
112 | all_pred_aro = []
113 |
114 | # Start inference on test set
115 | with torch.no_grad():
116 | for images, classes, labels in iter(valid_loader):
117 | images, classes, labels = (
118 | images.to(DEVICE),
119 | classes.to(DEVICE),
120 | labels.to(DEVICE),
121 | )
122 |
123 | outputs = MODEL(images)
124 | outputs_cls = outputs[:, :7]
125 | outputs_reg = outputs[:, 7:]
126 | val_pred = outputs_reg[:, 0]
127 | aro_pred = outputs_reg[:, 1]
128 |
129 | _, predicted_cls = torch.max(outputs_cls, 1)
130 |
131 | all_labels_cls.extend(classes.cpu().numpy())
132 | all_predicted_cls.extend(predicted_cls.cpu().numpy())
133 | val_true = labels[:, 0]
134 | aro_true = labels[:, 1]
135 |
136 | all_true_val.extend(val_true.cpu().numpy())
137 | all_true_aro.extend(aro_true.cpu().numpy())
138 | all_pred_val.extend(val_pred.cpu().numpy())
139 | all_pred_aro.extend(aro_pred.cpu().numpy())
140 |
141 | df = pd.DataFrame(
142 | {
143 | "cat_pred": all_predicted_cls,
144 | "cat_true": all_labels_cls,
145 | "val_pred": all_pred_val,
146 | "val_true": all_true_val,
147 | "aro_pred": all_pred_aro,
148 | "aro_true": all_true_aro,
149 | }
150 | )
151 | df.to_csv("inference.csv", index=False)
152 |
--------------------------------------------------------------------------------
/models/AffectNet7_Maxvit_Combined/train.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import os
3 | import torch
4 | import torchvision.transforms as transforms
5 | import torchvision.models as models
6 | from torch.utils.data import DataLoader, Dataset
7 | import torch.nn as nn
8 | import torch.optim as optim
9 | from PIL import Image
10 | from torch.optim import lr_scheduler
11 | from tqdm import tqdm
12 |
13 | # Load the annotations for training and validation from separate CSV files
14 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/"
15 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/"
16 | train_annotations_path = (
17 | "../../affectnet_annotations/train_set_annotation_without_lnd.csv"
18 | )
19 | valid_annotations_path = (
20 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv"
21 | )
22 | train_annotations_df = pd.read_csv(train_annotations_path)
23 | valid_annotations_df = pd.read_csv(valid_annotations_path)
24 |
25 | train_annotations_df = train_annotations_df[train_annotations_df["exp"] != 7]
26 | valid_annotations_df = valid_annotations_df[valid_annotations_df["exp"] != 7]
27 |
28 | # Set parameters
29 | BATCHSIZE = 128
30 | NUM_EPOCHS = 25
31 | LR = 4e-5
32 | MODEL = models.maxvit_t(weights="DEFAULT")
33 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
34 |
35 |
36 | # **** Create dataset and data loaders ****
37 | class CustomDataset(Dataset):
38 | def __init__(self, dataframe, root_dir, transform=None, balance=False):
39 | self.dataframe = dataframe
40 | self.transform = transform
41 | self.root_dir = root_dir
42 | self.balance = balance
43 |
44 | if self.balance:
45 | self.dataframe = self.balance_dataset()
46 |
47 | def __len__(self):
48 | return len(self.dataframe)
49 |
50 | def __getitem__(self, idx):
51 | image_path = os.path.join(
52 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg"
53 | )
54 | if os.path.exists(image_path):
55 | image = Image.open(image_path)
56 | else:
57 | image = Image.new(
58 | "RGB", (224, 224), color="white"
59 | ) # Handle missing image file
60 |
61 | classes = torch.tensor(self.dataframe["exp"].iloc[idx], dtype=torch.long)
62 | labels = torch.tensor(self.dataframe.iloc[idx, 2:4].values, dtype=torch.float32)
63 |
64 | if self.transform:
65 | image = self.transform(image)
66 |
67 | return image, classes, labels
68 |
69 | def balance_dataset(self):
70 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply(
71 | lambda x: x.sample(self.dataframe["exp"].value_counts().min())
72 | )
73 | return balanced_df
74 |
75 |
76 | transform = transforms.Compose(
77 | [
78 | transforms.RandomHorizontalFlip(0.5),
79 | transforms.RandomGrayscale(0.01),
80 | transforms.RandomRotation(10),
81 | transforms.ColorJitter(
82 | brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1
83 | ), # model more robust to changes in lighting conditions.
84 | transforms.RandomPerspective(
85 | distortion_scale=0.2, p=0.5
86 | ), # can be helpful if your images might have varying perspectives.
87 | transforms.ToTensor(), # saves image as tensor (automatically divides by 255)
88 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
89 | transforms.RandomErasing(
90 | p=0.5, scale=(0.02, 0.2), ratio=(0.3, 3.3), value="random"
91 | ), # TEST: Should help overfitting
92 | ]
93 | )
94 |
95 | transform_valid = transforms.Compose(
96 | [
97 | transforms.ToTensor(),
98 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
99 | ]
100 | )
101 |
102 | train_dataset = CustomDataset(
103 | dataframe=train_annotations_df,
104 | root_dir=IMAGE_FOLDER,
105 | transform=transform,
106 | balance=False,
107 | )
108 | valid_dataset = CustomDataset(
109 | dataframe=valid_annotations_df,
110 | root_dir=IMAGE_FOLDER_TEST,
111 | transform=transform_valid,
112 | balance=False,
113 | )
114 | train_loader = DataLoader(
115 | train_dataset, batch_size=BATCHSIZE, shuffle=True, num_workers=48
116 | )
117 | valid_loader = DataLoader(
118 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48
119 | )
120 |
121 | # ***** Define the model *****
122 |
123 | # Initialize the model
124 | block_channels = MODEL.classifier[3].in_features
125 | MODEL.classifier = nn.Sequential(
126 | nn.AdaptiveAvgPool2d(1),
127 | nn.Flatten(),
128 | nn.LayerNorm(block_channels),
129 | nn.Linear(block_channels, block_channels),
130 | nn.Tanh(),
131 | nn.Linear(block_channels, 9, bias=False),
132 | )
133 | MODEL.to(DEVICE) # Put the model to the GPU
134 |
135 | # Define (weighted) loss function
136 | # weights = torch.tensor([0.015605, 0.008709, 0.046078, 0.083078, 0.185434, 0.305953, 0.046934, 0.30821])
137 | weights7 = torch.tensor(
138 | [0.022600, 0.012589, 0.066464, 0.120094, 0.265305, 0.444943, 0.068006]
139 | )
140 | criterion_cls = nn.CrossEntropyLoss(weights7.to(DEVICE))
141 | criterion_cls_val = (
142 | nn.CrossEntropyLoss()
143 | ) # Use two loss functions, as the validation dataset is balanced
144 | criterion_reg = nn.MSELoss()
145 |
146 | optimizer = optim.AdamW(MODEL.parameters(), lr=LR)
147 | lr_scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=BATCHSIZE * NUM_EPOCHS)
148 |
149 | # ***** Train the model *****
150 | print("--- Start training ---")
151 | scaler = torch.cuda.amp.GradScaler()
152 | best_valid_loss = 100
153 |
154 | for epoch in range(NUM_EPOCHS):
155 | MODEL.train()
156 | total_train_correct = 0
157 | total_train_samples = 0
158 | for images, classes, labels in tqdm(
159 | train_loader, desc="Epoch train_loader progress"
160 | ):
161 | images, classes, labels = (
162 | images.to(DEVICE),
163 | classes.to(DEVICE),
164 | labels.to(DEVICE),
165 | )
166 | optimizer.zero_grad()
167 | with torch.autocast(device_type="cuda", dtype=torch.float16):
168 | outputs = MODEL(images)
169 | outputs_cls = outputs[:, :7]
170 | outputs_reg = outputs[:, 7:]
171 | loss = criterion_cls(
172 | outputs_cls.cuda(), classes.cuda()
173 | ) + 5 * criterion_reg(outputs_reg.cuda(), labels.cuda())
174 | scaler.scale(loss).backward()
175 | scaler.step(optimizer)
176 | scaler.update()
177 | lr_scheduler.step()
178 | current_lr = optimizer.param_groups[0]["lr"]
179 |
180 | _, train_predicted = torch.max(outputs_cls, 1)
181 | total_train_samples += classes.size(0)
182 | total_train_correct += (train_predicted == classes).sum().item()
183 |
184 | train_accuracy = (total_train_correct / total_train_samples) * 100
185 |
186 | MODEL.eval()
187 | valid_loss = 0.0
188 | correct = 0
189 | total = 0
190 | with torch.no_grad():
191 | for images, classes, labels in valid_loader:
192 | images, classes, labels = (
193 | images.to(DEVICE),
194 | classes.to(DEVICE),
195 | labels.to(DEVICE),
196 | )
197 | outputs = MODEL(images)
198 | outputs_cls = outputs[:, :7]
199 | outputs_reg = outputs[:, 7:]
200 | loss = criterion_cls_val(
201 | outputs_cls.cuda(), classes.cuda()
202 | ) + 5 * criterion_reg(outputs_reg.cuda(), labels.cuda())
203 | valid_loss += loss.item()
204 | _, predicted = torch.max(outputs_cls, 1)
205 | total += classes.size(0)
206 | correct += (predicted == classes).sum().item()
207 |
208 | print(
209 | f"Epoch [{epoch+1}/{NUM_EPOCHS}] - "
210 | f"Validation Loss: {valid_loss/len(valid_loader):.4f}, "
211 | f"Validation Accuracy: {(correct/total)*100:.2f}%"
212 | f", Training Accuracy: {train_accuracy:.2f}%, "
213 | )
214 |
215 | if valid_loss < best_valid_loss:
216 | best_valid_loss = valid_loss
217 | print(f"Saving model at epoch {epoch+1}")
218 | torch.save(MODEL.state_dict(), "best.pt") # Save the best model
219 |
--------------------------------------------------------------------------------
/models/AffectNet7_Maxvit_Discrete/generate_csv.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import os
3 | import torch
4 | import torchvision.transforms as transforms
5 | import torchvision.models as models
6 | from torch.utils.data import DataLoader, Dataset
7 | import torch.nn as nn
8 | from PIL import Image
9 |
10 | # Load the annotations for training and validation from separate CSV files
11 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/"
12 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/"
13 | valid_annotations_path = (
14 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv"
15 | )
16 | valid_annotations_df = pd.read_csv(valid_annotations_path)
17 | valid_annotations_df = valid_annotations_df[valid_annotations_df["exp"] != 7]
18 | # Set parameters
19 | BATCHSIZE = 128
20 | MODEL = models.maxvit_t(weights="DEFAULT")
21 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
22 |
23 |
24 | # **** Create dataset and data loaders ****
25 | class CustomDataset(Dataset):
26 | def __init__(self, dataframe, root_dir, transform=None, balance=False):
27 | self.dataframe = dataframe
28 | self.transform = transform
29 | self.root_dir = root_dir
30 | self.balance = balance
31 |
32 | if self.balance:
33 | self.dataframe = self.balance_dataset()
34 |
35 | def __len__(self):
36 | return len(self.dataframe)
37 |
38 | def __getitem__(self, idx):
39 | image_path = os.path.join(
40 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg"
41 | )
42 | if os.path.exists(image_path):
43 | image = Image.open(image_path)
44 | else:
45 | image = Image.new(
46 | "RGB", (224, 224), color="white"
47 | ) # Handle missing image file
48 |
49 | label = torch.tensor(self.dataframe["exp"].iloc[idx], dtype=torch.long)
50 |
51 | if self.transform:
52 | image = self.transform(image)
53 |
54 | return image, label
55 |
56 | def balance_dataset(self):
57 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply(
58 | lambda x: x.sample(self.dataframe["exp"].value_counts().min())
59 | )
60 | return balanced_df
61 |
62 |
63 | transform_valid = transforms.Compose(
64 | [
65 | transforms.ToTensor(),
66 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
67 | ]
68 | )
69 | valid_dataset = CustomDataset(
70 | dataframe=valid_annotations_df,
71 | root_dir=IMAGE_FOLDER_TEST,
72 | transform=transform_valid,
73 | balance=False,
74 | )
75 | valid_loader = DataLoader(
76 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48
77 | )
78 | # ***** Define the model *****
79 |
80 | # Initialize the model
81 | block_channels = MODEL.classifier[3].in_features
82 | MODEL.classifier = nn.Sequential(
83 | nn.AdaptiveAvgPool2d(1),
84 | nn.Flatten(),
85 | nn.LayerNorm(block_channels),
86 | nn.Linear(block_channels, block_channels),
87 | nn.Tanh(),
88 | nn.Linear(block_channels, 7, bias=False),
89 | )
90 | MODEL.to(DEVICE) # Put the model to the GPU
91 |
92 | # Set the model to evaluation mode
93 | MODEL.load_state_dict(torch.load("model.pt"))
94 | MODEL.to(DEVICE)
95 | MODEL.eval()
96 |
97 | all_labels_cls = []
98 | all_predicted_cls = []
99 |
100 | # Start inference on test set
101 | with torch.no_grad():
102 | for images, labels_cls in iter(valid_loader):
103 | images = images.to(DEVICE)
104 | labels_cls = labels_cls.to(DEVICE)
105 |
106 | outputs = MODEL(images)
107 |
108 | _, predicted_cls = torch.max(outputs, 1)
109 |
110 | all_labels_cls.extend(labels_cls.cpu().numpy())
111 | all_predicted_cls.extend(predicted_cls.cpu().numpy())
112 |
113 |
114 | df = pd.DataFrame({"cat_pred": all_predicted_cls, "cat_true": all_labels_cls})
115 | df.to_csv("inference.csv", index=False)
116 |
--------------------------------------------------------------------------------
/models/AffectNet7_Maxvit_VA/generate_csv.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import os
3 | import torch
4 | import torchvision.transforms as transforms
5 | import torchvision.models as models
6 | from torch.utils.data import DataLoader, Dataset
7 | import torch.nn as nn
8 | from PIL import Image
9 |
10 | # Load the annotations for training and validation from separate CSV files
11 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/"
12 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/"
13 | valid_annotations_path = (
14 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv"
15 | )
16 | valid_annotations_df = pd.read_csv(valid_annotations_path)
17 | valid_annotations_df = valid_annotations_df[valid_annotations_df["exp"] != 7]
18 | # Set parameters
19 | BATCHSIZE = 128
20 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
21 |
22 |
23 | # **** Create dataset and data loaders ****
24 | class CustomDataset(Dataset):
25 | def __init__(self, dataframe, root_dir, transform=None, balance=False):
26 | self.dataframe = dataframe
27 | self.transform = transform
28 | self.root_dir = root_dir
29 | self.balance = balance
30 |
31 | if self.balance:
32 | self.dataframe = self.balance_dataset()
33 |
34 | def __len__(self):
35 | return len(self.dataframe)
36 |
37 | def __getitem__(self, idx):
38 | image_path = os.path.join(
39 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg"
40 | )
41 | image = Image.open(image_path)
42 |
43 | classes = torch.tensor(self.dataframe.iloc[idx, 1], dtype=torch.int8)
44 | valence = torch.tensor(self.dataframe.iloc[idx, 2], dtype=torch.float16)
45 | arousal = torch.tensor(self.dataframe.iloc[idx, 3], dtype=torch.float16)
46 |
47 | if self.transform:
48 | image = self.transform(image)
49 |
50 | return image, classes, valence, arousal
51 |
52 | def balance_dataset(self):
53 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply(
54 | lambda x: x.sample(self.dataframe["exp"].value_counts().min())
55 | )
56 | return balanced_df
57 |
58 |
59 | transform_valid = transforms.Compose(
60 | [
61 | transforms.ToTensor(),
62 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
63 | ]
64 | )
65 |
66 | valid_dataset = CustomDataset(
67 | dataframe=valid_annotations_df,
68 | root_dir=IMAGE_FOLDER_TEST,
69 | transform=transform_valid,
70 | balance=False,
71 | )
72 | valid_loader = DataLoader(
73 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48
74 | )
75 |
76 | # ***** Define the model *****
77 |
78 | # Initialize the model
79 | MODEL = models.maxvit_t(weights="DEFAULT")
80 | block_channels = MODEL.classifier[3].in_features
81 | MODEL.classifier = nn.Sequential(
82 | nn.AdaptiveAvgPool2d(1),
83 | nn.Flatten(),
84 | nn.LayerNorm(block_channels),
85 | nn.Linear(block_channels, block_channels),
86 | nn.Tanh(),
87 | nn.Dropout(0.3),
88 | nn.Linear(block_channels, 2, bias=False),
89 | )
90 | MODEL.to(DEVICE)
91 |
92 | # **** Test the model performance for classification ****
93 |
94 | # Set the model to evaluation mode
95 | MODEL.load_state_dict(torch.load("model.pt"))
96 | MODEL.to(DEVICE)
97 | MODEL.eval()
98 |
99 | all_val_true_values = []
100 | all_val_predicted_values = []
101 | all_aro_true_values = []
102 | all_aro_predicted_values = []
103 |
104 | # Start inference on test set
105 | with torch.no_grad():
106 | for images, _, val_true, aro_true in valid_loader:
107 | images, val_true, aro_true = (
108 | images.to(DEVICE),
109 | val_true.to(DEVICE),
110 | aro_true.to(DEVICE),
111 | )
112 |
113 | outputs = MODEL(images)
114 | val_pred = outputs[:, 0]
115 | aro_pred = outputs[:, 1]
116 |
117 | # Append to the lists --> Regression
118 | true_val_values = val_true.cpu().numpy()
119 | true_aro_values = aro_true.cpu().numpy()
120 | pred_val_values = val_pred.cpu().numpy()
121 | pred_aro_values = aro_pred.cpu().numpy()
122 | all_val_true_values.extend(true_val_values)
123 | all_aro_true_values.extend(true_aro_values)
124 | all_val_predicted_values.extend(pred_val_values)
125 | all_aro_predicted_values.extend(pred_aro_values)
126 | df = pd.DataFrame(
127 | {
128 | "val_pred": all_val_predicted_values,
129 | "val_true": all_val_true_values,
130 | "aro_pred": all_aro_predicted_values,
131 | "aro_true": all_aro_true_values,
132 | }
133 | )
134 | df.to_csv("inference.csv", index=False)
135 |
--------------------------------------------------------------------------------
/models/AffectNet7_Swin_Combined/generate_csv.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import os
3 | import torch
4 | import torchvision.transforms as transforms
5 | import torchvision.models as models
6 | from torch.utils.data import DataLoader, Dataset
7 | import torch.nn as nn
8 | from PIL import Image
9 |
10 | # Load the annotations for training and validation from separate CSV files
11 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/"
12 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/"
13 | valid_annotations_path = (
14 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv"
15 | )
16 | valid_annotations_df = pd.read_csv(valid_annotations_path)
17 | valid_annotations_df = valid_annotations_df[valid_annotations_df["exp"] != 7]
18 | # Set parameters
19 | BATCHSIZE = 128
20 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
21 |
22 |
23 | # **** Create dataset and data loaders ****
24 | class CustomDataset(Dataset):
25 | def __init__(self, dataframe, root_dir, transform=None, balance=False):
26 | self.dataframe = dataframe
27 | self.transform = transform
28 | self.root_dir = root_dir
29 | self.balance = balance
30 |
31 | if self.balance:
32 | self.dataframe = self.balance_dataset()
33 |
34 | def __len__(self):
35 | return len(self.dataframe)
36 |
37 | def __getitem__(self, idx):
38 | image_path = os.path.join(
39 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg"
40 | )
41 | if os.path.exists(image_path):
42 | image = Image.open(image_path)
43 | else:
44 | image = Image.new(
45 | "RGB", (224, 224), color="white"
46 | ) # Handle missing image file
47 |
48 | classes = torch.tensor(self.dataframe["exp"].iloc[idx], dtype=torch.long)
49 | labels = torch.tensor(self.dataframe.iloc[idx, 2:4].values, dtype=torch.float32)
50 |
51 | if self.transform:
52 | image = self.transform(image)
53 |
54 | return image, classes, labels
55 |
56 | def balance_dataset(self):
57 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply(
58 | lambda x: x.sample(self.dataframe["exp"].value_counts().min())
59 | )
60 | return balanced_df
61 |
62 |
63 | transform_valid = transforms.Compose(
64 | [
65 | transforms.ToTensor(),
66 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
67 | ]
68 | )
69 |
70 | valid_dataset = CustomDataset(
71 | dataframe=valid_annotations_df,
72 | root_dir=IMAGE_FOLDER_TEST,
73 | transform=transform_valid,
74 | balance=False,
75 | )
76 | valid_loader = DataLoader(
77 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48
78 | )
79 |
80 | # * Define the model *
81 |
82 | # Initialize the model
83 | MODEL = models.swin_v2_t(weights="DEFAULT")
84 | MODEL.head = torch.nn.Linear(in_features=768, out_features=9, bias=True)
85 | MODEL.to(DEVICE)
86 |
87 | # **** Test the model performance for classification ****
88 |
89 | # Set the model to evaluation mode
90 | MODEL.load_state_dict(torch.load("model.pt"))
91 | MODEL.to(DEVICE)
92 | MODEL.eval()
93 |
94 | all_labels_cls = []
95 | all_predicted_cls = []
96 |
97 | all_true_val = []
98 | all_pred_val = []
99 | all_true_aro = []
100 | all_pred_aro = []
101 |
102 | # Start inference on test set
103 | with torch.no_grad():
104 | for images, classes, labels in iter(valid_loader):
105 | images, classes, labels = (
106 | images.to(DEVICE),
107 | classes.to(DEVICE),
108 | labels.to(DEVICE),
109 | )
110 |
111 | outputs = MODEL(images)
112 | outputs_cls = outputs[:, :7]
113 | outputs_reg = outputs[:, 7:]
114 | val_pred = outputs_reg[:, 0]
115 | aro_pred = outputs_reg[:, 1]
116 |
117 | _, predicted_cls = torch.max(outputs_cls, 1)
118 |
119 | all_labels_cls.extend(classes.cpu().numpy())
120 | all_predicted_cls.extend(predicted_cls.cpu().numpy())
121 | val_true = labels[:, 0]
122 | aro_true = labels[:, 1]
123 |
124 | all_true_val.extend(val_true.cpu().numpy())
125 | all_true_aro.extend(aro_true.cpu().numpy())
126 | all_pred_val.extend(val_pred.cpu().numpy())
127 | all_pred_aro.extend(aro_pred.cpu().numpy())
128 |
129 | df = pd.DataFrame(
130 | {
131 | "cat_pred": all_predicted_cls,
132 | "cat_true": all_labels_cls,
133 | "val_pred": all_pred_val,
134 | "val_true": all_true_val,
135 | "aro_pred": all_pred_aro,
136 | "aro_true": all_true_aro,
137 | }
138 | )
139 | df.to_csv("inference.csv", index=False)
140 |
--------------------------------------------------------------------------------
/models/AffectNet7_Swin_Combined/train.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import os
3 | import torch
4 | import torchvision.transforms as transforms
5 | import torchvision.models as models
6 | from torch.utils.data import DataLoader, Dataset
7 | import torch.nn as nn
8 | import torch.optim as optim
9 | from PIL import Image
10 | from torch.optim import lr_scheduler
11 | from tqdm import tqdm
12 |
13 | # Load the annotations for training and validation from separate CSV files
14 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/"
15 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/"
16 | train_annotations_path = (
17 | "../../affectnet_annotations/train_set_annotation_without_lnd.csv"
18 | )
19 | valid_annotations_path = (
20 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv"
21 | )
22 | train_annotations_df = pd.read_csv(train_annotations_path)
23 | valid_annotations_df = pd.read_csv(valid_annotations_path)
24 |
25 | train_annotations_df = train_annotations_df[train_annotations_df["exp"] != 7]
26 | valid_annotations_df = valid_annotations_df[valid_annotations_df["exp"] != 7]
27 |
28 | # Set parameters
29 | BATCHSIZE = 128
30 | NUM_EPOCHS = 20
31 | LR = 4e-5
32 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
33 |
34 |
35 | # **** Create dataset and data loaders ****
36 | class CustomDataset(Dataset):
37 | def __init__(self, dataframe, root_dir, transform=None, balance=False):
38 | self.dataframe = dataframe
39 | self.transform = transform
40 | self.root_dir = root_dir
41 | self.balance = balance
42 |
43 | if self.balance:
44 | self.dataframe = self.balance_dataset()
45 |
46 | def __len__(self):
47 | return len(self.dataframe)
48 |
49 | def __getitem__(self, idx):
50 | image_path = os.path.join(
51 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg"
52 | )
53 | if os.path.exists(image_path):
54 | image = Image.open(image_path)
55 | else:
56 | image = Image.new(
57 | "RGB", (224, 224), color="white"
58 | ) # Handle missing image file
59 |
60 | classes = torch.tensor(self.dataframe["exp"].iloc[idx], dtype=torch.long)
61 | labels = torch.tensor(self.dataframe.iloc[idx, 2:4].values, dtype=torch.float32)
62 |
63 | if self.transform:
64 | image = self.transform(image)
65 |
66 | return image, classes, labels
67 |
68 | def balance_dataset(self):
69 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply(
70 | lambda x: x.sample(self.dataframe["exp"].value_counts().min())
71 | )
72 | return balanced_df
73 |
74 |
75 | transform = transforms.Compose(
76 | [
77 | transforms.RandomHorizontalFlip(0.5),
78 | transforms.RandomGrayscale(0.01),
79 | transforms.RandomRotation(10),
80 | transforms.ColorJitter(
81 | brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1
82 | ), # model more robust to changes in lighting conditions.
83 | transforms.RandomPerspective(
84 | distortion_scale=0.2, p=0.5
85 | ), # can be helpful if your images might have varying perspectives.
86 | transforms.ToTensor(), # saves image as tensor (automatically divides by 255)
87 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
88 | transforms.RandomErasing(
89 | p=0.5, scale=(0.02, 0.2), ratio=(0.3, 3.3), value="random"
90 | ), # TEST: Should help overfitting
91 | ]
92 | )
93 |
94 | transform_valid = transforms.Compose(
95 | [
96 | transforms.ToTensor(),
97 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
98 | ]
99 | )
100 |
101 | train_dataset = CustomDataset(
102 | dataframe=train_annotations_df,
103 | root_dir=IMAGE_FOLDER,
104 | transform=transform,
105 | balance=False,
106 | )
107 | valid_dataset = CustomDataset(
108 | dataframe=valid_annotations_df,
109 | root_dir=IMAGE_FOLDER_TEST,
110 | transform=transform_valid,
111 | balance=False,
112 | )
113 | train_loader = DataLoader(
114 | train_dataset, batch_size=BATCHSIZE, shuffle=True, num_workers=48
115 | )
116 | valid_loader = DataLoader(
117 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48
118 | )
119 |
120 | # * Define the model *
121 |
122 | # Initialize the model
123 | MODEL = models.swin_v2_t(weights="DEFAULT")
124 | MODEL.head = torch.nn.Linear(in_features=768, out_features=9, bias=True)
125 | MODEL.to(DEVICE)
126 | # Define (weighted) loss function
127 | weights7 = torch.tensor(
128 | [0.022600, 0.012589, 0.066464, 0.120094, 0.265305, 0.444943, 0.068006]
129 | )
130 | criterion_cls = nn.CrossEntropyLoss(weights7.to(DEVICE))
131 | criterion_cls_val = (
132 | nn.CrossEntropyLoss()
133 | ) # Use two loss functions, as the validation dataset is balanced
134 | criterion_reg = nn.MSELoss()
135 |
136 | optimizer = optim.AdamW(MODEL.parameters(), lr=LR)
137 | lr_scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=BATCHSIZE * NUM_EPOCHS)
138 |
139 | # ***** Train the model *****
140 | print("--- Start training ---")
141 | scaler = torch.cuda.amp.GradScaler()
142 | best_valid_loss = 100
143 |
144 | for epoch in range(NUM_EPOCHS):
145 | MODEL.train()
146 | total_train_correct = 0
147 | total_train_samples = 0
148 | for images, classes, labels in tqdm(
149 | train_loader, desc="Epoch train_loader progress"
150 | ):
151 | images, classes, labels = (
152 | images.to(DEVICE),
153 | classes.to(DEVICE),
154 | labels.to(DEVICE),
155 | )
156 | optimizer.zero_grad()
157 | with torch.autocast(device_type="cuda", dtype=torch.float16):
158 | outputs = MODEL(images)
159 | outputs_cls = outputs[:, :7]
160 | outputs_reg = outputs[:, 7:]
161 | loss = criterion_cls(
162 | outputs_cls.cuda(), classes.cuda()
163 | ) + 5 * criterion_reg(outputs_reg.cuda(), labels.cuda())
164 | scaler.scale(loss).backward()
165 | scaler.step(optimizer)
166 | scaler.update()
167 | lr_scheduler.step()
168 | current_lr = optimizer.param_groups[0]["lr"]
169 |
170 | _, train_predicted = torch.max(outputs_cls, 1)
171 | total_train_samples += classes.size(0)
172 | total_train_correct += (train_predicted == classes).sum().item()
173 |
174 | train_accuracy = (total_train_correct / total_train_samples) * 100
175 |
176 | MODEL.eval()
177 | valid_loss = 0.0
178 | correct = 0
179 | total = 0
180 | with torch.no_grad():
181 | for images, classes, labels in valid_loader:
182 | images, classes, labels = (
183 | images.to(DEVICE),
184 | classes.to(DEVICE),
185 | labels.to(DEVICE),
186 | )
187 | outputs = MODEL(images)
188 | outputs_cls = outputs[:, :7]
189 | outputs_reg = outputs[:, 7:]
190 | loss = criterion_cls_val(
191 | outputs_cls.cuda(), classes.cuda()
192 | ) + 5 * criterion_reg(outputs_reg.cuda(), labels.cuda())
193 | valid_loss += loss.item()
194 | _, predicted = torch.max(outputs_cls, 1)
195 | total += classes.size(0)
196 | correct += (predicted == classes).sum().item()
197 |
198 | print(
199 | f"Epoch [{epoch+1}/{NUM_EPOCHS}] - "
200 | f"Validation Loss: {valid_loss/len(valid_loader):.4f}, "
201 | f"Validation Accuracy: {(correct/total)*100:.2f}%"
202 | f", Training Accuracy: {train_accuracy:.2f}%, "
203 | )
204 |
205 | if valid_loss < best_valid_loss:
206 | best_valid_loss = valid_loss
207 | print(f"Saving model at epoch {epoch+1}")
208 | torch.save(MODEL.state_dict(), "model.pt") # Save the best model
209 |
--------------------------------------------------------------------------------
/models/AffectNet7_Swin_Discrete/generate_csv.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import os
3 | import torch
4 | import torchvision.transforms as transforms
5 | import torchvision.models as models
6 | from torch.utils.data import DataLoader, Dataset
7 | import torch.nn as nn
8 | from PIL import Image
9 |
10 | # Load the annotations for training and validation from separate CSV files
11 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/"
12 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/"
13 | valid_annotations_path = (
14 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv"
15 | )
16 | valid_annotations_df = pd.read_csv(valid_annotations_path)
17 | valid_annotations_df = valid_annotations_df[valid_annotations_df["exp"] != 7]
18 | # Set parameters
19 | BATCHSIZE = 128
20 |
21 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
22 |
23 |
24 | # **** Create dataset and data loaders ****
25 | class CustomDataset(Dataset):
26 | def __init__(self, dataframe, root_dir, transform=None, balance=False):
27 | self.dataframe = dataframe
28 | self.transform = transform
29 | self.root_dir = root_dir
30 | self.balance = balance
31 |
32 | if self.balance:
33 | self.dataframe = self.balance_dataset()
34 |
35 | def __len__(self):
36 | return len(self.dataframe)
37 |
38 | def __getitem__(self, idx):
39 | image_path = os.path.join(
40 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg"
41 | )
42 | if os.path.exists(image_path):
43 | image = Image.open(image_path)
44 | else:
45 | image = Image.new(
46 | "RGB", (224, 224), color="white"
47 | ) # Handle missing image file
48 |
49 | label = torch.tensor(self.dataframe["exp"].iloc[idx], dtype=torch.long)
50 |
51 | if self.transform:
52 | image = self.transform(image)
53 |
54 | return image, label
55 |
56 | def balance_dataset(self):
57 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply(
58 | lambda x: x.sample(self.dataframe["exp"].value_counts().min())
59 | )
60 | return balanced_df
61 |
62 |
63 | transform_valid = transforms.Compose(
64 | [
65 | transforms.ToTensor(),
66 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
67 | ]
68 | )
69 | valid_dataset = CustomDataset(
70 | dataframe=valid_annotations_df,
71 | root_dir=IMAGE_FOLDER_TEST,
72 | transform=transform_valid,
73 | balance=False,
74 | )
75 | valid_loader = DataLoader(
76 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48
77 | )
78 | # * Define the model *
79 |
80 | # Initialize the model
81 | MODEL = models.swin_v2_t(weights="DEFAULT")
82 | MODEL.head = torch.nn.Linear(in_features=768, out_features=7, bias=True)
83 | MODEL.to(DEVICE)
84 |
85 | # Set the model to evaluation mode
86 | MODEL.load_state_dict(torch.load("model.pt"))
87 | MODEL.to(DEVICE)
88 | MODEL.eval()
89 |
90 | all_labels_cls = []
91 | all_predicted_cls = []
92 |
93 | # Start inference on test set
94 | with torch.no_grad():
95 | for images, labels_cls in iter(valid_loader):
96 | images = images.to(DEVICE)
97 | labels_cls = labels_cls.to(DEVICE)
98 |
99 | outputs = MODEL(images)
100 |
101 | _, predicted_cls = torch.max(outputs, 1)
102 |
103 | all_labels_cls.extend(labels_cls.cpu().numpy())
104 | all_predicted_cls.extend(predicted_cls.cpu().numpy())
105 |
106 |
107 | df = pd.DataFrame({"cat_pred": all_predicted_cls, "cat_true": all_labels_cls})
108 | df.to_csv("inference.csv", index=False)
109 |
--------------------------------------------------------------------------------
/models/AffectNet7_Swin_Discrete/train.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import os
3 | import torch
4 | import torchvision.transforms as transforms
5 | import torchvision.models as models
6 | from torch.utils.data import DataLoader, Dataset
7 | import torch.nn as nn
8 | import torch.optim as optim
9 | from PIL import Image
10 | import torchvision
11 | from torch.optim import lr_scheduler
12 | import re
13 | from tqdm import tqdm
14 |
15 | # Load the annotations for training and validation from separate CSV files
16 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/"
17 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/"
18 | train_annotations_path = (
19 | "../../affectnet_annotations/train_set_annotation_without_lnd.csv"
20 | )
21 | valid_annotations_path = (
22 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv"
23 | )
24 | train_annotations_df = pd.read_csv(train_annotations_path)
25 | valid_annotations_df = pd.read_csv(valid_annotations_path)
26 |
27 | train_annotations_df = train_annotations_df[train_annotations_df["exp"] != 7]
28 | valid_annotations_df = valid_annotations_df[valid_annotations_df["exp"] != 7]
29 |
30 | # Set parameters
31 | BATCHSIZE = 128
32 | NUM_EPOCHS = 20
33 | LR = 4e-5
34 |
35 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
36 |
37 |
38 | # **** Create dataset and data loaders ****
39 | class CustomDataset(Dataset):
40 | def __init__(self, dataframe, root_dir, transform=None, balance=False):
41 | self.dataframe = dataframe
42 | self.transform = transform
43 | self.root_dir = root_dir
44 | self.balance = balance
45 |
46 | if self.balance:
47 | self.dataframe = self.balance_dataset()
48 |
49 | def __len__(self):
50 | return len(self.dataframe)
51 |
52 | def __getitem__(self, idx):
53 | image_path = os.path.join(
54 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg"
55 | )
56 | if os.path.exists(image_path):
57 | image = Image.open(image_path)
58 | else:
59 | image = Image.new(
60 | "RGB", (224, 224), color="white"
61 | ) # Handle missing image file
62 |
63 | label = torch.tensor(self.dataframe["exp"].iloc[idx], dtype=torch.long)
64 |
65 | if self.transform:
66 | image = self.transform(image)
67 |
68 | return image, label
69 |
70 | def balance_dataset(self):
71 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply(
72 | lambda x: x.sample(self.dataframe["exp"].value_counts().min())
73 | )
74 | return balanced_df
75 |
76 |
77 | transform = transforms.Compose(
78 | [
79 | transforms.ElasticTransform(alpha=5.0, sigma=5.0),
80 | transforms.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5)),
81 | transforms.RandomGrayscale(p=0.1),
82 | transforms.RandomRotation(degrees=15),
83 | transforms.RandomVerticalFlip(),
84 | transforms.ColorJitter(0.15, 0.15, 0.15),
85 | torchvision.transforms.RandomAutocontrast(p=0.4),
86 | transforms.ToTensor(),
87 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
88 | ]
89 | )
90 |
91 | transform_valid = transforms.Compose(
92 | [
93 | transforms.ToTensor(),
94 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
95 | ]
96 | )
97 |
98 | train_dataset = CustomDataset(
99 | dataframe=train_annotations_df,
100 | root_dir=IMAGE_FOLDER,
101 | transform=transform,
102 | balance=False,
103 | )
104 | valid_dataset = CustomDataset(
105 | dataframe=valid_annotations_df,
106 | root_dir=IMAGE_FOLDER_TEST,
107 | transform=transform_valid,
108 | balance=False,
109 | )
110 | train_loader = DataLoader(
111 | train_dataset, batch_size=BATCHSIZE, shuffle=True, num_workers=48
112 | )
113 | valid_loader = DataLoader(
114 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48
115 | )
116 |
117 | # * Define the model *
118 |
119 | # Initialize the model
120 | MODEL = models.swin_v2_t(weights="DEFAULT")
121 | MODEL.head = torch.nn.Linear(in_features=768, out_features=7, bias=True)
122 | MODEL.to(DEVICE)
123 |
124 | # Define (weighted) loss function
125 | # weights = torch.tensor([0.015605, 0.008709, 0.046078, 0.083078, 0.185434, 0.305953, 0.046934, 0.30821])
126 | weights7 = torch.tensor(
127 | [0.022600, 0.012589, 0.066464, 0.120094, 0.265305, 0.444943, 0.068006]
128 | )
129 | criterion = nn.CrossEntropyLoss(weights7.to(DEVICE))
130 | criterion_val = (
131 | nn.CrossEntropyLoss()
132 | ) # Use two loss functions, as the validation dataset is balanced
133 |
134 |
135 | # Filter parameters for weight decay and no weight decay and create optimizer/scheduler
136 | def filter_params(params, include_patterns, exclude_patterns):
137 | included_params = []
138 | excluded_params = []
139 | for name, param in params:
140 | if any(re.search(pattern, name) for pattern in include_patterns):
141 | included_params.append(param)
142 | elif not any(re.search(pattern, name) for pattern in exclude_patterns):
143 | excluded_params.append(param)
144 | return included_params, excluded_params
145 |
146 |
147 | include_patterns = [
148 | r"^(?!.*\.bn)"
149 | ] # Match any layer name that doesn't contain '.bn' = BatchNorm parameters
150 | exclude_patterns = [r".*\.bn.*"] # Vice versa
151 | params_to_decay, params_not_to_decay = filter_params(
152 | MODEL.named_parameters(), include_patterns, exclude_patterns
153 | )
154 |
155 | # optimizer = optim.AdamW([
156 | # {'params': params_to_decay, 'weight_decay': ADAMW_WEIGHT_DECAY}, # Apply weight decay to these parameters
157 | # {'params': params_not_to_decay, 'weight_decay': 0.0} # Exclude weight decay for these parameters = 0.0
158 | # ], lr=LR)
159 | optimizer = optim.AdamW(MODEL.parameters(), lr=LR)
160 | lr_scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=BATCHSIZE * NUM_EPOCHS)
161 |
162 | # ***** Train the model *****
163 | print("--- Start training ---")
164 | scaler = torch.cuda.amp.GradScaler()
165 | best_valid_loss = 100
166 |
167 | for epoch in range(NUM_EPOCHS):
168 | MODEL.train()
169 | total_train_correct = 0
170 | total_train_samples = 0
171 | for images, labels in tqdm(train_loader, desc="Epoch train_loader progress"):
172 | images, labels = images.to(DEVICE), labels.to(DEVICE)
173 | optimizer.zero_grad()
174 | with torch.autocast(device_type="cuda", dtype=torch.float16):
175 | output = MODEL(images)
176 | loss = criterion(output.cuda(), labels.cuda())
177 | scaler.scale(loss).backward()
178 | scaler.step(optimizer)
179 | scaler.update()
180 | lr_scheduler.step()
181 | current_lr = optimizer.param_groups[0]["lr"]
182 |
183 | _, train_predicted = torch.max(output, 1)
184 | total_train_samples += labels.size(0)
185 | total_train_correct += (train_predicted == labels).sum().item()
186 |
187 | train_accuracy = (total_train_correct / total_train_samples) * 100
188 |
189 | MODEL.eval()
190 | valid_loss = 0.0
191 | correct = 0
192 | total = 0
193 | with torch.no_grad():
194 | for images, labels in valid_loader:
195 | images, labels = images.to(DEVICE), labels.to(DEVICE)
196 | outputs = MODEL(images)
197 | loss = criterion_val(outputs.cuda(), labels.cuda())
198 | valid_loss += loss.item()
199 | _, predicted = torch.max(outputs, 1)
200 | total += labels.size(0)
201 | correct += (predicted == labels).sum().item()
202 |
203 | print(
204 | f"Epoch [{epoch+1}/{NUM_EPOCHS}] - "
205 | f"Validation Loss: {valid_loss/len(valid_loader):.4f}, "
206 | f"Validation Accuracy: {(correct/total)*100:.2f}%"
207 | f", Training Accuracy: {train_accuracy:.2f}%, "
208 | )
209 | # TBD: Valid loss überschreiben, dann model speichern wie unten, wenn kleiner als zuvor
210 |
211 | if valid_loss < best_valid_loss:
212 | best_valid_loss = valid_loss
213 | print(f"Saving model at epoch {epoch+1}")
214 | torch.save(MODEL.state_dict(), "model.pt") # Save the best model
215 |
--------------------------------------------------------------------------------
/models/AffectNet7_Swin_VA/generate_csv.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import os
3 | import torch
4 | import torchvision.transforms as transforms
5 | import torchvision.models as models
6 | from torch.utils.data import DataLoader, Dataset
7 | import torch.nn as nn
8 | from PIL import Image
9 |
10 |
11 | # Load the annotations for training and validation from separate CSV files
12 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/"
13 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/"
14 |
15 | valid_annotations_path = (
16 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv"
17 | )
18 | valid_annotations_df = pd.read_csv(valid_annotations_path)
19 | valid_annotations_df = valid_annotations_df[valid_annotations_df["exp"] != 7]
20 | # Set parameters
21 | BATCHSIZE = 128
22 |
23 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
24 |
25 |
26 | # **** Create dataset and data loaders ****
27 | class CustomDataset(Dataset):
28 | def __init__(self, dataframe, root_dir, transform=None, balance=False):
29 | self.dataframe = dataframe
30 | self.transform = transform
31 | self.root_dir = root_dir
32 | self.balance = balance
33 |
34 | if self.balance:
35 | self.dataframe = self.balance_dataset()
36 |
37 | def __len__(self):
38 | return len(self.dataframe)
39 |
40 | def __getitem__(self, idx):
41 | image_path = os.path.join(
42 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg"
43 | )
44 | image = Image.open(image_path)
45 |
46 | classes = torch.tensor(self.dataframe.iloc[idx, 1], dtype=torch.int8)
47 | valence = torch.tensor(self.dataframe.iloc[idx, 2], dtype=torch.float16)
48 | arousal = torch.tensor(self.dataframe.iloc[idx, 3], dtype=torch.float16)
49 |
50 | if self.transform:
51 | image = self.transform(image)
52 |
53 | return image, classes, valence, arousal
54 |
55 | def balance_dataset(self):
56 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply(
57 | lambda x: x.sample(self.dataframe["exp"].value_counts().min())
58 | )
59 | return balanced_df
60 |
61 |
62 | transform_valid = transforms.Compose(
63 | [
64 | transforms.ToTensor(),
65 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
66 | ]
67 | )
68 |
69 | valid_dataset = CustomDataset(
70 | dataframe=valid_annotations_df,
71 | root_dir=IMAGE_FOLDER_TEST,
72 | transform=transform_valid,
73 | balance=False,
74 | )
75 |
76 | valid_loader = DataLoader(
77 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48
78 | )
79 |
80 | # * Define the model *
81 |
82 | # Initialize the model
83 | MODEL = models.swin_v2_t(weights="DEFAULT")
84 | MODEL.head = torch.nn.Linear(in_features=768, out_features=2, bias=True)
85 | MODEL.to(DEVICE)
86 |
87 |
88 | # **** Test the model performance for classification ****
89 |
90 | # Set the model to evaluation mode
91 | MODEL.load_state_dict(torch.load("model.pt"))
92 | MODEL.to(DEVICE)
93 | MODEL.eval()
94 |
95 | all_val_true_values = []
96 | all_val_predicted_values = []
97 | all_aro_true_values = []
98 | all_aro_predicted_values = []
99 |
100 | # Start inference on test set
101 | with torch.no_grad():
102 | for images, _, val_true, aro_true in valid_loader:
103 | images, val_true, aro_true = (
104 | images.to(DEVICE),
105 | val_true.to(DEVICE),
106 | aro_true.to(DEVICE),
107 | )
108 |
109 | outputs = MODEL(images)
110 | val_pred = outputs[:, 0]
111 | aro_pred = outputs[:, 1]
112 |
113 | # Append to the lists --> Regression
114 | true_val_values = val_true.cpu().numpy()
115 | true_aro_values = aro_true.cpu().numpy()
116 | pred_val_values = val_pred.cpu().numpy()
117 | pred_aro_values = aro_pred.cpu().numpy()
118 | all_val_true_values.extend(true_val_values)
119 | all_aro_true_values.extend(true_aro_values)
120 | all_val_predicted_values.extend(pred_val_values)
121 | all_aro_predicted_values.extend(pred_aro_values)
122 |
123 | df = pd.DataFrame(
124 | {
125 | "val_pred": all_val_predicted_values,
126 | "val_true": all_val_true_values,
127 | "aro_pred": all_aro_predicted_values,
128 | "aro_true": all_aro_true_values,
129 | }
130 | )
131 | df.to_csv("inference.csv", index=False)
132 |
--------------------------------------------------------------------------------
/models/AffectNet8_Efficientnet_Combined/generate_csv.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import os
3 | import torch
4 | import torchvision.transforms as transforms
5 | import torchvision.models as models
6 | from torch.utils.data import DataLoader, Dataset
7 | import torch.nn as nn
8 | from PIL import Image
9 |
10 | # Load the annotations for training and validation from separate CSV files
11 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/"
12 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/"
13 | valid_annotations_path = (
14 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv"
15 | )
16 | valid_annotations_df = pd.read_csv(valid_annotations_path)
17 |
18 | # Set parameters
19 | BATCHSIZE = 128
20 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
21 |
22 |
23 | # **** Create dataset and data loaders ****
24 | class CustomDataset(Dataset):
25 | def __init__(self, dataframe, root_dir, transform=None, balance=False):
26 | self.dataframe = dataframe
27 | self.transform = transform
28 | self.root_dir = root_dir
29 | self.balance = balance
30 |
31 | if self.balance:
32 | self.dataframe = self.balance_dataset()
33 |
34 | def __len__(self):
35 | return len(self.dataframe)
36 |
37 | def __getitem__(self, idx):
38 | image_path = os.path.join(
39 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg"
40 | )
41 | if os.path.exists(image_path):
42 | image = Image.open(image_path)
43 | else:
44 | image = Image.new(
45 | "RGB", (224, 224), color="white"
46 | ) # Handle missing image file
47 |
48 | classes = torch.tensor(self.dataframe["exp"].iloc[idx], dtype=torch.long)
49 | labels = torch.tensor(self.dataframe.iloc[idx, 2:4].values, dtype=torch.float32)
50 |
51 | if self.transform:
52 | image = self.transform(image)
53 |
54 | return image, classes, labels
55 |
56 | def balance_dataset(self):
57 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply(
58 | lambda x: x.sample(self.dataframe["exp"].value_counts().min())
59 | )
60 | return balanced_df
61 |
62 |
63 | transform_valid = transforms.Compose(
64 | [
65 | transforms.ToTensor(),
66 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
67 | ]
68 | )
69 |
70 | valid_dataset = CustomDataset(
71 | dataframe=valid_annotations_df,
72 | root_dir=IMAGE_FOLDER_TEST,
73 | transform=transform_valid,
74 | balance=False,
75 | )
76 | valid_loader = DataLoader(
77 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48
78 | )
79 |
80 | # ***** Define the model *****
81 |
82 | # Initialize the model
83 | MODEL = models.efficientnet_v2_s(weights="DEFAULT")
84 | num_features = MODEL.classifier[1].in_features
85 | MODEL.classifier[1] = nn.Linear(in_features=num_features, out_features=10)
86 | MODEL.to(DEVICE)
87 |
88 | # **** Test the model performance for classification ****
89 |
90 | # Set the model to evaluation mode
91 | MODEL.load_state_dict(torch.load("model.pt"))
92 | MODEL.to(DEVICE)
93 | MODEL.eval()
94 |
95 | all_labels_cls = []
96 | all_predicted_cls = []
97 |
98 | all_true_val = []
99 | all_pred_val = []
100 | all_true_aro = []
101 | all_pred_aro = []
102 |
103 | # Start inference on test set
104 | with torch.no_grad():
105 | for images, classes, labels in iter(valid_loader):
106 | images, classes, labels = (
107 | images.to(DEVICE),
108 | classes.to(DEVICE),
109 | labels.to(DEVICE),
110 | )
111 |
112 | outputs = MODEL(images)
113 | outputs_cls = outputs[:, :8]
114 | outputs_reg = outputs[:, 8:]
115 | val_pred = outputs_reg[:, 0]
116 | aro_pred = outputs_reg[:, 1]
117 |
118 | _, predicted_cls = torch.max(outputs_cls, 1)
119 |
120 | all_labels_cls.extend(classes.cpu().numpy())
121 | all_predicted_cls.extend(predicted_cls.cpu().numpy())
122 | val_true = labels[:, 0]
123 | aro_true = labels[:, 1]
124 |
125 | all_true_val.extend(val_true.cpu().numpy())
126 | all_true_aro.extend(aro_true.cpu().numpy())
127 | all_pred_val.extend(val_pred.cpu().numpy())
128 | all_pred_aro.extend(aro_pred.cpu().numpy())
129 |
130 | df = pd.DataFrame(
131 | {
132 | "cat_pred": all_predicted_cls,
133 | "cat_true": all_labels_cls,
134 | "val_pred": all_pred_val,
135 | "val_true": all_true_val,
136 | "aro_pred": all_pred_aro,
137 | "aro_true": all_true_aro,
138 | }
139 | )
140 | df.to_csv("inference.csv", index=False)
141 |
--------------------------------------------------------------------------------
/models/AffectNet8_Efficientnet_Combined/train.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import os
3 | import torch
4 | import torchvision.transforms as transforms
5 | import torchvision.models as models
6 | from torch.utils.data import DataLoader, Dataset
7 | import torch.nn as nn
8 | import torch.optim as optim
9 | from PIL import Image
10 | from torch.optim import lr_scheduler
11 | from tqdm import tqdm
12 |
13 | # Load the annotations for training and validation from separate CSV files
14 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/"
15 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/"
16 | train_annotations_path = (
17 | "../../affectnet_annotations/train_set_annotation_without_lnd.csv"
18 | )
19 | valid_annotations_path = (
20 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv"
21 | )
22 | train_annotations_df = pd.read_csv(train_annotations_path)
23 | valid_annotations_df = pd.read_csv(valid_annotations_path)
24 |
25 |
26 | # Set parameters
27 | BATCHSIZE = 128
28 | NUM_EPOCHS = 20
29 | LR = 4e-5
30 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
31 |
32 |
33 | # **** Create dataset and data loaders ****
34 | class CustomDataset(Dataset):
35 | def __init__(self, dataframe, root_dir, transform=None, balance=False):
36 | self.dataframe = dataframe
37 | self.transform = transform
38 | self.root_dir = root_dir
39 | self.balance = balance
40 |
41 | if self.balance:
42 | self.dataframe = self.balance_dataset()
43 |
44 | def __len__(self):
45 | return len(self.dataframe)
46 |
47 | def __getitem__(self, idx):
48 | image_path = os.path.join(
49 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg"
50 | )
51 | if os.path.exists(image_path):
52 | image = Image.open(image_path)
53 | else:
54 | image = Image.new(
55 | "RGB", (224, 224), color="white"
56 | ) # Handle missing image file
57 |
58 | classes = torch.tensor(self.dataframe["exp"].iloc[idx], dtype=torch.long)
59 | labels = torch.tensor(self.dataframe.iloc[idx, 2:4].values, dtype=torch.float32)
60 |
61 | if self.transform:
62 | image = self.transform(image)
63 |
64 | return image, classes, labels
65 |
66 | def balance_dataset(self):
67 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply(
68 | lambda x: x.sample(self.dataframe["exp"].value_counts().min())
69 | )
70 | return balanced_df
71 |
72 |
73 | transform = transforms.Compose(
74 | [
75 | transforms.RandomHorizontalFlip(0.5),
76 | transforms.RandomGrayscale(0.01),
77 | transforms.RandomRotation(10),
78 | transforms.ColorJitter(
79 | brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1
80 | ), # model more robust to changes in lighting conditions.
81 | transforms.RandomPerspective(
82 | distortion_scale=0.2, p=0.5
83 | ), # can be helpful if your images might have varying perspectives.
84 | transforms.ToTensor(), # saves image as tensor (automatically divides by 255)
85 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
86 | transforms.RandomErasing(
87 | p=0.5, scale=(0.02, 0.2), ratio=(0.3, 3.3), value="random"
88 | ), # TEST: Should help overfitting
89 | ]
90 | )
91 |
92 | transform_valid = transforms.Compose(
93 | [
94 | transforms.ToTensor(),
95 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
96 | ]
97 | )
98 |
99 | train_dataset = CustomDataset(
100 | dataframe=train_annotations_df,
101 | root_dir=IMAGE_FOLDER,
102 | transform=transform,
103 | balance=False,
104 | )
105 | valid_dataset = CustomDataset(
106 | dataframe=valid_annotations_df,
107 | root_dir=IMAGE_FOLDER_TEST,
108 | transform=transform_valid,
109 | balance=False,
110 | )
111 | train_loader = DataLoader(
112 | train_dataset, batch_size=BATCHSIZE, shuffle=True, num_workers=48
113 | )
114 | valid_loader = DataLoader(
115 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48
116 | )
117 |
118 | # ***** Define the model *****
119 |
120 | # Initialize the model
121 | MODEL = models.efficientnet_v2_s(weights="DEFAULT")
122 | num_features = MODEL.classifier[1].in_features
123 | MODEL.classifier[1] = nn.Linear(in_features=num_features, out_features=10)
124 | MODEL.to(DEVICE)
125 | # Define (weighted) loss function
126 | weights = torch.tensor(
127 | [0.015605, 0.008709, 0.046078, 0.083078, 0.185434, 0.305953, 0.046934, 0.30821]
128 | )
129 | criterion_cls = nn.CrossEntropyLoss(weights.to(DEVICE))
130 | criterion_cls_val = (
131 | nn.CrossEntropyLoss()
132 | ) # Use two loss functions, as the validation dataset is balanced
133 | criterion_reg = nn.MSELoss()
134 |
135 | optimizer = optim.AdamW(MODEL.parameters(), lr=LR)
136 | lr_scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=BATCHSIZE * NUM_EPOCHS)
137 |
138 | # ***** Train the model *****
139 | print("--- Start training ---")
140 | scaler = torch.cuda.amp.GradScaler()
141 | best_valid_loss = 100
142 |
143 | for epoch in range(NUM_EPOCHS):
144 | MODEL.train()
145 | total_train_correct = 0
146 | total_train_samples = 0
147 | for images, classes, labels in tqdm(
148 | train_loader, desc="Epoch train_loader progress"
149 | ):
150 | images, classes, labels = (
151 | images.to(DEVICE),
152 | classes.to(DEVICE),
153 | labels.to(DEVICE),
154 | )
155 | optimizer.zero_grad()
156 | with torch.autocast(device_type="cuda", dtype=torch.float16):
157 | outputs = MODEL(images)
158 | outputs_cls = outputs[:, :8]
159 | outputs_reg = outputs[:, 8:]
160 | loss = criterion_cls(
161 | outputs_cls.cuda(), classes.cuda()
162 | ) + 5 * criterion_reg(outputs_reg.cuda(), labels.cuda())
163 | scaler.scale(loss).backward()
164 | scaler.step(optimizer)
165 | scaler.update()
166 | lr_scheduler.step()
167 | current_lr = optimizer.param_groups[0]["lr"]
168 |
169 | _, train_predicted = torch.max(outputs_cls, 1)
170 | total_train_samples += classes.size(0)
171 | total_train_correct += (train_predicted == classes).sum().item()
172 |
173 | train_accuracy = (total_train_correct / total_train_samples) * 100
174 |
175 | MODEL.eval()
176 | valid_loss = 0.0
177 | correct = 0
178 | total = 0
179 | with torch.no_grad():
180 | for images, classes, labels in valid_loader:
181 | images, classes, labels = (
182 | images.to(DEVICE),
183 | classes.to(DEVICE),
184 | labels.to(DEVICE),
185 | )
186 | outputs = MODEL(images)
187 | outputs_cls = outputs[:, :8]
188 | outputs_reg = outputs[:, 8:]
189 | loss = criterion_cls_val(
190 | outputs_cls.cuda(), classes.cuda()
191 | ) + 5 * criterion_reg(outputs_reg.cuda(), labels.cuda())
192 | valid_loss += loss.item()
193 | _, predicted = torch.max(outputs_cls, 1)
194 | total += classes.size(0)
195 | correct += (predicted == classes).sum().item()
196 |
197 | print(
198 | f"Epoch [{epoch+1}/{NUM_EPOCHS}] - "
199 | f"Validation Loss: {valid_loss/len(valid_loader):.4f}, "
200 | f"Validation Accuracy: {(correct/total)*100:.2f}%"
201 | f", Training Accuracy: {train_accuracy:.2f}%, "
202 | )
203 |
204 | if valid_loss < best_valid_loss:
205 | best_valid_loss = valid_loss
206 | print(f"Saving model at epoch {epoch+1}")
207 | torch.save(MODEL.state_dict(), "model.pt") # Save the best model
208 |
--------------------------------------------------------------------------------
/models/AffectNet8_Efficientnet_Discrete/generate_csv.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import os
3 | import torch
4 | import torchvision.transforms as transforms
5 | import torchvision.models as models
6 | from torch.utils.data import DataLoader, Dataset
7 | import torch.nn as nn
8 | from PIL import Image
9 |
10 | # Load the annotations for training and validation from separate CSV files
11 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/"
12 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/"
13 | valid_annotations_path = (
14 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv"
15 | )
16 |
17 | valid_annotations_df = pd.read_csv(valid_annotations_path)
18 |
19 |
20 | # Set parameters
21 | BATCHSIZE = 128
22 | # ADAMW_WEIGHT_DECAY = 0.1 # For efficientnet only
23 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
24 |
25 |
26 | # **** Create dataset and data loaders ****
27 | class CustomDataset(Dataset):
28 | def __init__(self, dataframe, root_dir, transform=None, balance=False):
29 | self.dataframe = dataframe
30 | self.transform = transform
31 | self.root_dir = root_dir
32 | self.balance = balance
33 |
34 | if self.balance:
35 | self.dataframe = self.balance_dataset()
36 |
37 | def __len__(self):
38 | return len(self.dataframe)
39 |
40 | def __getitem__(self, idx):
41 | image_path = os.path.join(
42 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg"
43 | )
44 | if os.path.exists(image_path):
45 | image = Image.open(image_path)
46 | else:
47 | image = Image.new(
48 | "RGB", (224, 224), color="white"
49 | ) # Handle missing image file
50 |
51 | label = torch.tensor(self.dataframe["exp"].iloc[idx], dtype=torch.long)
52 |
53 | if self.transform:
54 | image = self.transform(image)
55 |
56 | return image, label
57 |
58 | def balance_dataset(self):
59 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply(
60 | lambda x: x.sample(self.dataframe["exp"].value_counts().min())
61 | )
62 | return balanced_df
63 |
64 |
65 | transform_valid = transforms.Compose(
66 | [
67 | transforms.ToTensor(),
68 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
69 | ]
70 | )
71 |
72 |
73 | valid_dataset = CustomDataset(
74 | dataframe=valid_annotations_df,
75 | root_dir=IMAGE_FOLDER_TEST,
76 | transform=transform_valid,
77 | balance=False,
78 | )
79 |
80 | valid_loader = DataLoader(
81 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48
82 | )
83 |
84 | # ***** Define the model *****
85 |
86 | # Initialize the model
87 | MODEL = models.efficientnet_v2_s(weights="DEFAULT")
88 | num_features = MODEL.classifier[1].in_features
89 | MODEL.classifier[1] = nn.Linear(in_features=num_features, out_features=8)
90 | MODEL.to(DEVICE)
91 |
92 | # **** Test the model performance for classification ****
93 |
94 | # Set the model to evaluation mode
95 | MODEL.load_state_dict(torch.load("model.pt"))
96 | MODEL.to(DEVICE)
97 | MODEL.eval()
98 |
99 | all_labels_cls = []
100 | all_predicted_cls = []
101 |
102 | # Start inference on test set
103 | with torch.no_grad():
104 | for images, labels_cls in iter(valid_loader):
105 | images = images.to(DEVICE)
106 | labels_cls = labels_cls.to(DEVICE)
107 |
108 | outputs = MODEL(images)
109 |
110 | _, predicted_cls = torch.max(outputs, 1)
111 |
112 | all_labels_cls.extend(labels_cls.cpu().numpy())
113 | all_predicted_cls.extend(predicted_cls.cpu().numpy())
114 |
115 | df = pd.DataFrame({"cat_pred": all_predicted_cls, "cat_true": all_labels_cls})
116 | df.to_csv("inference.csv", index=False)
117 |
--------------------------------------------------------------------------------
/models/AffectNet8_Efficientnet_Discrete/train.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import os
3 | import torch
4 | import torchvision.transforms as transforms
5 | import torchvision.models as models
6 | from torch.utils.data import DataLoader, Dataset
7 | import torch.nn as nn
8 | import torch.optim as optim
9 | from PIL import Image
10 | import torchvision
11 | from torch.optim import lr_scheduler
12 | from tqdm import tqdm
13 |
14 | # Load the annotations for training and validation from separate CSV files
15 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/"
16 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/"
17 | train_annotations_path = (
18 | "../../affectnet_annotations/train_set_annotation_without_lnd.csv"
19 | )
20 | valid_annotations_path = (
21 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv"
22 | )
23 | train_annotations_df = pd.read_csv(train_annotations_path)
24 | valid_annotations_df = pd.read_csv(valid_annotations_path)
25 |
26 | # Set parameters
27 | BATCHSIZE = 128
28 | NUM_EPOCHS = 20
29 | LR = 4e-5
30 | # ADAMW_WEIGHT_DECAY = 0.1 # For efficientnet only
31 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
32 |
33 |
34 | # **** Create dataset and data loaders ****
35 | class CustomDataset(Dataset):
36 | def __init__(self, dataframe, root_dir, transform=None, balance=False):
37 | self.dataframe = dataframe
38 | self.transform = transform
39 | self.root_dir = root_dir
40 | self.balance = balance
41 |
42 | if self.balance:
43 | self.dataframe = self.balance_dataset()
44 |
45 | def __len__(self):
46 | return len(self.dataframe)
47 |
48 | def __getitem__(self, idx):
49 | image_path = os.path.join(
50 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg"
51 | )
52 | if os.path.exists(image_path):
53 | image = Image.open(image_path)
54 | else:
55 | image = Image.new(
56 | "RGB", (224, 224), color="white"
57 | ) # Handle missing image file
58 |
59 | label = torch.tensor(self.dataframe["exp"].iloc[idx], dtype=torch.long)
60 |
61 | if self.transform:
62 | image = self.transform(image)
63 |
64 | return image, label
65 |
66 | def balance_dataset(self):
67 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply(
68 | lambda x: x.sample(self.dataframe["exp"].value_counts().min())
69 | )
70 | return balanced_df
71 |
72 |
73 | transform = transforms.Compose(
74 | [
75 | transforms.ElasticTransform(alpha=5.0, sigma=5.0),
76 | transforms.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5)),
77 | transforms.RandomGrayscale(p=0.1),
78 | transforms.RandomRotation(degrees=15),
79 | transforms.RandomVerticalFlip(),
80 | transforms.ColorJitter(0.15, 0.15, 0.15),
81 | torchvision.transforms.RandomAutocontrast(p=0.4),
82 | transforms.ToTensor(),
83 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
84 | ]
85 | )
86 |
87 |
88 | transform_valid = transforms.Compose(
89 | [
90 | transforms.ToTensor(),
91 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
92 | ]
93 | )
94 |
95 | train_dataset = CustomDataset(
96 | dataframe=train_annotations_df,
97 | root_dir=IMAGE_FOLDER,
98 | transform=transform,
99 | balance=False,
100 | )
101 | valid_dataset = CustomDataset(
102 | dataframe=valid_annotations_df,
103 | root_dir=IMAGE_FOLDER_TEST,
104 | transform=transform_valid,
105 | balance=False,
106 | )
107 | train_loader = DataLoader(
108 | train_dataset, batch_size=BATCHSIZE, shuffle=True, num_workers=48
109 | )
110 | valid_loader = DataLoader(
111 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48
112 | )
113 |
114 | # ***** Define the model *****
115 |
116 | # Initialize the model
117 | MODEL = models.efficientnet_v2_s(weights="DEFAULT")
118 | num_features = MODEL.classifier[1].in_features
119 | MODEL.classifier[1] = nn.Linear(in_features=num_features, out_features=8)
120 | MODEL.to(DEVICE)
121 | # Define (weighted) loss function
122 | weights = torch.tensor(
123 | [0.015605, 0.008709, 0.046078, 0.083078, 0.185434, 0.305953, 0.046934, 0.30821]
124 | )
125 | criterion = nn.CrossEntropyLoss(weights.to(DEVICE))
126 | criterion_val = (
127 | nn.CrossEntropyLoss()
128 | ) # Use two loss functions, as the validation dataset is balanced
129 |
130 | optimizer = optim.AdamW(MODEL.parameters(), lr=LR)
131 | lr_scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=BATCHSIZE * NUM_EPOCHS)
132 |
133 | # ***** Train the model *****
134 | print("--- Start training ---")
135 | scaler = torch.cuda.amp.GradScaler()
136 | best_valid_loss = 100
137 |
138 | for epoch in range(NUM_EPOCHS):
139 | MODEL.train()
140 | total_train_correct = 0
141 | total_train_samples = 0
142 | for images, labels in tqdm(train_loader, desc="Epoch train_loader progress"):
143 | images, labels = images.to(DEVICE), labels.to(DEVICE)
144 | optimizer.zero_grad()
145 | with torch.autocast(device_type="cuda", dtype=torch.float16):
146 | output = MODEL(images)
147 | loss = criterion(output.cuda(), labels.cuda())
148 | scaler.scale(loss).backward()
149 | scaler.step(optimizer)
150 | scaler.update()
151 | lr_scheduler.step()
152 | current_lr = optimizer.param_groups[0]["lr"]
153 |
154 | _, train_predicted = torch.max(output, 1)
155 | total_train_samples += labels.size(0)
156 | total_train_correct += (train_predicted == labels).sum().item()
157 |
158 | train_accuracy = (total_train_correct / total_train_samples) * 100
159 |
160 | MODEL.eval()
161 | valid_loss = 0.0
162 | correct = 0
163 | total = 0
164 | with torch.no_grad():
165 | for images, labels in valid_loader:
166 | images, labels = images.to(DEVICE), labels.to(DEVICE)
167 | outputs = MODEL(images)
168 | loss = criterion_val(outputs.cuda(), labels.cuda())
169 | valid_loss += loss.item()
170 | _, predicted = torch.max(outputs, 1)
171 | total += labels.size(0)
172 | correct += (predicted == labels).sum().item()
173 |
174 | print(
175 | f"Epoch [{epoch+1}/{NUM_EPOCHS}] - "
176 | f"Validation Loss: {valid_loss/len(valid_loader):.4f}, "
177 | f"Validation Accuracy: {(correct/total)*100:.2f}%"
178 | f", Training Accuracy: {train_accuracy:.2f}%, "
179 | )
180 |
181 | if valid_loss < best_valid_loss:
182 | best_valid_loss = valid_loss
183 | print(f"Saving model at epoch {epoch+1}")
184 | torch.save(MODEL.state_dict(), "model.pt") # Save the best model
185 |
--------------------------------------------------------------------------------
/models/AffectNet8_Efficientnet_VA/generate_csv.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import os
3 | import torch
4 | import torchvision.transforms as transforms
5 | import torchvision.models as models
6 | from torch.utils.data import DataLoader, Dataset
7 | import torch.nn as nn
8 | from PIL import Image
9 |
10 |
11 | # Load the annotations for training and validation from separate CSV files
12 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/"
13 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/"
14 |
15 | valid_annotations_path = (
16 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv"
17 | )
18 | valid_annotations_df = pd.read_csv(valid_annotations_path)
19 |
20 | # Set parameters
21 | BATCHSIZE = 128
22 |
23 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
24 |
25 |
26 | # **** Create dataset and data loaders ****
27 | class CustomDataset(Dataset):
28 | def __init__(self, dataframe, root_dir, transform=None, balance=False):
29 | self.dataframe = dataframe
30 | self.transform = transform
31 | self.root_dir = root_dir
32 | self.balance = balance
33 |
34 | if self.balance:
35 | self.dataframe = self.balance_dataset()
36 |
37 | def __len__(self):
38 | return len(self.dataframe)
39 |
40 | def __getitem__(self, idx):
41 | image_path = os.path.join(
42 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg"
43 | )
44 | image = Image.open(image_path)
45 |
46 | classes = torch.tensor(self.dataframe.iloc[idx, 1], dtype=torch.int8)
47 | valence = torch.tensor(self.dataframe.iloc[idx, 2], dtype=torch.float16)
48 | arousal = torch.tensor(self.dataframe.iloc[idx, 3], dtype=torch.float16)
49 |
50 | if self.transform:
51 | image = self.transform(image)
52 |
53 | return image, classes, valence, arousal
54 |
55 | def balance_dataset(self):
56 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply(
57 | lambda x: x.sample(self.dataframe["exp"].value_counts().min())
58 | )
59 | return balanced_df
60 |
61 |
62 | transform_valid = transforms.Compose(
63 | [
64 | transforms.ToTensor(),
65 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
66 | ]
67 | )
68 |
69 | valid_dataset = CustomDataset(
70 | dataframe=valid_annotations_df,
71 | root_dir=IMAGE_FOLDER_TEST,
72 | transform=transform_valid,
73 | balance=False,
74 | )
75 |
76 | valid_loader = DataLoader(
77 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48
78 | )
79 |
80 | # ***** Define the model *****
81 |
82 | # Initialize the model
83 | MODEL = models.efficientnet_v2_s(weights="DEFAULT")
84 | num_features = MODEL.classifier[1].in_features
85 | MODEL.classifier[1] = nn.Linear(in_features=num_features, out_features=2)
86 | MODEL.to(DEVICE)
87 |
88 |
89 | # **** Test the model performance for classification ****
90 |
91 | # Set the model to evaluation mode
92 | MODEL.load_state_dict(torch.load("model.pt"))
93 | MODEL.to(DEVICE)
94 | MODEL.eval()
95 |
96 | all_val_true_values = []
97 | all_val_predicted_values = []
98 | all_aro_true_values = []
99 | all_aro_predicted_values = []
100 |
101 | # Start inference on test set
102 | with torch.no_grad():
103 | for images, _, val_true, aro_true in valid_loader:
104 | images, val_true, aro_true = (
105 | images.to(DEVICE),
106 | val_true.to(DEVICE),
107 | aro_true.to(DEVICE),
108 | )
109 |
110 | outputs = MODEL(images)
111 | val_pred = outputs[:, 0]
112 | aro_pred = outputs[:, 1]
113 |
114 | # Append to the lists --> Regression
115 | true_val_values = val_true.cpu().numpy()
116 | true_aro_values = aro_true.cpu().numpy()
117 | pred_val_values = val_pred.cpu().numpy()
118 | pred_aro_values = aro_pred.cpu().numpy()
119 | all_val_true_values.extend(true_val_values)
120 | all_aro_true_values.extend(true_aro_values)
121 | all_val_predicted_values.extend(pred_val_values)
122 | all_aro_predicted_values.extend(pred_aro_values)
123 |
124 | df = pd.DataFrame(
125 | {
126 | "val_pred": all_val_predicted_values,
127 | "val_true": all_val_true_values,
128 | "aro_pred": all_aro_predicted_values,
129 | "aro_true": all_aro_true_values,
130 | }
131 | )
132 | df.to_csv("inference.csv", index=False)
133 |
--------------------------------------------------------------------------------
/models/AffectNet8_Maxvit_Combined/generate_csv.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import os
3 | import torch
4 | import torchvision.transforms as transforms
5 | import torchvision.models as models
6 | from torch.utils.data import DataLoader, Dataset
7 | import torch.nn as nn
8 | from PIL import Image
9 |
10 | # Load the annotations for training and validation from separate CSV files
11 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/"
12 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/"
13 | valid_annotations_path = (
14 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv"
15 | )
16 | valid_annotations_df = pd.read_csv(valid_annotations_path)
17 |
18 | # Set parameters
19 | BATCHSIZE = 128
20 | MODEL = models.maxvit_t(weights="DEFAULT")
21 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
22 |
23 |
24 | # **** Create dataset and data loaders ****
25 | class CustomDataset(Dataset):
26 | def __init__(self, dataframe, root_dir, transform=None, balance=False):
27 | self.dataframe = dataframe
28 | self.transform = transform
29 | self.root_dir = root_dir
30 | self.balance = balance
31 |
32 | if self.balance:
33 | self.dataframe = self.balance_dataset()
34 |
35 | def __len__(self):
36 | return len(self.dataframe)
37 |
38 | def __getitem__(self, idx):
39 | image_path = os.path.join(
40 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg"
41 | )
42 | if os.path.exists(image_path):
43 | image = Image.open(image_path)
44 | else:
45 | image = Image.new(
46 | "RGB", (224, 224), color="white"
47 | ) # Handle missing image file
48 |
49 | classes = torch.tensor(self.dataframe["exp"].iloc[idx], dtype=torch.long)
50 | labels = torch.tensor(self.dataframe.iloc[idx, 2:4].values, dtype=torch.float32)
51 |
52 | if self.transform:
53 | image = self.transform(image)
54 |
55 | return image, classes, labels
56 |
57 | def balance_dataset(self):
58 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply(
59 | lambda x: x.sample(self.dataframe["exp"].value_counts().min())
60 | )
61 | return balanced_df
62 |
63 |
64 | transform_valid = transforms.Compose(
65 | [
66 | transforms.ToTensor(),
67 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
68 | ]
69 | )
70 |
71 | valid_dataset = CustomDataset(
72 | dataframe=valid_annotations_df,
73 | root_dir=IMAGE_FOLDER_TEST,
74 | transform=transform_valid,
75 | balance=False,
76 | )
77 | valid_loader = DataLoader(
78 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48
79 | )
80 |
81 | # ***** Define the model *****
82 |
83 | # Initialize the model
84 | block_channels = MODEL.classifier[3].in_features
85 | MODEL.classifier = nn.Sequential(
86 | nn.AdaptiveAvgPool2d(1),
87 | nn.Flatten(),
88 | nn.LayerNorm(block_channels),
89 | nn.Linear(block_channels, block_channels),
90 | nn.Tanh(),
91 | nn.Linear(block_channels, 10, bias=False),
92 | )
93 | MODEL.to(DEVICE) # Put the model to the GPU
94 |
95 | # Set the model to evaluation mode
96 | MODEL.load_state_dict(torch.load("model.pt"))
97 | MODEL.to(DEVICE)
98 | MODEL.eval()
99 |
100 | all_labels_cls = []
101 | all_predicted_cls = []
102 |
103 | all_true_val = []
104 | all_pred_val = []
105 | all_true_aro = []
106 | all_pred_aro = []
107 |
108 | # Start inference on test set
109 | with torch.no_grad():
110 | for images, classes, labels in iter(valid_loader):
111 | images, classes, labels = (
112 | images.to(DEVICE),
113 | classes.to(DEVICE),
114 | labels.to(DEVICE),
115 | )
116 |
117 | outputs = MODEL(images)
118 | outputs_cls = outputs[:, :8]
119 | outputs_reg = outputs[:, 8:]
120 | val_pred = outputs_reg[:, 0]
121 | aro_pred = outputs_reg[:, 1]
122 |
123 | _, predicted_cls = torch.max(outputs_cls, 1)
124 |
125 | all_labels_cls.extend(classes.cpu().numpy())
126 | all_predicted_cls.extend(predicted_cls.cpu().numpy())
127 | val_true = labels[:, 0]
128 | aro_true = labels[:, 1]
129 |
130 | all_true_val.extend(val_true.cpu().numpy())
131 | all_true_aro.extend(aro_true.cpu().numpy())
132 | all_pred_val.extend(val_pred.cpu().numpy())
133 | all_pred_aro.extend(aro_pred.cpu().numpy())
134 |
135 | df = pd.DataFrame(
136 | {
137 | "cat_pred": all_predicted_cls,
138 | "cat_true": all_labels_cls,
139 | "val_pred": all_pred_val,
140 | "val_true": all_true_val,
141 | "aro_pred": all_pred_aro,
142 | "aro_true": all_true_aro,
143 | }
144 | )
145 | df.to_csv("inference.csv", index=False)
146 |
--------------------------------------------------------------------------------
/models/AffectNet8_Maxvit_Combined/train.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import os
3 | import torch
4 | import torchvision.transforms as transforms
5 | import torchvision.models as models
6 | from torch.utils.data import DataLoader, Dataset
7 | import torch.nn as nn
8 | import torch.optim as optim
9 | from PIL import Image
10 | from torch.optim import lr_scheduler
11 | from tqdm import tqdm
12 |
13 | # Load the annotations for training and validation from separate CSV files
14 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/"
15 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/"
16 | train_annotations_path = (
17 | "../../affectnet_annotations/train_set_annotation_without_lnd.csv"
18 | )
19 | valid_annotations_path = (
20 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv"
21 | )
22 | train_annotations_df = pd.read_csv(train_annotations_path)
23 | valid_annotations_df = pd.read_csv(valid_annotations_path)
24 |
25 |
26 | # Set parameters
27 | BATCHSIZE = 128
28 | NUM_EPOCHS = 20
29 | LR = 4e-5
30 | MODEL = models.maxvit_t(weights="DEFAULT")
31 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
32 |
33 |
34 | # **** Create dataset and data loaders ****
35 | class CustomDataset(Dataset):
36 | def __init__(self, dataframe, root_dir, transform=None, balance=False):
37 | self.dataframe = dataframe
38 | self.transform = transform
39 | self.root_dir = root_dir
40 | self.balance = balance
41 |
42 | if self.balance:
43 | self.dataframe = self.balance_dataset()
44 |
45 | def __len__(self):
46 | return len(self.dataframe)
47 |
48 | def __getitem__(self, idx):
49 | image_path = os.path.join(
50 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg"
51 | )
52 | if os.path.exists(image_path):
53 | image = Image.open(image_path)
54 | else:
55 | image = Image.new(
56 | "RGB", (224, 224), color="white"
57 | ) # Handle missing image file
58 |
59 | classes = torch.tensor(self.dataframe["exp"].iloc[idx], dtype=torch.long)
60 | labels = torch.tensor(self.dataframe.iloc[idx, 2:4].values, dtype=torch.float32)
61 |
62 | if self.transform:
63 | image = self.transform(image)
64 |
65 | return image, classes, labels
66 |
67 | def balance_dataset(self):
68 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply(
69 | lambda x: x.sample(self.dataframe["exp"].value_counts().min())
70 | )
71 | return balanced_df
72 |
73 |
74 | transform = transforms.Compose(
75 | [
76 | transforms.RandomHorizontalFlip(0.5),
77 | transforms.RandomGrayscale(0.01),
78 | transforms.RandomRotation(10),
79 | transforms.ColorJitter(
80 | brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1
81 | ), # model more robust to changes in lighting conditions.
82 | transforms.RandomPerspective(
83 | distortion_scale=0.2, p=0.5
84 | ), # can be helpful if your images might have varying perspectives.
85 | transforms.ToTensor(), # saves image as tensor (automatically divides by 255)
86 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
87 | transforms.RandomErasing(
88 | p=0.5, scale=(0.02, 0.2), ratio=(0.3, 3.3), value="random"
89 | ), # TEST: Should help overfitting
90 | ]
91 | )
92 |
93 | transform_valid = transforms.Compose(
94 | [
95 | transforms.ToTensor(),
96 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
97 | ]
98 | )
99 |
100 | train_dataset = CustomDataset(
101 | dataframe=train_annotations_df,
102 | root_dir=IMAGE_FOLDER,
103 | transform=transform,
104 | balance=False,
105 | )
106 | valid_dataset = CustomDataset(
107 | dataframe=valid_annotations_df,
108 | root_dir=IMAGE_FOLDER_TEST,
109 | transform=transform_valid,
110 | balance=False,
111 | )
112 | train_loader = DataLoader(
113 | train_dataset, batch_size=BATCHSIZE, shuffle=True, num_workers=48
114 | )
115 | valid_loader = DataLoader(
116 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48
117 | )
118 |
119 | # ***** Define the model *****
120 |
121 | # Initialize the model
122 | block_channels = MODEL.classifier[3].in_features
123 | MODEL.classifier = nn.Sequential(
124 | nn.AdaptiveAvgPool2d(1),
125 | nn.Flatten(),
126 | nn.LayerNorm(block_channels),
127 | nn.Linear(block_channels, block_channels),
128 | nn.Tanh(),
129 | nn.Linear(block_channels, 10, bias=False),
130 | )
131 | MODEL.to(DEVICE) # Put the model to the GPU
132 |
133 | # Define (weighted) loss function
134 | weights = torch.tensor(
135 | [0.015605, 0.008709, 0.046078, 0.083078, 0.185434, 0.305953, 0.046934, 0.30821]
136 | )
137 | criterion_cls = nn.CrossEntropyLoss(weights.to(DEVICE))
138 | criterion_cls_val = (
139 | nn.CrossEntropyLoss()
140 | ) # Use two loss functions, as the validation dataset is balanced
141 | criterion_reg = nn.MSELoss()
142 |
143 | optimizer = optim.AdamW(MODEL.parameters(), lr=LR)
144 | lr_scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=BATCHSIZE * NUM_EPOCHS)
145 |
146 | # ***** Train the model *****
147 | print("--- Start training ---")
148 | scaler = torch.cuda.amp.GradScaler()
149 | best_valid_loss = 100
150 |
151 | for epoch in range(NUM_EPOCHS):
152 | MODEL.train()
153 | total_train_correct = 0
154 | total_train_samples = 0
155 | for images, classes, labels in tqdm(
156 | train_loader, desc="Epoch train_loader progress"
157 | ):
158 | images, classes, labels = (
159 | images.to(DEVICE),
160 | classes.to(DEVICE),
161 | labels.to(DEVICE),
162 | )
163 | optimizer.zero_grad()
164 | with torch.autocast(device_type="cuda", dtype=torch.float16):
165 | outputs = MODEL(images)
166 | outputs_cls = outputs[:, :8]
167 | outputs_reg = outputs[:, 8:]
168 | loss = criterion_cls(
169 | outputs_cls.cuda(), classes.cuda()
170 | ) + 5 * criterion_reg(outputs_reg.cuda(), labels.cuda())
171 | scaler.scale(loss).backward()
172 | scaler.step(optimizer)
173 | scaler.update()
174 | lr_scheduler.step()
175 | current_lr = optimizer.param_groups[0]["lr"]
176 |
177 | _, train_predicted = torch.max(outputs_cls, 1)
178 | total_train_samples += classes.size(0)
179 | total_train_correct += (train_predicted == classes).sum().item()
180 |
181 | train_accuracy = (total_train_correct / total_train_samples) * 100
182 |
183 | MODEL.eval()
184 | valid_loss = 0.0
185 | correct = 0
186 | total = 0
187 | with torch.no_grad():
188 | for images, classes, labels in valid_loader:
189 | images, classes, labels = (
190 | images.to(DEVICE),
191 | classes.to(DEVICE),
192 | labels.to(DEVICE),
193 | )
194 | outputs = MODEL(images)
195 | outputs_cls = outputs[:, :8]
196 | outputs_reg = outputs[:, 8:]
197 | loss = criterion_cls_val(
198 | outputs_cls.cuda(), classes.cuda()
199 | ) + 5 * criterion_reg(outputs_reg.cuda(), labels.cuda())
200 | valid_loss += loss.item()
201 | _, predicted = torch.max(outputs_cls, 1)
202 | total += classes.size(0)
203 | correct += (predicted == classes).sum().item()
204 |
205 | print(
206 | f"Epoch [{epoch+1}/{NUM_EPOCHS}] - "
207 | f"Validation Loss: {valid_loss/len(valid_loader):.4f}, "
208 | f"Validation Accuracy: {(correct/total)*100:.2f}%"
209 | f", Training Accuracy: {train_accuracy:.2f}%, "
210 | )
211 |
212 | if valid_loss < best_valid_loss:
213 | best_valid_loss = valid_loss
214 | print(f"Saving model at epoch {epoch+1}")
215 | torch.save(MODEL.state_dict(), "model.pt") # Save the best model
216 |
--------------------------------------------------------------------------------
/models/AffectNet8_Maxvit_Discrete/generate_csv.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import os
3 | import torch
4 | import torchvision.transforms as transforms
5 | import torchvision.models as models
6 | from torch.utils.data import DataLoader, Dataset
7 | import torch.nn as nn
8 | from PIL import Image
9 |
10 | # Load the annotations for training and validation from separate CSV files
11 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/"
12 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/"
13 | valid_annotations_path = (
14 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv"
15 | )
16 |
17 | valid_annotations_df = pd.read_csv(valid_annotations_path)
18 |
19 |
20 | # Set parameters
21 | BATCHSIZE = 128
22 | # ADAMW_WEIGHT_DECAY = 0.1 # For efficientnet only
23 | MODEL = models.maxvit_t(weights="DEFAULT")
24 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
25 |
26 |
27 | # **** Create dataset and data loaders ****
28 | class CustomDataset(Dataset):
29 | def __init__(self, dataframe, root_dir, transform=None, balance=False):
30 | self.dataframe = dataframe
31 | self.transform = transform
32 | self.root_dir = root_dir
33 | self.balance = balance
34 |
35 | if self.balance:
36 | self.dataframe = self.balance_dataset()
37 |
38 | def __len__(self):
39 | return len(self.dataframe)
40 |
41 | def __getitem__(self, idx):
42 | image_path = os.path.join(
43 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg"
44 | )
45 | if os.path.exists(image_path):
46 | image = Image.open(image_path)
47 | else:
48 | image = Image.new(
49 | "RGB", (224, 224), color="white"
50 | ) # Handle missing image file
51 |
52 | label = torch.tensor(self.dataframe["exp"].iloc[idx], dtype=torch.long)
53 |
54 | if self.transform:
55 | image = self.transform(image)
56 |
57 | return image, label
58 |
59 | def balance_dataset(self):
60 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply(
61 | lambda x: x.sample(self.dataframe["exp"].value_counts().min())
62 | )
63 | return balanced_df
64 |
65 |
66 | transform_valid = transforms.Compose(
67 | [
68 | transforms.ToTensor(),
69 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
70 | ]
71 | )
72 |
73 |
74 | valid_dataset = CustomDataset(
75 | dataframe=valid_annotations_df,
76 | root_dir=IMAGE_FOLDER_TEST,
77 | transform=transform_valid,
78 | balance=False,
79 | )
80 |
81 | valid_loader = DataLoader(
82 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48
83 | )
84 |
85 | # ***** Define the model *****
86 |
87 | # Initialize the model
88 | block_channels = MODEL.classifier[3].in_features
89 | MODEL.classifier = nn.Sequential(
90 | nn.AdaptiveAvgPool2d(1),
91 | nn.Flatten(),
92 | nn.LayerNorm(block_channels),
93 | nn.Linear(block_channels, block_channels),
94 | nn.Tanh(),
95 | nn.Linear(block_channels, 8, bias=False),
96 | )
97 | MODEL.to(DEVICE) # Put the model to the GPU
98 |
99 | # **** Test the model performance for classification ****
100 |
101 | # Set the model to evaluation mode
102 | MODEL.load_state_dict(torch.load("model.pt"))
103 | MODEL.to(DEVICE)
104 | MODEL.eval()
105 |
106 | all_labels_cls = []
107 | all_predicted_cls = []
108 |
109 | # Start inference on test set
110 | with torch.no_grad():
111 | for images, labels_cls in iter(valid_loader):
112 | images = images.to(DEVICE)
113 | labels_cls = labels_cls.to(DEVICE)
114 |
115 | outputs = MODEL(images)
116 |
117 | _, predicted_cls = torch.max(outputs, 1)
118 |
119 | all_labels_cls.extend(labels_cls.cpu().numpy())
120 | all_predicted_cls.extend(predicted_cls.cpu().numpy())
121 |
122 | df = pd.DataFrame({"cat_pred": all_predicted_cls, "cat_true": all_labels_cls})
123 | df.to_csv("inference.csv", index=False)
124 |
--------------------------------------------------------------------------------
/models/AffectNet8_Maxvit_Discrete/train.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import os
3 | import torch
4 | import torchvision.transforms as transforms
5 | import torchvision.models as models
6 | from torch.utils.data import DataLoader, Dataset
7 | import torch.nn as nn
8 | import torch.optim as optim
9 | from PIL import Image
10 | import torchvision
11 | from torch.optim import lr_scheduler
12 | from tqdm import tqdm
13 |
14 | # Load the annotations for training and validation from separate CSV files
15 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/"
16 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/"
17 | train_annotations_path = (
18 | "../../affectnet_annotations/train_set_annotation_without_lnd.csv"
19 | )
20 | valid_annotations_path = (
21 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv"
22 | )
23 | train_annotations_df = pd.read_csv(train_annotations_path)
24 | valid_annotations_df = pd.read_csv(valid_annotations_path)
25 |
26 | # Set parameters
27 | BATCHSIZE = 128
28 | NUM_EPOCHS = 20
29 | LR = 4e-5
30 | # ADAMW_WEIGHT_DECAY = 0.1 # For efficientnet only
31 | MODEL = models.maxvit_t(weights="DEFAULT")
32 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
33 |
34 |
35 | # **** Create dataset and data loaders ****
36 | class CustomDataset(Dataset):
37 | def __init__(self, dataframe, root_dir, transform=None, balance=False):
38 | self.dataframe = dataframe
39 | self.transform = transform
40 | self.root_dir = root_dir
41 | self.balance = balance
42 |
43 | if self.balance:
44 | self.dataframe = self.balance_dataset()
45 |
46 | def __len__(self):
47 | return len(self.dataframe)
48 |
49 | def __getitem__(self, idx):
50 | image_path = os.path.join(
51 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg"
52 | )
53 | if os.path.exists(image_path):
54 | image = Image.open(image_path)
55 | else:
56 | image = Image.new(
57 | "RGB", (224, 224), color="white"
58 | ) # Handle missing image file
59 |
60 | label = torch.tensor(self.dataframe["exp"].iloc[idx], dtype=torch.long)
61 |
62 | if self.transform:
63 | image = self.transform(image)
64 |
65 | return image, label
66 |
67 | def balance_dataset(self):
68 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply(
69 | lambda x: x.sample(self.dataframe["exp"].value_counts().min())
70 | )
71 | return balanced_df
72 |
73 |
74 | transform = transforms.Compose(
75 | [
76 | transforms.ElasticTransform(alpha=5.0, sigma=5.0),
77 | transforms.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5)),
78 | transforms.RandomGrayscale(p=0.1),
79 | transforms.RandomRotation(degrees=15),
80 | transforms.RandomVerticalFlip(),
81 | transforms.ColorJitter(0.15, 0.15, 0.15),
82 | torchvision.transforms.RandomAutocontrast(p=0.4),
83 | transforms.ToTensor(),
84 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
85 | ]
86 | )
87 |
88 |
89 | transform_valid = transforms.Compose(
90 | [
91 | transforms.ToTensor(),
92 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
93 | ]
94 | )
95 |
96 | train_dataset = CustomDataset(
97 | dataframe=train_annotations_df,
98 | root_dir=IMAGE_FOLDER,
99 | transform=transform,
100 | balance=False,
101 | )
102 | valid_dataset = CustomDataset(
103 | dataframe=valid_annotations_df,
104 | root_dir=IMAGE_FOLDER_TEST,
105 | transform=transform_valid,
106 | balance=False,
107 | )
108 | train_loader = DataLoader(
109 | train_dataset, batch_size=BATCHSIZE, shuffle=True, num_workers=48
110 | )
111 | valid_loader = DataLoader(
112 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48
113 | )
114 |
115 | # ***** Define the model *****
116 |
117 | # Initialize the model
118 | block_channels = MODEL.classifier[3].in_features
119 | MODEL.classifier = nn.Sequential(
120 | nn.AdaptiveAvgPool2d(1),
121 | nn.Flatten(),
122 | nn.LayerNorm(block_channels),
123 | nn.Linear(block_channels, block_channels),
124 | nn.Tanh(),
125 | nn.Linear(block_channels, 8, bias=False),
126 | )
127 | MODEL.to(DEVICE) # Put the model to the GPU
128 |
129 | # Define (weighted) loss function
130 | weights = torch.tensor(
131 | [0.015605, 0.008709, 0.046078, 0.083078, 0.185434, 0.305953, 0.046934, 0.30821]
132 | )
133 | criterion = nn.CrossEntropyLoss(weights.to(DEVICE))
134 | criterion_val = (
135 | nn.CrossEntropyLoss()
136 | ) # Use two loss functions, as the validation dataset is balanced
137 |
138 | optimizer = optim.AdamW(MODEL.parameters(), lr=LR)
139 | lr_scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=BATCHSIZE * NUM_EPOCHS)
140 |
141 | # ***** Train the model *****
142 | print("--- Start training ---")
143 | scaler = torch.cuda.amp.GradScaler()
144 | best_valid_loss = 100
145 |
146 | for epoch in range(NUM_EPOCHS):
147 | MODEL.train()
148 | total_train_correct = 0
149 | total_train_samples = 0
150 | for images, labels in tqdm(train_loader, desc="Epoch train_loader progress"):
151 | images, labels = images.to(DEVICE), labels.to(DEVICE)
152 | optimizer.zero_grad()
153 | with torch.autocast(device_type="cuda", dtype=torch.float16):
154 | output = MODEL(images)
155 | loss = criterion(output.cuda(), labels.cuda())
156 | scaler.scale(loss).backward()
157 | scaler.step(optimizer)
158 | scaler.update()
159 | lr_scheduler.step()
160 | current_lr = optimizer.param_groups[0]["lr"]
161 |
162 | _, train_predicted = torch.max(output, 1)
163 | total_train_samples += labels.size(0)
164 | total_train_correct += (train_predicted == labels).sum().item()
165 |
166 | train_accuracy = (total_train_correct / total_train_samples) * 100
167 |
168 | MODEL.eval()
169 | valid_loss = 0.0
170 | correct = 0
171 | total = 0
172 | with torch.no_grad():
173 | for images, labels in valid_loader:
174 | images, labels = images.to(DEVICE), labels.to(DEVICE)
175 | outputs = MODEL(images)
176 | loss = criterion_val(outputs.cuda(), labels.cuda())
177 | valid_loss += loss.item()
178 | _, predicted = torch.max(outputs, 1)
179 | total += labels.size(0)
180 | correct += (predicted == labels).sum().item()
181 |
182 | print(
183 | f"Epoch [{epoch+1}/{NUM_EPOCHS}] - "
184 | f"Validation Loss: {valid_loss/len(valid_loader):.4f}, "
185 | f"Validation Accuracy: {(correct/total)*100:.2f}%"
186 | f", Training Accuracy: {train_accuracy:.2f}%, "
187 | )
188 |
189 | if valid_loss < best_valid_loss:
190 | best_valid_loss = valid_loss
191 | print(f"Saving model at epoch {epoch+1}")
192 | torch.save(MODEL.state_dict(), "model.pt") # Save the best model
193 |
--------------------------------------------------------------------------------
/models/AffectNet8_Maxvit_VA/generate_csv.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import os
3 | import torch
4 | import torchvision.transforms as transforms
5 | import torchvision.models as models
6 | from torch.utils.data import DataLoader, Dataset
7 | import torch.nn as nn
8 | from PIL import Image
9 |
10 | # Load the annotations for training and validation from separate CSV files
11 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/"
12 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/"
13 | valid_annotations_path = (
14 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv"
15 | )
16 | valid_annotations_df = pd.read_csv(valid_annotations_path)
17 |
18 | # Set parameters
19 | BATCHSIZE = 128
20 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
21 |
22 |
23 | # **** Create dataset and data loaders ****
24 | class CustomDataset(Dataset):
25 | def __init__(self, dataframe, root_dir, transform=None, balance=False):
26 | self.dataframe = dataframe
27 | self.transform = transform
28 | self.root_dir = root_dir
29 | self.balance = balance
30 |
31 | if self.balance:
32 | self.dataframe = self.balance_dataset()
33 |
34 | def __len__(self):
35 | return len(self.dataframe)
36 |
37 | def __getitem__(self, idx):
38 | image_path = os.path.join(
39 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg"
40 | )
41 | image = Image.open(image_path)
42 |
43 | classes = torch.tensor(self.dataframe.iloc[idx, 1], dtype=torch.int8)
44 | valence = torch.tensor(self.dataframe.iloc[idx, 2], dtype=torch.float16)
45 | arousal = torch.tensor(self.dataframe.iloc[idx, 3], dtype=torch.float16)
46 |
47 | if self.transform:
48 | image = self.transform(image)
49 |
50 | return image, classes, valence, arousal
51 |
52 | def balance_dataset(self):
53 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply(
54 | lambda x: x.sample(self.dataframe["exp"].value_counts().min())
55 | )
56 | return balanced_df
57 |
58 |
59 | transform_valid = transforms.Compose(
60 | [
61 | transforms.ToTensor(),
62 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
63 | ]
64 | )
65 |
66 | valid_dataset = CustomDataset(
67 | dataframe=valid_annotations_df,
68 | root_dir=IMAGE_FOLDER_TEST,
69 | transform=transform_valid,
70 | balance=False,
71 | )
72 | valid_loader = DataLoader(
73 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48
74 | )
75 |
76 | # ***** Define the model *****
77 |
78 | # Initialize the model
79 | MODEL = models.maxvit_t(weights="DEFAULT")
80 | block_channels = MODEL.classifier[3].in_features
81 | MODEL.classifier = nn.Sequential(
82 | nn.AdaptiveAvgPool2d(1),
83 | nn.Flatten(),
84 | nn.LayerNorm(block_channels),
85 | nn.Linear(block_channels, block_channels),
86 | nn.Tanh(),
87 | nn.Dropout(0.3),
88 | nn.Linear(block_channels, 2, bias=False),
89 | )
90 | MODEL.to(DEVICE)
91 |
92 | # **** Test the model performance for classification ****
93 |
94 | # Set the model to evaluation mode
95 | MODEL.load_state_dict(torch.load("model.pt"))
96 | MODEL.to(DEVICE)
97 | MODEL.eval()
98 |
99 | all_val_true_values = []
100 | all_val_predicted_values = []
101 | all_aro_true_values = []
102 | all_aro_predicted_values = []
103 |
104 | # Start inference on test set
105 | with torch.no_grad():
106 | for images, _, val_true, aro_true in valid_loader:
107 | images, val_true, aro_true = (
108 | images.to(DEVICE),
109 | val_true.to(DEVICE),
110 | aro_true.to(DEVICE),
111 | )
112 |
113 | outputs = MODEL(images)
114 | val_pred = outputs[:, 0]
115 | aro_pred = outputs[:, 1]
116 |
117 | # Append to the lists --> Regression
118 | true_val_values = val_true.cpu().numpy()
119 | true_aro_values = aro_true.cpu().numpy()
120 | pred_val_values = val_pred.cpu().numpy()
121 | pred_aro_values = aro_pred.cpu().numpy()
122 | all_val_true_values.extend(true_val_values)
123 | all_aro_true_values.extend(true_aro_values)
124 | all_val_predicted_values.extend(pred_val_values)
125 | all_aro_predicted_values.extend(pred_aro_values)
126 | df = pd.DataFrame(
127 | {
128 | "val_pred": all_val_predicted_values,
129 | "val_true": all_val_true_values,
130 | "aro_pred": all_aro_predicted_values,
131 | "aro_true": all_aro_true_values,
132 | }
133 | )
134 | df.to_csv("inference.csv", index=False)
135 |
--------------------------------------------------------------------------------
/models/AffectNet8_Swin_Combined/generate_csv.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import os
3 | import torch
4 | import torchvision.transforms as transforms
5 | import torchvision.models as models
6 | from torch.utils.data import DataLoader, Dataset
7 | import torch.nn as nn
8 | from PIL import Image
9 |
10 | # Load the annotations for training and validation from separate CSV files
11 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/"
12 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/"
13 | valid_annotations_path = (
14 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv"
15 | )
16 | valid_annotations_df = pd.read_csv(valid_annotations_path)
17 | # Set parameters
18 | BATCHSIZE = 128
19 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
20 |
21 |
22 | # **** Create dataset and data loaders ****
23 | class CustomDataset(Dataset):
24 | def __init__(self, dataframe, root_dir, transform=None, balance=False):
25 | self.dataframe = dataframe
26 | self.transform = transform
27 | self.root_dir = root_dir
28 | self.balance = balance
29 |
30 | if self.balance:
31 | self.dataframe = self.balance_dataset()
32 |
33 | def __len__(self):
34 | return len(self.dataframe)
35 |
36 | def __getitem__(self, idx):
37 | image_path = os.path.join(
38 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg"
39 | )
40 | if os.path.exists(image_path):
41 | image = Image.open(image_path)
42 | else:
43 | image = Image.new(
44 | "RGB", (224, 224), color="white"
45 | ) # Handle missing image file
46 |
47 | classes = torch.tensor(self.dataframe["exp"].iloc[idx], dtype=torch.long)
48 | labels = torch.tensor(self.dataframe.iloc[idx, 2:4].values, dtype=torch.float32)
49 |
50 | if self.transform:
51 | image = self.transform(image)
52 |
53 | return image, classes, labels
54 |
55 | def balance_dataset(self):
56 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply(
57 | lambda x: x.sample(self.dataframe["exp"].value_counts().min())
58 | )
59 | return balanced_df
60 |
61 |
62 | transform_valid = transforms.Compose(
63 | [
64 | transforms.ToTensor(),
65 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
66 | ]
67 | )
68 |
69 | valid_dataset = CustomDataset(
70 | dataframe=valid_annotations_df,
71 | root_dir=IMAGE_FOLDER_TEST,
72 | transform=transform_valid,
73 | balance=False,
74 | )
75 | valid_loader = DataLoader(
76 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48
77 | )
78 |
79 | # * Define the model *
80 |
81 | # Initialize the model
82 | MODEL = models.swin_v2_t(weights="DEFAULT")
83 | MODEL.head = torch.nn.Linear(in_features=768, out_features=10, bias=True)
84 | MODEL.to(DEVICE)
85 |
86 | # **** Test the model performance for classification ****
87 |
88 | # Set the model to evaluation mode
89 | MODEL.load_state_dict(torch.load("model.pt"))
90 | MODEL.to(DEVICE)
91 | MODEL.eval()
92 |
93 | all_labels_cls = []
94 | all_predicted_cls = []
95 |
96 | all_true_val = []
97 | all_pred_val = []
98 | all_true_aro = []
99 | all_pred_aro = []
100 |
101 | # Start inference on test set
102 | with torch.no_grad():
103 | for images, classes, labels in iter(valid_loader):
104 | images, classes, labels = (
105 | images.to(DEVICE),
106 | classes.to(DEVICE),
107 | labels.to(DEVICE),
108 | )
109 |
110 | outputs = MODEL(images)
111 | outputs_cls = outputs[:, :7]
112 | outputs_reg = outputs[:, 7:]
113 | val_pred = outputs_reg[:, 0]
114 | aro_pred = outputs_reg[:, 1]
115 |
116 | _, predicted_cls = torch.max(outputs_cls, 1)
117 |
118 | all_labels_cls.extend(classes.cpu().numpy())
119 | all_predicted_cls.extend(predicted_cls.cpu().numpy())
120 | val_true = labels[:, 0]
121 | aro_true = labels[:, 1]
122 |
123 | all_true_val.extend(val_true.cpu().numpy())
124 | all_true_aro.extend(aro_true.cpu().numpy())
125 | all_pred_val.extend(val_pred.cpu().numpy())
126 | all_pred_aro.extend(aro_pred.cpu().numpy())
127 |
128 | df = pd.DataFrame(
129 | {
130 | "cat_pred": all_predicted_cls,
131 | "cat_true": all_labels_cls,
132 | "val_pred": all_pred_val,
133 | "val_true": all_true_val,
134 | "aro_pred": all_pred_aro,
135 | "aro_true": all_true_aro,
136 | }
137 | )
138 | df.to_csv("inference.csv", index=False)
139 |
--------------------------------------------------------------------------------
/models/AffectNet8_Swin_Combined/train.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import os
3 | import torch
4 | import torchvision.transforms as transforms
5 | import torchvision.models as models
6 | from torch.utils.data import DataLoader, Dataset
7 | import torch.nn as nn
8 | import torch.optim as optim
9 | from PIL import Image
10 | from torch.optim import lr_scheduler
11 | from tqdm import tqdm
12 |
13 | # Load the annotations for training and validation from separate CSV files
14 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/"
15 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/"
16 | train_annotations_path = (
17 | "../../affectnet_annotations/train_set_annotation_without_lnd.csv"
18 | )
19 | valid_annotations_path = (
20 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv"
21 | )
22 | train_annotations_df = pd.read_csv(train_annotations_path)
23 | valid_annotations_df = pd.read_csv(valid_annotations_path)
24 |
25 |
26 | # Set parameters
27 | BATCHSIZE = 128
28 | NUM_EPOCHS = 20
29 | LR = 4e-5
30 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
31 |
32 |
33 | # **** Create dataset and data loaders ****
34 | class CustomDataset(Dataset):
35 | def __init__(self, dataframe, root_dir, transform=None, balance=False):
36 | self.dataframe = dataframe
37 | self.transform = transform
38 | self.root_dir = root_dir
39 | self.balance = balance
40 |
41 | if self.balance:
42 | self.dataframe = self.balance_dataset()
43 |
44 | def __len__(self):
45 | return len(self.dataframe)
46 |
47 | def __getitem__(self, idx):
48 | image_path = os.path.join(
49 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg"
50 | )
51 | if os.path.exists(image_path):
52 | image = Image.open(image_path)
53 | else:
54 | image = Image.new(
55 | "RGB", (224, 224), color="white"
56 | ) # Handle missing image file
57 |
58 | classes = torch.tensor(self.dataframe["exp"].iloc[idx], dtype=torch.long)
59 | labels = torch.tensor(self.dataframe.iloc[idx, 2:4].values, dtype=torch.float32)
60 |
61 | if self.transform:
62 | image = self.transform(image)
63 |
64 | return image, classes, labels
65 |
66 | def balance_dataset(self):
67 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply(
68 | lambda x: x.sample(self.dataframe["exp"].value_counts().min())
69 | )
70 | return balanced_df
71 |
72 |
73 | transform = transforms.Compose(
74 | [
75 | transforms.RandomHorizontalFlip(0.5),
76 | transforms.RandomGrayscale(0.01),
77 | transforms.RandomRotation(10),
78 | transforms.ColorJitter(
79 | brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1
80 | ), # model more robust to changes in lighting conditions.
81 | transforms.RandomPerspective(
82 | distortion_scale=0.2, p=0.5
83 | ), # can be helpful if your images might have varying perspectives.
84 | transforms.ToTensor(), # saves image as tensor (automatically divides by 255)
85 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
86 | transforms.RandomErasing(
87 | p=0.5, scale=(0.02, 0.2), ratio=(0.3, 3.3), value="random"
88 | ), # TEST: Should help overfitting
89 | ]
90 | )
91 |
92 | transform_valid = transforms.Compose(
93 | [
94 | transforms.ToTensor(),
95 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
96 | ]
97 | )
98 |
99 | train_dataset = CustomDataset(
100 | dataframe=train_annotations_df,
101 | root_dir=IMAGE_FOLDER,
102 | transform=transform,
103 | balance=False,
104 | )
105 | valid_dataset = CustomDataset(
106 | dataframe=valid_annotations_df,
107 | root_dir=IMAGE_FOLDER_TEST,
108 | transform=transform_valid,
109 | balance=False,
110 | )
111 | train_loader = DataLoader(
112 | train_dataset, batch_size=BATCHSIZE, shuffle=True, num_workers=48
113 | )
114 | valid_loader = DataLoader(
115 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48
116 | )
117 |
118 | # * Define the model *
119 |
120 | # Initialize the model
121 | MODEL = models.swin_v2_t(weights="DEFAULT")
122 | MODEL.head = torch.nn.Linear(in_features=768, out_features=10, bias=True)
123 | MODEL.to(DEVICE)
124 | # Define (weighted) loss function
125 | weights = torch.tensor(
126 | [0.015605, 0.008709, 0.046078, 0.083078, 0.185434, 0.305953, 0.046934, 0.30821]
127 | )
128 | criterion_cls = nn.CrossEntropyLoss(weights.to(DEVICE))
129 | criterion_cls_val = (
130 | nn.CrossEntropyLoss()
131 | ) # Use two loss functions, as the validation dataset is balanced
132 | criterion_reg = nn.MSELoss()
133 |
134 | optimizer = optim.AdamW(MODEL.parameters(), lr=LR)
135 | lr_scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=BATCHSIZE * NUM_EPOCHS)
136 |
137 | # ***** Train the model *****
138 | print("--- Start training ---")
139 | scaler = torch.cuda.amp.GradScaler()
140 | best_valid_loss = 100
141 |
142 | for epoch in range(NUM_EPOCHS):
143 | MODEL.train()
144 | total_train_correct = 0
145 | total_train_samples = 0
146 | for images, classes, labels in tqdm(
147 | train_loader, desc="Epoch train_loader progress"
148 | ):
149 | images, classes, labels = (
150 | images.to(DEVICE),
151 | classes.to(DEVICE),
152 | labels.to(DEVICE),
153 | )
154 | optimizer.zero_grad()
155 | with torch.autocast(device_type="cuda", dtype=torch.float16):
156 | outputs = MODEL(images)
157 | outputs_cls = outputs[:, :8]
158 | outputs_reg = outputs[:, 8:]
159 | loss = criterion_cls(
160 | outputs_cls.cuda(), classes.cuda()
161 | ) + 5 * criterion_reg(outputs_reg.cuda(), labels.cuda())
162 | scaler.scale(loss).backward()
163 | scaler.step(optimizer)
164 | scaler.update()
165 | lr_scheduler.step()
166 | current_lr = optimizer.param_groups[0]["lr"]
167 |
168 | _, train_predicted = torch.max(outputs_cls, 1)
169 | total_train_samples += classes.size(0)
170 | total_train_correct += (train_predicted == classes).sum().item()
171 |
172 | train_accuracy = (total_train_correct / total_train_samples) * 100
173 |
174 | MODEL.eval()
175 | valid_loss = 0.0
176 | correct = 0
177 | total = 0
178 | with torch.no_grad():
179 | for images, classes, labels in valid_loader:
180 | images, classes, labels = (
181 | images.to(DEVICE),
182 | classes.to(DEVICE),
183 | labels.to(DEVICE),
184 | )
185 | outputs = MODEL(images)
186 | outputs_cls = outputs[:, :8]
187 | outputs_reg = outputs[:, 8:]
188 | loss = criterion_cls_val(
189 | outputs_cls.cuda(), classes.cuda()
190 | ) + 5 * criterion_reg(outputs_reg.cuda(), labels.cuda())
191 | valid_loss += loss.item()
192 | _, predicted = torch.max(outputs_cls, 1)
193 | total += classes.size(0)
194 | correct += (predicted == classes).sum().item()
195 |
196 | print(
197 | f"Epoch [{epoch+1}/{NUM_EPOCHS}] - "
198 | f"Validation Loss: {valid_loss/len(valid_loader):.4f}, "
199 | f"Validation Accuracy: {(correct/total)*100:.2f}%"
200 | f", Training Accuracy: {train_accuracy:.2f}%, "
201 | )
202 |
203 | if valid_loss < best_valid_loss:
204 | best_valid_loss = valid_loss
205 | print(f"Saving model at epoch {epoch+1}")
206 | torch.save(MODEL.state_dict(), "model.pt") # Save the best model
207 |
--------------------------------------------------------------------------------
/models/AffectNet8_Swin_Discrete/generate_csv.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import os
3 | import torch
4 | import torchvision.transforms as transforms
5 | import torchvision.models as models
6 | from torch.utils.data import DataLoader, Dataset
7 | import torch.nn as nn
8 | from PIL import Image
9 |
10 | # Load the annotations for training and validation from separate CSV files
11 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/"
12 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/"
13 | valid_annotations_path = (
14 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv"
15 | )
16 | valid_annotations_df = pd.read_csv(valid_annotations_path)
17 | # Set parameters
18 | BATCHSIZE = 128
19 |
20 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
21 |
22 |
23 | # **** Create dataset and data loaders ****
24 | class CustomDataset(Dataset):
25 | def __init__(self, dataframe, root_dir, transform=None, balance=False):
26 | self.dataframe = dataframe
27 | self.transform = transform
28 | self.root_dir = root_dir
29 | self.balance = balance
30 |
31 | if self.balance:
32 | self.dataframe = self.balance_dataset()
33 |
34 | def __len__(self):
35 | return len(self.dataframe)
36 |
37 | def __getitem__(self, idx):
38 | image_path = os.path.join(
39 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg"
40 | )
41 | if os.path.exists(image_path):
42 | image = Image.open(image_path)
43 | else:
44 | image = Image.new(
45 | "RGB", (224, 224), color="white"
46 | ) # Handle missing image file
47 |
48 | label = torch.tensor(self.dataframe["exp"].iloc[idx], dtype=torch.long)
49 |
50 | if self.transform:
51 | image = self.transform(image)
52 |
53 | return image, label
54 |
55 | def balance_dataset(self):
56 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply(
57 | lambda x: x.sample(self.dataframe["exp"].value_counts().min())
58 | )
59 | return balanced_df
60 |
61 |
62 | transform_valid = transforms.Compose(
63 | [
64 | transforms.ToTensor(),
65 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
66 | ]
67 | )
68 | valid_dataset = CustomDataset(
69 | dataframe=valid_annotations_df,
70 | root_dir=IMAGE_FOLDER_TEST,
71 | transform=transform_valid,
72 | balance=False,
73 | )
74 | valid_loader = DataLoader(
75 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48
76 | )
77 | # * Define the model *
78 |
79 | # Initialize the model
80 | MODEL = models.swin_v2_t(weights="DEFAULT")
81 | MODEL.head = torch.nn.Linear(in_features=768, out_features=8, bias=True)
82 | MODEL.to(DEVICE)
83 |
84 | # Set the model to evaluation mode
85 | MODEL.load_state_dict(torch.load("model.pt"))
86 | MODEL.to(DEVICE)
87 | MODEL.eval()
88 |
89 | all_labels_cls = []
90 | all_predicted_cls = []
91 |
92 | # Start inference on test set
93 | with torch.no_grad():
94 | for images, labels_cls in iter(valid_loader):
95 | images = images.to(DEVICE)
96 | labels_cls = labels_cls.to(DEVICE)
97 |
98 | outputs = MODEL(images)
99 |
100 | _, predicted_cls = torch.max(outputs, 1)
101 |
102 | all_labels_cls.extend(labels_cls.cpu().numpy())
103 | all_predicted_cls.extend(predicted_cls.cpu().numpy())
104 |
105 |
106 | df = pd.DataFrame({"cat_pred": all_predicted_cls, "cat_true": all_labels_cls})
107 | df.to_csv("inference.csv", index=False)
108 |
--------------------------------------------------------------------------------
/models/AffectNet8_Swin_Discrete/train.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import os
3 | import torch
4 | import torchvision.transforms as transforms
5 | import torchvision.models as models
6 | from torch.utils.data import DataLoader, Dataset
7 | import torch.nn as nn
8 | import torch.optim as optim
9 | from PIL import Image
10 | import torchvision
11 | from torch.optim import lr_scheduler
12 | import re
13 | from tqdm import tqdm
14 |
15 | # Load the annotations for training and validation from separate CSV files
16 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/"
17 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/"
18 | train_annotations_path = (
19 | "../../affectnet_annotations/train_set_annotation_without_lnd.csv"
20 | )
21 | valid_annotations_path = (
22 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv"
23 | )
24 | train_annotations_df = pd.read_csv(train_annotations_path)
25 | valid_annotations_df = pd.read_csv(valid_annotations_path)
26 |
27 |
28 | # Set parameters
29 | BATCHSIZE = 128
30 | NUM_EPOCHS = 20
31 | LR = 4e-5
32 |
33 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
34 |
35 |
36 | # **** Create dataset and data loaders ****
37 | class CustomDataset(Dataset):
38 | def __init__(self, dataframe, root_dir, transform=None, balance=False):
39 | self.dataframe = dataframe
40 | self.transform = transform
41 | self.root_dir = root_dir
42 | self.balance = balance
43 |
44 | if self.balance:
45 | self.dataframe = self.balance_dataset()
46 |
47 | def __len__(self):
48 | return len(self.dataframe)
49 |
50 | def __getitem__(self, idx):
51 | image_path = os.path.join(
52 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg"
53 | )
54 | if os.path.exists(image_path):
55 | image = Image.open(image_path)
56 | else:
57 | image = Image.new(
58 | "RGB", (224, 224), color="white"
59 | ) # Handle missing image file
60 |
61 | label = torch.tensor(self.dataframe["exp"].iloc[idx], dtype=torch.long)
62 |
63 | if self.transform:
64 | image = self.transform(image)
65 |
66 | return image, label
67 |
68 | def balance_dataset(self):
69 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply(
70 | lambda x: x.sample(self.dataframe["exp"].value_counts().min())
71 | )
72 | return balanced_df
73 |
74 |
75 | transform = transforms.Compose(
76 | [
77 | transforms.ElasticTransform(alpha=5.0, sigma=5.0),
78 | transforms.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5)),
79 | transforms.RandomGrayscale(p=0.1),
80 | transforms.RandomRotation(degrees=15),
81 | transforms.RandomVerticalFlip(),
82 | transforms.ColorJitter(0.15, 0.15, 0.15),
83 | torchvision.transforms.RandomAutocontrast(p=0.4),
84 | transforms.ToTensor(),
85 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
86 | ]
87 | )
88 |
89 | transform_valid = transforms.Compose(
90 | [
91 | transforms.ToTensor(),
92 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
93 | ]
94 | )
95 |
96 | train_dataset = CustomDataset(
97 | dataframe=train_annotations_df,
98 | root_dir=IMAGE_FOLDER,
99 | transform=transform,
100 | balance=False,
101 | )
102 | valid_dataset = CustomDataset(
103 | dataframe=valid_annotations_df,
104 | root_dir=IMAGE_FOLDER_TEST,
105 | transform=transform_valid,
106 | balance=False,
107 | )
108 | train_loader = DataLoader(
109 | train_dataset, batch_size=BATCHSIZE, shuffle=True, num_workers=48
110 | )
111 | valid_loader = DataLoader(
112 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48
113 | )
114 |
115 | # * Define the model *
116 |
117 | # Initialize the model
118 | MODEL = models.swin_v2_t(weights="DEFAULT")
119 | MODEL.head = torch.nn.Linear(in_features=768, out_features=8, bias=True)
120 | MODEL.to(DEVICE)
121 |
122 | # Define (weighted) loss function
123 | weights = torch.tensor(
124 | [0.015605, 0.008709, 0.046078, 0.083078, 0.185434, 0.305953, 0.046934, 0.30821]
125 | )
126 |
127 | criterion = nn.CrossEntropyLoss(weights.to(DEVICE))
128 | criterion_val = (
129 | nn.CrossEntropyLoss()
130 | ) # Use two loss functions, as the validation dataset is balanced
131 |
132 |
133 | # Filter parameters for weight decay and no weight decay and create optimizer/scheduler
134 | def filter_params(params, include_patterns, exclude_patterns):
135 | included_params = []
136 | excluded_params = []
137 | for name, param in params:
138 | if any(re.search(pattern, name) for pattern in include_patterns):
139 | included_params.append(param)
140 | elif not any(re.search(pattern, name) for pattern in exclude_patterns):
141 | excluded_params.append(param)
142 | return included_params, excluded_params
143 |
144 |
145 | include_patterns = [
146 | r"^(?!.*\.bn)"
147 | ] # Match any layer name that doesn't contain '.bn' = BatchNorm parameters
148 | exclude_patterns = [r".*\.bn.*"] # Vice versa
149 | params_to_decay, params_not_to_decay = filter_params(
150 | MODEL.named_parameters(), include_patterns, exclude_patterns
151 | )
152 |
153 | # optimizer = optim.AdamW([
154 | # {'params': params_to_decay, 'weight_decay': ADAMW_WEIGHT_DECAY}, # Apply weight decay to these parameters
155 | # {'params': params_not_to_decay, 'weight_decay': 0.0} # Exclude weight decay for these parameters = 0.0
156 | # ], lr=LR)
157 | optimizer = optim.AdamW(MODEL.parameters(), lr=LR)
158 | lr_scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=BATCHSIZE * NUM_EPOCHS)
159 |
160 | # ***** Train the model *****
161 | print("--- Start training ---")
162 | scaler = torch.cuda.amp.GradScaler()
163 | best_valid_loss = 100
164 |
165 | for epoch in range(NUM_EPOCHS):
166 | MODEL.train()
167 | total_train_correct = 0
168 | total_train_samples = 0
169 | for images, labels in tqdm(train_loader, desc="Epoch train_loader progress"):
170 | images, labels = images.to(DEVICE), labels.to(DEVICE)
171 | optimizer.zero_grad()
172 | with torch.autocast(device_type="cuda", dtype=torch.float16):
173 | output = MODEL(images)
174 | loss = criterion(output.cuda(), labels.cuda())
175 | scaler.scale(loss).backward()
176 | scaler.step(optimizer)
177 | scaler.update()
178 | lr_scheduler.step()
179 | current_lr = optimizer.param_groups[0]["lr"]
180 |
181 | _, train_predicted = torch.max(output, 1)
182 | total_train_samples += labels.size(0)
183 | total_train_correct += (train_predicted == labels).sum().item()
184 |
185 | train_accuracy = (total_train_correct / total_train_samples) * 100
186 |
187 | MODEL.eval()
188 | valid_loss = 0.0
189 | correct = 0
190 | total = 0
191 | with torch.no_grad():
192 | for images, labels in valid_loader:
193 | images, labels = images.to(DEVICE), labels.to(DEVICE)
194 | outputs = MODEL(images)
195 | loss = criterion_val(outputs.cuda(), labels.cuda())
196 | valid_loss += loss.item()
197 | _, predicted = torch.max(outputs, 1)
198 | total += labels.size(0)
199 | correct += (predicted == labels).sum().item()
200 |
201 | print(
202 | f"Epoch [{epoch+1}/{NUM_EPOCHS}] - "
203 | f"Validation Loss: {valid_loss/len(valid_loader):.4f}, "
204 | f"Validation Accuracy: {(correct/total)*100:.2f}%"
205 | f", Training Accuracy: {train_accuracy:.2f}%, "
206 | )
207 | # TBD: Valid loss überschreiben, dann model speichern wie unten, wenn kleiner als zuvor
208 |
209 | if valid_loss < best_valid_loss:
210 | best_valid_loss = valid_loss
211 | print(f"Saving model at epoch {epoch+1}")
212 | torch.save(MODEL.state_dict(), "model.pt") # Save the best model
213 |
--------------------------------------------------------------------------------
/models/AffectNet8_Swin_VA/generate_csv.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import os
3 | import torch
4 | import torchvision.transforms as transforms
5 | import torchvision.models as models
6 | from torch.utils.data import DataLoader, Dataset
7 | import torch.nn as nn
8 | from PIL import Image
9 |
10 |
11 | # Load the annotations for training and validation from separate CSV files
12 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/"
13 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/"
14 |
15 | valid_annotations_path = (
16 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv"
17 | )
18 | valid_annotations_df = pd.read_csv(valid_annotations_path)
19 | # Set parameters
20 | BATCHSIZE = 128
21 |
22 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
23 |
24 |
25 | # **** Create dataset and data loaders ****
26 | class CustomDataset(Dataset):
27 | def __init__(self, dataframe, root_dir, transform=None, balance=False):
28 | self.dataframe = dataframe
29 | self.transform = transform
30 | self.root_dir = root_dir
31 | self.balance = balance
32 |
33 | if self.balance:
34 | self.dataframe = self.balance_dataset()
35 |
36 | def __len__(self):
37 | return len(self.dataframe)
38 |
39 | def __getitem__(self, idx):
40 | image_path = os.path.join(
41 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg"
42 | )
43 | image = Image.open(image_path)
44 |
45 | classes = torch.tensor(self.dataframe.iloc[idx, 1], dtype=torch.int8)
46 | valence = torch.tensor(self.dataframe.iloc[idx, 2], dtype=torch.float16)
47 | arousal = torch.tensor(self.dataframe.iloc[idx, 3], dtype=torch.float16)
48 |
49 | if self.transform:
50 | image = self.transform(image)
51 |
52 | return image, classes, valence, arousal
53 |
54 | def balance_dataset(self):
55 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply(
56 | lambda x: x.sample(self.dataframe["exp"].value_counts().min())
57 | )
58 | return balanced_df
59 |
60 |
61 | transform_valid = transforms.Compose(
62 | [
63 | transforms.ToTensor(),
64 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
65 | ]
66 | )
67 |
68 | valid_dataset = CustomDataset(
69 | dataframe=valid_annotations_df,
70 | root_dir=IMAGE_FOLDER_TEST,
71 | transform=transform_valid,
72 | balance=False,
73 | )
74 |
75 | valid_loader = DataLoader(
76 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48
77 | )
78 |
79 | # * Define the model *
80 |
81 | # Initialize the model
82 | MODEL = models.swin_v2_t(weights="DEFAULT")
83 | MODEL.head = torch.nn.Linear(in_features=768, out_features=2, bias=True)
84 | MODEL.to(DEVICE)
85 |
86 |
87 | # **** Test the model performance for classification ****
88 |
89 | # Set the model to evaluation mode
90 | MODEL.load_state_dict(torch.load("model.pt"))
91 | MODEL.to(DEVICE)
92 | MODEL.eval()
93 |
94 | all_val_true_values = []
95 | all_val_predicted_values = []
96 | all_aro_true_values = []
97 | all_aro_predicted_values = []
98 |
99 | # Start inference on test set
100 | with torch.no_grad():
101 | for images, _, val_true, aro_true in valid_loader:
102 | images, val_true, aro_true = (
103 | images.to(DEVICE),
104 | val_true.to(DEVICE),
105 | aro_true.to(DEVICE),
106 | )
107 |
108 | outputs = MODEL(images)
109 | val_pred = outputs[:, 0]
110 | aro_pred = outputs[:, 1]
111 |
112 | # Append to the lists --> Regression
113 | true_val_values = val_true.cpu().numpy()
114 | true_aro_values = aro_true.cpu().numpy()
115 | pred_val_values = val_pred.cpu().numpy()
116 | pred_aro_values = aro_pred.cpu().numpy()
117 | all_val_true_values.extend(true_val_values)
118 | all_aro_true_values.extend(true_aro_values)
119 | all_val_predicted_values.extend(pred_val_values)
120 | all_aro_predicted_values.extend(pred_aro_values)
121 |
122 | df = pd.DataFrame(
123 | {
124 | "val_pred": all_val_predicted_values,
125 | "val_true": all_val_true_values,
126 | "aro_pred": all_aro_predicted_values,
127 | "aro_true": all_aro_true_values,
128 | }
129 | )
130 | df.to_csv("inference.csv", index=False)
131 |
--------------------------------------------------------------------------------
/models/AffectNet8_Swin_VA/train.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import os
3 | import torch
4 | import torchvision.transforms as transforms
5 | import torchvision.models as models
6 | from torch.utils.data import DataLoader, Dataset
7 | import torch.nn as nn
8 | import torch.optim as optim
9 | from PIL import Image
10 | from torch.optim import lr_scheduler
11 | from tqdm import tqdm
12 |
13 | # Load the annotations for training and validation from separate CSV files
14 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/"
15 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/"
16 | train_annotations_path = (
17 | "../../affectnet_annotations/train_set_annotation_without_lnd.csv"
18 | )
19 | valid_annotations_path = (
20 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv"
21 | )
22 | train_annotations_df = pd.read_csv(train_annotations_path)
23 | valid_annotations_df = pd.read_csv(valid_annotations_path)
24 |
25 | # Set parameters
26 | BATCHSIZE = 128
27 | NUM_EPOCHS = 20
28 | LR = 4e-5
29 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
30 |
31 |
32 | # **** Create dataset and data loaders ****
33 | class CustomDataset(Dataset):
34 | def __init__(self, dataframe, root_dir, transform=None, balance=False):
35 | self.dataframe = dataframe
36 | self.transform = transform
37 | self.root_dir = root_dir
38 | self.balance = balance
39 |
40 | if self.balance:
41 | self.dataframe = self.balance_dataset()
42 |
43 | def __len__(self):
44 | return len(self.dataframe)
45 |
46 | def __getitem__(self, idx):
47 | image_path = os.path.join(
48 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg"
49 | )
50 | image = Image.open(image_path)
51 |
52 | classes = torch.tensor(self.dataframe.iloc[idx, 1], dtype=torch.int8)
53 | valence = torch.tensor(self.dataframe.iloc[idx, 2], dtype=torch.float16)
54 | arousal = torch.tensor(self.dataframe.iloc[idx, 3], dtype=torch.float16)
55 |
56 | if self.transform:
57 | image = self.transform(image)
58 |
59 | return image, classes, valence, arousal
60 |
61 | def balance_dataset(self):
62 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply(
63 | lambda x: x.sample(self.dataframe["exp"].value_counts().min())
64 | )
65 | return balanced_df
66 |
67 |
68 | transform = transforms.Compose(
69 | [
70 | transforms.RandomHorizontalFlip(0.5),
71 | transforms.RandomGrayscale(0.01),
72 | transforms.RandomRotation(10),
73 | transforms.ColorJitter(
74 | brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1
75 | ), # model more robust to changes in lighting conditions.
76 | transforms.RandomPerspective(
77 | distortion_scale=0.2, p=0.5
78 | ), # can be helpful if your images might have varying perspectives.
79 | transforms.ToTensor(), # saves image as tensor (automatically divides by 255)
80 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
81 | transforms.RandomErasing(
82 | p=0.5, scale=(0.02, 0.2), ratio=(0.3, 3.3), value="random"
83 | ), # Should help overfitting
84 | ]
85 | )
86 |
87 | transform_valid = transforms.Compose(
88 | [
89 | transforms.ToTensor(),
90 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
91 | ]
92 | )
93 |
94 | train_dataset = CustomDataset(
95 | dataframe=train_annotations_df,
96 | root_dir=IMAGE_FOLDER,
97 | transform=transform,
98 | balance=True,
99 | )
100 | valid_dataset = CustomDataset(
101 | dataframe=valid_annotations_df,
102 | root_dir=IMAGE_FOLDER_TEST,
103 | transform=transform_valid,
104 | balance=False,
105 | )
106 | train_loader = DataLoader(
107 | train_dataset, batch_size=BATCHSIZE, shuffle=True, num_workers=48
108 | )
109 | valid_loader = DataLoader(
110 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48
111 | )
112 |
113 | # * Define the model *
114 |
115 | # Initialize the model
116 | MODEL = models.swin_v2_t(weights="DEFAULT")
117 | MODEL.head = torch.nn.Linear(in_features=768, out_features=10, bias=True)
118 | MODEL.to(DEVICE)
119 | MODEL.load_state_dict(torch.load("../AffectNet8_Swin_Combined/model.pt"))
120 | MODEL.head = torch.nn.Linear(in_features=768, out_features=2, bias=True)
121 | MODEL.to(DEVICE)
122 |
123 |
124 | def CCCLoss(x, y):
125 | # Compute means
126 | x_mean = torch.mean(x, dim=0)
127 | y_mean = torch.mean(y, dim=0)
128 | # Compute variances
129 | x_var = torch.var(x, dim=0)
130 | y_var = torch.var(y, dim=0)
131 | # Compute covariance matrix
132 | cov_matrix = torch.matmul(
133 | (x - x_mean).permute(*torch.arange(x.dim() - 1, -1, -1)), y - y_mean
134 | ) / (x.size(0) - 1)
135 | # Compute CCC
136 | numerator = 2 * cov_matrix
137 | denominator = x_var + y_var + torch.pow((x_mean - y_mean), 2)
138 | ccc = torch.mean(numerator / denominator)
139 | return -ccc
140 |
141 |
142 | val_loss = nn.MSELoss()
143 | aro_loss = nn.MSELoss()
144 |
145 | optimizer = optim.AdamW(MODEL.parameters(), lr=LR)
146 | lr_scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=BATCHSIZE * NUM_EPOCHS)
147 |
148 | # ***** Train the model *****
149 | print("--- Start training ---")
150 | scaler = torch.cuda.amp.GradScaler()
151 | best_valid_loss = 100
152 | l2_lambda = 0.00001 # L1 Regularization
153 | l1_lambda = 0.00001 # L2 Regularization
154 |
155 | for epoch in range(NUM_EPOCHS):
156 | MODEL.train()
157 | total_train_correct = 0
158 | total_train_samples = 0
159 | current_lr = optimizer.param_groups[0]["lr"]
160 | for images, _, val_true, aro_true in tqdm(
161 | train_loader, desc="Epoch train_loader progress"
162 | ):
163 | images, val_true, aro_true = (
164 | images.to(DEVICE),
165 | val_true.to(DEVICE),
166 | aro_true.to(DEVICE),
167 | )
168 | optimizer.zero_grad()
169 | train_loss = 0
170 | l2_reg = 0
171 | l1_reg = 0
172 | with torch.autocast(device_type="cuda", dtype=torch.float16):
173 | outputs = MODEL(images)
174 | val_pred = outputs[:, 0]
175 | aro_pred = outputs[:, 1]
176 | for param in MODEL.parameters():
177 | l2_reg += torch.norm(param, 2) # **2
178 | l1_reg += torch.norm(param, 1)
179 | loss = (
180 | 3 * val_loss(val_pred.cuda(), val_true.cuda())
181 | + 3 * aro_loss(aro_pred.cuda(), aro_true.cuda())
182 | + CCCLoss(val_pred.cuda(), val_true.cuda())
183 | + CCCLoss(aro_pred.cuda(), aro_true.cuda())
184 | )
185 | train_loss += loss.item()
186 | scaler.scale(loss).backward()
187 | scaler.step(optimizer)
188 | scaler.update()
189 |
190 | MODEL.eval()
191 | valid_loss = 0.0
192 | total_valid_correct = 0
193 | total_valid_samples = 0
194 | with torch.no_grad():
195 | for images, _, val_true, aro_true in valid_loader:
196 | images, val_true, aro_true = (
197 | images.to(DEVICE),
198 | val_true.to(DEVICE),
199 | aro_true.to(DEVICE),
200 | )
201 | with torch.autocast(device_type="cuda", dtype=torch.float16):
202 | outputs = MODEL(images)
203 | val_pred = outputs[:, 0]
204 | aro_pred = outputs[:, 1]
205 | loss = (
206 | 3 * val_loss(val_pred.cuda(), val_true.cuda())
207 | + 3 * aro_loss(aro_pred.cuda(), aro_true.cuda())
208 | + CCCLoss(val_pred.cuda(), val_true.cuda())
209 | + CCCLoss(aro_pred.cuda(), aro_true.cuda())
210 | )
211 | valid_loss += loss.item()
212 |
213 | print(
214 | f"Epoch [{epoch+1}/{NUM_EPOCHS}] - "
215 | f"Training Loss: {train_loss/len(train_loader):.4f}, "
216 | f"Validation Loss: {valid_loss/len(valid_loader):.4f}, "
217 | f"Learning Rate: {current_lr:.8f}, "
218 | )
219 |
220 | if valid_loss < best_valid_loss:
221 | best_valid_loss = valid_loss
222 | print(f"Saving model at epoch {epoch+1}")
223 | torch.save(MODEL.state_dict(), "model.pt") # Save the best model
224 |
--------------------------------------------------------------------------------
/models/evaluation.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 | from sklearn.metrics import (
4 | mean_absolute_error,
5 | mean_squared_error,
6 | root_mean_squared_error,
7 | classification_report,
8 | )
9 | import os
10 | import subprocess
11 |
12 | ONLY_INFERENCE = False
13 | root_dir = "."
14 | command = "echo test"
15 | command = "python3 generate_csv.py"
16 |
17 | label_mapping = {
18 | "Neutral": 0,
19 | "Happy": 1,
20 | "Sad": 2,
21 | "Surprise": 3,
22 | "Fear": 4,
23 | "Disgust": 5,
24 | "Anger": 6,
25 | "Contempt": 7,
26 | }
27 |
28 |
29 | def get_subdirectories(directory):
30 | subdirs = []
31 | for item in os.listdir(directory):
32 | full_path = os.path.abspath(os.path.join(directory, item))
33 | if os.path.isdir(full_path):
34 | subdirs.append(full_path)
35 | return subdirs
36 |
37 |
38 | def get_files_in_directory(directory):
39 | files = []
40 | # Iterate over each item in the directory
41 | for item in os.listdir(directory):
42 | # Check if it's a file
43 | if os.path.isfile(os.path.join(directory, item)):
44 | files.append(item)
45 | return files
46 |
47 |
48 | def concordance_correlation_coefficient(true_values, pred_values):
49 | mean_true = np.mean(true_values)
50 | mean_pred = np.mean(pred_values)
51 |
52 | num = 2 * np.cov(true_values, pred_values)[0, 1]
53 | den = np.var(true_values) + np.var(pred_values) + (mean_true - mean_pred) ** 2
54 | return num / den
55 |
56 |
57 | def print_discrete(true_labels, pred_labels):
58 | if max(true_labels) == 7:
59 | class_names = [
60 | "Anger",
61 | "Disgust",
62 | "Fear",
63 | "Happy",
64 | "Sad",
65 | "Surprise",
66 | "Neutral",
67 | "Contempt",
68 | ]
69 | else:
70 | class_names = [
71 | "Anger",
72 | "Disgust",
73 | "Fear",
74 | "Happy",
75 | "Sad",
76 | "Surprise",
77 | "Neutral",
78 | ]
79 |
80 | mapped_labels = [label_mapping[name] for name in class_names]
81 | map = classification_report(
82 | true_labels,
83 | pred_labels,
84 | labels=mapped_labels,
85 | target_names=class_names,
86 | zero_division=0.0,
87 | digits=3,
88 | output_dict=True,
89 | )
90 | precision = map["weighted avg"]["precision"]
91 | recall = map["weighted avg"]["recall"]
92 | f1 = map["weighted avg"]["f1-score"]
93 | print(f"Precision: {precision:.3f}")
94 | print(f"Recall: {recall:.3f}")
95 | print(f"F1: {f1:.3f}")
96 |
97 |
98 | def evaluate(path: str):
99 | df = pd.read_csv(path)
100 | discrete = "cat_pred" in df.columns
101 | va = "val_pred" in df.columns
102 | if va:
103 | true_values = list(df["val_true"]) + list(df["aro_true"])
104 | pred_values = list(df["val_pred"]) + list(df["aro_pred"])
105 | if va:
106 | mse = mean_squared_error(true_values, pred_values)
107 | mae = mean_absolute_error(true_values, pred_values)
108 | rmse = root_mean_squared_error(true_values, pred_values)
109 | ccc = concordance_correlation_coefficient(true_values, pred_values)
110 | print(path)
111 | if discrete:
112 | print_discrete(df["cat_true"], df["cat_pred"])
113 | if va:
114 | print(f"Mean Squared Error (MSE): {mse:.4f}")
115 | print(f"Mean Absolute Error (MAE): {mae:.4f}")
116 | print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
117 | print(f"Concordance Correlation Coefficient (CCC): {ccc:.4f}")
118 |
119 |
120 | for subdir in get_subdirectories("."):
121 | files = get_files_in_directory(subdir)
122 | if ONLY_INFERENCE is False:
123 | if "model.pt" in files:
124 | result = subprocess.run(
125 | command, shell=True, cwd=subdir, capture_output=True, text=True
126 | )
127 | files = get_files_in_directory(subdir)
128 | if "inference.csv" in files:
129 | evaluate(os.path.join(subdir, "inference.csv"))
130 | print("\n")
131 | print(50 * "-")
132 | print("\n")
133 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | asttokens==2.4.1
2 | comm==0.2.2
3 | contourpy==1.2.0
4 | cycler==0.12.1
5 | debugpy==1.8.1
6 | decorator==5.1.1
7 | exceptiongroup==1.2.0
8 | executing==2.0.1
9 | filelock==3.13.1
10 | fonttools==4.50.0
11 | fsspec==2024.2.0
12 | ipykernel==6.29.3
13 | ipython==8.22.2
14 | jedi==0.19.1
15 | Jinja2==3.1.3
16 | joblib==1.3.2
17 | jupyter_client==8.6.1
18 | jupyter_core==5.7.2
19 | kiwisolver==1.4.5
20 | MarkupSafe==2.1.5
21 | matplotlib==3.8.3
22 | matplotlib-inline==0.1.6
23 | mpmath==1.3.0
24 | nest-asyncio==1.6.0
25 | networkx==3.2.1
26 | numpy==1.26.4
27 | nvidia-cublas-cu12==12.1.3.1
28 | nvidia-cuda-cupti-cu12==12.1.105
29 | nvidia-cuda-nvrtc-cu12==12.1.105
30 | nvidia-cuda-runtime-cu12==12.1.105
31 | nvidia-cudnn-cu12==8.9.2.26
32 | nvidia-cufft-cu12==11.0.2.54
33 | nvidia-curand-cu12==10.3.2.106
34 | nvidia-cusolver-cu12==11.4.5.107
35 | nvidia-cusparse-cu12==12.1.0.106
36 | nvidia-nccl-cu12==2.19.3
37 | nvidia-nvjitlink-cu12==12.4.99
38 | nvidia-nvtx-cu12==12.1.105
39 | packaging==24.0
40 | pandas==2.2.1
41 | parso==0.8.3
42 | pexpect==4.9.0
43 | pillow==10.2.0
44 | platformdirs==4.2.0
45 | prompt-toolkit==3.0.43
46 | psutil==5.9.8
47 | ptyprocess==0.7.0
48 | pure-eval==0.2.2
49 | Pygments==2.17.2
50 | pyparsing==3.1.2
51 | python-dateutil==2.9.0.post0
52 | pytz==2024.1
53 | pyzmq==25.1.2
54 | scikit-learn==1.4.1.post1
55 | scipy==1.12.0
56 | six==1.16.0
57 | stack-data==0.6.3
58 | sympy==1.12
59 | threadpoolctl==3.3.0
60 | torch==2.2.1
61 | torchvision==0.17.1
62 | tornado==6.4
63 | tqdm==4.66.2
64 | traitlets==5.14.2
65 | triton==2.2.0
66 | typing_extensions==4.10.0
67 | tzdata==2024.1
68 | wcwidth==0.2.13
69 |
70 |
--------------------------------------------------------------------------------