├── .gitignore ├── CVPR_Workshop_ABAW_2024.pdf ├── CVPR_Workshop_ABAW_2024 ├── README.md ├── arxiv_main.tex ├── cvpr.sty ├── ieeenat_fullname.bst ├── main.bib ├── main.tex ├── pictures │ ├── Shuttle ganz.jpg │ ├── affectnet │ │ ├── Bild_Russel_AffectNet.pdf │ │ ├── affectnet_cdf.pdf │ │ ├── arousal_distribution.pdf │ │ ├── av_for_each_category.pdf │ │ ├── example_image.pdf │ │ ├── frequency_of_expression.pdf │ │ ├── inference_best_va_affectnet8.pdf │ │ ├── scatterplot.pdf │ │ └── valence_distribution.pdf │ ├── affectnet8onemotic.png │ ├── confusion_7VA.png │ ├── confusion_8VA.png │ ├── emotic │ │ ├── emotic_cdf.pdf │ │ ├── example_image.pdf │ │ ├── frequency_of_expression.pdf │ │ ├── frequency_of_expressions.pdf │ │ └── inference_affectnet8_on_emotic.pdf │ ├── emoticonaffectnet8.png │ ├── inference_affectnet8_on_emotic.pdf │ ├── inference_cross_validation.pdf │ └── inference_emotic_on_affectnet8.pdf ├── preamble.tex ├── sec │ ├── 00_Abstract.tex │ ├── 01_Intro.tex │ ├── 02_Related_Work.tex │ ├── 03_Datasets.tex │ ├── 04_Model.tex │ ├── 06_Conclusion.tex │ └── X_suppl.tex └── todos.txt ├── Honnold_inference.gif ├── LICENSE ├── README.md ├── affectnet_annotations ├── train_set_annotation_without_lnd.csv └── val_set_annotation_without_lnd.csv ├── inference_on_webcam.py ├── mat2py.py ├── models ├── AffectNet7_Efficientnet_Combined │ ├── generate_csv.py │ └── train.py ├── AffectNet7_Efficientnet_Discrete │ ├── generate_csv.py │ └── train.py ├── AffectNet7_Efficientnet_VA │ ├── generate_csv.py │ └── train.py ├── AffectNet7_Maxvit_Combined │ ├── generate_csv.py │ └── train.py ├── AffectNet7_Maxvit_Discrete │ ├── generate_csv.py │ └── train.py ├── AffectNet7_Maxvit_VA │ ├── generate_csv.py │ └── train.py ├── AffectNet7_Swin_Combined │ ├── generate_csv.py │ └── train.py ├── AffectNet7_Swin_Discrete │ ├── generate_csv.py │ └── train.py ├── AffectNet7_Swin_VA │ ├── generate_csv.py │ └── train.py ├── AffectNet8_Efficientnet_Combined │ ├── generate_csv.py │ └── train.py ├── AffectNet8_Efficientnet_Discrete │ ├── generate_csv.py │ └── train.py ├── AffectNet8_Efficientnet_VA │ ├── generate_csv.py │ └── train.py ├── AffectNet8_Maxvit_Combined │ ├── generate_csv.py │ └── train.py ├── AffectNet8_Maxvit_Discrete │ ├── generate_csv.py │ └── train.py ├── AffectNet8_Maxvit_VA │ ├── generate_csv.py │ └── train.py ├── AffectNet8_Swin_Combined │ ├── generate_csv.py │ └── train.py ├── AffectNet8_Swin_Discrete │ ├── generate_csv.py │ └── train.py ├── AffectNet8_Swin_VA │ ├── generate_csv.py │ └── train.py └── evaluation.py └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | /.DS_Store 2 | /.ipynb_checkpoints 3 | /output 4 | /__pycache__ 5 | /data.csv 6 | myvenv 7 | .venv -------------------------------------------------------------------------------- /CVPR_Workshop_ABAW_2024.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wagner-niklas/CAGE_expression_inference/02732acea92326aacf3b303a833a161b97d4a3cd/CVPR_Workshop_ABAW_2024.pdf -------------------------------------------------------------------------------- /CVPR_Workshop_ABAW_2024/README.md: -------------------------------------------------------------------------------- 1 | # CVPR/ICCV/3DV Official LaTeX template 2 | 3 | History (in reverse chronological order) 4 | 5 | - References in `cvprblue` for CVPR 2024 by [Klaus Greff](https://github.com/Qwlouse) 6 | - added natbib for CVPR 2024 by [Christian Richardt](https://richardt.name/) 7 | - replaced buggy (review-mode) line numbering for 3DV 2024 by [Adin Ramirez Rivera](https://openreview.net/profile?id=~Ad%C3%ADn_Ram%C3%ADrez_Rivera1) 8 | - logic for inline supplementary for 3DV 2024 by [Andrea Tagliasacchi](https://taiya.github.io) 9 | - modernized for CVPR 2022 by [Stefan Roth](stefan.roth@NOSPAMtu-darmstadt.de) 10 | - created cvpr.sty file to unify review/rebuttal/final versions by [Ming-Ming Cheng](https://github.com/MCG-NKU/CVPR_Template) 11 | - developed CVPR 2005 template by [Paolo Ienne](Paolo.Ienne@di.epfl.ch) and [Andrew Fitzgibbon](awf@acm.org) 12 | -------------------------------------------------------------------------------- /CVPR_Workshop_ABAW_2024/arxiv_main.tex: -------------------------------------------------------------------------------- 1 | % CVPR 2024 Paper Template; see https://github.com/cvpr-org/author-kit 2 | \newcommand{\val}{\textit{valence}} 3 | \newcommand{\aro}{\textit{arousal}} 4 | \newcommand{\dom}{\textit{dominance}} 5 | \newcommand{\Val}{\textit{Valence}} 6 | \newcommand{\Aro}{\textit{Arousal}} 7 | \newcommand{\Dom}{\textit{Dominance}} 8 | \newcommand{\va}{\val{} and \aro{}} 9 | \newcommand{\VA}{\Val{} and \Aro{}} 10 | \newcommand{\affectnet}{AffectNet} 11 | \newcommand{\emotic}{EMOTIC} 12 | 13 | \newcommand\copyrighttext{% 14 | \footnotesize \textcopyright 2024 IEEE. Personal use of this material is permitted. Permission from IEEE must be obtained for all other uses, in any current or future media, including reprinting/republishing this material for advertising or promotional purposes, creating new collective works, for resale or redistribution to servers or lists, or reuse of any copyrighted component of this work in other works.} 15 | \newcommand\copyrightnotice{% 16 | \begin{tikzpicture}[remember picture,overlay] 17 | \node[anchor=south,yshift=10pt] at (current page.south) {\fbox{\parbox{\dimexpr\textwidth-\fboxsep-\fboxrule\relax}{\copyrighttext}}}; 18 | \end{tikzpicture}% 19 | } 20 | \documentclass[10pt,twocolumn,letterpaper]{article} 21 | 22 | %%%%%%%%% PAPER TYPE - PLEASE UPDATE FOR FINAL VERSION 23 | %\usepackage{cvpr} % To produce the CAMERA-READY version 24 | %\usepackage[review]{cvpr} % To produce the REVIEW version 25 | \usepackage[pagenumbers]{cvpr} % To force page numbers, e.g. for an arXiv version 26 | 27 | % NW: For long table 28 | \usepackage{tabularx,booktabs} 29 | \usepackage[accsupp]{axessibility} % Improves PDF readability for those with disabilities. 30 | 31 | % Import additional packages in the preamble file, before hyperref 32 | \input{preamble} 33 | 34 | % It is strongly recommended to use hyperref, especially for the review version. 35 | % hyperref with option pagebackref eases the reviewers' job. 36 | % Please disable hyperref *only* if you encounter grave issues, 37 | % e.g. with the file validation for the camera-ready version. 38 | % 39 | % If you comment hyperref and then uncomment it, you should delete *.aux before re-running LaTeX. 40 | % (Or just hit 'q' on the first LaTeX run, let it finish, and you should be clear). 41 | \definecolor{cvprblue}{rgb}{0.21,0.49,0.74} 42 | \usepackage[pagebackref,breaklinks,colorlinks,citecolor=cvprblue]{hyperref} 43 | \usepackage{tikz} 44 | %%%%%%%%% PAPER ID - PLEASE UPDATE 45 | \def\paperID{33} % *** Enter the Paper ID here 46 | \def\confName{CVPR} 47 | \def\confYear{2024} 48 | 49 | %%%%%%%%% TITLE - PLEASE UPDATE 50 | \title{CAGE: Circumplex Affect Guided Expression Inference} 51 | 52 | %%%%%%%%% AUTHORS - PLEASE UPDATE 53 | % \author{Niklas Wagner\\ 54 | % Karlsruhe Institute of Technology\\ 55 | % {\tt\small uvssk@student.kit.edu} 56 | % % For a paper whose authors are all at the same institution, 57 | % % omit the following lines up until the closing ``}''. 58 | % % Additional authors and addresses can be added with ``\and'', 59 | % % just like the second author. 60 | % % To save space, use either the email address or home page, not both 61 | % \and 62 | % Felix Mätzler\\ 63 | % Karlsruhe Institute of Technology\\ 64 | % {\tt\small uvian@student.kit.edu} 65 | % \and 66 | % Samed R. Vossberg\\ 67 | % Karlsruhe Institute of Technology\\ 68 | % {\tt\small urgfl@student.kit.edu} 69 | % } 70 | 71 | \author{Niklas Wagner$^{1}$$^,$$^*$, Felix Mätzler$^{1}$$^,$$^*$, Samed R. Vossberg$^{1}$$^,$$^*$, Helen Schneider$^{1}$$^*$, Svetlana Pavlitska$^{2}$, \\J. Marius Zöllner$^{1,2}$\\ 72 | \textit{$^{1}$ Karlsruhe Institute of Technology (KIT), Germany}\\ 73 | \textit{$^{2}$ FZI Research Center for Information Technology, Germany} \\ 74 | {\tt\small helen.schneider@kit.edu}\\ 75 | } 76 | \begin{document} 77 | \maketitle 78 | \def\thefootnote{*}\footnotetext{These authors contributed equally to this work} 79 | \copyrightnotice 80 | \thispagestyle{empty} 81 | \pagestyle{empty} 82 | \input{sec/00_Abstract} 83 | \input{sec/01_Intro} 84 | % \newpage 85 | % \clearpage 86 | \input{sec/02_Related_Work} 87 | % \newpage 88 | % \clearpage 89 | \input{sec/03_Datasets} 90 | % \newpage 91 | % \clearpage 92 | \input{sec/04_Model} 93 | 94 | \input{sec/06_Conclusion} 95 | \newpage 96 | \clearpage 97 | { 98 | \small 99 | \bibliographystyle{ieeenat_fullname} 100 | \bibliography{main} 101 | } 102 | 103 | % WARNING: do not forget to delete the supplementary pages from your submission 104 | % \input{sec/X_suppl} 105 | 106 | \end{document} 107 | -------------------------------------------------------------------------------- /CVPR_Workshop_ABAW_2024/main.tex: -------------------------------------------------------------------------------- 1 | % CVPR 2024 Paper Template; see https://github.com/cvpr-org/author-kit 2 | \newcommand{\val}{\textit{valence}} 3 | \newcommand{\aro}{\textit{arousal}} 4 | \newcommand{\dom}{\textit{dominance}} 5 | \newcommand{\Val}{\textit{Valence}} 6 | \newcommand{\Aro}{\textit{Arousal}} 7 | \newcommand{\Dom}{\textit{Dominance}} 8 | \newcommand{\va}{\val{} and \aro{}} 9 | \newcommand{\VA}{\Val{} and \Aro{}} 10 | \newcommand{\affectnet}{AffectNet} 11 | \newcommand{\emotic}{EMOTIC} 12 | 13 | \documentclass[10pt,twocolumn,letterpaper]{article} 14 | 15 | %%%%%%%%% PAPER TYPE - PLEASE UPDATE FOR FINAL VERSION 16 | \usepackage{cvpr} % To produce the CAMERA-READY version 17 | %\usepackage[review]{cvpr} % To produce the REVIEW version 18 | % \usepackage[pagenumbers]{cvpr} % To force page numbers, e.g. for an arXiv version 19 | 20 | % NW: For long table 21 | \usepackage{tabularx,booktabs} 22 | \usepackage[accsupp]{axessibility} % Improves PDF readability for those with disabilities. 23 | 24 | % Import additional packages in the preamble file, before hyperref 25 | \input{preamble} 26 | 27 | % It is strongly recommended to use hyperref, especially for the review version. 28 | % hyperref with option pagebackref eases the reviewers' job. 29 | % Please disable hyperref *only* if you encounter grave issues, 30 | % e.g. with the file validation for the camera-ready version. 31 | % 32 | % If you comment hyperref and then uncomment it, you should delete *.aux before re-running LaTeX. 33 | % (Or just hit 'q' on the first LaTeX run, let it finish, and you should be clear). 34 | \definecolor{cvprblue}{rgb}{0.21,0.49,0.74} 35 | \usepackage[pagebackref,breaklinks,colorlinks,citecolor=cvprblue]{hyperref} 36 | 37 | %%%%%%%%% PAPER ID - PLEASE UPDATE 38 | \def\paperID{33} % *** Enter the Paper ID here 39 | \def\confName{CVPR} 40 | \def\confYear{2024} 41 | 42 | %%%%%%%%% TITLE - PLEASE UPDATE 43 | \title{CAGE: Circumplex Affect Guided Expression Inference} 44 | 45 | %%%%%%%%% AUTHORS - PLEASE UPDATE 46 | % \author{Niklas Wagner\\ 47 | % Karlsruhe Institute of Technology\\ 48 | % {\tt\small uvssk@student.kit.edu} 49 | % % For a paper whose authors are all at the same institution, 50 | % % omit the following lines up until the closing ``}''. 51 | % % Additional authors and addresses can be added with ``\and'', 52 | % % just like the second author. 53 | % % To save space, use either the email address or home page, not both 54 | % \and 55 | % Felix Mätzler\\ 56 | % Karlsruhe Institute of Technology\\ 57 | % {\tt\small uvian@student.kit.edu} 58 | % \and 59 | % Samed R. Vossberg\\ 60 | % Karlsruhe Institute of Technology\\ 61 | % {\tt\small urgfl@student.kit.edu} 62 | % } 63 | 64 | \author{Niklas Wagner$^{1}$$^,$$^*$, Felix Mätzler$^{1}$$^,$$^*$, Samed R. Vossberg$^{1}$$^,$$^*$, Helen Schneider$^{1}$$^*$, Svetlana Pavlitska$^{2}$, \\J. Marius Zöllner$^{1,2}$\\ 65 | \textit{$^{1}$ Karlsruhe Institute of Technology (KIT), Germany}\\ 66 | \textit{$^{2}$ FZI Research Center for Information Technology, Germany} \\ 67 | {\tt\small helen.schneider@kit.edu}\\ 68 | } 69 | \begin{document} 70 | \maketitle 71 | \def\thefootnote{*}\footnotetext{These authors contributed equally to this work} 72 | \input{sec/00_Abstract} 73 | \input{sec/01_Intro} 74 | % \newpage 75 | % \clearpage 76 | \input{sec/02_Related_Work} 77 | % \newpage 78 | % \clearpage 79 | \input{sec/03_Datasets} 80 | % \newpage 81 | % \clearpage 82 | \input{sec/04_Model} 83 | 84 | \input{sec/06_Conclusion} 85 | \newpage 86 | \clearpage 87 | { 88 | \small 89 | \bibliographystyle{ieeenat_fullname} 90 | \bibliography{main} 91 | } 92 | 93 | % WARNING: do not forget to delete the supplementary pages from your submission 94 | % \input{sec/X_suppl} 95 | 96 | \end{document} 97 | -------------------------------------------------------------------------------- /CVPR_Workshop_ABAW_2024/pictures/Shuttle ganz.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wagner-niklas/CAGE_expression_inference/02732acea92326aacf3b303a833a161b97d4a3cd/CVPR_Workshop_ABAW_2024/pictures/Shuttle ganz.jpg -------------------------------------------------------------------------------- /CVPR_Workshop_ABAW_2024/pictures/affectnet/Bild_Russel_AffectNet.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wagner-niklas/CAGE_expression_inference/02732acea92326aacf3b303a833a161b97d4a3cd/CVPR_Workshop_ABAW_2024/pictures/affectnet/Bild_Russel_AffectNet.pdf -------------------------------------------------------------------------------- /CVPR_Workshop_ABAW_2024/pictures/affectnet/affectnet_cdf.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wagner-niklas/CAGE_expression_inference/02732acea92326aacf3b303a833a161b97d4a3cd/CVPR_Workshop_ABAW_2024/pictures/affectnet/affectnet_cdf.pdf -------------------------------------------------------------------------------- /CVPR_Workshop_ABAW_2024/pictures/affectnet/arousal_distribution.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wagner-niklas/CAGE_expression_inference/02732acea92326aacf3b303a833a161b97d4a3cd/CVPR_Workshop_ABAW_2024/pictures/affectnet/arousal_distribution.pdf -------------------------------------------------------------------------------- /CVPR_Workshop_ABAW_2024/pictures/affectnet/av_for_each_category.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wagner-niklas/CAGE_expression_inference/02732acea92326aacf3b303a833a161b97d4a3cd/CVPR_Workshop_ABAW_2024/pictures/affectnet/av_for_each_category.pdf -------------------------------------------------------------------------------- /CVPR_Workshop_ABAW_2024/pictures/affectnet/example_image.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wagner-niklas/CAGE_expression_inference/02732acea92326aacf3b303a833a161b97d4a3cd/CVPR_Workshop_ABAW_2024/pictures/affectnet/example_image.pdf -------------------------------------------------------------------------------- /CVPR_Workshop_ABAW_2024/pictures/affectnet/frequency_of_expression.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wagner-niklas/CAGE_expression_inference/02732acea92326aacf3b303a833a161b97d4a3cd/CVPR_Workshop_ABAW_2024/pictures/affectnet/frequency_of_expression.pdf -------------------------------------------------------------------------------- /CVPR_Workshop_ABAW_2024/pictures/affectnet/inference_best_va_affectnet8.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wagner-niklas/CAGE_expression_inference/02732acea92326aacf3b303a833a161b97d4a3cd/CVPR_Workshop_ABAW_2024/pictures/affectnet/inference_best_va_affectnet8.pdf -------------------------------------------------------------------------------- /CVPR_Workshop_ABAW_2024/pictures/affectnet/scatterplot.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wagner-niklas/CAGE_expression_inference/02732acea92326aacf3b303a833a161b97d4a3cd/CVPR_Workshop_ABAW_2024/pictures/affectnet/scatterplot.pdf -------------------------------------------------------------------------------- /CVPR_Workshop_ABAW_2024/pictures/affectnet/valence_distribution.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wagner-niklas/CAGE_expression_inference/02732acea92326aacf3b303a833a161b97d4a3cd/CVPR_Workshop_ABAW_2024/pictures/affectnet/valence_distribution.pdf -------------------------------------------------------------------------------- /CVPR_Workshop_ABAW_2024/pictures/affectnet8onemotic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wagner-niklas/CAGE_expression_inference/02732acea92326aacf3b303a833a161b97d4a3cd/CVPR_Workshop_ABAW_2024/pictures/affectnet8onemotic.png -------------------------------------------------------------------------------- /CVPR_Workshop_ABAW_2024/pictures/confusion_7VA.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wagner-niklas/CAGE_expression_inference/02732acea92326aacf3b303a833a161b97d4a3cd/CVPR_Workshop_ABAW_2024/pictures/confusion_7VA.png -------------------------------------------------------------------------------- /CVPR_Workshop_ABAW_2024/pictures/confusion_8VA.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wagner-niklas/CAGE_expression_inference/02732acea92326aacf3b303a833a161b97d4a3cd/CVPR_Workshop_ABAW_2024/pictures/confusion_8VA.png -------------------------------------------------------------------------------- /CVPR_Workshop_ABAW_2024/pictures/emotic/emotic_cdf.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wagner-niklas/CAGE_expression_inference/02732acea92326aacf3b303a833a161b97d4a3cd/CVPR_Workshop_ABAW_2024/pictures/emotic/emotic_cdf.pdf -------------------------------------------------------------------------------- /CVPR_Workshop_ABAW_2024/pictures/emotic/example_image.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wagner-niklas/CAGE_expression_inference/02732acea92326aacf3b303a833a161b97d4a3cd/CVPR_Workshop_ABAW_2024/pictures/emotic/example_image.pdf -------------------------------------------------------------------------------- /CVPR_Workshop_ABAW_2024/pictures/emotic/frequency_of_expression.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wagner-niklas/CAGE_expression_inference/02732acea92326aacf3b303a833a161b97d4a3cd/CVPR_Workshop_ABAW_2024/pictures/emotic/frequency_of_expression.pdf -------------------------------------------------------------------------------- /CVPR_Workshop_ABAW_2024/pictures/emotic/frequency_of_expressions.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wagner-niklas/CAGE_expression_inference/02732acea92326aacf3b303a833a161b97d4a3cd/CVPR_Workshop_ABAW_2024/pictures/emotic/frequency_of_expressions.pdf -------------------------------------------------------------------------------- /CVPR_Workshop_ABAW_2024/pictures/emotic/inference_affectnet8_on_emotic.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wagner-niklas/CAGE_expression_inference/02732acea92326aacf3b303a833a161b97d4a3cd/CVPR_Workshop_ABAW_2024/pictures/emotic/inference_affectnet8_on_emotic.pdf -------------------------------------------------------------------------------- /CVPR_Workshop_ABAW_2024/pictures/emoticonaffectnet8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wagner-niklas/CAGE_expression_inference/02732acea92326aacf3b303a833a161b97d4a3cd/CVPR_Workshop_ABAW_2024/pictures/emoticonaffectnet8.png -------------------------------------------------------------------------------- /CVPR_Workshop_ABAW_2024/pictures/inference_affectnet8_on_emotic.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wagner-niklas/CAGE_expression_inference/02732acea92326aacf3b303a833a161b97d4a3cd/CVPR_Workshop_ABAW_2024/pictures/inference_affectnet8_on_emotic.pdf -------------------------------------------------------------------------------- /CVPR_Workshop_ABAW_2024/pictures/inference_cross_validation.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wagner-niklas/CAGE_expression_inference/02732acea92326aacf3b303a833a161b97d4a3cd/CVPR_Workshop_ABAW_2024/pictures/inference_cross_validation.pdf -------------------------------------------------------------------------------- /CVPR_Workshop_ABAW_2024/pictures/inference_emotic_on_affectnet8.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wagner-niklas/CAGE_expression_inference/02732acea92326aacf3b303a833a161b97d4a3cd/CVPR_Workshop_ABAW_2024/pictures/inference_emotic_on_affectnet8.pdf -------------------------------------------------------------------------------- /CVPR_Workshop_ABAW_2024/preamble.tex: -------------------------------------------------------------------------------- 1 | % 2 | % --- inline annotations 3 | % 4 | \usepackage[dvipsnames]{xcolor} 5 | \newcommand{\red}[1]{{\color{red}#1}} 6 | \newcommand{\todo}[1]{{\color{red}#1}} 7 | \newcommand{\TODO}[1]{\textbf{\color{red}[TODO: #1]}} 8 | % --- disable by uncommenting 9 | % \renewcommand{\TODO}[1]{} 10 | % \renewcommand{\todo}[1]{#1} 11 | 12 | -------------------------------------------------------------------------------- /CVPR_Workshop_ABAW_2024/sec/00_Abstract.tex: -------------------------------------------------------------------------------- 1 | \begin{abstract} 2 | Understanding emotions and expressions is a task of interest across multiple disciplines, especially for improving user experiences. Contrary to the common perception, it has been shown that emotions are not discrete entities but instead exist along a continuum. People understand discrete emotions differently due to a variety of factors, including cultural background, individual experiences, and cognitive biases. Therefore, most approaches to expression understanding, particularly those relying on discrete categories, are inherently biased. In this paper, we present a comparative in-depth analysis of two common datasets (\affectnet{} and \emotic{}) equipped with the components of the circumplex model of affect. Further, we propose a model for the prediction of facial expressions tailored for lightweight applications. Using a small-scaled MaxViT-based model architecture, we evaluate the impact of discrete expression category labels % (\textit{Neutral, Happiness, Sadness, Surprise, Fear, Disgust, Anger, Contempt}) 3 | in training with the continuous \va{} labels. We show that considering valence and arousal in addition to discrete category labels helps to significantly improve expression inference. The proposed model outperforms the current state-of-the-art models on \affectnet{}, establishing it as the best-performing model for inferring \va{} achieving a 7\% lower RMSE. Training scripts and trained weights to reproduce our results can be found here: \url{https://github.com/wagner-niklas/CAGE_expression_inference}. 4 | \end{abstract} -------------------------------------------------------------------------------- /CVPR_Workshop_ABAW_2024/sec/01_Intro.tex: -------------------------------------------------------------------------------- 1 | \section{Introduction} 2 | \label{sec:intro} 3 | 4 | The inference of emotions through expressions has been a topic of interest for the past years as it might give insights into a person's feelings towards other individuals or topics. Mehrabian and Wiener~\cite{mehrabian1967decoding} suggest 55\% of communication is perceived by expressions. Lapakko~\cite{Lapakko2015CommunicationI9} argues, however, that these findings are limited to emotional states. Automation of analysis of expressions to get insights into user experience is one step towards live feedback without direct interaction with an individual. 5 | 6 | \begin{figure}[t] 7 | \centering 8 | \includegraphics[width=0.8\columnwidth, trim={3cm 11cm 3cm 3cm}, clip]{pictures/affectnet/Bild_Russel_AffectNet.pdf} 9 | \caption{\textit{Valence/arousal} for sample images from \affectnet{}~\cite{mollahosseini2017affectnet}.} 10 | \label{fig:Russel_Affectnet} 11 | \end{figure} 12 | 13 | 14 | A common approach is \textit{expression inference}, i.e. classification of emotional expressions into discrete categories. However, a comprehensive meta-analysis of facial expressions research by Barrett et al.~\cite{barretetal2019} has shown, that there is no consensus across cultures and intra-cultural over specific facial movements reliably depicting one category of emotion. They suggest that affective states can more reliably be inferred by a third-party individual. They emphasize that these states are inferred, not recognized. According to Russell~\cite{rusellmodell}, affects can be described as a set of dimensions with each dimension varying independently. These dimensions are called \va{}, representing the positivity/negativity and intensity-/activation of expressions respectively. Using \va{} of the circumplex model of affect~\cite{rusellmodell} as additional dimensions rather than only discrete emotions for expression inference thus offers a more robust framework, as they provide a continuous spectrum that captures the underlying affective states. 15 | 16 | 17 | In this work, we compare training with \va{} labels merged with the commonly used discrete emotions to train with the two approaches separately. 18 | Our approach involves pinpointing the differences and similarities between two leading datasets that catalog images according to their explicit discrete and continuous emotional states: \affectnet~\cite{mollahosseini2017affectnet} and \emotic{}~\cite{kosti_emotic_2017}. 19 | % We examine the labeling process and show that there is still a need for further datasets to create a universal model that does not guess emotions based on labels of third-party individuals but rather gets information about the true internal state of each image subject. 20 | % We refer to~\cite{barretetal2019} for a more detailed discussion. 21 | We then develop a lightweight deep neural network tailored for computer vision tasks, aiming to accurately infer these discrete emotions as well as the continuous dimensions of \va{}, surpassing the performance of existing models. In particular, our model improves accuracy by reducing the root-mean-square error (RMSE) by 7.0\% for \val{} and 6.8\% for \aro{}. It also increases the concordance correlation coefficients (CCC) by 0.8\% for \val{} and 2.0\% for \aro{} when tested on the \affectnet{} dataset. These improvements are reflected in our final results, with CCC values of 0.716 for \val{} and 0.642 for \aro{}, and RMSE values of 0.331 for \val{} and 0.305 for \aro{}. Furthermore, we exceed the top-3 accuracy set by Khan \etal~\cite{khan2024focusclip} on the \emotic{} dataset by 1.0\%. %Additionally, we conduct a cross-evaluation of the model's effectiveness using the given test datasets. %In summary, this research focuses on the following question: 22 | % Consequently, the impact of these approaches needs to be measured. To do so, we 23 | % \begin{enumerate} [label=(\roman*)] 24 | % \item identify differences and similarities of two state-of-the-art datasets containing images with their apparent discrete and continuous emotion states 25 | % \item enhance a lightweight deep neural network architecture suited for computer vision to suggest these discrete emotions and/or continuous dimensions \va{}l{}, \aro{} 26 | % \item cross-evaluate the resulting model performances on the given test datasets. Hence, the following research question motivates our research: 27 | % \end{enumerate} 28 | %\textit{What effect does the addition of \val{}/\aro{} regression to discrete emotion classification have on facial emotion guessing performance across datasets?} 29 | 30 | %In the following we examine related work in \autoref{sec:relatedwork}, looking into different datasets and related approaches in emotion guessing. Then, we describe our applied data analysis and our model training approach in \autoref{sec:method}. Subsequently, we share our insights on the data and the outcomes of our model training in \autoref{sec:results}. Lastly, we present our conclusions and future perspectives in \autoref{sec:conclusion}. 31 | -------------------------------------------------------------------------------- /CVPR_Workshop_ABAW_2024/sec/02_Related_Work.tex: -------------------------------------------------------------------------------- 1 | \section{Related Work} 2 | \label{sec:relatedwork} 3 | 4 | % In this chapter, we go into the existing research and methodologies relevant to emotion guessing via facial expressions. 5 | In the field of affective computing, in particular expression inference, the integration of \val{}/\aro{} regression with discrete emotion classification has emerged as a promising approach to enhance the performance and applicability across diverse datasets. In the following, we discuss existing works in this domain. 6 | 7 | \subsection{Datasets for Expression Inference} 8 | 9 | In the domain of expression inference, several datasets exist. However, these datasets vary significantly in both the data they offer and their popularity. 10 | Among the most widely used datasets are FER2013~\cite{goodfellow_challenges_2013} and FERPlus~\cite{barsoum_training_2016}, which provide annotated 48$\times$48 pixel black-and-white facial images classified in seven (FER) or eight (FER+) discrete emotional states. 11 | % While these datasets have contributed significantly to the advancement of emotion state research, they may have limitations in capturing the complexity and nuances of human emotions due to limited data labeling. In our approach, we have therefore chosen the \affectnet{}~\cite{mollahosseini2017affectnet} and \emotic{}~\cite{emotic_pami2019} datasets. 12 | While these datasets have been the foundation for numerous research contributions, they have been expanded in various ways over the past years. Notable examples in this context are the \emotic{}~\cite{kosti_emotic_2017} and \affectnet{}~\cite{mollahosseini2017affectnet} datasets, which both contain high-resolution RGB images. 13 | \affectnet{} is a large-scale database containing around 0.4 million facial images labeled by 12 annotators. Each image is annotated with categorical emotions, mirroring those used in the FER+ dataset, in addition to \va{} values. This approach offers a more refined representation of emotions compared to categorical labels only. 14 | 15 | The \emotic{} (\textit{Emotions in Context}) dataset provides a more nuanced perspective on affective states. Unlike earlier datasets focused solely on facial expressions, \emotic{} captures individuals in full-body shots within their surrounding context. \emotic{} features bounding boxes that encompass each individual's entire body, eliminating the need for a visible face. Furthermore, it categorizes emotions into 26 discrete categories, allowing for multiple labels per individual. In addition, the dataset expands these discrete values with continuous measures of \va{} as well as \dom{} that measures the level of control a person feels during a situation, ranging from submissive / non-control to dominant / in-control~\cite{emotic_pami2019}. 16 | 17 | While there are at least 28 datasets such as CK+~\cite{5543262}, RAF-DB~\cite{Li_2017_CVPR} or Aff-Wild2~\cite{kollias2023abaw2, kollias2023multi, kollias2022abaw, kollias2023abaw, kollias2021analysing, kollias2021affect, kollias2021distribution, kollias2020analysing, kollias_expression_2019, kollias2019deep, kollias2019face, zafeiriou2017aff, kollias2019affwild2} focusing specifically on \textit{facial expression recognition/inference} featuring continuous and/or discrete measures, we chose to focus on the two mentioned above, since we are interested in both discrete emotion labeling on an individual basis as well as continuous measures of \va{}. 18 | \affectnet{}~\cite{mollahosseini2017affectnet} as a state-of-the-art, is arguably the most represented dataset in the current research field. 19 | On the other hand, \emotic{}, although not being the most utilized dataset, offers the most refined representation of measures while still focusing on a combination of discrete and continuous variables to define individuals emotion. 20 | % Außerdem hier noch Related work angeben was schon gemacht wurde im Sinne vergleich? Gibt es Paper die unser Thema schon genauer anschauen? Gibt es Vergleich zwischen den Datensätzen oder zwischen FER und den einzelnen?! Hier einbinden 21 | % Elicit research machen 22 | 23 | \begin{table}[t] 24 | \centering 25 | \begin{tabular}{r | c | c } 26 | \hline 27 | \textbf{Method} & \textbf{Accuracy [\%]} & \textbf{Date [mm-yy]} \\ 28 | \hline 29 | DDAMFN~\cite{electronics12173595} & 64.25 & 08-23 \\ 30 | POSTER++~\cite{mao2023poster} &63.77 & 01-23 \\ 31 | S2D~\cite{chen2023static}&63.06 & 12-22 \\ 32 | MT EffNet-B2~\cite{9815154} & 63.03 & 07-22 \\ 33 | MT-ArcRes~\cite{kollias_expression_2019} & 63.00 & 09-19 \\ \hline 34 | \end{tabular} 35 | \caption{Top five models on \affectnet{}-8 benchmark~\cite{paperswithcodeaff}.} 36 | \label{tab:relatedworkaffectnet8} 37 | \end{table} 38 | 39 | \begin{table}[t] 40 | \centering 41 | \begin{tabular}{r | c | c } 42 | \hline 43 | \textbf{Method} & \textbf{Accuracy [\%]} & \textbf{Date [mm-yy]} \\ 44 | \hline 45 | S2D~\cite{chen2023static}&67.62 & 12-22 \\ 46 | POSTER++~\cite{mao2023poster} &67.49 & 01-23 \\ 47 | DDAMFN~\cite{electronics12173595} & 67.03 & 08-23 \\ 48 | Emo\affectnet{}~\cite{RYUMINA2022435} & 66.49 & 12-22 \\ 49 | Emotion-GCN~\cite{Antoniadis_2021} & 66.46 & 07-21 \\\hline 50 | \end{tabular} 51 | \caption{Top five models on \affectnet{}-7 benchmark~\cite{paperswithcodeaff}.} 52 | \label{tab:relatedworkaffectnet7} 53 | \end{table} 54 | 55 | \subsection{Expression Inference Models} 56 | 57 | Expression inference on datasets like \affectnet{} has been addressed in numerous publications. 58 | According to Paperswithcode~\cite{paperswithcodeaff}, 207 \affectnet{}-related papers have been published since 2020. Tables~\ref{tab:relatedworkaffectnet8} and~\ref{tab:relatedworkaffectnet7} show five best models in leaderboards for the \affectnet{}-8 and \affectnet{}-7 test benchmark as of 01.01.2024. As the initial FER dataset does not contain the emotion \textit{Contempt}, there exists also an \affectnet{}-7 benchmark omitting this emotion. 59 | So far, the best-performing models for expression inference have been almost exclusively based on convolutional neural networks (CNNs), e.g. ResNet-18~\cite{he2016deep}. Although CNNs are still competitive as shown by Savchenko \etal~\cite{9815154}, more recent architectures like the POSTER++~\cite{mao2023poster} facilitate hybrid facial expression inference via networks that combine CNNs for feature extraction with vision transformer elements for efficient multi-scale feature integration and attention-based cross-fusion, achieving state-of-the-art performance with reduced computational cost. 60 | Because \emotic{} allows for multiple discrete labels for each individual, a general accuracy score is less applicable. Instead, Khan \etal~\cite{khan2024focusclip} suggests the \textit{top-k accuracy} can provide more insights. Utilizing a multi-modal approach leveraging region of interest heatmaps, a vision encoder, and a text encoder they achieve a top-3 accuracy of 13.73\%. 61 | % Far less popular is \emotic{}, cited by 63 papers since 2020 according to Paperswithcode (fig.~\ref{tab:relatedworkemotic}) 62 | % \begin{table}[htbp] 63 | % \centering 64 | % \begin{tabular}{r | c | c } 65 | % \textbf{Method} & \textbf{mAP} & \textbf{Date [mm-yy]} \\ 66 | % \hline 67 | % EmotiCon~\cite{mittal2020emoticon} &35.48 & 03-20 \\ 68 | % EmotiCon (GCN)~\cite{mittal2020emoticon} & 32.03 & 03-20 \\ 69 | % Fusion Model 1~\cite{Kosti_2019} & 29.45 & 03-20 \\ 70 | % Fusion Model 2~\cite{Kosti_2019} & 27.70 & 03-20 \\ 71 | % CAER-Net~\cite{Lee_2019_ICCV} & 20.84 & 10-19 \\ 72 | % \end{tabular} 73 | % \caption{Comparison Top-5 \emotic{} Benchmarks~\cite{paperswithcodeemo} } 74 | % \label{tab:relatedworkemotic} 75 | % \end{table} 76 | Khor Wen Hwooi \etal~\cite{hwooi_deep_2022} suggested to extract features from CNNs and then apply model regression with a CultureNet~\cite{rudovic2018culturenet} for the continuous prediction of affect from facial expression images within the \va{} space. The best results were achieved with DenseNet201~\cite{huang2017densely} for feature extraction. The work demonstrates superior performance in predicting \va{} levels, particularly on the \affectnet{} dataset. 77 | %The authors highlight their model's ability to generalize across unseen datasets by testing on the Aff-Wild2~\cite{kollias_expression_2019} dataset. 78 | -------------------------------------------------------------------------------- /CVPR_Workshop_ABAW_2024/sec/06_Conclusion.tex: -------------------------------------------------------------------------------- 1 | \section{Conclusion \& Outlook} 2 | \label{sec:conclusion} 3 | 4 | In this paper, we assessed the capability of discrete classifier approaches with multi-task learning models when inferring emotional expressions. 5 | We used two prominent datasets tailored for discrete expressions and values based on the circumplex model of affect to train our models. 6 | 7 | \textbf{Firstly}, we have performed in-depth analysis of the datasets. It was observed that while test datasets are often balanced concerning emotional expressions, the balance is not maintained for \va{}. Models trained solely on \va{} tend to minimize errors. Additionally, it is noteworthy to delve into the intricate distribution of the \emotic{} dataset, especially how it varies concerning the number of classes in the train and test sets. 8 | 9 | \textbf{Secondly}, we proposed to use the MaxViT model architecture and described the training and evaluation protocol for both datasets. The proposed approach significantly improved model accuracy. Even in cases of misclassification, the predicted \va{} values often remained accurate. Establishing a threshold for correct prediction of \va{} poses an interesting challenge for future work, as it involves considering factors such as human error and the inherent complexity of emotional expression perception. Furthermore, our model based on \affectnet{} demonstrated robust performance in \va{} estimation via cross-validation. This suggests the potential for it to serve as a well-generalized model. Conversely, the performance of our \emotic{}-based approach was less conclusive, possibly due to insufficient data or other factors. 10 | 11 | In conclusion, our research underscores the effectiveness of continuous value approaches within multi-task learning frameworks for emotional expression inference. Further exploration and refinement of these methodologies could yield even more accurate and robust models in the future. 12 | -------------------------------------------------------------------------------- /CVPR_Workshop_ABAW_2024/sec/X_suppl.tex: -------------------------------------------------------------------------------- 1 | \clearpage 2 | \setcounter{page}{1} 3 | \maketitlesupplementary 4 | 5 | 6 | \section{Rationale} 7 | \label{sec:rationale} 8 | % 9 | Having the supplementary compiled together with the main paper means that: 10 | % 11 | \begin{itemize} 12 | \item The supplementary can back-reference sections of the main paper, for example, we can refer to \cref{sec:intro}; 13 | \item The main paper can forward reference sub-sections within the supplementary explicitly (e.g. referring to a particular experiment); 14 | \item When submitted to arXiv, the supplementary will already included at the end of the paper. 15 | \end{itemize} 16 | % 17 | To split the supplementary pages from the main paper, you can use \href{https://support.apple.com/en-ca/guide/preview/prvw11793/mac#:~:text=Delete%20a%20page%20from%20a,or%20choose%20Edit%20%3E%20Delete).}{Preview (on macOS)}, \href{https://www.adobe.com/acrobat/how-to/delete-pages-from-pdf.html#:~:text=Choose%20%E2%80%9CTools%E2%80%9D%20%3E%20%E2%80%9COrganize,or%20pages%20from%20the%20file.}{Adobe Acrobat} (on all OSs), as well as \href{https://superuser.com/questions/517986/is-it-possible-to-delete-some-pages-of-a-pdf-document}{command line tools}. -------------------------------------------------------------------------------- /CVPR_Workshop_ABAW_2024/todos.txt: -------------------------------------------------------------------------------- 1 | - The references for Aff-Wild2 database are not right, please use the ones found here: 2 | https://affective-behavior-analysis-in-the-wild.github.io/6th/index.html - 3 | - emotion guessing? facial expression inference 4 | - cite: Distribution Matching for Multi-Task Learning of Classification Tasks: 5 | a Large-Scale Study on Faces & Beyond 6 | - lines 114-119: Aff-Wild2 also contains both discrete categories (7 basic expressions), as well as continuous valence-arousal (it also contains action units) 7 | - more citations for Aff-Wild2 8 | - IEEE copyright submiten 9 | - Repo: https://github.com/wagner-niklas/CAGE-CircumplexAffectGuidedExpressionInference 10 | 11 | - results for swin transformer 12 | 13 | 14 | - Subsection 5.1.3 -------------------------------------------------------------------------------- /Honnold_inference.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wagner-niklas/CAGE_expression_inference/02732acea92326aacf3b303a833a161b97d4a3cd/Honnold_inference.gif -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Niklas Wagner 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Circumplex Affect Guided Expression Inference (CAGE) 2 | 3 | ## Realtime Expression Inference Supported By The Circumplex Model 4 | 5 | ### Keywords: User experience, Expression Inference, FER, Expression Recgonition, Emotion Recognition, Supervised Learning, Computer Vision, Data Set Comparison, Autonomous driving 6 | 7 | [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/cage-circumplex-affect-guided-expression/arousal-estimation-on-affectnet)](https://paperswithcode.com/sota/arousal-estimation-on-affectnet?p=cage-circumplex-affect-guided-expression) 8 | [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/cage-circumplex-affect-guided-expression/valence-estimation-on-affectnet)](https://paperswithcode.com/sota/valence-estimation-on-affectnet?p=cage-circumplex-affect-guided-expression) 9 | [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/cage-circumplex-affect-guided-expression/dominance-estimation-on-emotic)](https://paperswithcode.com/sota/dominance-estimation-on-emotic?p=cage-circumplex-affect-guided-expression) 10 | [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/cage-circumplex-affect-guided-expression/arousal-estimation-on-emotic)](https://paperswithcode.com/sota/arousal-estimation-on-emotic?p=cage-circumplex-affect-guided-expression) 11 | [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/cage-circumplex-affect-guided-expression/valence-estimation-on-emotic)](https://paperswithcode.com/sota/valence-estimation-on-emotic?p=cage-circumplex-affect-guided-expression) 12 | [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/cage-circumplex-affect-guided-expression/emotion-recognition-on-emotic)](https://paperswithcode.com/sota/emotion-recognition-on-emotic?p=cage-circumplex-affect-guided-expression) 13 | [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/cage-circumplex-affect-guided-expression/facial-expression-recognition-on-affectnet)](https://paperswithcode.com/sota/facial-expression-recognition-on-affectnet?p=cage-circumplex-affect-guided-expression) 14 | 15 | ### Citation 16 | If you use this repository or any of its contents please consider citiing our Paper: 17 | [CAGE: Circumplex Affect Guided Expression Inference](https://arxiv.org/abs/2404.14975) 18 | ``` 19 | @InProceedings{Wagner_2024_CVPR, 20 | author = {Wagner, Niklas and M\"atzler, Felix and Vossberg, Samed R. and Schneider, Helen and Pavlitska, Svetlana and Z\"ollner, J. Marius}, 21 | title = {CAGE: Circumplex Affect Guided Expression Inference}, 22 | booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, 23 | month = {June}, 24 | year = {2024}, 25 | pages = {4683-4692} 26 | ``` 27 | 28 | ### Abstract: 29 | Understanding expressions and emotions is a task of interest across multiple disciplines, especially for improving user experiences. Contrary to the common perception, it has been shown that expressions are not discrete entities but instead exist along a continuum. People understand discrete expressions differently due to a variety of factors, including cultural background, individual experiences and cognitive biases. Therefore, most approaches to expression understanding, particularly those relying on discrete categories, are inherently biased. In this paper, we present a comparative indepth analysis of two common datasets (AffectNet and EMOTIC) equipped with the components of the circumplex model of affect. Further, we propose a model for prediction of facial expression tailored for lightweight applications. Using a small-scaled MaxViT-based model architecture, we evaluate the impact of discrete expression category labels in training with the continuous valence and arousal labels. We show that considering valence and arousal in addition to discrete category labels helps to significantly improve expression prediction. The proposed model outperforms the current state-of-the-art models on AffectNet, establishing it as the best-performing model for inferring valence and arousal achieving a 7% lower RMSE. 30 | 31 | ### Model inference on a video: 32 | ![](https://github.com/wagner-niklas/KIT_FacialEmotionRecognition/blob/main/Honnold_inference.gif) 33 | 34 | 35 | ### Usage: 36 | To run the version with our best performing model simply cd into the project directory and run: 37 | Install requirements: 38 | ``` 39 | pip install -r requirements.txt 40 | ``` 41 | 42 | If you want to train / alter the models you can run one of the python scripts in the directory. 43 | To run the train scripts, make sure you have the datasets of EMOTIC[[1]](#1) and AffectNet[[2]](#2) downloaded and saved in the right directory. 44 | The Datasets are not publically available and access has to be requested ([EMOTIC, 2019](https://s3.sunai.uoc.edu/emotic/download.html)) ([AffectNet, 2017](http://mohammadmahoor.com/affectnet/)) 45 | 46 | 47 | 48 | [1] 49 | R. Kosti, J.M. Álvarez, A. Recasens and A. Lapedriza, "Context based emotion recognition using emotic dataset", IEEE Transactions on Pattern Analysis and Machine Intelligence (PAMI), 2019. 50 | 51 | [2] 52 | Ali Mollahosseini, Behzad Hasani and Mohammad H. Mahoor, "AffectNet: A Database for Facial Expression, Valence, and Arousal Computing in the Wild," in IEEE Transactions on Affective Computing, vol. 10, no. 1, pp. 18-31, 1 Jan.-March 2019, doi: 10.1109/TAFFC.2017.2740923.' 53 | 54 | ### Tasks of this project: 55 | 56 | [1] Implement live video expression inference discrete 57 | 58 | [2] Extend code to guess the continuous values of the circumplex model of affect 59 | 60 | [3] Test model performance on AffectNet and EMOTIC 61 | 62 | [4] Live test expression inference 63 | 64 | [5] Research methods for validating and improving results for future work 65 | -------------------------------------------------------------------------------- /inference_on_webcam.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import torchvision.models as models 3 | import torch.nn as nn 4 | import torch 5 | import numpy as np 6 | from PIL import Image 7 | from torchvision import transforms 8 | from torchvision.models import efficientnet_b4, EfficientNet_B4_Weights, EfficientNet 9 | import torchvision 10 | import re 11 | 12 | DEVICE = torch.device( 13 | "cuda" if torch.cuda.is_available() else "cpu" 14 | ) # For Macbook, use mps 15 | 16 | 17 | def draw_valence_bar(frame, valence, x, y, w, bar_height=20): 18 | if valence >= 0.25: 19 | color = (0, 255, 0) # Green for positive valence 20 | elif valence <= -0.25: 21 | color = (0, 0, 255) # Red for negative valence 22 | else: 23 | color = (255, 255, 0) # Blue for neutral valence 24 | 25 | bar_width = int(w) 26 | cv2.rectangle(frame, (x, y - bar_height), (x + bar_width, y), (100, 100, 100), -1) 27 | cursor_x = x + int(bar_width * (valence + 1) / 2) 28 | cv2.line(frame, (cursor_x, y - bar_height), (cursor_x, y), color, 2) 29 | 30 | valence_text = f"Valence: {valence:.2f}" 31 | cv2.putText( 32 | frame, 33 | valence_text, 34 | (x, y - 30), 35 | cv2.FONT_HERSHEY_SIMPLEX, 36 | 0.5, 37 | (255, 255, 255), 38 | 1, 39 | cv2.LINE_AA, 40 | ) 41 | 42 | 43 | def draw_arousal_bar(frame, arousal, x, y, h, bar_width=20): 44 | if arousal >= 0.25: 45 | color = (0, 255, 0) # Green for positive valence 46 | elif arousal <= -0.25: 47 | color = (0, 0, 255) # Red for negative valence 48 | else: 49 | color = (255, 255, 0) # Blue for neutral valence 50 | 51 | bar_height = int(h) 52 | cv2.rectangle(frame, (x, y), (x + bar_width, y + bar_height), (100, 100, 100), -1) 53 | cursor_y = y + int(bar_height * (-arousal + 1) / 2) 54 | cv2.line(frame, (x, cursor_y), (x + bar_width, cursor_y), color, 2) 55 | 56 | arousal_text = f"Arousal: {arousal:.2f}" 57 | cv2.putText( 58 | frame, 59 | arousal_text, 60 | (x + 30, y + 10), 61 | cv2.FONT_HERSHEY_SIMPLEX, 62 | 0.5, 63 | (255, 255, 255), 64 | 1, 65 | cv2.LINE_AA, 66 | ) 67 | 68 | 69 | def get_emotion(outputs_cls): 70 | emotions = [ 71 | "Neutral", 72 | "Happy", 73 | "Sad", 74 | "Suprise", 75 | "Fear", 76 | "Disgust", 77 | "Angry", 78 | "Contempt", # AffectNet8 has 8 classes, when using the AffectNet7 model, remove this class 79 | ] 80 | 81 | max_indices = outputs_cls.argmax(dim=1) 82 | emotions_batch = [emotions[idx.item()] for idx in max_indices] 83 | return emotions_batch 84 | 85 | valence_text = f"Valence: {valence:.2f}" 86 | cv2.putText( 87 | frame, 88 | valence_text, 89 | (x, y - 40), 90 | cv2.FONT_HERSHEY_SIMPLEX, 91 | 0.5, 92 | (255, 255, 255), 93 | 1, 94 | cv2.LINE_AA, 95 | ) 96 | 97 | 98 | cap = cv2.VideoCapture(0) # 0 is usually the default camera (webcam) 99 | 100 | # Load the model 101 | MODEL = models.maxvit_t(weights="DEFAULT") 102 | block_channels = MODEL.classifier[3].in_features 103 | MODEL.classifier = nn.Sequential( 104 | nn.AdaptiveAvgPool2d(1), 105 | nn.Flatten(), 106 | nn.LayerNorm(block_channels), 107 | nn.Linear(block_channels, block_channels), 108 | nn.Tanh(), 109 | nn.Linear( 110 | block_channels, 10, bias=False 111 | ), # Change the number of output classes, e.g. for AffectNet7 combined use 9 output neurons 112 | ) 113 | MODEL.load_state_dict( 114 | torch.load( 115 | "models/AffectNet8_Maxvit_Combined/model.pt", map_location=torch.device(DEVICE) 116 | ) 117 | ) 118 | MODEL.eval() 119 | MODEL.to(DEVICE) 120 | 121 | test_transform = transforms.Compose( 122 | [ 123 | transforms.ToPILImage(), 124 | transforms.Resize((224, 224)), 125 | transforms.ToTensor(), 126 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 127 | ] 128 | ) 129 | # Inititalize the face classifier 130 | face_classifier = cv2.CascadeClassifier( 131 | cv2.data.haarcascades + "haarcascade_frontalface_default.xml" 132 | ) 133 | 134 | # ***** Access the webcam ***** 135 | 136 | if not cap.isOpened(): 137 | print("Error: Could not open webcam.") 138 | else: 139 | while True: 140 | ret, frame = cap.read() 141 | text = "Press 'q' to quit" 142 | cv2.putText(frame, text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) 143 | 144 | faces = face_classifier.detectMultiScale( 145 | frame, scaleFactor=1.1, minNeighbors=5, minSize=(40, 40) 146 | ) 147 | # Loop over multiple detected faces 148 | for x, y, w, h in faces: 149 | # Cut out the face from the frame 150 | face_roi = frame[y : y + h, x : x + w] 151 | 152 | img = test_transform(face_roi) 153 | img = img.unsqueeze(0) # Add a batch dimension for the model 154 | outputs = MODEL(img.to(DEVICE)) 155 | outputs_cls = outputs[:, :7] 156 | valence = outputs[:, 7:8].item() 157 | arousal = outputs[:, 8:].item() 158 | 159 | # Draw the valence bar over the face 160 | draw_valence_bar(frame, valence, x, y, w) 161 | draw_arousal_bar(frame, arousal, x + w, y, h) 162 | 163 | emotion = get_emotion(outputs_cls) 164 | emotion_text = f"Emotion: {emotion}" 165 | text_size = cv2.getTextSize(emotion_text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)[ 166 | 0 167 | ] 168 | cv2.putText( 169 | frame, 170 | emotion_text, 171 | (x - text_size[0] - 10, y + text_size[1] // 2), 172 | cv2.FONT_HERSHEY_SIMPLEX, 173 | 0.5, 174 | (255, 255, 255), 175 | 1, 176 | cv2.LINE_AA, 177 | ) 178 | 179 | cv2.imshow("Webcam", frame) 180 | if cv2.waitKey(1) & 0xFF == ord("q"): 181 | break 182 | 183 | cap.release() 184 | cv2.destroyAllWindows() 185 | -------------------------------------------------------------------------------- /models/AffectNet7_Efficientnet_Combined/generate_csv.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import os 3 | import torch 4 | import torchvision.transforms as transforms 5 | import torchvision.models as models 6 | from torch.utils.data import DataLoader, Dataset 7 | import torch.nn as nn 8 | from PIL import Image 9 | 10 | # Load the annotations for training and validation from separate CSV files 11 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/" 12 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/" 13 | valid_annotations_path = ( 14 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv" 15 | ) 16 | valid_annotations_df = pd.read_csv(valid_annotations_path) 17 | valid_annotations_df = valid_annotations_df[valid_annotations_df["exp"] != 7] 18 | # Set parameters 19 | BATCHSIZE = 128 20 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") 21 | 22 | 23 | # **** Create dataset and data loaders **** 24 | class CustomDataset(Dataset): 25 | def __init__(self, dataframe, root_dir, transform=None, balance=False): 26 | self.dataframe = dataframe 27 | self.transform = transform 28 | self.root_dir = root_dir 29 | self.balance = balance 30 | 31 | if self.balance: 32 | self.dataframe = self.balance_dataset() 33 | 34 | def __len__(self): 35 | return len(self.dataframe) 36 | 37 | def __getitem__(self, idx): 38 | image_path = os.path.join( 39 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg" 40 | ) 41 | if os.path.exists(image_path): 42 | image = Image.open(image_path) 43 | else: 44 | image = Image.new( 45 | "RGB", (224, 224), color="white" 46 | ) # Handle missing image file 47 | 48 | classes = torch.tensor(self.dataframe["exp"].iloc[idx], dtype=torch.long) 49 | labels = torch.tensor(self.dataframe.iloc[idx, 2:4].values, dtype=torch.float32) 50 | 51 | if self.transform: 52 | image = self.transform(image) 53 | 54 | return image, classes, labels 55 | 56 | def balance_dataset(self): 57 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply( 58 | lambda x: x.sample(self.dataframe["exp"].value_counts().min()) 59 | ) 60 | return balanced_df 61 | 62 | 63 | transform_valid = transforms.Compose( 64 | [ 65 | transforms.ToTensor(), 66 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 67 | ] 68 | ) 69 | 70 | valid_dataset = CustomDataset( 71 | dataframe=valid_annotations_df, 72 | root_dir=IMAGE_FOLDER_TEST, 73 | transform=transform_valid, 74 | balance=False, 75 | ) 76 | valid_loader = DataLoader( 77 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48 78 | ) 79 | 80 | # * Define the model * 81 | 82 | # Initialize the model 83 | MODEL = models.swin_v2_t(weights="DEFAULT") 84 | MODEL.head = torch.nn.Linear(in_features=768, out_features=9, bias=True) 85 | MODEL.to(DEVICE) 86 | 87 | # **** Test the model performance for classification **** 88 | 89 | # Set the model to evaluation mode 90 | MODEL.load_state_dict(torch.load("model.pt")) 91 | MODEL.to(DEVICE) 92 | MODEL.eval() 93 | 94 | all_labels_cls = [] 95 | all_predicted_cls = [] 96 | 97 | all_true_val = [] 98 | all_pred_val = [] 99 | all_true_aro = [] 100 | all_pred_aro = [] 101 | 102 | # Start inference on test set 103 | with torch.no_grad(): 104 | for images, classes, labels in iter(valid_loader): 105 | images, classes, labels = ( 106 | images.to(DEVICE), 107 | classes.to(DEVICE), 108 | labels.to(DEVICE), 109 | ) 110 | 111 | outputs = MODEL(images) 112 | outputs_cls = outputs[:, :7] 113 | outputs_reg = outputs[:, 7:] 114 | val_pred = outputs_reg[:, 0] 115 | aro_pred = outputs_reg[:, 1] 116 | 117 | _, predicted_cls = torch.max(outputs_cls, 1) 118 | 119 | all_labels_cls.extend(classes.cpu().numpy()) 120 | all_predicted_cls.extend(predicted_cls.cpu().numpy()) 121 | val_true = labels[:, 0] 122 | aro_true = labels[:, 1] 123 | 124 | all_true_val.extend(val_true.cpu().numpy()) 125 | all_true_aro.extend(aro_true.cpu().numpy()) 126 | all_pred_val.extend(val_pred.cpu().numpy()) 127 | all_pred_aro.extend(aro_pred.cpu().numpy()) 128 | 129 | df = pd.DataFrame( 130 | { 131 | "cat_pred": all_predicted_cls, 132 | "cat_true": all_labels_cls, 133 | "val_pred": all_pred_val, 134 | "val_true": all_true_val, 135 | "aro_pred": all_pred_aro, 136 | "aro_true": all_true_aro, 137 | } 138 | ) 139 | df.to_csv("inference.csv", index=False) 140 | -------------------------------------------------------------------------------- /models/AffectNet7_Efficientnet_Combined/train.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import os 3 | import torch 4 | import torchvision.transforms as transforms 5 | import torchvision.models as models 6 | from torch.utils.data import DataLoader, Dataset 7 | import torch.nn as nn 8 | import torch.optim as optim 9 | from PIL import Image 10 | from torch.optim import lr_scheduler 11 | from tqdm import tqdm 12 | 13 | # Load the annotations for training and validation from separate CSV files 14 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/" 15 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/" 16 | train_annotations_path = ( 17 | "../../affectnet_annotations/train_set_annotation_without_lnd.csv" 18 | ) 19 | valid_annotations_path = ( 20 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv" 21 | ) 22 | train_annotations_df = pd.read_csv(train_annotations_path) 23 | valid_annotations_df = pd.read_csv(valid_annotations_path) 24 | 25 | train_annotations_df = train_annotations_df[train_annotations_df["exp"] != 7] 26 | valid_annotations_df = valid_annotations_df[valid_annotations_df["exp"] != 7] 27 | 28 | # Set parameters 29 | BATCHSIZE = 128 30 | NUM_EPOCHS = 20 31 | LR = 4e-5 32 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") 33 | 34 | 35 | # **** Create dataset and data loaders **** 36 | class CustomDataset(Dataset): 37 | def __init__(self, dataframe, root_dir, transform=None, balance=False): 38 | self.dataframe = dataframe 39 | self.transform = transform 40 | self.root_dir = root_dir 41 | self.balance = balance 42 | 43 | if self.balance: 44 | self.dataframe = self.balance_dataset() 45 | 46 | def __len__(self): 47 | return len(self.dataframe) 48 | 49 | def __getitem__(self, idx): 50 | image_path = os.path.join( 51 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg" 52 | ) 53 | if os.path.exists(image_path): 54 | image = Image.open(image_path) 55 | else: 56 | image = Image.new( 57 | "RGB", (224, 224), color="white" 58 | ) # Handle missing image file 59 | 60 | classes = torch.tensor(self.dataframe["exp"].iloc[idx], dtype=torch.long) 61 | labels = torch.tensor(self.dataframe.iloc[idx, 2:4].values, dtype=torch.float32) 62 | 63 | if self.transform: 64 | image = self.transform(image) 65 | 66 | return image, classes, labels 67 | 68 | def balance_dataset(self): 69 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply( 70 | lambda x: x.sample(self.dataframe["exp"].value_counts().min()) 71 | ) 72 | return balanced_df 73 | 74 | 75 | transform = transforms.Compose( 76 | [ 77 | transforms.RandomHorizontalFlip(0.5), 78 | transforms.RandomGrayscale(0.01), 79 | transforms.RandomRotation(10), 80 | transforms.ColorJitter( 81 | brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1 82 | ), # model more robust to changes in lighting conditions. 83 | transforms.RandomPerspective( 84 | distortion_scale=0.2, p=0.5 85 | ), # can be helpful if your images might have varying perspectives. 86 | transforms.ToTensor(), # saves image as tensor (automatically divides by 255) 87 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 88 | transforms.RandomErasing( 89 | p=0.5, scale=(0.02, 0.2), ratio=(0.3, 3.3), value="random" 90 | ), # TEST: Should help overfitting 91 | ] 92 | ) 93 | 94 | transform_valid = transforms.Compose( 95 | [ 96 | transforms.ToTensor(), 97 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 98 | ] 99 | ) 100 | 101 | train_dataset = CustomDataset( 102 | dataframe=train_annotations_df, 103 | root_dir=IMAGE_FOLDER, 104 | transform=transform, 105 | balance=False, 106 | ) 107 | valid_dataset = CustomDataset( 108 | dataframe=valid_annotations_df, 109 | root_dir=IMAGE_FOLDER_TEST, 110 | transform=transform_valid, 111 | balance=False, 112 | ) 113 | train_loader = DataLoader( 114 | train_dataset, batch_size=BATCHSIZE, shuffle=True, num_workers=48 115 | ) 116 | valid_loader = DataLoader( 117 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48 118 | ) 119 | 120 | # ***** Define the model ***** 121 | 122 | # Initialize the model 123 | MODEL = models.efficientnet_v2_s(weights="DEFAULT") 124 | num_features = MODEL.classifier[1].in_features 125 | MODEL.classifier[1] = nn.Linear(in_features=num_features, out_features=9) 126 | MODEL.to(DEVICE) 127 | # Define (weighted) loss function 128 | weights7 = torch.tensor( 129 | [0.022600, 0.012589, 0.066464, 0.120094, 0.265305, 0.444943, 0.068006] 130 | ) 131 | criterion_cls = nn.CrossEntropyLoss(weights7.to(DEVICE)) 132 | criterion_cls_val = ( 133 | nn.CrossEntropyLoss() 134 | ) # Use two loss functions, as the validation dataset is balanced 135 | criterion_reg = nn.MSELoss() 136 | 137 | optimizer = optim.AdamW(MODEL.parameters(), lr=LR) 138 | lr_scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=BATCHSIZE * NUM_EPOCHS) 139 | 140 | # ***** Train the model ***** 141 | print("--- Start training ---") 142 | scaler = torch.cuda.amp.GradScaler() 143 | best_valid_loss = 100 144 | 145 | for epoch in range(NUM_EPOCHS): 146 | MODEL.train() 147 | total_train_correct = 0 148 | total_train_samples = 0 149 | for images, classes, labels in tqdm( 150 | train_loader, desc="Epoch train_loader progress" 151 | ): 152 | images, classes, labels = ( 153 | images.to(DEVICE), 154 | classes.to(DEVICE), 155 | labels.to(DEVICE), 156 | ) 157 | optimizer.zero_grad() 158 | with torch.autocast(device_type="cuda", dtype=torch.float16): 159 | outputs = MODEL(images) 160 | outputs_cls = outputs[:, :7] 161 | outputs_reg = outputs[:, 7:] 162 | loss = criterion_cls( 163 | outputs_cls.cuda(), classes.cuda() 164 | ) + 5 * criterion_reg(outputs_reg.cuda(), labels.cuda()) 165 | scaler.scale(loss).backward() 166 | scaler.step(optimizer) 167 | scaler.update() 168 | lr_scheduler.step() 169 | current_lr = optimizer.param_groups[0]["lr"] 170 | 171 | _, train_predicted = torch.max(outputs_cls, 1) 172 | total_train_samples += classes.size(0) 173 | total_train_correct += (train_predicted == classes).sum().item() 174 | 175 | train_accuracy = (total_train_correct / total_train_samples) * 100 176 | 177 | MODEL.eval() 178 | valid_loss = 0.0 179 | correct = 0 180 | total = 0 181 | with torch.no_grad(): 182 | for images, classes, labels in valid_loader: 183 | images, classes, labels = ( 184 | images.to(DEVICE), 185 | classes.to(DEVICE), 186 | labels.to(DEVICE), 187 | ) 188 | outputs = MODEL(images) 189 | outputs_cls = outputs[:, :7] 190 | outputs_reg = outputs[:, 7:] 191 | loss = criterion_cls_val( 192 | outputs_cls.cuda(), classes.cuda() 193 | ) + 5 * criterion_reg(outputs_reg.cuda(), labels.cuda()) 194 | valid_loss += loss.item() 195 | _, predicted = torch.max(outputs_cls, 1) 196 | total += classes.size(0) 197 | correct += (predicted == classes).sum().item() 198 | 199 | print( 200 | f"Epoch [{epoch+1}/{NUM_EPOCHS}] - " 201 | f"Validation Loss: {valid_loss/len(valid_loader):.4f}, " 202 | f"Validation Accuracy: {(correct/total)*100:.2f}%" 203 | f", Training Accuracy: {train_accuracy:.2f}%, " 204 | ) 205 | 206 | if valid_loss < best_valid_loss: 207 | best_valid_loss = valid_loss 208 | print(f"Saving model at epoch {epoch+1}") 209 | torch.save(MODEL.state_dict(), "model.pt") # Save the best model 210 | -------------------------------------------------------------------------------- /models/AffectNet7_Efficientnet_Discrete/generate_csv.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import os 3 | import torch 4 | import torchvision.transforms as transforms 5 | import torchvision.models as models 6 | from torch.utils.data import DataLoader, Dataset 7 | import torch.nn as nn 8 | from PIL import Image 9 | 10 | # Load the annotations for training and validation from separate CSV files 11 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/" 12 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/" 13 | valid_annotations_path = ( 14 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv" 15 | ) 16 | valid_annotations_df = pd.read_csv(valid_annotations_path) 17 | valid_annotations_df = valid_annotations_df[valid_annotations_df["exp"] != 7] 18 | # Set parameters 19 | BATCHSIZE = 128 20 | 21 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") 22 | 23 | 24 | # **** Create dataset and data loaders **** 25 | class CustomDataset(Dataset): 26 | def __init__(self, dataframe, root_dir, transform=None, balance=False): 27 | self.dataframe = dataframe 28 | self.transform = transform 29 | self.root_dir = root_dir 30 | self.balance = balance 31 | 32 | if self.balance: 33 | self.dataframe = self.balance_dataset() 34 | 35 | def __len__(self): 36 | return len(self.dataframe) 37 | 38 | def __getitem__(self, idx): 39 | image_path = os.path.join( 40 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg" 41 | ) 42 | if os.path.exists(image_path): 43 | image = Image.open(image_path) 44 | else: 45 | image = Image.new( 46 | "RGB", (224, 224), color="white" 47 | ) # Handle missing image file 48 | 49 | label = torch.tensor(self.dataframe["exp"].iloc[idx], dtype=torch.long) 50 | 51 | if self.transform: 52 | image = self.transform(image) 53 | 54 | return image, label 55 | 56 | def balance_dataset(self): 57 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply( 58 | lambda x: x.sample(self.dataframe["exp"].value_counts().min()) 59 | ) 60 | return balanced_df 61 | 62 | 63 | transform_valid = transforms.Compose( 64 | [ 65 | transforms.ToTensor(), 66 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 67 | ] 68 | ) 69 | valid_dataset = CustomDataset( 70 | dataframe=valid_annotations_df, 71 | root_dir=IMAGE_FOLDER_TEST, 72 | transform=transform_valid, 73 | balance=False, 74 | ) 75 | valid_loader = DataLoader( 76 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48 77 | ) 78 | # ***** Define the model ***** 79 | 80 | # Initialize the model 81 | MODEL = models.efficientnet_v2_s(weights="DEFAULT") 82 | num_features = MODEL.classifier[1].in_features 83 | MODEL.classifier[1] = nn.Linear(in_features=num_features, out_features=7) 84 | MODEL.to(DEVICE) 85 | 86 | # Set the model to evaluation mode 87 | MODEL.load_state_dict(torch.load("model.pt")) 88 | MODEL.to(DEVICE) 89 | MODEL.eval() 90 | 91 | all_labels_cls = [] 92 | all_predicted_cls = [] 93 | 94 | # Start inference on test set 95 | with torch.no_grad(): 96 | for images, labels_cls in iter(valid_loader): 97 | images = images.to(DEVICE) 98 | labels_cls = labels_cls.to(DEVICE) 99 | 100 | outputs = MODEL(images) 101 | 102 | _, predicted_cls = torch.max(outputs, 1) 103 | 104 | all_labels_cls.extend(labels_cls.cpu().numpy()) 105 | all_predicted_cls.extend(predicted_cls.cpu().numpy()) 106 | 107 | 108 | df = pd.DataFrame({"cat_pred": all_predicted_cls, "cat_true": all_labels_cls}) 109 | df.to_csv("inference.csv", index=False) 110 | -------------------------------------------------------------------------------- /models/AffectNet7_Efficientnet_Discrete/train.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import os 3 | import torch 4 | import torchvision.transforms as transforms 5 | import torchvision.models as models 6 | from torch.utils.data import DataLoader, Dataset 7 | import torch.nn as nn 8 | import torch.optim as optim 9 | from PIL import Image 10 | import torchvision 11 | from torch.optim import lr_scheduler 12 | import re 13 | from tqdm import tqdm 14 | 15 | # Load the annotations for training and validation from separate CSV files 16 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/" 17 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/" 18 | train_annotations_path = ( 19 | "../../affectnet_annotations/train_set_annotation_without_lnd.csv" 20 | ) 21 | valid_annotations_path = ( 22 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv" 23 | ) 24 | train_annotations_df = pd.read_csv(train_annotations_path) 25 | valid_annotations_df = pd.read_csv(valid_annotations_path) 26 | 27 | train_annotations_df = train_annotations_df[train_annotations_df["exp"] != 7] 28 | valid_annotations_df = valid_annotations_df[valid_annotations_df["exp"] != 7] 29 | 30 | # Set parameters 31 | BATCHSIZE = 128 32 | NUM_EPOCHS = 20 33 | LR = 4e-5 34 | 35 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") 36 | 37 | 38 | # **** Create dataset and data loaders **** 39 | class CustomDataset(Dataset): 40 | def __init__(self, dataframe, root_dir, transform=None, balance=False): 41 | self.dataframe = dataframe 42 | self.transform = transform 43 | self.root_dir = root_dir 44 | self.balance = balance 45 | 46 | if self.balance: 47 | self.dataframe = self.balance_dataset() 48 | 49 | def __len__(self): 50 | return len(self.dataframe) 51 | 52 | def __getitem__(self, idx): 53 | image_path = os.path.join( 54 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg" 55 | ) 56 | if os.path.exists(image_path): 57 | image = Image.open(image_path) 58 | else: 59 | image = Image.new( 60 | "RGB", (224, 224), color="white" 61 | ) # Handle missing image file 62 | 63 | label = torch.tensor(self.dataframe["exp"].iloc[idx], dtype=torch.long) 64 | 65 | if self.transform: 66 | image = self.transform(image) 67 | 68 | return image, label 69 | 70 | def balance_dataset(self): 71 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply( 72 | lambda x: x.sample(self.dataframe["exp"].value_counts().min()) 73 | ) 74 | return balanced_df 75 | 76 | 77 | transform = transforms.Compose( 78 | [ 79 | transforms.ElasticTransform(alpha=5.0, sigma=5.0), 80 | transforms.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5)), 81 | transforms.RandomGrayscale(p=0.1), 82 | transforms.RandomRotation(degrees=15), 83 | transforms.RandomVerticalFlip(), 84 | transforms.ColorJitter(0.15, 0.15, 0.15), 85 | torchvision.transforms.RandomAutocontrast(p=0.4), 86 | transforms.ToTensor(), 87 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 88 | ] 89 | ) 90 | 91 | transform_valid = transforms.Compose( 92 | [ 93 | transforms.ToTensor(), 94 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 95 | ] 96 | ) 97 | 98 | train_dataset = CustomDataset( 99 | dataframe=train_annotations_df, 100 | root_dir=IMAGE_FOLDER, 101 | transform=transform, 102 | balance=False, 103 | ) 104 | valid_dataset = CustomDataset( 105 | dataframe=valid_annotations_df, 106 | root_dir=IMAGE_FOLDER_TEST, 107 | transform=transform_valid, 108 | balance=False, 109 | ) 110 | train_loader = DataLoader( 111 | train_dataset, batch_size=BATCHSIZE, shuffle=True, num_workers=48 112 | ) 113 | valid_loader = DataLoader( 114 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48 115 | ) 116 | 117 | # ***** Define the model ***** 118 | 119 | # Initialize the model 120 | MODEL = models.efficientnet_v2_s(weights="DEFAULT") 121 | num_features = MODEL.classifier[1].in_features 122 | MODEL.classifier[1] = nn.Linear(in_features=num_features, out_features=7) 123 | MODEL.to(DEVICE) 124 | 125 | # Define (weighted) loss function 126 | # weights = torch.tensor([0.015605, 0.008709, 0.046078, 0.083078, 0.185434, 0.305953, 0.046934, 0.30821]) 127 | weights7 = torch.tensor( 128 | [0.022600, 0.012589, 0.066464, 0.120094, 0.265305, 0.444943, 0.068006] 129 | ) 130 | criterion = nn.CrossEntropyLoss(weights7.to(DEVICE)) 131 | criterion_val = ( 132 | nn.CrossEntropyLoss() 133 | ) # Use two loss functions, as the validation dataset is balanced 134 | 135 | 136 | # Filter parameters for weight decay and no weight decay and create optimizer/scheduler 137 | def filter_params(params, include_patterns, exclude_patterns): 138 | included_params = [] 139 | excluded_params = [] 140 | for name, param in params: 141 | if any(re.search(pattern, name) for pattern in include_patterns): 142 | included_params.append(param) 143 | elif not any(re.search(pattern, name) for pattern in exclude_patterns): 144 | excluded_params.append(param) 145 | return included_params, excluded_params 146 | 147 | 148 | include_patterns = [ 149 | r"^(?!.*\.bn)" 150 | ] # Match any layer name that doesn't contain '.bn' = BatchNorm parameters 151 | exclude_patterns = [r".*\.bn.*"] # Vice versa 152 | params_to_decay, params_not_to_decay = filter_params( 153 | MODEL.named_parameters(), include_patterns, exclude_patterns 154 | ) 155 | 156 | # optimizer = optim.AdamW([ 157 | # {'params': params_to_decay, 'weight_decay': ADAMW_WEIGHT_DECAY}, # Apply weight decay to these parameters 158 | # {'params': params_not_to_decay, 'weight_decay': 0.0} # Exclude weight decay for these parameters = 0.0 159 | # ], lr=LR) 160 | optimizer = optim.AdamW(MODEL.parameters(), lr=LR) 161 | lr_scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=BATCHSIZE * NUM_EPOCHS) 162 | 163 | # ***** Train the model ***** 164 | print("--- Start training ---") 165 | scaler = torch.cuda.amp.GradScaler() 166 | best_valid_loss = 100 167 | 168 | for epoch in range(NUM_EPOCHS): 169 | MODEL.train() 170 | total_train_correct = 0 171 | total_train_samples = 0 172 | for images, labels in tqdm(train_loader, desc="Epoch train_loader progress"): 173 | images, labels = images.to(DEVICE), labels.to(DEVICE) 174 | optimizer.zero_grad() 175 | with torch.autocast(device_type="cuda", dtype=torch.float16): 176 | output = MODEL(images) 177 | loss = criterion(output.cuda(), labels.cuda()) 178 | scaler.scale(loss).backward() 179 | scaler.step(optimizer) 180 | scaler.update() 181 | lr_scheduler.step() 182 | current_lr = optimizer.param_groups[0]["lr"] 183 | 184 | _, train_predicted = torch.max(output, 1) 185 | total_train_samples += labels.size(0) 186 | total_train_correct += (train_predicted == labels).sum().item() 187 | 188 | train_accuracy = (total_train_correct / total_train_samples) * 100 189 | 190 | MODEL.eval() 191 | valid_loss = 0.0 192 | correct = 0 193 | total = 0 194 | with torch.no_grad(): 195 | for images, labels in valid_loader: 196 | images, labels = images.to(DEVICE), labels.to(DEVICE) 197 | outputs = MODEL(images) 198 | loss = criterion_val(outputs.cuda(), labels.cuda()) 199 | valid_loss += loss.item() 200 | _, predicted = torch.max(outputs, 1) 201 | total += labels.size(0) 202 | correct += (predicted == labels).sum().item() 203 | 204 | print( 205 | f"Epoch [{epoch+1}/{NUM_EPOCHS}] - " 206 | f"Validation Loss: {valid_loss/len(valid_loader):.4f}, " 207 | f"Validation Accuracy: {(correct/total)*100:.2f}%" 208 | f", Training Accuracy: {train_accuracy:.2f}%, " 209 | ) 210 | # TBD: Valid loss überschreiben, dann model speichern wie unten, wenn kleiner als zuvor 211 | 212 | if valid_loss < best_valid_loss: 213 | best_valid_loss = valid_loss 214 | print(f"Saving model at epoch {epoch+1}") 215 | torch.save(MODEL.state_dict(), "model.pt") # Save the best model 216 | -------------------------------------------------------------------------------- /models/AffectNet7_Efficientnet_VA/generate_csv.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import os 3 | import torch 4 | import torchvision.transforms as transforms 5 | import torchvision.models as models 6 | from torch.utils.data import DataLoader, Dataset 7 | import torch.nn as nn 8 | from PIL import Image 9 | 10 | 11 | # Load the annotations for training and validation from separate CSV files 12 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/" 13 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/" 14 | 15 | valid_annotations_path = ( 16 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv" 17 | ) 18 | valid_annotations_df = pd.read_csv(valid_annotations_path) 19 | valid_annotations_df = valid_annotations_df[valid_annotations_df["exp"] != 7] 20 | # Set parameters 21 | BATCHSIZE = 128 22 | 23 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") 24 | 25 | 26 | # **** Create dataset and data loaders **** 27 | class CustomDataset(Dataset): 28 | def __init__(self, dataframe, root_dir, transform=None, balance=False): 29 | self.dataframe = dataframe 30 | self.transform = transform 31 | self.root_dir = root_dir 32 | self.balance = balance 33 | 34 | if self.balance: 35 | self.dataframe = self.balance_dataset() 36 | 37 | def __len__(self): 38 | return len(self.dataframe) 39 | 40 | def __getitem__(self, idx): 41 | image_path = os.path.join( 42 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg" 43 | ) 44 | image = Image.open(image_path) 45 | 46 | classes = torch.tensor(self.dataframe.iloc[idx, 1], dtype=torch.int8) 47 | valence = torch.tensor(self.dataframe.iloc[idx, 2], dtype=torch.float16) 48 | arousal = torch.tensor(self.dataframe.iloc[idx, 3], dtype=torch.float16) 49 | 50 | if self.transform: 51 | image = self.transform(image) 52 | 53 | return image, classes, valence, arousal 54 | 55 | def balance_dataset(self): 56 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply( 57 | lambda x: x.sample(self.dataframe["exp"].value_counts().min()) 58 | ) 59 | return balanced_df 60 | 61 | 62 | transform_valid = transforms.Compose( 63 | [ 64 | transforms.ToTensor(), 65 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 66 | ] 67 | ) 68 | 69 | valid_dataset = CustomDataset( 70 | dataframe=valid_annotations_df, 71 | root_dir=IMAGE_FOLDER_TEST, 72 | transform=transform_valid, 73 | balance=False, 74 | ) 75 | 76 | valid_loader = DataLoader( 77 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48 78 | ) 79 | 80 | # ***** Define the model ***** 81 | 82 | # Initialize the model 83 | MODEL = models.efficientnet_v2_s(weights="DEFAULT") 84 | num_features = MODEL.classifier[1].in_features 85 | MODEL.classifier[1] = nn.Linear(in_features=num_features, out_features=2) 86 | MODEL.to(DEVICE) 87 | 88 | 89 | # **** Test the model performance for classification **** 90 | 91 | # Set the model to evaluation mode 92 | MODEL.load_state_dict(torch.load("model.pt")) 93 | MODEL.to(DEVICE) 94 | MODEL.eval() 95 | 96 | all_val_true_values = [] 97 | all_val_predicted_values = [] 98 | all_aro_true_values = [] 99 | all_aro_predicted_values = [] 100 | 101 | # Start inference on test set 102 | with torch.no_grad(): 103 | for images, _, val_true, aro_true in valid_loader: 104 | images, val_true, aro_true = ( 105 | images.to(DEVICE), 106 | val_true.to(DEVICE), 107 | aro_true.to(DEVICE), 108 | ) 109 | 110 | outputs = MODEL(images) 111 | val_pred = outputs[:, 0] 112 | aro_pred = outputs[:, 1] 113 | 114 | # Append to the lists --> Regression 115 | true_val_values = val_true.cpu().numpy() 116 | true_aro_values = aro_true.cpu().numpy() 117 | pred_val_values = val_pred.cpu().numpy() 118 | pred_aro_values = aro_pred.cpu().numpy() 119 | all_val_true_values.extend(true_val_values) 120 | all_aro_true_values.extend(true_aro_values) 121 | all_val_predicted_values.extend(pred_val_values) 122 | all_aro_predicted_values.extend(pred_aro_values) 123 | 124 | df = pd.DataFrame( 125 | { 126 | "val_pred": all_val_predicted_values, 127 | "val_true": all_val_true_values, 128 | "aro_pred": all_aro_predicted_values, 129 | "aro_true": all_aro_true_values, 130 | } 131 | ) 132 | df.to_csv("inference.csv", index=False) 133 | -------------------------------------------------------------------------------- /models/AffectNet7_Maxvit_Combined/generate_csv.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import os 3 | import torch 4 | import torchvision.transforms as transforms 5 | import torchvision.models as models 6 | from torch.utils.data import DataLoader, Dataset 7 | import torch.nn as nn 8 | from PIL import Image 9 | 10 | 11 | # Load the annotations for training and validation from separate CSV files 12 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/" 13 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/" 14 | 15 | valid_annotations_path = ( 16 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv" 17 | ) 18 | valid_annotations_df = pd.read_csv(valid_annotations_path) 19 | valid_annotations_df = valid_annotations_df[valid_annotations_df["exp"] != 7] 20 | 21 | exp_counts_valid = valid_annotations_df["exp"].value_counts().sort_index() 22 | 23 | # Set parameters 24 | BATCHSIZE = 128 25 | MODEL = models.maxvit_t(weights="DEFAULT") 26 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") 27 | 28 | 29 | # **** Create dataset and data loaders **** 30 | class CustomDataset(Dataset): 31 | def __init__(self, dataframe, root_dir, transform=None, balance=False): 32 | self.dataframe = dataframe 33 | self.transform = transform 34 | self.root_dir = root_dir 35 | self.balance = balance 36 | 37 | if self.balance: 38 | self.dataframe = self.balance_dataset() 39 | 40 | def __len__(self): 41 | return len(self.dataframe) 42 | 43 | def __getitem__(self, idx): 44 | image_path = os.path.join( 45 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg" 46 | ) 47 | if os.path.exists(image_path): 48 | image = Image.open(image_path) 49 | else: 50 | image = Image.new( 51 | "RGB", (224, 224), color="white" 52 | ) # Handle missing image file 53 | 54 | classes = torch.tensor(self.dataframe["exp"].iloc[idx], dtype=torch.long) 55 | labels = torch.tensor(self.dataframe.iloc[idx, 2:4].values, dtype=torch.float32) 56 | 57 | if self.transform: 58 | image = self.transform(image) 59 | 60 | return image, classes, labels 61 | 62 | def balance_dataset(self): 63 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply( 64 | lambda x: x.sample(self.dataframe["exp"].value_counts().min()) 65 | ) 66 | return balanced_df 67 | 68 | 69 | transform_valid = transforms.Compose( 70 | [ 71 | transforms.ToTensor(), 72 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 73 | ] 74 | ) 75 | valid_dataset = CustomDataset( 76 | dataframe=valid_annotations_df, 77 | root_dir=IMAGE_FOLDER_TEST, 78 | transform=transform_valid, 79 | balance=False, 80 | ) 81 | valid_loader = DataLoader( 82 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48 83 | ) 84 | 85 | # ***** Define the model ***** 86 | 87 | # Initialize the model 88 | block_channels = MODEL.classifier[3].in_features 89 | MODEL.classifier = nn.Sequential( 90 | nn.AdaptiveAvgPool2d(1), 91 | nn.Flatten(), 92 | nn.LayerNorm(block_channels), 93 | nn.Linear(block_channels, block_channels), 94 | nn.Tanh(), 95 | nn.Linear(block_channels, 9, bias=False), 96 | ) 97 | MODEL.to(DEVICE) # Put the model to the GPU 98 | 99 | # **** Test the model performance for classification **** 100 | 101 | # Set the model to evaluation mode 102 | MODEL.load_state_dict(torch.load("model.pt")) 103 | MODEL.to(DEVICE) 104 | MODEL.eval() 105 | 106 | all_labels_cls = [] 107 | all_predicted_cls = [] 108 | 109 | all_true_val = [] 110 | all_pred_val = [] 111 | all_true_aro = [] 112 | all_pred_aro = [] 113 | 114 | # Start inference on test set 115 | with torch.no_grad(): 116 | for images, classes, labels in iter(valid_loader): 117 | images, classes, labels = ( 118 | images.to(DEVICE), 119 | classes.to(DEVICE), 120 | labels.to(DEVICE), 121 | ) 122 | 123 | outputs = MODEL(images) 124 | outputs_cls = outputs[:, :7] 125 | outputs_reg = outputs[:, 7:] 126 | val_pred = outputs_reg[:, 0] 127 | aro_pred = outputs_reg[:, 1] 128 | 129 | _, predicted_cls = torch.max(outputs_cls, 1) 130 | 131 | all_labels_cls.extend(classes.cpu().numpy()) 132 | all_predicted_cls.extend(predicted_cls.cpu().numpy()) 133 | val_true = labels[:, 0] 134 | aro_true = labels[:, 1] 135 | 136 | all_true_val.extend(val_true.cpu().numpy()) 137 | all_true_aro.extend(aro_true.cpu().numpy()) 138 | all_pred_val.extend(val_pred.cpu().numpy()) 139 | all_pred_aro.extend(aro_pred.cpu().numpy()) 140 | 141 | df = pd.DataFrame( 142 | { 143 | "cat_pred": all_predicted_cls, 144 | "cat_true": all_labels_cls, 145 | "val_pred": all_pred_val, 146 | "val_true": all_true_val, 147 | "aro_pred": all_pred_aro, 148 | "aro_true": all_true_aro, 149 | } 150 | ) 151 | df.to_csv("inference.csv", index=False) 152 | -------------------------------------------------------------------------------- /models/AffectNet7_Maxvit_Combined/train.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import os 3 | import torch 4 | import torchvision.transforms as transforms 5 | import torchvision.models as models 6 | from torch.utils.data import DataLoader, Dataset 7 | import torch.nn as nn 8 | import torch.optim as optim 9 | from PIL import Image 10 | from torch.optim import lr_scheduler 11 | from tqdm import tqdm 12 | 13 | # Load the annotations for training and validation from separate CSV files 14 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/" 15 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/" 16 | train_annotations_path = ( 17 | "../../affectnet_annotations/train_set_annotation_without_lnd.csv" 18 | ) 19 | valid_annotations_path = ( 20 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv" 21 | ) 22 | train_annotations_df = pd.read_csv(train_annotations_path) 23 | valid_annotations_df = pd.read_csv(valid_annotations_path) 24 | 25 | train_annotations_df = train_annotations_df[train_annotations_df["exp"] != 7] 26 | valid_annotations_df = valid_annotations_df[valid_annotations_df["exp"] != 7] 27 | 28 | # Set parameters 29 | BATCHSIZE = 128 30 | NUM_EPOCHS = 25 31 | LR = 4e-5 32 | MODEL = models.maxvit_t(weights="DEFAULT") 33 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") 34 | 35 | 36 | # **** Create dataset and data loaders **** 37 | class CustomDataset(Dataset): 38 | def __init__(self, dataframe, root_dir, transform=None, balance=False): 39 | self.dataframe = dataframe 40 | self.transform = transform 41 | self.root_dir = root_dir 42 | self.balance = balance 43 | 44 | if self.balance: 45 | self.dataframe = self.balance_dataset() 46 | 47 | def __len__(self): 48 | return len(self.dataframe) 49 | 50 | def __getitem__(self, idx): 51 | image_path = os.path.join( 52 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg" 53 | ) 54 | if os.path.exists(image_path): 55 | image = Image.open(image_path) 56 | else: 57 | image = Image.new( 58 | "RGB", (224, 224), color="white" 59 | ) # Handle missing image file 60 | 61 | classes = torch.tensor(self.dataframe["exp"].iloc[idx], dtype=torch.long) 62 | labels = torch.tensor(self.dataframe.iloc[idx, 2:4].values, dtype=torch.float32) 63 | 64 | if self.transform: 65 | image = self.transform(image) 66 | 67 | return image, classes, labels 68 | 69 | def balance_dataset(self): 70 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply( 71 | lambda x: x.sample(self.dataframe["exp"].value_counts().min()) 72 | ) 73 | return balanced_df 74 | 75 | 76 | transform = transforms.Compose( 77 | [ 78 | transforms.RandomHorizontalFlip(0.5), 79 | transforms.RandomGrayscale(0.01), 80 | transforms.RandomRotation(10), 81 | transforms.ColorJitter( 82 | brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1 83 | ), # model more robust to changes in lighting conditions. 84 | transforms.RandomPerspective( 85 | distortion_scale=0.2, p=0.5 86 | ), # can be helpful if your images might have varying perspectives. 87 | transforms.ToTensor(), # saves image as tensor (automatically divides by 255) 88 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 89 | transforms.RandomErasing( 90 | p=0.5, scale=(0.02, 0.2), ratio=(0.3, 3.3), value="random" 91 | ), # TEST: Should help overfitting 92 | ] 93 | ) 94 | 95 | transform_valid = transforms.Compose( 96 | [ 97 | transforms.ToTensor(), 98 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 99 | ] 100 | ) 101 | 102 | train_dataset = CustomDataset( 103 | dataframe=train_annotations_df, 104 | root_dir=IMAGE_FOLDER, 105 | transform=transform, 106 | balance=False, 107 | ) 108 | valid_dataset = CustomDataset( 109 | dataframe=valid_annotations_df, 110 | root_dir=IMAGE_FOLDER_TEST, 111 | transform=transform_valid, 112 | balance=False, 113 | ) 114 | train_loader = DataLoader( 115 | train_dataset, batch_size=BATCHSIZE, shuffle=True, num_workers=48 116 | ) 117 | valid_loader = DataLoader( 118 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48 119 | ) 120 | 121 | # ***** Define the model ***** 122 | 123 | # Initialize the model 124 | block_channels = MODEL.classifier[3].in_features 125 | MODEL.classifier = nn.Sequential( 126 | nn.AdaptiveAvgPool2d(1), 127 | nn.Flatten(), 128 | nn.LayerNorm(block_channels), 129 | nn.Linear(block_channels, block_channels), 130 | nn.Tanh(), 131 | nn.Linear(block_channels, 9, bias=False), 132 | ) 133 | MODEL.to(DEVICE) # Put the model to the GPU 134 | 135 | # Define (weighted) loss function 136 | # weights = torch.tensor([0.015605, 0.008709, 0.046078, 0.083078, 0.185434, 0.305953, 0.046934, 0.30821]) 137 | weights7 = torch.tensor( 138 | [0.022600, 0.012589, 0.066464, 0.120094, 0.265305, 0.444943, 0.068006] 139 | ) 140 | criterion_cls = nn.CrossEntropyLoss(weights7.to(DEVICE)) 141 | criterion_cls_val = ( 142 | nn.CrossEntropyLoss() 143 | ) # Use two loss functions, as the validation dataset is balanced 144 | criterion_reg = nn.MSELoss() 145 | 146 | optimizer = optim.AdamW(MODEL.parameters(), lr=LR) 147 | lr_scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=BATCHSIZE * NUM_EPOCHS) 148 | 149 | # ***** Train the model ***** 150 | print("--- Start training ---") 151 | scaler = torch.cuda.amp.GradScaler() 152 | best_valid_loss = 100 153 | 154 | for epoch in range(NUM_EPOCHS): 155 | MODEL.train() 156 | total_train_correct = 0 157 | total_train_samples = 0 158 | for images, classes, labels in tqdm( 159 | train_loader, desc="Epoch train_loader progress" 160 | ): 161 | images, classes, labels = ( 162 | images.to(DEVICE), 163 | classes.to(DEVICE), 164 | labels.to(DEVICE), 165 | ) 166 | optimizer.zero_grad() 167 | with torch.autocast(device_type="cuda", dtype=torch.float16): 168 | outputs = MODEL(images) 169 | outputs_cls = outputs[:, :7] 170 | outputs_reg = outputs[:, 7:] 171 | loss = criterion_cls( 172 | outputs_cls.cuda(), classes.cuda() 173 | ) + 5 * criterion_reg(outputs_reg.cuda(), labels.cuda()) 174 | scaler.scale(loss).backward() 175 | scaler.step(optimizer) 176 | scaler.update() 177 | lr_scheduler.step() 178 | current_lr = optimizer.param_groups[0]["lr"] 179 | 180 | _, train_predicted = torch.max(outputs_cls, 1) 181 | total_train_samples += classes.size(0) 182 | total_train_correct += (train_predicted == classes).sum().item() 183 | 184 | train_accuracy = (total_train_correct / total_train_samples) * 100 185 | 186 | MODEL.eval() 187 | valid_loss = 0.0 188 | correct = 0 189 | total = 0 190 | with torch.no_grad(): 191 | for images, classes, labels in valid_loader: 192 | images, classes, labels = ( 193 | images.to(DEVICE), 194 | classes.to(DEVICE), 195 | labels.to(DEVICE), 196 | ) 197 | outputs = MODEL(images) 198 | outputs_cls = outputs[:, :7] 199 | outputs_reg = outputs[:, 7:] 200 | loss = criterion_cls_val( 201 | outputs_cls.cuda(), classes.cuda() 202 | ) + 5 * criterion_reg(outputs_reg.cuda(), labels.cuda()) 203 | valid_loss += loss.item() 204 | _, predicted = torch.max(outputs_cls, 1) 205 | total += classes.size(0) 206 | correct += (predicted == classes).sum().item() 207 | 208 | print( 209 | f"Epoch [{epoch+1}/{NUM_EPOCHS}] - " 210 | f"Validation Loss: {valid_loss/len(valid_loader):.4f}, " 211 | f"Validation Accuracy: {(correct/total)*100:.2f}%" 212 | f", Training Accuracy: {train_accuracy:.2f}%, " 213 | ) 214 | 215 | if valid_loss < best_valid_loss: 216 | best_valid_loss = valid_loss 217 | print(f"Saving model at epoch {epoch+1}") 218 | torch.save(MODEL.state_dict(), "best.pt") # Save the best model 219 | -------------------------------------------------------------------------------- /models/AffectNet7_Maxvit_Discrete/generate_csv.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import os 3 | import torch 4 | import torchvision.transforms as transforms 5 | import torchvision.models as models 6 | from torch.utils.data import DataLoader, Dataset 7 | import torch.nn as nn 8 | from PIL import Image 9 | 10 | # Load the annotations for training and validation from separate CSV files 11 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/" 12 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/" 13 | valid_annotations_path = ( 14 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv" 15 | ) 16 | valid_annotations_df = pd.read_csv(valid_annotations_path) 17 | valid_annotations_df = valid_annotations_df[valid_annotations_df["exp"] != 7] 18 | # Set parameters 19 | BATCHSIZE = 128 20 | MODEL = models.maxvit_t(weights="DEFAULT") 21 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") 22 | 23 | 24 | # **** Create dataset and data loaders **** 25 | class CustomDataset(Dataset): 26 | def __init__(self, dataframe, root_dir, transform=None, balance=False): 27 | self.dataframe = dataframe 28 | self.transform = transform 29 | self.root_dir = root_dir 30 | self.balance = balance 31 | 32 | if self.balance: 33 | self.dataframe = self.balance_dataset() 34 | 35 | def __len__(self): 36 | return len(self.dataframe) 37 | 38 | def __getitem__(self, idx): 39 | image_path = os.path.join( 40 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg" 41 | ) 42 | if os.path.exists(image_path): 43 | image = Image.open(image_path) 44 | else: 45 | image = Image.new( 46 | "RGB", (224, 224), color="white" 47 | ) # Handle missing image file 48 | 49 | label = torch.tensor(self.dataframe["exp"].iloc[idx], dtype=torch.long) 50 | 51 | if self.transform: 52 | image = self.transform(image) 53 | 54 | return image, label 55 | 56 | def balance_dataset(self): 57 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply( 58 | lambda x: x.sample(self.dataframe["exp"].value_counts().min()) 59 | ) 60 | return balanced_df 61 | 62 | 63 | transform_valid = transforms.Compose( 64 | [ 65 | transforms.ToTensor(), 66 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 67 | ] 68 | ) 69 | valid_dataset = CustomDataset( 70 | dataframe=valid_annotations_df, 71 | root_dir=IMAGE_FOLDER_TEST, 72 | transform=transform_valid, 73 | balance=False, 74 | ) 75 | valid_loader = DataLoader( 76 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48 77 | ) 78 | # ***** Define the model ***** 79 | 80 | # Initialize the model 81 | block_channels = MODEL.classifier[3].in_features 82 | MODEL.classifier = nn.Sequential( 83 | nn.AdaptiveAvgPool2d(1), 84 | nn.Flatten(), 85 | nn.LayerNorm(block_channels), 86 | nn.Linear(block_channels, block_channels), 87 | nn.Tanh(), 88 | nn.Linear(block_channels, 7, bias=False), 89 | ) 90 | MODEL.to(DEVICE) # Put the model to the GPU 91 | 92 | # Set the model to evaluation mode 93 | MODEL.load_state_dict(torch.load("model.pt")) 94 | MODEL.to(DEVICE) 95 | MODEL.eval() 96 | 97 | all_labels_cls = [] 98 | all_predicted_cls = [] 99 | 100 | # Start inference on test set 101 | with torch.no_grad(): 102 | for images, labels_cls in iter(valid_loader): 103 | images = images.to(DEVICE) 104 | labels_cls = labels_cls.to(DEVICE) 105 | 106 | outputs = MODEL(images) 107 | 108 | _, predicted_cls = torch.max(outputs, 1) 109 | 110 | all_labels_cls.extend(labels_cls.cpu().numpy()) 111 | all_predicted_cls.extend(predicted_cls.cpu().numpy()) 112 | 113 | 114 | df = pd.DataFrame({"cat_pred": all_predicted_cls, "cat_true": all_labels_cls}) 115 | df.to_csv("inference.csv", index=False) 116 | -------------------------------------------------------------------------------- /models/AffectNet7_Maxvit_VA/generate_csv.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import os 3 | import torch 4 | import torchvision.transforms as transforms 5 | import torchvision.models as models 6 | from torch.utils.data import DataLoader, Dataset 7 | import torch.nn as nn 8 | from PIL import Image 9 | 10 | # Load the annotations for training and validation from separate CSV files 11 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/" 12 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/" 13 | valid_annotations_path = ( 14 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv" 15 | ) 16 | valid_annotations_df = pd.read_csv(valid_annotations_path) 17 | valid_annotations_df = valid_annotations_df[valid_annotations_df["exp"] != 7] 18 | # Set parameters 19 | BATCHSIZE = 128 20 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") 21 | 22 | 23 | # **** Create dataset and data loaders **** 24 | class CustomDataset(Dataset): 25 | def __init__(self, dataframe, root_dir, transform=None, balance=False): 26 | self.dataframe = dataframe 27 | self.transform = transform 28 | self.root_dir = root_dir 29 | self.balance = balance 30 | 31 | if self.balance: 32 | self.dataframe = self.balance_dataset() 33 | 34 | def __len__(self): 35 | return len(self.dataframe) 36 | 37 | def __getitem__(self, idx): 38 | image_path = os.path.join( 39 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg" 40 | ) 41 | image = Image.open(image_path) 42 | 43 | classes = torch.tensor(self.dataframe.iloc[idx, 1], dtype=torch.int8) 44 | valence = torch.tensor(self.dataframe.iloc[idx, 2], dtype=torch.float16) 45 | arousal = torch.tensor(self.dataframe.iloc[idx, 3], dtype=torch.float16) 46 | 47 | if self.transform: 48 | image = self.transform(image) 49 | 50 | return image, classes, valence, arousal 51 | 52 | def balance_dataset(self): 53 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply( 54 | lambda x: x.sample(self.dataframe["exp"].value_counts().min()) 55 | ) 56 | return balanced_df 57 | 58 | 59 | transform_valid = transforms.Compose( 60 | [ 61 | transforms.ToTensor(), 62 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 63 | ] 64 | ) 65 | 66 | valid_dataset = CustomDataset( 67 | dataframe=valid_annotations_df, 68 | root_dir=IMAGE_FOLDER_TEST, 69 | transform=transform_valid, 70 | balance=False, 71 | ) 72 | valid_loader = DataLoader( 73 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48 74 | ) 75 | 76 | # ***** Define the model ***** 77 | 78 | # Initialize the model 79 | MODEL = models.maxvit_t(weights="DEFAULT") 80 | block_channels = MODEL.classifier[3].in_features 81 | MODEL.classifier = nn.Sequential( 82 | nn.AdaptiveAvgPool2d(1), 83 | nn.Flatten(), 84 | nn.LayerNorm(block_channels), 85 | nn.Linear(block_channels, block_channels), 86 | nn.Tanh(), 87 | nn.Dropout(0.3), 88 | nn.Linear(block_channels, 2, bias=False), 89 | ) 90 | MODEL.to(DEVICE) 91 | 92 | # **** Test the model performance for classification **** 93 | 94 | # Set the model to evaluation mode 95 | MODEL.load_state_dict(torch.load("model.pt")) 96 | MODEL.to(DEVICE) 97 | MODEL.eval() 98 | 99 | all_val_true_values = [] 100 | all_val_predicted_values = [] 101 | all_aro_true_values = [] 102 | all_aro_predicted_values = [] 103 | 104 | # Start inference on test set 105 | with torch.no_grad(): 106 | for images, _, val_true, aro_true in valid_loader: 107 | images, val_true, aro_true = ( 108 | images.to(DEVICE), 109 | val_true.to(DEVICE), 110 | aro_true.to(DEVICE), 111 | ) 112 | 113 | outputs = MODEL(images) 114 | val_pred = outputs[:, 0] 115 | aro_pred = outputs[:, 1] 116 | 117 | # Append to the lists --> Regression 118 | true_val_values = val_true.cpu().numpy() 119 | true_aro_values = aro_true.cpu().numpy() 120 | pred_val_values = val_pred.cpu().numpy() 121 | pred_aro_values = aro_pred.cpu().numpy() 122 | all_val_true_values.extend(true_val_values) 123 | all_aro_true_values.extend(true_aro_values) 124 | all_val_predicted_values.extend(pred_val_values) 125 | all_aro_predicted_values.extend(pred_aro_values) 126 | df = pd.DataFrame( 127 | { 128 | "val_pred": all_val_predicted_values, 129 | "val_true": all_val_true_values, 130 | "aro_pred": all_aro_predicted_values, 131 | "aro_true": all_aro_true_values, 132 | } 133 | ) 134 | df.to_csv("inference.csv", index=False) 135 | -------------------------------------------------------------------------------- /models/AffectNet7_Swin_Combined/generate_csv.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import os 3 | import torch 4 | import torchvision.transforms as transforms 5 | import torchvision.models as models 6 | from torch.utils.data import DataLoader, Dataset 7 | import torch.nn as nn 8 | from PIL import Image 9 | 10 | # Load the annotations for training and validation from separate CSV files 11 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/" 12 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/" 13 | valid_annotations_path = ( 14 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv" 15 | ) 16 | valid_annotations_df = pd.read_csv(valid_annotations_path) 17 | valid_annotations_df = valid_annotations_df[valid_annotations_df["exp"] != 7] 18 | # Set parameters 19 | BATCHSIZE = 128 20 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") 21 | 22 | 23 | # **** Create dataset and data loaders **** 24 | class CustomDataset(Dataset): 25 | def __init__(self, dataframe, root_dir, transform=None, balance=False): 26 | self.dataframe = dataframe 27 | self.transform = transform 28 | self.root_dir = root_dir 29 | self.balance = balance 30 | 31 | if self.balance: 32 | self.dataframe = self.balance_dataset() 33 | 34 | def __len__(self): 35 | return len(self.dataframe) 36 | 37 | def __getitem__(self, idx): 38 | image_path = os.path.join( 39 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg" 40 | ) 41 | if os.path.exists(image_path): 42 | image = Image.open(image_path) 43 | else: 44 | image = Image.new( 45 | "RGB", (224, 224), color="white" 46 | ) # Handle missing image file 47 | 48 | classes = torch.tensor(self.dataframe["exp"].iloc[idx], dtype=torch.long) 49 | labels = torch.tensor(self.dataframe.iloc[idx, 2:4].values, dtype=torch.float32) 50 | 51 | if self.transform: 52 | image = self.transform(image) 53 | 54 | return image, classes, labels 55 | 56 | def balance_dataset(self): 57 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply( 58 | lambda x: x.sample(self.dataframe["exp"].value_counts().min()) 59 | ) 60 | return balanced_df 61 | 62 | 63 | transform_valid = transforms.Compose( 64 | [ 65 | transforms.ToTensor(), 66 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 67 | ] 68 | ) 69 | 70 | valid_dataset = CustomDataset( 71 | dataframe=valid_annotations_df, 72 | root_dir=IMAGE_FOLDER_TEST, 73 | transform=transform_valid, 74 | balance=False, 75 | ) 76 | valid_loader = DataLoader( 77 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48 78 | ) 79 | 80 | # * Define the model * 81 | 82 | # Initialize the model 83 | MODEL = models.swin_v2_t(weights="DEFAULT") 84 | MODEL.head = torch.nn.Linear(in_features=768, out_features=9, bias=True) 85 | MODEL.to(DEVICE) 86 | 87 | # **** Test the model performance for classification **** 88 | 89 | # Set the model to evaluation mode 90 | MODEL.load_state_dict(torch.load("model.pt")) 91 | MODEL.to(DEVICE) 92 | MODEL.eval() 93 | 94 | all_labels_cls = [] 95 | all_predicted_cls = [] 96 | 97 | all_true_val = [] 98 | all_pred_val = [] 99 | all_true_aro = [] 100 | all_pred_aro = [] 101 | 102 | # Start inference on test set 103 | with torch.no_grad(): 104 | for images, classes, labels in iter(valid_loader): 105 | images, classes, labels = ( 106 | images.to(DEVICE), 107 | classes.to(DEVICE), 108 | labels.to(DEVICE), 109 | ) 110 | 111 | outputs = MODEL(images) 112 | outputs_cls = outputs[:, :7] 113 | outputs_reg = outputs[:, 7:] 114 | val_pred = outputs_reg[:, 0] 115 | aro_pred = outputs_reg[:, 1] 116 | 117 | _, predicted_cls = torch.max(outputs_cls, 1) 118 | 119 | all_labels_cls.extend(classes.cpu().numpy()) 120 | all_predicted_cls.extend(predicted_cls.cpu().numpy()) 121 | val_true = labels[:, 0] 122 | aro_true = labels[:, 1] 123 | 124 | all_true_val.extend(val_true.cpu().numpy()) 125 | all_true_aro.extend(aro_true.cpu().numpy()) 126 | all_pred_val.extend(val_pred.cpu().numpy()) 127 | all_pred_aro.extend(aro_pred.cpu().numpy()) 128 | 129 | df = pd.DataFrame( 130 | { 131 | "cat_pred": all_predicted_cls, 132 | "cat_true": all_labels_cls, 133 | "val_pred": all_pred_val, 134 | "val_true": all_true_val, 135 | "aro_pred": all_pred_aro, 136 | "aro_true": all_true_aro, 137 | } 138 | ) 139 | df.to_csv("inference.csv", index=False) 140 | -------------------------------------------------------------------------------- /models/AffectNet7_Swin_Combined/train.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import os 3 | import torch 4 | import torchvision.transforms as transforms 5 | import torchvision.models as models 6 | from torch.utils.data import DataLoader, Dataset 7 | import torch.nn as nn 8 | import torch.optim as optim 9 | from PIL import Image 10 | from torch.optim import lr_scheduler 11 | from tqdm import tqdm 12 | 13 | # Load the annotations for training and validation from separate CSV files 14 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/" 15 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/" 16 | train_annotations_path = ( 17 | "../../affectnet_annotations/train_set_annotation_without_lnd.csv" 18 | ) 19 | valid_annotations_path = ( 20 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv" 21 | ) 22 | train_annotations_df = pd.read_csv(train_annotations_path) 23 | valid_annotations_df = pd.read_csv(valid_annotations_path) 24 | 25 | train_annotations_df = train_annotations_df[train_annotations_df["exp"] != 7] 26 | valid_annotations_df = valid_annotations_df[valid_annotations_df["exp"] != 7] 27 | 28 | # Set parameters 29 | BATCHSIZE = 128 30 | NUM_EPOCHS = 20 31 | LR = 4e-5 32 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") 33 | 34 | 35 | # **** Create dataset and data loaders **** 36 | class CustomDataset(Dataset): 37 | def __init__(self, dataframe, root_dir, transform=None, balance=False): 38 | self.dataframe = dataframe 39 | self.transform = transform 40 | self.root_dir = root_dir 41 | self.balance = balance 42 | 43 | if self.balance: 44 | self.dataframe = self.balance_dataset() 45 | 46 | def __len__(self): 47 | return len(self.dataframe) 48 | 49 | def __getitem__(self, idx): 50 | image_path = os.path.join( 51 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg" 52 | ) 53 | if os.path.exists(image_path): 54 | image = Image.open(image_path) 55 | else: 56 | image = Image.new( 57 | "RGB", (224, 224), color="white" 58 | ) # Handle missing image file 59 | 60 | classes = torch.tensor(self.dataframe["exp"].iloc[idx], dtype=torch.long) 61 | labels = torch.tensor(self.dataframe.iloc[idx, 2:4].values, dtype=torch.float32) 62 | 63 | if self.transform: 64 | image = self.transform(image) 65 | 66 | return image, classes, labels 67 | 68 | def balance_dataset(self): 69 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply( 70 | lambda x: x.sample(self.dataframe["exp"].value_counts().min()) 71 | ) 72 | return balanced_df 73 | 74 | 75 | transform = transforms.Compose( 76 | [ 77 | transforms.RandomHorizontalFlip(0.5), 78 | transforms.RandomGrayscale(0.01), 79 | transforms.RandomRotation(10), 80 | transforms.ColorJitter( 81 | brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1 82 | ), # model more robust to changes in lighting conditions. 83 | transforms.RandomPerspective( 84 | distortion_scale=0.2, p=0.5 85 | ), # can be helpful if your images might have varying perspectives. 86 | transforms.ToTensor(), # saves image as tensor (automatically divides by 255) 87 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 88 | transforms.RandomErasing( 89 | p=0.5, scale=(0.02, 0.2), ratio=(0.3, 3.3), value="random" 90 | ), # TEST: Should help overfitting 91 | ] 92 | ) 93 | 94 | transform_valid = transforms.Compose( 95 | [ 96 | transforms.ToTensor(), 97 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 98 | ] 99 | ) 100 | 101 | train_dataset = CustomDataset( 102 | dataframe=train_annotations_df, 103 | root_dir=IMAGE_FOLDER, 104 | transform=transform, 105 | balance=False, 106 | ) 107 | valid_dataset = CustomDataset( 108 | dataframe=valid_annotations_df, 109 | root_dir=IMAGE_FOLDER_TEST, 110 | transform=transform_valid, 111 | balance=False, 112 | ) 113 | train_loader = DataLoader( 114 | train_dataset, batch_size=BATCHSIZE, shuffle=True, num_workers=48 115 | ) 116 | valid_loader = DataLoader( 117 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48 118 | ) 119 | 120 | # * Define the model * 121 | 122 | # Initialize the model 123 | MODEL = models.swin_v2_t(weights="DEFAULT") 124 | MODEL.head = torch.nn.Linear(in_features=768, out_features=9, bias=True) 125 | MODEL.to(DEVICE) 126 | # Define (weighted) loss function 127 | weights7 = torch.tensor( 128 | [0.022600, 0.012589, 0.066464, 0.120094, 0.265305, 0.444943, 0.068006] 129 | ) 130 | criterion_cls = nn.CrossEntropyLoss(weights7.to(DEVICE)) 131 | criterion_cls_val = ( 132 | nn.CrossEntropyLoss() 133 | ) # Use two loss functions, as the validation dataset is balanced 134 | criterion_reg = nn.MSELoss() 135 | 136 | optimizer = optim.AdamW(MODEL.parameters(), lr=LR) 137 | lr_scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=BATCHSIZE * NUM_EPOCHS) 138 | 139 | # ***** Train the model ***** 140 | print("--- Start training ---") 141 | scaler = torch.cuda.amp.GradScaler() 142 | best_valid_loss = 100 143 | 144 | for epoch in range(NUM_EPOCHS): 145 | MODEL.train() 146 | total_train_correct = 0 147 | total_train_samples = 0 148 | for images, classes, labels in tqdm( 149 | train_loader, desc="Epoch train_loader progress" 150 | ): 151 | images, classes, labels = ( 152 | images.to(DEVICE), 153 | classes.to(DEVICE), 154 | labels.to(DEVICE), 155 | ) 156 | optimizer.zero_grad() 157 | with torch.autocast(device_type="cuda", dtype=torch.float16): 158 | outputs = MODEL(images) 159 | outputs_cls = outputs[:, :7] 160 | outputs_reg = outputs[:, 7:] 161 | loss = criterion_cls( 162 | outputs_cls.cuda(), classes.cuda() 163 | ) + 5 * criterion_reg(outputs_reg.cuda(), labels.cuda()) 164 | scaler.scale(loss).backward() 165 | scaler.step(optimizer) 166 | scaler.update() 167 | lr_scheduler.step() 168 | current_lr = optimizer.param_groups[0]["lr"] 169 | 170 | _, train_predicted = torch.max(outputs_cls, 1) 171 | total_train_samples += classes.size(0) 172 | total_train_correct += (train_predicted == classes).sum().item() 173 | 174 | train_accuracy = (total_train_correct / total_train_samples) * 100 175 | 176 | MODEL.eval() 177 | valid_loss = 0.0 178 | correct = 0 179 | total = 0 180 | with torch.no_grad(): 181 | for images, classes, labels in valid_loader: 182 | images, classes, labels = ( 183 | images.to(DEVICE), 184 | classes.to(DEVICE), 185 | labels.to(DEVICE), 186 | ) 187 | outputs = MODEL(images) 188 | outputs_cls = outputs[:, :7] 189 | outputs_reg = outputs[:, 7:] 190 | loss = criterion_cls_val( 191 | outputs_cls.cuda(), classes.cuda() 192 | ) + 5 * criterion_reg(outputs_reg.cuda(), labels.cuda()) 193 | valid_loss += loss.item() 194 | _, predicted = torch.max(outputs_cls, 1) 195 | total += classes.size(0) 196 | correct += (predicted == classes).sum().item() 197 | 198 | print( 199 | f"Epoch [{epoch+1}/{NUM_EPOCHS}] - " 200 | f"Validation Loss: {valid_loss/len(valid_loader):.4f}, " 201 | f"Validation Accuracy: {(correct/total)*100:.2f}%" 202 | f", Training Accuracy: {train_accuracy:.2f}%, " 203 | ) 204 | 205 | if valid_loss < best_valid_loss: 206 | best_valid_loss = valid_loss 207 | print(f"Saving model at epoch {epoch+1}") 208 | torch.save(MODEL.state_dict(), "model.pt") # Save the best model 209 | -------------------------------------------------------------------------------- /models/AffectNet7_Swin_Discrete/generate_csv.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import os 3 | import torch 4 | import torchvision.transforms as transforms 5 | import torchvision.models as models 6 | from torch.utils.data import DataLoader, Dataset 7 | import torch.nn as nn 8 | from PIL import Image 9 | 10 | # Load the annotations for training and validation from separate CSV files 11 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/" 12 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/" 13 | valid_annotations_path = ( 14 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv" 15 | ) 16 | valid_annotations_df = pd.read_csv(valid_annotations_path) 17 | valid_annotations_df = valid_annotations_df[valid_annotations_df["exp"] != 7] 18 | # Set parameters 19 | BATCHSIZE = 128 20 | 21 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") 22 | 23 | 24 | # **** Create dataset and data loaders **** 25 | class CustomDataset(Dataset): 26 | def __init__(self, dataframe, root_dir, transform=None, balance=False): 27 | self.dataframe = dataframe 28 | self.transform = transform 29 | self.root_dir = root_dir 30 | self.balance = balance 31 | 32 | if self.balance: 33 | self.dataframe = self.balance_dataset() 34 | 35 | def __len__(self): 36 | return len(self.dataframe) 37 | 38 | def __getitem__(self, idx): 39 | image_path = os.path.join( 40 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg" 41 | ) 42 | if os.path.exists(image_path): 43 | image = Image.open(image_path) 44 | else: 45 | image = Image.new( 46 | "RGB", (224, 224), color="white" 47 | ) # Handle missing image file 48 | 49 | label = torch.tensor(self.dataframe["exp"].iloc[idx], dtype=torch.long) 50 | 51 | if self.transform: 52 | image = self.transform(image) 53 | 54 | return image, label 55 | 56 | def balance_dataset(self): 57 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply( 58 | lambda x: x.sample(self.dataframe["exp"].value_counts().min()) 59 | ) 60 | return balanced_df 61 | 62 | 63 | transform_valid = transforms.Compose( 64 | [ 65 | transforms.ToTensor(), 66 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 67 | ] 68 | ) 69 | valid_dataset = CustomDataset( 70 | dataframe=valid_annotations_df, 71 | root_dir=IMAGE_FOLDER_TEST, 72 | transform=transform_valid, 73 | balance=False, 74 | ) 75 | valid_loader = DataLoader( 76 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48 77 | ) 78 | # * Define the model * 79 | 80 | # Initialize the model 81 | MODEL = models.swin_v2_t(weights="DEFAULT") 82 | MODEL.head = torch.nn.Linear(in_features=768, out_features=7, bias=True) 83 | MODEL.to(DEVICE) 84 | 85 | # Set the model to evaluation mode 86 | MODEL.load_state_dict(torch.load("model.pt")) 87 | MODEL.to(DEVICE) 88 | MODEL.eval() 89 | 90 | all_labels_cls = [] 91 | all_predicted_cls = [] 92 | 93 | # Start inference on test set 94 | with torch.no_grad(): 95 | for images, labels_cls in iter(valid_loader): 96 | images = images.to(DEVICE) 97 | labels_cls = labels_cls.to(DEVICE) 98 | 99 | outputs = MODEL(images) 100 | 101 | _, predicted_cls = torch.max(outputs, 1) 102 | 103 | all_labels_cls.extend(labels_cls.cpu().numpy()) 104 | all_predicted_cls.extend(predicted_cls.cpu().numpy()) 105 | 106 | 107 | df = pd.DataFrame({"cat_pred": all_predicted_cls, "cat_true": all_labels_cls}) 108 | df.to_csv("inference.csv", index=False) 109 | -------------------------------------------------------------------------------- /models/AffectNet7_Swin_Discrete/train.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import os 3 | import torch 4 | import torchvision.transforms as transforms 5 | import torchvision.models as models 6 | from torch.utils.data import DataLoader, Dataset 7 | import torch.nn as nn 8 | import torch.optim as optim 9 | from PIL import Image 10 | import torchvision 11 | from torch.optim import lr_scheduler 12 | import re 13 | from tqdm import tqdm 14 | 15 | # Load the annotations for training and validation from separate CSV files 16 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/" 17 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/" 18 | train_annotations_path = ( 19 | "../../affectnet_annotations/train_set_annotation_without_lnd.csv" 20 | ) 21 | valid_annotations_path = ( 22 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv" 23 | ) 24 | train_annotations_df = pd.read_csv(train_annotations_path) 25 | valid_annotations_df = pd.read_csv(valid_annotations_path) 26 | 27 | train_annotations_df = train_annotations_df[train_annotations_df["exp"] != 7] 28 | valid_annotations_df = valid_annotations_df[valid_annotations_df["exp"] != 7] 29 | 30 | # Set parameters 31 | BATCHSIZE = 128 32 | NUM_EPOCHS = 20 33 | LR = 4e-5 34 | 35 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") 36 | 37 | 38 | # **** Create dataset and data loaders **** 39 | class CustomDataset(Dataset): 40 | def __init__(self, dataframe, root_dir, transform=None, balance=False): 41 | self.dataframe = dataframe 42 | self.transform = transform 43 | self.root_dir = root_dir 44 | self.balance = balance 45 | 46 | if self.balance: 47 | self.dataframe = self.balance_dataset() 48 | 49 | def __len__(self): 50 | return len(self.dataframe) 51 | 52 | def __getitem__(self, idx): 53 | image_path = os.path.join( 54 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg" 55 | ) 56 | if os.path.exists(image_path): 57 | image = Image.open(image_path) 58 | else: 59 | image = Image.new( 60 | "RGB", (224, 224), color="white" 61 | ) # Handle missing image file 62 | 63 | label = torch.tensor(self.dataframe["exp"].iloc[idx], dtype=torch.long) 64 | 65 | if self.transform: 66 | image = self.transform(image) 67 | 68 | return image, label 69 | 70 | def balance_dataset(self): 71 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply( 72 | lambda x: x.sample(self.dataframe["exp"].value_counts().min()) 73 | ) 74 | return balanced_df 75 | 76 | 77 | transform = transforms.Compose( 78 | [ 79 | transforms.ElasticTransform(alpha=5.0, sigma=5.0), 80 | transforms.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5)), 81 | transforms.RandomGrayscale(p=0.1), 82 | transforms.RandomRotation(degrees=15), 83 | transforms.RandomVerticalFlip(), 84 | transforms.ColorJitter(0.15, 0.15, 0.15), 85 | torchvision.transforms.RandomAutocontrast(p=0.4), 86 | transforms.ToTensor(), 87 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 88 | ] 89 | ) 90 | 91 | transform_valid = transforms.Compose( 92 | [ 93 | transforms.ToTensor(), 94 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 95 | ] 96 | ) 97 | 98 | train_dataset = CustomDataset( 99 | dataframe=train_annotations_df, 100 | root_dir=IMAGE_FOLDER, 101 | transform=transform, 102 | balance=False, 103 | ) 104 | valid_dataset = CustomDataset( 105 | dataframe=valid_annotations_df, 106 | root_dir=IMAGE_FOLDER_TEST, 107 | transform=transform_valid, 108 | balance=False, 109 | ) 110 | train_loader = DataLoader( 111 | train_dataset, batch_size=BATCHSIZE, shuffle=True, num_workers=48 112 | ) 113 | valid_loader = DataLoader( 114 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48 115 | ) 116 | 117 | # * Define the model * 118 | 119 | # Initialize the model 120 | MODEL = models.swin_v2_t(weights="DEFAULT") 121 | MODEL.head = torch.nn.Linear(in_features=768, out_features=7, bias=True) 122 | MODEL.to(DEVICE) 123 | 124 | # Define (weighted) loss function 125 | # weights = torch.tensor([0.015605, 0.008709, 0.046078, 0.083078, 0.185434, 0.305953, 0.046934, 0.30821]) 126 | weights7 = torch.tensor( 127 | [0.022600, 0.012589, 0.066464, 0.120094, 0.265305, 0.444943, 0.068006] 128 | ) 129 | criterion = nn.CrossEntropyLoss(weights7.to(DEVICE)) 130 | criterion_val = ( 131 | nn.CrossEntropyLoss() 132 | ) # Use two loss functions, as the validation dataset is balanced 133 | 134 | 135 | # Filter parameters for weight decay and no weight decay and create optimizer/scheduler 136 | def filter_params(params, include_patterns, exclude_patterns): 137 | included_params = [] 138 | excluded_params = [] 139 | for name, param in params: 140 | if any(re.search(pattern, name) for pattern in include_patterns): 141 | included_params.append(param) 142 | elif not any(re.search(pattern, name) for pattern in exclude_patterns): 143 | excluded_params.append(param) 144 | return included_params, excluded_params 145 | 146 | 147 | include_patterns = [ 148 | r"^(?!.*\.bn)" 149 | ] # Match any layer name that doesn't contain '.bn' = BatchNorm parameters 150 | exclude_patterns = [r".*\.bn.*"] # Vice versa 151 | params_to_decay, params_not_to_decay = filter_params( 152 | MODEL.named_parameters(), include_patterns, exclude_patterns 153 | ) 154 | 155 | # optimizer = optim.AdamW([ 156 | # {'params': params_to_decay, 'weight_decay': ADAMW_WEIGHT_DECAY}, # Apply weight decay to these parameters 157 | # {'params': params_not_to_decay, 'weight_decay': 0.0} # Exclude weight decay for these parameters = 0.0 158 | # ], lr=LR) 159 | optimizer = optim.AdamW(MODEL.parameters(), lr=LR) 160 | lr_scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=BATCHSIZE * NUM_EPOCHS) 161 | 162 | # ***** Train the model ***** 163 | print("--- Start training ---") 164 | scaler = torch.cuda.amp.GradScaler() 165 | best_valid_loss = 100 166 | 167 | for epoch in range(NUM_EPOCHS): 168 | MODEL.train() 169 | total_train_correct = 0 170 | total_train_samples = 0 171 | for images, labels in tqdm(train_loader, desc="Epoch train_loader progress"): 172 | images, labels = images.to(DEVICE), labels.to(DEVICE) 173 | optimizer.zero_grad() 174 | with torch.autocast(device_type="cuda", dtype=torch.float16): 175 | output = MODEL(images) 176 | loss = criterion(output.cuda(), labels.cuda()) 177 | scaler.scale(loss).backward() 178 | scaler.step(optimizer) 179 | scaler.update() 180 | lr_scheduler.step() 181 | current_lr = optimizer.param_groups[0]["lr"] 182 | 183 | _, train_predicted = torch.max(output, 1) 184 | total_train_samples += labels.size(0) 185 | total_train_correct += (train_predicted == labels).sum().item() 186 | 187 | train_accuracy = (total_train_correct / total_train_samples) * 100 188 | 189 | MODEL.eval() 190 | valid_loss = 0.0 191 | correct = 0 192 | total = 0 193 | with torch.no_grad(): 194 | for images, labels in valid_loader: 195 | images, labels = images.to(DEVICE), labels.to(DEVICE) 196 | outputs = MODEL(images) 197 | loss = criterion_val(outputs.cuda(), labels.cuda()) 198 | valid_loss += loss.item() 199 | _, predicted = torch.max(outputs, 1) 200 | total += labels.size(0) 201 | correct += (predicted == labels).sum().item() 202 | 203 | print( 204 | f"Epoch [{epoch+1}/{NUM_EPOCHS}] - " 205 | f"Validation Loss: {valid_loss/len(valid_loader):.4f}, " 206 | f"Validation Accuracy: {(correct/total)*100:.2f}%" 207 | f", Training Accuracy: {train_accuracy:.2f}%, " 208 | ) 209 | # TBD: Valid loss überschreiben, dann model speichern wie unten, wenn kleiner als zuvor 210 | 211 | if valid_loss < best_valid_loss: 212 | best_valid_loss = valid_loss 213 | print(f"Saving model at epoch {epoch+1}") 214 | torch.save(MODEL.state_dict(), "model.pt") # Save the best model 215 | -------------------------------------------------------------------------------- /models/AffectNet7_Swin_VA/generate_csv.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import os 3 | import torch 4 | import torchvision.transforms as transforms 5 | import torchvision.models as models 6 | from torch.utils.data import DataLoader, Dataset 7 | import torch.nn as nn 8 | from PIL import Image 9 | 10 | 11 | # Load the annotations for training and validation from separate CSV files 12 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/" 13 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/" 14 | 15 | valid_annotations_path = ( 16 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv" 17 | ) 18 | valid_annotations_df = pd.read_csv(valid_annotations_path) 19 | valid_annotations_df = valid_annotations_df[valid_annotations_df["exp"] != 7] 20 | # Set parameters 21 | BATCHSIZE = 128 22 | 23 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") 24 | 25 | 26 | # **** Create dataset and data loaders **** 27 | class CustomDataset(Dataset): 28 | def __init__(self, dataframe, root_dir, transform=None, balance=False): 29 | self.dataframe = dataframe 30 | self.transform = transform 31 | self.root_dir = root_dir 32 | self.balance = balance 33 | 34 | if self.balance: 35 | self.dataframe = self.balance_dataset() 36 | 37 | def __len__(self): 38 | return len(self.dataframe) 39 | 40 | def __getitem__(self, idx): 41 | image_path = os.path.join( 42 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg" 43 | ) 44 | image = Image.open(image_path) 45 | 46 | classes = torch.tensor(self.dataframe.iloc[idx, 1], dtype=torch.int8) 47 | valence = torch.tensor(self.dataframe.iloc[idx, 2], dtype=torch.float16) 48 | arousal = torch.tensor(self.dataframe.iloc[idx, 3], dtype=torch.float16) 49 | 50 | if self.transform: 51 | image = self.transform(image) 52 | 53 | return image, classes, valence, arousal 54 | 55 | def balance_dataset(self): 56 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply( 57 | lambda x: x.sample(self.dataframe["exp"].value_counts().min()) 58 | ) 59 | return balanced_df 60 | 61 | 62 | transform_valid = transforms.Compose( 63 | [ 64 | transforms.ToTensor(), 65 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 66 | ] 67 | ) 68 | 69 | valid_dataset = CustomDataset( 70 | dataframe=valid_annotations_df, 71 | root_dir=IMAGE_FOLDER_TEST, 72 | transform=transform_valid, 73 | balance=False, 74 | ) 75 | 76 | valid_loader = DataLoader( 77 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48 78 | ) 79 | 80 | # * Define the model * 81 | 82 | # Initialize the model 83 | MODEL = models.swin_v2_t(weights="DEFAULT") 84 | MODEL.head = torch.nn.Linear(in_features=768, out_features=2, bias=True) 85 | MODEL.to(DEVICE) 86 | 87 | 88 | # **** Test the model performance for classification **** 89 | 90 | # Set the model to evaluation mode 91 | MODEL.load_state_dict(torch.load("model.pt")) 92 | MODEL.to(DEVICE) 93 | MODEL.eval() 94 | 95 | all_val_true_values = [] 96 | all_val_predicted_values = [] 97 | all_aro_true_values = [] 98 | all_aro_predicted_values = [] 99 | 100 | # Start inference on test set 101 | with torch.no_grad(): 102 | for images, _, val_true, aro_true in valid_loader: 103 | images, val_true, aro_true = ( 104 | images.to(DEVICE), 105 | val_true.to(DEVICE), 106 | aro_true.to(DEVICE), 107 | ) 108 | 109 | outputs = MODEL(images) 110 | val_pred = outputs[:, 0] 111 | aro_pred = outputs[:, 1] 112 | 113 | # Append to the lists --> Regression 114 | true_val_values = val_true.cpu().numpy() 115 | true_aro_values = aro_true.cpu().numpy() 116 | pred_val_values = val_pred.cpu().numpy() 117 | pred_aro_values = aro_pred.cpu().numpy() 118 | all_val_true_values.extend(true_val_values) 119 | all_aro_true_values.extend(true_aro_values) 120 | all_val_predicted_values.extend(pred_val_values) 121 | all_aro_predicted_values.extend(pred_aro_values) 122 | 123 | df = pd.DataFrame( 124 | { 125 | "val_pred": all_val_predicted_values, 126 | "val_true": all_val_true_values, 127 | "aro_pred": all_aro_predicted_values, 128 | "aro_true": all_aro_true_values, 129 | } 130 | ) 131 | df.to_csv("inference.csv", index=False) 132 | -------------------------------------------------------------------------------- /models/AffectNet8_Efficientnet_Combined/generate_csv.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import os 3 | import torch 4 | import torchvision.transforms as transforms 5 | import torchvision.models as models 6 | from torch.utils.data import DataLoader, Dataset 7 | import torch.nn as nn 8 | from PIL import Image 9 | 10 | # Load the annotations for training and validation from separate CSV files 11 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/" 12 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/" 13 | valid_annotations_path = ( 14 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv" 15 | ) 16 | valid_annotations_df = pd.read_csv(valid_annotations_path) 17 | 18 | # Set parameters 19 | BATCHSIZE = 128 20 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") 21 | 22 | 23 | # **** Create dataset and data loaders **** 24 | class CustomDataset(Dataset): 25 | def __init__(self, dataframe, root_dir, transform=None, balance=False): 26 | self.dataframe = dataframe 27 | self.transform = transform 28 | self.root_dir = root_dir 29 | self.balance = balance 30 | 31 | if self.balance: 32 | self.dataframe = self.balance_dataset() 33 | 34 | def __len__(self): 35 | return len(self.dataframe) 36 | 37 | def __getitem__(self, idx): 38 | image_path = os.path.join( 39 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg" 40 | ) 41 | if os.path.exists(image_path): 42 | image = Image.open(image_path) 43 | else: 44 | image = Image.new( 45 | "RGB", (224, 224), color="white" 46 | ) # Handle missing image file 47 | 48 | classes = torch.tensor(self.dataframe["exp"].iloc[idx], dtype=torch.long) 49 | labels = torch.tensor(self.dataframe.iloc[idx, 2:4].values, dtype=torch.float32) 50 | 51 | if self.transform: 52 | image = self.transform(image) 53 | 54 | return image, classes, labels 55 | 56 | def balance_dataset(self): 57 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply( 58 | lambda x: x.sample(self.dataframe["exp"].value_counts().min()) 59 | ) 60 | return balanced_df 61 | 62 | 63 | transform_valid = transforms.Compose( 64 | [ 65 | transforms.ToTensor(), 66 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 67 | ] 68 | ) 69 | 70 | valid_dataset = CustomDataset( 71 | dataframe=valid_annotations_df, 72 | root_dir=IMAGE_FOLDER_TEST, 73 | transform=transform_valid, 74 | balance=False, 75 | ) 76 | valid_loader = DataLoader( 77 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48 78 | ) 79 | 80 | # ***** Define the model ***** 81 | 82 | # Initialize the model 83 | MODEL = models.efficientnet_v2_s(weights="DEFAULT") 84 | num_features = MODEL.classifier[1].in_features 85 | MODEL.classifier[1] = nn.Linear(in_features=num_features, out_features=10) 86 | MODEL.to(DEVICE) 87 | 88 | # **** Test the model performance for classification **** 89 | 90 | # Set the model to evaluation mode 91 | MODEL.load_state_dict(torch.load("model.pt")) 92 | MODEL.to(DEVICE) 93 | MODEL.eval() 94 | 95 | all_labels_cls = [] 96 | all_predicted_cls = [] 97 | 98 | all_true_val = [] 99 | all_pred_val = [] 100 | all_true_aro = [] 101 | all_pred_aro = [] 102 | 103 | # Start inference on test set 104 | with torch.no_grad(): 105 | for images, classes, labels in iter(valid_loader): 106 | images, classes, labels = ( 107 | images.to(DEVICE), 108 | classes.to(DEVICE), 109 | labels.to(DEVICE), 110 | ) 111 | 112 | outputs = MODEL(images) 113 | outputs_cls = outputs[:, :8] 114 | outputs_reg = outputs[:, 8:] 115 | val_pred = outputs_reg[:, 0] 116 | aro_pred = outputs_reg[:, 1] 117 | 118 | _, predicted_cls = torch.max(outputs_cls, 1) 119 | 120 | all_labels_cls.extend(classes.cpu().numpy()) 121 | all_predicted_cls.extend(predicted_cls.cpu().numpy()) 122 | val_true = labels[:, 0] 123 | aro_true = labels[:, 1] 124 | 125 | all_true_val.extend(val_true.cpu().numpy()) 126 | all_true_aro.extend(aro_true.cpu().numpy()) 127 | all_pred_val.extend(val_pred.cpu().numpy()) 128 | all_pred_aro.extend(aro_pred.cpu().numpy()) 129 | 130 | df = pd.DataFrame( 131 | { 132 | "cat_pred": all_predicted_cls, 133 | "cat_true": all_labels_cls, 134 | "val_pred": all_pred_val, 135 | "val_true": all_true_val, 136 | "aro_pred": all_pred_aro, 137 | "aro_true": all_true_aro, 138 | } 139 | ) 140 | df.to_csv("inference.csv", index=False) 141 | -------------------------------------------------------------------------------- /models/AffectNet8_Efficientnet_Combined/train.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import os 3 | import torch 4 | import torchvision.transforms as transforms 5 | import torchvision.models as models 6 | from torch.utils.data import DataLoader, Dataset 7 | import torch.nn as nn 8 | import torch.optim as optim 9 | from PIL import Image 10 | from torch.optim import lr_scheduler 11 | from tqdm import tqdm 12 | 13 | # Load the annotations for training and validation from separate CSV files 14 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/" 15 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/" 16 | train_annotations_path = ( 17 | "../../affectnet_annotations/train_set_annotation_without_lnd.csv" 18 | ) 19 | valid_annotations_path = ( 20 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv" 21 | ) 22 | train_annotations_df = pd.read_csv(train_annotations_path) 23 | valid_annotations_df = pd.read_csv(valid_annotations_path) 24 | 25 | 26 | # Set parameters 27 | BATCHSIZE = 128 28 | NUM_EPOCHS = 20 29 | LR = 4e-5 30 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") 31 | 32 | 33 | # **** Create dataset and data loaders **** 34 | class CustomDataset(Dataset): 35 | def __init__(self, dataframe, root_dir, transform=None, balance=False): 36 | self.dataframe = dataframe 37 | self.transform = transform 38 | self.root_dir = root_dir 39 | self.balance = balance 40 | 41 | if self.balance: 42 | self.dataframe = self.balance_dataset() 43 | 44 | def __len__(self): 45 | return len(self.dataframe) 46 | 47 | def __getitem__(self, idx): 48 | image_path = os.path.join( 49 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg" 50 | ) 51 | if os.path.exists(image_path): 52 | image = Image.open(image_path) 53 | else: 54 | image = Image.new( 55 | "RGB", (224, 224), color="white" 56 | ) # Handle missing image file 57 | 58 | classes = torch.tensor(self.dataframe["exp"].iloc[idx], dtype=torch.long) 59 | labels = torch.tensor(self.dataframe.iloc[idx, 2:4].values, dtype=torch.float32) 60 | 61 | if self.transform: 62 | image = self.transform(image) 63 | 64 | return image, classes, labels 65 | 66 | def balance_dataset(self): 67 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply( 68 | lambda x: x.sample(self.dataframe["exp"].value_counts().min()) 69 | ) 70 | return balanced_df 71 | 72 | 73 | transform = transforms.Compose( 74 | [ 75 | transforms.RandomHorizontalFlip(0.5), 76 | transforms.RandomGrayscale(0.01), 77 | transforms.RandomRotation(10), 78 | transforms.ColorJitter( 79 | brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1 80 | ), # model more robust to changes in lighting conditions. 81 | transforms.RandomPerspective( 82 | distortion_scale=0.2, p=0.5 83 | ), # can be helpful if your images might have varying perspectives. 84 | transforms.ToTensor(), # saves image as tensor (automatically divides by 255) 85 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 86 | transforms.RandomErasing( 87 | p=0.5, scale=(0.02, 0.2), ratio=(0.3, 3.3), value="random" 88 | ), # TEST: Should help overfitting 89 | ] 90 | ) 91 | 92 | transform_valid = transforms.Compose( 93 | [ 94 | transforms.ToTensor(), 95 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 96 | ] 97 | ) 98 | 99 | train_dataset = CustomDataset( 100 | dataframe=train_annotations_df, 101 | root_dir=IMAGE_FOLDER, 102 | transform=transform, 103 | balance=False, 104 | ) 105 | valid_dataset = CustomDataset( 106 | dataframe=valid_annotations_df, 107 | root_dir=IMAGE_FOLDER_TEST, 108 | transform=transform_valid, 109 | balance=False, 110 | ) 111 | train_loader = DataLoader( 112 | train_dataset, batch_size=BATCHSIZE, shuffle=True, num_workers=48 113 | ) 114 | valid_loader = DataLoader( 115 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48 116 | ) 117 | 118 | # ***** Define the model ***** 119 | 120 | # Initialize the model 121 | MODEL = models.efficientnet_v2_s(weights="DEFAULT") 122 | num_features = MODEL.classifier[1].in_features 123 | MODEL.classifier[1] = nn.Linear(in_features=num_features, out_features=10) 124 | MODEL.to(DEVICE) 125 | # Define (weighted) loss function 126 | weights = torch.tensor( 127 | [0.015605, 0.008709, 0.046078, 0.083078, 0.185434, 0.305953, 0.046934, 0.30821] 128 | ) 129 | criterion_cls = nn.CrossEntropyLoss(weights.to(DEVICE)) 130 | criterion_cls_val = ( 131 | nn.CrossEntropyLoss() 132 | ) # Use two loss functions, as the validation dataset is balanced 133 | criterion_reg = nn.MSELoss() 134 | 135 | optimizer = optim.AdamW(MODEL.parameters(), lr=LR) 136 | lr_scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=BATCHSIZE * NUM_EPOCHS) 137 | 138 | # ***** Train the model ***** 139 | print("--- Start training ---") 140 | scaler = torch.cuda.amp.GradScaler() 141 | best_valid_loss = 100 142 | 143 | for epoch in range(NUM_EPOCHS): 144 | MODEL.train() 145 | total_train_correct = 0 146 | total_train_samples = 0 147 | for images, classes, labels in tqdm( 148 | train_loader, desc="Epoch train_loader progress" 149 | ): 150 | images, classes, labels = ( 151 | images.to(DEVICE), 152 | classes.to(DEVICE), 153 | labels.to(DEVICE), 154 | ) 155 | optimizer.zero_grad() 156 | with torch.autocast(device_type="cuda", dtype=torch.float16): 157 | outputs = MODEL(images) 158 | outputs_cls = outputs[:, :8] 159 | outputs_reg = outputs[:, 8:] 160 | loss = criterion_cls( 161 | outputs_cls.cuda(), classes.cuda() 162 | ) + 5 * criterion_reg(outputs_reg.cuda(), labels.cuda()) 163 | scaler.scale(loss).backward() 164 | scaler.step(optimizer) 165 | scaler.update() 166 | lr_scheduler.step() 167 | current_lr = optimizer.param_groups[0]["lr"] 168 | 169 | _, train_predicted = torch.max(outputs_cls, 1) 170 | total_train_samples += classes.size(0) 171 | total_train_correct += (train_predicted == classes).sum().item() 172 | 173 | train_accuracy = (total_train_correct / total_train_samples) * 100 174 | 175 | MODEL.eval() 176 | valid_loss = 0.0 177 | correct = 0 178 | total = 0 179 | with torch.no_grad(): 180 | for images, classes, labels in valid_loader: 181 | images, classes, labels = ( 182 | images.to(DEVICE), 183 | classes.to(DEVICE), 184 | labels.to(DEVICE), 185 | ) 186 | outputs = MODEL(images) 187 | outputs_cls = outputs[:, :8] 188 | outputs_reg = outputs[:, 8:] 189 | loss = criterion_cls_val( 190 | outputs_cls.cuda(), classes.cuda() 191 | ) + 5 * criterion_reg(outputs_reg.cuda(), labels.cuda()) 192 | valid_loss += loss.item() 193 | _, predicted = torch.max(outputs_cls, 1) 194 | total += classes.size(0) 195 | correct += (predicted == classes).sum().item() 196 | 197 | print( 198 | f"Epoch [{epoch+1}/{NUM_EPOCHS}] - " 199 | f"Validation Loss: {valid_loss/len(valid_loader):.4f}, " 200 | f"Validation Accuracy: {(correct/total)*100:.2f}%" 201 | f", Training Accuracy: {train_accuracy:.2f}%, " 202 | ) 203 | 204 | if valid_loss < best_valid_loss: 205 | best_valid_loss = valid_loss 206 | print(f"Saving model at epoch {epoch+1}") 207 | torch.save(MODEL.state_dict(), "model.pt") # Save the best model 208 | -------------------------------------------------------------------------------- /models/AffectNet8_Efficientnet_Discrete/generate_csv.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import os 3 | import torch 4 | import torchvision.transforms as transforms 5 | import torchvision.models as models 6 | from torch.utils.data import DataLoader, Dataset 7 | import torch.nn as nn 8 | from PIL import Image 9 | 10 | # Load the annotations for training and validation from separate CSV files 11 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/" 12 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/" 13 | valid_annotations_path = ( 14 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv" 15 | ) 16 | 17 | valid_annotations_df = pd.read_csv(valid_annotations_path) 18 | 19 | 20 | # Set parameters 21 | BATCHSIZE = 128 22 | # ADAMW_WEIGHT_DECAY = 0.1 # For efficientnet only 23 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") 24 | 25 | 26 | # **** Create dataset and data loaders **** 27 | class CustomDataset(Dataset): 28 | def __init__(self, dataframe, root_dir, transform=None, balance=False): 29 | self.dataframe = dataframe 30 | self.transform = transform 31 | self.root_dir = root_dir 32 | self.balance = balance 33 | 34 | if self.balance: 35 | self.dataframe = self.balance_dataset() 36 | 37 | def __len__(self): 38 | return len(self.dataframe) 39 | 40 | def __getitem__(self, idx): 41 | image_path = os.path.join( 42 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg" 43 | ) 44 | if os.path.exists(image_path): 45 | image = Image.open(image_path) 46 | else: 47 | image = Image.new( 48 | "RGB", (224, 224), color="white" 49 | ) # Handle missing image file 50 | 51 | label = torch.tensor(self.dataframe["exp"].iloc[idx], dtype=torch.long) 52 | 53 | if self.transform: 54 | image = self.transform(image) 55 | 56 | return image, label 57 | 58 | def balance_dataset(self): 59 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply( 60 | lambda x: x.sample(self.dataframe["exp"].value_counts().min()) 61 | ) 62 | return balanced_df 63 | 64 | 65 | transform_valid = transforms.Compose( 66 | [ 67 | transforms.ToTensor(), 68 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 69 | ] 70 | ) 71 | 72 | 73 | valid_dataset = CustomDataset( 74 | dataframe=valid_annotations_df, 75 | root_dir=IMAGE_FOLDER_TEST, 76 | transform=transform_valid, 77 | balance=False, 78 | ) 79 | 80 | valid_loader = DataLoader( 81 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48 82 | ) 83 | 84 | # ***** Define the model ***** 85 | 86 | # Initialize the model 87 | MODEL = models.efficientnet_v2_s(weights="DEFAULT") 88 | num_features = MODEL.classifier[1].in_features 89 | MODEL.classifier[1] = nn.Linear(in_features=num_features, out_features=8) 90 | MODEL.to(DEVICE) 91 | 92 | # **** Test the model performance for classification **** 93 | 94 | # Set the model to evaluation mode 95 | MODEL.load_state_dict(torch.load("model.pt")) 96 | MODEL.to(DEVICE) 97 | MODEL.eval() 98 | 99 | all_labels_cls = [] 100 | all_predicted_cls = [] 101 | 102 | # Start inference on test set 103 | with torch.no_grad(): 104 | for images, labels_cls in iter(valid_loader): 105 | images = images.to(DEVICE) 106 | labels_cls = labels_cls.to(DEVICE) 107 | 108 | outputs = MODEL(images) 109 | 110 | _, predicted_cls = torch.max(outputs, 1) 111 | 112 | all_labels_cls.extend(labels_cls.cpu().numpy()) 113 | all_predicted_cls.extend(predicted_cls.cpu().numpy()) 114 | 115 | df = pd.DataFrame({"cat_pred": all_predicted_cls, "cat_true": all_labels_cls}) 116 | df.to_csv("inference.csv", index=False) 117 | -------------------------------------------------------------------------------- /models/AffectNet8_Efficientnet_Discrete/train.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import os 3 | import torch 4 | import torchvision.transforms as transforms 5 | import torchvision.models as models 6 | from torch.utils.data import DataLoader, Dataset 7 | import torch.nn as nn 8 | import torch.optim as optim 9 | from PIL import Image 10 | import torchvision 11 | from torch.optim import lr_scheduler 12 | from tqdm import tqdm 13 | 14 | # Load the annotations for training and validation from separate CSV files 15 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/" 16 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/" 17 | train_annotations_path = ( 18 | "../../affectnet_annotations/train_set_annotation_without_lnd.csv" 19 | ) 20 | valid_annotations_path = ( 21 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv" 22 | ) 23 | train_annotations_df = pd.read_csv(train_annotations_path) 24 | valid_annotations_df = pd.read_csv(valid_annotations_path) 25 | 26 | # Set parameters 27 | BATCHSIZE = 128 28 | NUM_EPOCHS = 20 29 | LR = 4e-5 30 | # ADAMW_WEIGHT_DECAY = 0.1 # For efficientnet only 31 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") 32 | 33 | 34 | # **** Create dataset and data loaders **** 35 | class CustomDataset(Dataset): 36 | def __init__(self, dataframe, root_dir, transform=None, balance=False): 37 | self.dataframe = dataframe 38 | self.transform = transform 39 | self.root_dir = root_dir 40 | self.balance = balance 41 | 42 | if self.balance: 43 | self.dataframe = self.balance_dataset() 44 | 45 | def __len__(self): 46 | return len(self.dataframe) 47 | 48 | def __getitem__(self, idx): 49 | image_path = os.path.join( 50 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg" 51 | ) 52 | if os.path.exists(image_path): 53 | image = Image.open(image_path) 54 | else: 55 | image = Image.new( 56 | "RGB", (224, 224), color="white" 57 | ) # Handle missing image file 58 | 59 | label = torch.tensor(self.dataframe["exp"].iloc[idx], dtype=torch.long) 60 | 61 | if self.transform: 62 | image = self.transform(image) 63 | 64 | return image, label 65 | 66 | def balance_dataset(self): 67 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply( 68 | lambda x: x.sample(self.dataframe["exp"].value_counts().min()) 69 | ) 70 | return balanced_df 71 | 72 | 73 | transform = transforms.Compose( 74 | [ 75 | transforms.ElasticTransform(alpha=5.0, sigma=5.0), 76 | transforms.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5)), 77 | transforms.RandomGrayscale(p=0.1), 78 | transforms.RandomRotation(degrees=15), 79 | transforms.RandomVerticalFlip(), 80 | transforms.ColorJitter(0.15, 0.15, 0.15), 81 | torchvision.transforms.RandomAutocontrast(p=0.4), 82 | transforms.ToTensor(), 83 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 84 | ] 85 | ) 86 | 87 | 88 | transform_valid = transforms.Compose( 89 | [ 90 | transforms.ToTensor(), 91 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 92 | ] 93 | ) 94 | 95 | train_dataset = CustomDataset( 96 | dataframe=train_annotations_df, 97 | root_dir=IMAGE_FOLDER, 98 | transform=transform, 99 | balance=False, 100 | ) 101 | valid_dataset = CustomDataset( 102 | dataframe=valid_annotations_df, 103 | root_dir=IMAGE_FOLDER_TEST, 104 | transform=transform_valid, 105 | balance=False, 106 | ) 107 | train_loader = DataLoader( 108 | train_dataset, batch_size=BATCHSIZE, shuffle=True, num_workers=48 109 | ) 110 | valid_loader = DataLoader( 111 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48 112 | ) 113 | 114 | # ***** Define the model ***** 115 | 116 | # Initialize the model 117 | MODEL = models.efficientnet_v2_s(weights="DEFAULT") 118 | num_features = MODEL.classifier[1].in_features 119 | MODEL.classifier[1] = nn.Linear(in_features=num_features, out_features=8) 120 | MODEL.to(DEVICE) 121 | # Define (weighted) loss function 122 | weights = torch.tensor( 123 | [0.015605, 0.008709, 0.046078, 0.083078, 0.185434, 0.305953, 0.046934, 0.30821] 124 | ) 125 | criterion = nn.CrossEntropyLoss(weights.to(DEVICE)) 126 | criterion_val = ( 127 | nn.CrossEntropyLoss() 128 | ) # Use two loss functions, as the validation dataset is balanced 129 | 130 | optimizer = optim.AdamW(MODEL.parameters(), lr=LR) 131 | lr_scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=BATCHSIZE * NUM_EPOCHS) 132 | 133 | # ***** Train the model ***** 134 | print("--- Start training ---") 135 | scaler = torch.cuda.amp.GradScaler() 136 | best_valid_loss = 100 137 | 138 | for epoch in range(NUM_EPOCHS): 139 | MODEL.train() 140 | total_train_correct = 0 141 | total_train_samples = 0 142 | for images, labels in tqdm(train_loader, desc="Epoch train_loader progress"): 143 | images, labels = images.to(DEVICE), labels.to(DEVICE) 144 | optimizer.zero_grad() 145 | with torch.autocast(device_type="cuda", dtype=torch.float16): 146 | output = MODEL(images) 147 | loss = criterion(output.cuda(), labels.cuda()) 148 | scaler.scale(loss).backward() 149 | scaler.step(optimizer) 150 | scaler.update() 151 | lr_scheduler.step() 152 | current_lr = optimizer.param_groups[0]["lr"] 153 | 154 | _, train_predicted = torch.max(output, 1) 155 | total_train_samples += labels.size(0) 156 | total_train_correct += (train_predicted == labels).sum().item() 157 | 158 | train_accuracy = (total_train_correct / total_train_samples) * 100 159 | 160 | MODEL.eval() 161 | valid_loss = 0.0 162 | correct = 0 163 | total = 0 164 | with torch.no_grad(): 165 | for images, labels in valid_loader: 166 | images, labels = images.to(DEVICE), labels.to(DEVICE) 167 | outputs = MODEL(images) 168 | loss = criterion_val(outputs.cuda(), labels.cuda()) 169 | valid_loss += loss.item() 170 | _, predicted = torch.max(outputs, 1) 171 | total += labels.size(0) 172 | correct += (predicted == labels).sum().item() 173 | 174 | print( 175 | f"Epoch [{epoch+1}/{NUM_EPOCHS}] - " 176 | f"Validation Loss: {valid_loss/len(valid_loader):.4f}, " 177 | f"Validation Accuracy: {(correct/total)*100:.2f}%" 178 | f", Training Accuracy: {train_accuracy:.2f}%, " 179 | ) 180 | 181 | if valid_loss < best_valid_loss: 182 | best_valid_loss = valid_loss 183 | print(f"Saving model at epoch {epoch+1}") 184 | torch.save(MODEL.state_dict(), "model.pt") # Save the best model 185 | -------------------------------------------------------------------------------- /models/AffectNet8_Efficientnet_VA/generate_csv.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import os 3 | import torch 4 | import torchvision.transforms as transforms 5 | import torchvision.models as models 6 | from torch.utils.data import DataLoader, Dataset 7 | import torch.nn as nn 8 | from PIL import Image 9 | 10 | 11 | # Load the annotations for training and validation from separate CSV files 12 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/" 13 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/" 14 | 15 | valid_annotations_path = ( 16 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv" 17 | ) 18 | valid_annotations_df = pd.read_csv(valid_annotations_path) 19 | 20 | # Set parameters 21 | BATCHSIZE = 128 22 | 23 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") 24 | 25 | 26 | # **** Create dataset and data loaders **** 27 | class CustomDataset(Dataset): 28 | def __init__(self, dataframe, root_dir, transform=None, balance=False): 29 | self.dataframe = dataframe 30 | self.transform = transform 31 | self.root_dir = root_dir 32 | self.balance = balance 33 | 34 | if self.balance: 35 | self.dataframe = self.balance_dataset() 36 | 37 | def __len__(self): 38 | return len(self.dataframe) 39 | 40 | def __getitem__(self, idx): 41 | image_path = os.path.join( 42 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg" 43 | ) 44 | image = Image.open(image_path) 45 | 46 | classes = torch.tensor(self.dataframe.iloc[idx, 1], dtype=torch.int8) 47 | valence = torch.tensor(self.dataframe.iloc[idx, 2], dtype=torch.float16) 48 | arousal = torch.tensor(self.dataframe.iloc[idx, 3], dtype=torch.float16) 49 | 50 | if self.transform: 51 | image = self.transform(image) 52 | 53 | return image, classes, valence, arousal 54 | 55 | def balance_dataset(self): 56 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply( 57 | lambda x: x.sample(self.dataframe["exp"].value_counts().min()) 58 | ) 59 | return balanced_df 60 | 61 | 62 | transform_valid = transforms.Compose( 63 | [ 64 | transforms.ToTensor(), 65 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 66 | ] 67 | ) 68 | 69 | valid_dataset = CustomDataset( 70 | dataframe=valid_annotations_df, 71 | root_dir=IMAGE_FOLDER_TEST, 72 | transform=transform_valid, 73 | balance=False, 74 | ) 75 | 76 | valid_loader = DataLoader( 77 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48 78 | ) 79 | 80 | # ***** Define the model ***** 81 | 82 | # Initialize the model 83 | MODEL = models.efficientnet_v2_s(weights="DEFAULT") 84 | num_features = MODEL.classifier[1].in_features 85 | MODEL.classifier[1] = nn.Linear(in_features=num_features, out_features=2) 86 | MODEL.to(DEVICE) 87 | 88 | 89 | # **** Test the model performance for classification **** 90 | 91 | # Set the model to evaluation mode 92 | MODEL.load_state_dict(torch.load("model.pt")) 93 | MODEL.to(DEVICE) 94 | MODEL.eval() 95 | 96 | all_val_true_values = [] 97 | all_val_predicted_values = [] 98 | all_aro_true_values = [] 99 | all_aro_predicted_values = [] 100 | 101 | # Start inference on test set 102 | with torch.no_grad(): 103 | for images, _, val_true, aro_true in valid_loader: 104 | images, val_true, aro_true = ( 105 | images.to(DEVICE), 106 | val_true.to(DEVICE), 107 | aro_true.to(DEVICE), 108 | ) 109 | 110 | outputs = MODEL(images) 111 | val_pred = outputs[:, 0] 112 | aro_pred = outputs[:, 1] 113 | 114 | # Append to the lists --> Regression 115 | true_val_values = val_true.cpu().numpy() 116 | true_aro_values = aro_true.cpu().numpy() 117 | pred_val_values = val_pred.cpu().numpy() 118 | pred_aro_values = aro_pred.cpu().numpy() 119 | all_val_true_values.extend(true_val_values) 120 | all_aro_true_values.extend(true_aro_values) 121 | all_val_predicted_values.extend(pred_val_values) 122 | all_aro_predicted_values.extend(pred_aro_values) 123 | 124 | df = pd.DataFrame( 125 | { 126 | "val_pred": all_val_predicted_values, 127 | "val_true": all_val_true_values, 128 | "aro_pred": all_aro_predicted_values, 129 | "aro_true": all_aro_true_values, 130 | } 131 | ) 132 | df.to_csv("inference.csv", index=False) 133 | -------------------------------------------------------------------------------- /models/AffectNet8_Maxvit_Combined/generate_csv.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import os 3 | import torch 4 | import torchvision.transforms as transforms 5 | import torchvision.models as models 6 | from torch.utils.data import DataLoader, Dataset 7 | import torch.nn as nn 8 | from PIL import Image 9 | 10 | # Load the annotations for training and validation from separate CSV files 11 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/" 12 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/" 13 | valid_annotations_path = ( 14 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv" 15 | ) 16 | valid_annotations_df = pd.read_csv(valid_annotations_path) 17 | 18 | # Set parameters 19 | BATCHSIZE = 128 20 | MODEL = models.maxvit_t(weights="DEFAULT") 21 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") 22 | 23 | 24 | # **** Create dataset and data loaders **** 25 | class CustomDataset(Dataset): 26 | def __init__(self, dataframe, root_dir, transform=None, balance=False): 27 | self.dataframe = dataframe 28 | self.transform = transform 29 | self.root_dir = root_dir 30 | self.balance = balance 31 | 32 | if self.balance: 33 | self.dataframe = self.balance_dataset() 34 | 35 | def __len__(self): 36 | return len(self.dataframe) 37 | 38 | def __getitem__(self, idx): 39 | image_path = os.path.join( 40 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg" 41 | ) 42 | if os.path.exists(image_path): 43 | image = Image.open(image_path) 44 | else: 45 | image = Image.new( 46 | "RGB", (224, 224), color="white" 47 | ) # Handle missing image file 48 | 49 | classes = torch.tensor(self.dataframe["exp"].iloc[idx], dtype=torch.long) 50 | labels = torch.tensor(self.dataframe.iloc[idx, 2:4].values, dtype=torch.float32) 51 | 52 | if self.transform: 53 | image = self.transform(image) 54 | 55 | return image, classes, labels 56 | 57 | def balance_dataset(self): 58 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply( 59 | lambda x: x.sample(self.dataframe["exp"].value_counts().min()) 60 | ) 61 | return balanced_df 62 | 63 | 64 | transform_valid = transforms.Compose( 65 | [ 66 | transforms.ToTensor(), 67 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 68 | ] 69 | ) 70 | 71 | valid_dataset = CustomDataset( 72 | dataframe=valid_annotations_df, 73 | root_dir=IMAGE_FOLDER_TEST, 74 | transform=transform_valid, 75 | balance=False, 76 | ) 77 | valid_loader = DataLoader( 78 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48 79 | ) 80 | 81 | # ***** Define the model ***** 82 | 83 | # Initialize the model 84 | block_channels = MODEL.classifier[3].in_features 85 | MODEL.classifier = nn.Sequential( 86 | nn.AdaptiveAvgPool2d(1), 87 | nn.Flatten(), 88 | nn.LayerNorm(block_channels), 89 | nn.Linear(block_channels, block_channels), 90 | nn.Tanh(), 91 | nn.Linear(block_channels, 10, bias=False), 92 | ) 93 | MODEL.to(DEVICE) # Put the model to the GPU 94 | 95 | # Set the model to evaluation mode 96 | MODEL.load_state_dict(torch.load("model.pt")) 97 | MODEL.to(DEVICE) 98 | MODEL.eval() 99 | 100 | all_labels_cls = [] 101 | all_predicted_cls = [] 102 | 103 | all_true_val = [] 104 | all_pred_val = [] 105 | all_true_aro = [] 106 | all_pred_aro = [] 107 | 108 | # Start inference on test set 109 | with torch.no_grad(): 110 | for images, classes, labels in iter(valid_loader): 111 | images, classes, labels = ( 112 | images.to(DEVICE), 113 | classes.to(DEVICE), 114 | labels.to(DEVICE), 115 | ) 116 | 117 | outputs = MODEL(images) 118 | outputs_cls = outputs[:, :8] 119 | outputs_reg = outputs[:, 8:] 120 | val_pred = outputs_reg[:, 0] 121 | aro_pred = outputs_reg[:, 1] 122 | 123 | _, predicted_cls = torch.max(outputs_cls, 1) 124 | 125 | all_labels_cls.extend(classes.cpu().numpy()) 126 | all_predicted_cls.extend(predicted_cls.cpu().numpy()) 127 | val_true = labels[:, 0] 128 | aro_true = labels[:, 1] 129 | 130 | all_true_val.extend(val_true.cpu().numpy()) 131 | all_true_aro.extend(aro_true.cpu().numpy()) 132 | all_pred_val.extend(val_pred.cpu().numpy()) 133 | all_pred_aro.extend(aro_pred.cpu().numpy()) 134 | 135 | df = pd.DataFrame( 136 | { 137 | "cat_pred": all_predicted_cls, 138 | "cat_true": all_labels_cls, 139 | "val_pred": all_pred_val, 140 | "val_true": all_true_val, 141 | "aro_pred": all_pred_aro, 142 | "aro_true": all_true_aro, 143 | } 144 | ) 145 | df.to_csv("inference.csv", index=False) 146 | -------------------------------------------------------------------------------- /models/AffectNet8_Maxvit_Combined/train.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import os 3 | import torch 4 | import torchvision.transforms as transforms 5 | import torchvision.models as models 6 | from torch.utils.data import DataLoader, Dataset 7 | import torch.nn as nn 8 | import torch.optim as optim 9 | from PIL import Image 10 | from torch.optim import lr_scheduler 11 | from tqdm import tqdm 12 | 13 | # Load the annotations for training and validation from separate CSV files 14 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/" 15 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/" 16 | train_annotations_path = ( 17 | "../../affectnet_annotations/train_set_annotation_without_lnd.csv" 18 | ) 19 | valid_annotations_path = ( 20 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv" 21 | ) 22 | train_annotations_df = pd.read_csv(train_annotations_path) 23 | valid_annotations_df = pd.read_csv(valid_annotations_path) 24 | 25 | 26 | # Set parameters 27 | BATCHSIZE = 128 28 | NUM_EPOCHS = 20 29 | LR = 4e-5 30 | MODEL = models.maxvit_t(weights="DEFAULT") 31 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") 32 | 33 | 34 | # **** Create dataset and data loaders **** 35 | class CustomDataset(Dataset): 36 | def __init__(self, dataframe, root_dir, transform=None, balance=False): 37 | self.dataframe = dataframe 38 | self.transform = transform 39 | self.root_dir = root_dir 40 | self.balance = balance 41 | 42 | if self.balance: 43 | self.dataframe = self.balance_dataset() 44 | 45 | def __len__(self): 46 | return len(self.dataframe) 47 | 48 | def __getitem__(self, idx): 49 | image_path = os.path.join( 50 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg" 51 | ) 52 | if os.path.exists(image_path): 53 | image = Image.open(image_path) 54 | else: 55 | image = Image.new( 56 | "RGB", (224, 224), color="white" 57 | ) # Handle missing image file 58 | 59 | classes = torch.tensor(self.dataframe["exp"].iloc[idx], dtype=torch.long) 60 | labels = torch.tensor(self.dataframe.iloc[idx, 2:4].values, dtype=torch.float32) 61 | 62 | if self.transform: 63 | image = self.transform(image) 64 | 65 | return image, classes, labels 66 | 67 | def balance_dataset(self): 68 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply( 69 | lambda x: x.sample(self.dataframe["exp"].value_counts().min()) 70 | ) 71 | return balanced_df 72 | 73 | 74 | transform = transforms.Compose( 75 | [ 76 | transforms.RandomHorizontalFlip(0.5), 77 | transforms.RandomGrayscale(0.01), 78 | transforms.RandomRotation(10), 79 | transforms.ColorJitter( 80 | brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1 81 | ), # model more robust to changes in lighting conditions. 82 | transforms.RandomPerspective( 83 | distortion_scale=0.2, p=0.5 84 | ), # can be helpful if your images might have varying perspectives. 85 | transforms.ToTensor(), # saves image as tensor (automatically divides by 255) 86 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 87 | transforms.RandomErasing( 88 | p=0.5, scale=(0.02, 0.2), ratio=(0.3, 3.3), value="random" 89 | ), # TEST: Should help overfitting 90 | ] 91 | ) 92 | 93 | transform_valid = transforms.Compose( 94 | [ 95 | transforms.ToTensor(), 96 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 97 | ] 98 | ) 99 | 100 | train_dataset = CustomDataset( 101 | dataframe=train_annotations_df, 102 | root_dir=IMAGE_FOLDER, 103 | transform=transform, 104 | balance=False, 105 | ) 106 | valid_dataset = CustomDataset( 107 | dataframe=valid_annotations_df, 108 | root_dir=IMAGE_FOLDER_TEST, 109 | transform=transform_valid, 110 | balance=False, 111 | ) 112 | train_loader = DataLoader( 113 | train_dataset, batch_size=BATCHSIZE, shuffle=True, num_workers=48 114 | ) 115 | valid_loader = DataLoader( 116 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48 117 | ) 118 | 119 | # ***** Define the model ***** 120 | 121 | # Initialize the model 122 | block_channels = MODEL.classifier[3].in_features 123 | MODEL.classifier = nn.Sequential( 124 | nn.AdaptiveAvgPool2d(1), 125 | nn.Flatten(), 126 | nn.LayerNorm(block_channels), 127 | nn.Linear(block_channels, block_channels), 128 | nn.Tanh(), 129 | nn.Linear(block_channels, 10, bias=False), 130 | ) 131 | MODEL.to(DEVICE) # Put the model to the GPU 132 | 133 | # Define (weighted) loss function 134 | weights = torch.tensor( 135 | [0.015605, 0.008709, 0.046078, 0.083078, 0.185434, 0.305953, 0.046934, 0.30821] 136 | ) 137 | criterion_cls = nn.CrossEntropyLoss(weights.to(DEVICE)) 138 | criterion_cls_val = ( 139 | nn.CrossEntropyLoss() 140 | ) # Use two loss functions, as the validation dataset is balanced 141 | criterion_reg = nn.MSELoss() 142 | 143 | optimizer = optim.AdamW(MODEL.parameters(), lr=LR) 144 | lr_scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=BATCHSIZE * NUM_EPOCHS) 145 | 146 | # ***** Train the model ***** 147 | print("--- Start training ---") 148 | scaler = torch.cuda.amp.GradScaler() 149 | best_valid_loss = 100 150 | 151 | for epoch in range(NUM_EPOCHS): 152 | MODEL.train() 153 | total_train_correct = 0 154 | total_train_samples = 0 155 | for images, classes, labels in tqdm( 156 | train_loader, desc="Epoch train_loader progress" 157 | ): 158 | images, classes, labels = ( 159 | images.to(DEVICE), 160 | classes.to(DEVICE), 161 | labels.to(DEVICE), 162 | ) 163 | optimizer.zero_grad() 164 | with torch.autocast(device_type="cuda", dtype=torch.float16): 165 | outputs = MODEL(images) 166 | outputs_cls = outputs[:, :8] 167 | outputs_reg = outputs[:, 8:] 168 | loss = criterion_cls( 169 | outputs_cls.cuda(), classes.cuda() 170 | ) + 5 * criterion_reg(outputs_reg.cuda(), labels.cuda()) 171 | scaler.scale(loss).backward() 172 | scaler.step(optimizer) 173 | scaler.update() 174 | lr_scheduler.step() 175 | current_lr = optimizer.param_groups[0]["lr"] 176 | 177 | _, train_predicted = torch.max(outputs_cls, 1) 178 | total_train_samples += classes.size(0) 179 | total_train_correct += (train_predicted == classes).sum().item() 180 | 181 | train_accuracy = (total_train_correct / total_train_samples) * 100 182 | 183 | MODEL.eval() 184 | valid_loss = 0.0 185 | correct = 0 186 | total = 0 187 | with torch.no_grad(): 188 | for images, classes, labels in valid_loader: 189 | images, classes, labels = ( 190 | images.to(DEVICE), 191 | classes.to(DEVICE), 192 | labels.to(DEVICE), 193 | ) 194 | outputs = MODEL(images) 195 | outputs_cls = outputs[:, :8] 196 | outputs_reg = outputs[:, 8:] 197 | loss = criterion_cls_val( 198 | outputs_cls.cuda(), classes.cuda() 199 | ) + 5 * criterion_reg(outputs_reg.cuda(), labels.cuda()) 200 | valid_loss += loss.item() 201 | _, predicted = torch.max(outputs_cls, 1) 202 | total += classes.size(0) 203 | correct += (predicted == classes).sum().item() 204 | 205 | print( 206 | f"Epoch [{epoch+1}/{NUM_EPOCHS}] - " 207 | f"Validation Loss: {valid_loss/len(valid_loader):.4f}, " 208 | f"Validation Accuracy: {(correct/total)*100:.2f}%" 209 | f", Training Accuracy: {train_accuracy:.2f}%, " 210 | ) 211 | 212 | if valid_loss < best_valid_loss: 213 | best_valid_loss = valid_loss 214 | print(f"Saving model at epoch {epoch+1}") 215 | torch.save(MODEL.state_dict(), "model.pt") # Save the best model 216 | -------------------------------------------------------------------------------- /models/AffectNet8_Maxvit_Discrete/generate_csv.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import os 3 | import torch 4 | import torchvision.transforms as transforms 5 | import torchvision.models as models 6 | from torch.utils.data import DataLoader, Dataset 7 | import torch.nn as nn 8 | from PIL import Image 9 | 10 | # Load the annotations for training and validation from separate CSV files 11 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/" 12 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/" 13 | valid_annotations_path = ( 14 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv" 15 | ) 16 | 17 | valid_annotations_df = pd.read_csv(valid_annotations_path) 18 | 19 | 20 | # Set parameters 21 | BATCHSIZE = 128 22 | # ADAMW_WEIGHT_DECAY = 0.1 # For efficientnet only 23 | MODEL = models.maxvit_t(weights="DEFAULT") 24 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") 25 | 26 | 27 | # **** Create dataset and data loaders **** 28 | class CustomDataset(Dataset): 29 | def __init__(self, dataframe, root_dir, transform=None, balance=False): 30 | self.dataframe = dataframe 31 | self.transform = transform 32 | self.root_dir = root_dir 33 | self.balance = balance 34 | 35 | if self.balance: 36 | self.dataframe = self.balance_dataset() 37 | 38 | def __len__(self): 39 | return len(self.dataframe) 40 | 41 | def __getitem__(self, idx): 42 | image_path = os.path.join( 43 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg" 44 | ) 45 | if os.path.exists(image_path): 46 | image = Image.open(image_path) 47 | else: 48 | image = Image.new( 49 | "RGB", (224, 224), color="white" 50 | ) # Handle missing image file 51 | 52 | label = torch.tensor(self.dataframe["exp"].iloc[idx], dtype=torch.long) 53 | 54 | if self.transform: 55 | image = self.transform(image) 56 | 57 | return image, label 58 | 59 | def balance_dataset(self): 60 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply( 61 | lambda x: x.sample(self.dataframe["exp"].value_counts().min()) 62 | ) 63 | return balanced_df 64 | 65 | 66 | transform_valid = transforms.Compose( 67 | [ 68 | transforms.ToTensor(), 69 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 70 | ] 71 | ) 72 | 73 | 74 | valid_dataset = CustomDataset( 75 | dataframe=valid_annotations_df, 76 | root_dir=IMAGE_FOLDER_TEST, 77 | transform=transform_valid, 78 | balance=False, 79 | ) 80 | 81 | valid_loader = DataLoader( 82 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48 83 | ) 84 | 85 | # ***** Define the model ***** 86 | 87 | # Initialize the model 88 | block_channels = MODEL.classifier[3].in_features 89 | MODEL.classifier = nn.Sequential( 90 | nn.AdaptiveAvgPool2d(1), 91 | nn.Flatten(), 92 | nn.LayerNorm(block_channels), 93 | nn.Linear(block_channels, block_channels), 94 | nn.Tanh(), 95 | nn.Linear(block_channels, 8, bias=False), 96 | ) 97 | MODEL.to(DEVICE) # Put the model to the GPU 98 | 99 | # **** Test the model performance for classification **** 100 | 101 | # Set the model to evaluation mode 102 | MODEL.load_state_dict(torch.load("model.pt")) 103 | MODEL.to(DEVICE) 104 | MODEL.eval() 105 | 106 | all_labels_cls = [] 107 | all_predicted_cls = [] 108 | 109 | # Start inference on test set 110 | with torch.no_grad(): 111 | for images, labels_cls in iter(valid_loader): 112 | images = images.to(DEVICE) 113 | labels_cls = labels_cls.to(DEVICE) 114 | 115 | outputs = MODEL(images) 116 | 117 | _, predicted_cls = torch.max(outputs, 1) 118 | 119 | all_labels_cls.extend(labels_cls.cpu().numpy()) 120 | all_predicted_cls.extend(predicted_cls.cpu().numpy()) 121 | 122 | df = pd.DataFrame({"cat_pred": all_predicted_cls, "cat_true": all_labels_cls}) 123 | df.to_csv("inference.csv", index=False) 124 | -------------------------------------------------------------------------------- /models/AffectNet8_Maxvit_Discrete/train.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import os 3 | import torch 4 | import torchvision.transforms as transforms 5 | import torchvision.models as models 6 | from torch.utils.data import DataLoader, Dataset 7 | import torch.nn as nn 8 | import torch.optim as optim 9 | from PIL import Image 10 | import torchvision 11 | from torch.optim import lr_scheduler 12 | from tqdm import tqdm 13 | 14 | # Load the annotations for training and validation from separate CSV files 15 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/" 16 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/" 17 | train_annotations_path = ( 18 | "../../affectnet_annotations/train_set_annotation_without_lnd.csv" 19 | ) 20 | valid_annotations_path = ( 21 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv" 22 | ) 23 | train_annotations_df = pd.read_csv(train_annotations_path) 24 | valid_annotations_df = pd.read_csv(valid_annotations_path) 25 | 26 | # Set parameters 27 | BATCHSIZE = 128 28 | NUM_EPOCHS = 20 29 | LR = 4e-5 30 | # ADAMW_WEIGHT_DECAY = 0.1 # For efficientnet only 31 | MODEL = models.maxvit_t(weights="DEFAULT") 32 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") 33 | 34 | 35 | # **** Create dataset and data loaders **** 36 | class CustomDataset(Dataset): 37 | def __init__(self, dataframe, root_dir, transform=None, balance=False): 38 | self.dataframe = dataframe 39 | self.transform = transform 40 | self.root_dir = root_dir 41 | self.balance = balance 42 | 43 | if self.balance: 44 | self.dataframe = self.balance_dataset() 45 | 46 | def __len__(self): 47 | return len(self.dataframe) 48 | 49 | def __getitem__(self, idx): 50 | image_path = os.path.join( 51 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg" 52 | ) 53 | if os.path.exists(image_path): 54 | image = Image.open(image_path) 55 | else: 56 | image = Image.new( 57 | "RGB", (224, 224), color="white" 58 | ) # Handle missing image file 59 | 60 | label = torch.tensor(self.dataframe["exp"].iloc[idx], dtype=torch.long) 61 | 62 | if self.transform: 63 | image = self.transform(image) 64 | 65 | return image, label 66 | 67 | def balance_dataset(self): 68 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply( 69 | lambda x: x.sample(self.dataframe["exp"].value_counts().min()) 70 | ) 71 | return balanced_df 72 | 73 | 74 | transform = transforms.Compose( 75 | [ 76 | transforms.ElasticTransform(alpha=5.0, sigma=5.0), 77 | transforms.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5)), 78 | transforms.RandomGrayscale(p=0.1), 79 | transforms.RandomRotation(degrees=15), 80 | transforms.RandomVerticalFlip(), 81 | transforms.ColorJitter(0.15, 0.15, 0.15), 82 | torchvision.transforms.RandomAutocontrast(p=0.4), 83 | transforms.ToTensor(), 84 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 85 | ] 86 | ) 87 | 88 | 89 | transform_valid = transforms.Compose( 90 | [ 91 | transforms.ToTensor(), 92 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 93 | ] 94 | ) 95 | 96 | train_dataset = CustomDataset( 97 | dataframe=train_annotations_df, 98 | root_dir=IMAGE_FOLDER, 99 | transform=transform, 100 | balance=False, 101 | ) 102 | valid_dataset = CustomDataset( 103 | dataframe=valid_annotations_df, 104 | root_dir=IMAGE_FOLDER_TEST, 105 | transform=transform_valid, 106 | balance=False, 107 | ) 108 | train_loader = DataLoader( 109 | train_dataset, batch_size=BATCHSIZE, shuffle=True, num_workers=48 110 | ) 111 | valid_loader = DataLoader( 112 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48 113 | ) 114 | 115 | # ***** Define the model ***** 116 | 117 | # Initialize the model 118 | block_channels = MODEL.classifier[3].in_features 119 | MODEL.classifier = nn.Sequential( 120 | nn.AdaptiveAvgPool2d(1), 121 | nn.Flatten(), 122 | nn.LayerNorm(block_channels), 123 | nn.Linear(block_channels, block_channels), 124 | nn.Tanh(), 125 | nn.Linear(block_channels, 8, bias=False), 126 | ) 127 | MODEL.to(DEVICE) # Put the model to the GPU 128 | 129 | # Define (weighted) loss function 130 | weights = torch.tensor( 131 | [0.015605, 0.008709, 0.046078, 0.083078, 0.185434, 0.305953, 0.046934, 0.30821] 132 | ) 133 | criterion = nn.CrossEntropyLoss(weights.to(DEVICE)) 134 | criterion_val = ( 135 | nn.CrossEntropyLoss() 136 | ) # Use two loss functions, as the validation dataset is balanced 137 | 138 | optimizer = optim.AdamW(MODEL.parameters(), lr=LR) 139 | lr_scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=BATCHSIZE * NUM_EPOCHS) 140 | 141 | # ***** Train the model ***** 142 | print("--- Start training ---") 143 | scaler = torch.cuda.amp.GradScaler() 144 | best_valid_loss = 100 145 | 146 | for epoch in range(NUM_EPOCHS): 147 | MODEL.train() 148 | total_train_correct = 0 149 | total_train_samples = 0 150 | for images, labels in tqdm(train_loader, desc="Epoch train_loader progress"): 151 | images, labels = images.to(DEVICE), labels.to(DEVICE) 152 | optimizer.zero_grad() 153 | with torch.autocast(device_type="cuda", dtype=torch.float16): 154 | output = MODEL(images) 155 | loss = criterion(output.cuda(), labels.cuda()) 156 | scaler.scale(loss).backward() 157 | scaler.step(optimizer) 158 | scaler.update() 159 | lr_scheduler.step() 160 | current_lr = optimizer.param_groups[0]["lr"] 161 | 162 | _, train_predicted = torch.max(output, 1) 163 | total_train_samples += labels.size(0) 164 | total_train_correct += (train_predicted == labels).sum().item() 165 | 166 | train_accuracy = (total_train_correct / total_train_samples) * 100 167 | 168 | MODEL.eval() 169 | valid_loss = 0.0 170 | correct = 0 171 | total = 0 172 | with torch.no_grad(): 173 | for images, labels in valid_loader: 174 | images, labels = images.to(DEVICE), labels.to(DEVICE) 175 | outputs = MODEL(images) 176 | loss = criterion_val(outputs.cuda(), labels.cuda()) 177 | valid_loss += loss.item() 178 | _, predicted = torch.max(outputs, 1) 179 | total += labels.size(0) 180 | correct += (predicted == labels).sum().item() 181 | 182 | print( 183 | f"Epoch [{epoch+1}/{NUM_EPOCHS}] - " 184 | f"Validation Loss: {valid_loss/len(valid_loader):.4f}, " 185 | f"Validation Accuracy: {(correct/total)*100:.2f}%" 186 | f", Training Accuracy: {train_accuracy:.2f}%, " 187 | ) 188 | 189 | if valid_loss < best_valid_loss: 190 | best_valid_loss = valid_loss 191 | print(f"Saving model at epoch {epoch+1}") 192 | torch.save(MODEL.state_dict(), "model.pt") # Save the best model 193 | -------------------------------------------------------------------------------- /models/AffectNet8_Maxvit_VA/generate_csv.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import os 3 | import torch 4 | import torchvision.transforms as transforms 5 | import torchvision.models as models 6 | from torch.utils.data import DataLoader, Dataset 7 | import torch.nn as nn 8 | from PIL import Image 9 | 10 | # Load the annotations for training and validation from separate CSV files 11 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/" 12 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/" 13 | valid_annotations_path = ( 14 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv" 15 | ) 16 | valid_annotations_df = pd.read_csv(valid_annotations_path) 17 | 18 | # Set parameters 19 | BATCHSIZE = 128 20 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") 21 | 22 | 23 | # **** Create dataset and data loaders **** 24 | class CustomDataset(Dataset): 25 | def __init__(self, dataframe, root_dir, transform=None, balance=False): 26 | self.dataframe = dataframe 27 | self.transform = transform 28 | self.root_dir = root_dir 29 | self.balance = balance 30 | 31 | if self.balance: 32 | self.dataframe = self.balance_dataset() 33 | 34 | def __len__(self): 35 | return len(self.dataframe) 36 | 37 | def __getitem__(self, idx): 38 | image_path = os.path.join( 39 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg" 40 | ) 41 | image = Image.open(image_path) 42 | 43 | classes = torch.tensor(self.dataframe.iloc[idx, 1], dtype=torch.int8) 44 | valence = torch.tensor(self.dataframe.iloc[idx, 2], dtype=torch.float16) 45 | arousal = torch.tensor(self.dataframe.iloc[idx, 3], dtype=torch.float16) 46 | 47 | if self.transform: 48 | image = self.transform(image) 49 | 50 | return image, classes, valence, arousal 51 | 52 | def balance_dataset(self): 53 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply( 54 | lambda x: x.sample(self.dataframe["exp"].value_counts().min()) 55 | ) 56 | return balanced_df 57 | 58 | 59 | transform_valid = transforms.Compose( 60 | [ 61 | transforms.ToTensor(), 62 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 63 | ] 64 | ) 65 | 66 | valid_dataset = CustomDataset( 67 | dataframe=valid_annotations_df, 68 | root_dir=IMAGE_FOLDER_TEST, 69 | transform=transform_valid, 70 | balance=False, 71 | ) 72 | valid_loader = DataLoader( 73 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48 74 | ) 75 | 76 | # ***** Define the model ***** 77 | 78 | # Initialize the model 79 | MODEL = models.maxvit_t(weights="DEFAULT") 80 | block_channels = MODEL.classifier[3].in_features 81 | MODEL.classifier = nn.Sequential( 82 | nn.AdaptiveAvgPool2d(1), 83 | nn.Flatten(), 84 | nn.LayerNorm(block_channels), 85 | nn.Linear(block_channels, block_channels), 86 | nn.Tanh(), 87 | nn.Dropout(0.3), 88 | nn.Linear(block_channels, 2, bias=False), 89 | ) 90 | MODEL.to(DEVICE) 91 | 92 | # **** Test the model performance for classification **** 93 | 94 | # Set the model to evaluation mode 95 | MODEL.load_state_dict(torch.load("model.pt")) 96 | MODEL.to(DEVICE) 97 | MODEL.eval() 98 | 99 | all_val_true_values = [] 100 | all_val_predicted_values = [] 101 | all_aro_true_values = [] 102 | all_aro_predicted_values = [] 103 | 104 | # Start inference on test set 105 | with torch.no_grad(): 106 | for images, _, val_true, aro_true in valid_loader: 107 | images, val_true, aro_true = ( 108 | images.to(DEVICE), 109 | val_true.to(DEVICE), 110 | aro_true.to(DEVICE), 111 | ) 112 | 113 | outputs = MODEL(images) 114 | val_pred = outputs[:, 0] 115 | aro_pred = outputs[:, 1] 116 | 117 | # Append to the lists --> Regression 118 | true_val_values = val_true.cpu().numpy() 119 | true_aro_values = aro_true.cpu().numpy() 120 | pred_val_values = val_pred.cpu().numpy() 121 | pred_aro_values = aro_pred.cpu().numpy() 122 | all_val_true_values.extend(true_val_values) 123 | all_aro_true_values.extend(true_aro_values) 124 | all_val_predicted_values.extend(pred_val_values) 125 | all_aro_predicted_values.extend(pred_aro_values) 126 | df = pd.DataFrame( 127 | { 128 | "val_pred": all_val_predicted_values, 129 | "val_true": all_val_true_values, 130 | "aro_pred": all_aro_predicted_values, 131 | "aro_true": all_aro_true_values, 132 | } 133 | ) 134 | df.to_csv("inference.csv", index=False) 135 | -------------------------------------------------------------------------------- /models/AffectNet8_Swin_Combined/generate_csv.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import os 3 | import torch 4 | import torchvision.transforms as transforms 5 | import torchvision.models as models 6 | from torch.utils.data import DataLoader, Dataset 7 | import torch.nn as nn 8 | from PIL import Image 9 | 10 | # Load the annotations for training and validation from separate CSV files 11 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/" 12 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/" 13 | valid_annotations_path = ( 14 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv" 15 | ) 16 | valid_annotations_df = pd.read_csv(valid_annotations_path) 17 | # Set parameters 18 | BATCHSIZE = 128 19 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") 20 | 21 | 22 | # **** Create dataset and data loaders **** 23 | class CustomDataset(Dataset): 24 | def __init__(self, dataframe, root_dir, transform=None, balance=False): 25 | self.dataframe = dataframe 26 | self.transform = transform 27 | self.root_dir = root_dir 28 | self.balance = balance 29 | 30 | if self.balance: 31 | self.dataframe = self.balance_dataset() 32 | 33 | def __len__(self): 34 | return len(self.dataframe) 35 | 36 | def __getitem__(self, idx): 37 | image_path = os.path.join( 38 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg" 39 | ) 40 | if os.path.exists(image_path): 41 | image = Image.open(image_path) 42 | else: 43 | image = Image.new( 44 | "RGB", (224, 224), color="white" 45 | ) # Handle missing image file 46 | 47 | classes = torch.tensor(self.dataframe["exp"].iloc[idx], dtype=torch.long) 48 | labels = torch.tensor(self.dataframe.iloc[idx, 2:4].values, dtype=torch.float32) 49 | 50 | if self.transform: 51 | image = self.transform(image) 52 | 53 | return image, classes, labels 54 | 55 | def balance_dataset(self): 56 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply( 57 | lambda x: x.sample(self.dataframe["exp"].value_counts().min()) 58 | ) 59 | return balanced_df 60 | 61 | 62 | transform_valid = transforms.Compose( 63 | [ 64 | transforms.ToTensor(), 65 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 66 | ] 67 | ) 68 | 69 | valid_dataset = CustomDataset( 70 | dataframe=valid_annotations_df, 71 | root_dir=IMAGE_FOLDER_TEST, 72 | transform=transform_valid, 73 | balance=False, 74 | ) 75 | valid_loader = DataLoader( 76 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48 77 | ) 78 | 79 | # * Define the model * 80 | 81 | # Initialize the model 82 | MODEL = models.swin_v2_t(weights="DEFAULT") 83 | MODEL.head = torch.nn.Linear(in_features=768, out_features=10, bias=True) 84 | MODEL.to(DEVICE) 85 | 86 | # **** Test the model performance for classification **** 87 | 88 | # Set the model to evaluation mode 89 | MODEL.load_state_dict(torch.load("model.pt")) 90 | MODEL.to(DEVICE) 91 | MODEL.eval() 92 | 93 | all_labels_cls = [] 94 | all_predicted_cls = [] 95 | 96 | all_true_val = [] 97 | all_pred_val = [] 98 | all_true_aro = [] 99 | all_pred_aro = [] 100 | 101 | # Start inference on test set 102 | with torch.no_grad(): 103 | for images, classes, labels in iter(valid_loader): 104 | images, classes, labels = ( 105 | images.to(DEVICE), 106 | classes.to(DEVICE), 107 | labels.to(DEVICE), 108 | ) 109 | 110 | outputs = MODEL(images) 111 | outputs_cls = outputs[:, :7] 112 | outputs_reg = outputs[:, 7:] 113 | val_pred = outputs_reg[:, 0] 114 | aro_pred = outputs_reg[:, 1] 115 | 116 | _, predicted_cls = torch.max(outputs_cls, 1) 117 | 118 | all_labels_cls.extend(classes.cpu().numpy()) 119 | all_predicted_cls.extend(predicted_cls.cpu().numpy()) 120 | val_true = labels[:, 0] 121 | aro_true = labels[:, 1] 122 | 123 | all_true_val.extend(val_true.cpu().numpy()) 124 | all_true_aro.extend(aro_true.cpu().numpy()) 125 | all_pred_val.extend(val_pred.cpu().numpy()) 126 | all_pred_aro.extend(aro_pred.cpu().numpy()) 127 | 128 | df = pd.DataFrame( 129 | { 130 | "cat_pred": all_predicted_cls, 131 | "cat_true": all_labels_cls, 132 | "val_pred": all_pred_val, 133 | "val_true": all_true_val, 134 | "aro_pred": all_pred_aro, 135 | "aro_true": all_true_aro, 136 | } 137 | ) 138 | df.to_csv("inference.csv", index=False) 139 | -------------------------------------------------------------------------------- /models/AffectNet8_Swin_Combined/train.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import os 3 | import torch 4 | import torchvision.transforms as transforms 5 | import torchvision.models as models 6 | from torch.utils.data import DataLoader, Dataset 7 | import torch.nn as nn 8 | import torch.optim as optim 9 | from PIL import Image 10 | from torch.optim import lr_scheduler 11 | from tqdm import tqdm 12 | 13 | # Load the annotations for training and validation from separate CSV files 14 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/" 15 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/" 16 | train_annotations_path = ( 17 | "../../affectnet_annotations/train_set_annotation_without_lnd.csv" 18 | ) 19 | valid_annotations_path = ( 20 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv" 21 | ) 22 | train_annotations_df = pd.read_csv(train_annotations_path) 23 | valid_annotations_df = pd.read_csv(valid_annotations_path) 24 | 25 | 26 | # Set parameters 27 | BATCHSIZE = 128 28 | NUM_EPOCHS = 20 29 | LR = 4e-5 30 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") 31 | 32 | 33 | # **** Create dataset and data loaders **** 34 | class CustomDataset(Dataset): 35 | def __init__(self, dataframe, root_dir, transform=None, balance=False): 36 | self.dataframe = dataframe 37 | self.transform = transform 38 | self.root_dir = root_dir 39 | self.balance = balance 40 | 41 | if self.balance: 42 | self.dataframe = self.balance_dataset() 43 | 44 | def __len__(self): 45 | return len(self.dataframe) 46 | 47 | def __getitem__(self, idx): 48 | image_path = os.path.join( 49 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg" 50 | ) 51 | if os.path.exists(image_path): 52 | image = Image.open(image_path) 53 | else: 54 | image = Image.new( 55 | "RGB", (224, 224), color="white" 56 | ) # Handle missing image file 57 | 58 | classes = torch.tensor(self.dataframe["exp"].iloc[idx], dtype=torch.long) 59 | labels = torch.tensor(self.dataframe.iloc[idx, 2:4].values, dtype=torch.float32) 60 | 61 | if self.transform: 62 | image = self.transform(image) 63 | 64 | return image, classes, labels 65 | 66 | def balance_dataset(self): 67 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply( 68 | lambda x: x.sample(self.dataframe["exp"].value_counts().min()) 69 | ) 70 | return balanced_df 71 | 72 | 73 | transform = transforms.Compose( 74 | [ 75 | transforms.RandomHorizontalFlip(0.5), 76 | transforms.RandomGrayscale(0.01), 77 | transforms.RandomRotation(10), 78 | transforms.ColorJitter( 79 | brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1 80 | ), # model more robust to changes in lighting conditions. 81 | transforms.RandomPerspective( 82 | distortion_scale=0.2, p=0.5 83 | ), # can be helpful if your images might have varying perspectives. 84 | transforms.ToTensor(), # saves image as tensor (automatically divides by 255) 85 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 86 | transforms.RandomErasing( 87 | p=0.5, scale=(0.02, 0.2), ratio=(0.3, 3.3), value="random" 88 | ), # TEST: Should help overfitting 89 | ] 90 | ) 91 | 92 | transform_valid = transforms.Compose( 93 | [ 94 | transforms.ToTensor(), 95 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 96 | ] 97 | ) 98 | 99 | train_dataset = CustomDataset( 100 | dataframe=train_annotations_df, 101 | root_dir=IMAGE_FOLDER, 102 | transform=transform, 103 | balance=False, 104 | ) 105 | valid_dataset = CustomDataset( 106 | dataframe=valid_annotations_df, 107 | root_dir=IMAGE_FOLDER_TEST, 108 | transform=transform_valid, 109 | balance=False, 110 | ) 111 | train_loader = DataLoader( 112 | train_dataset, batch_size=BATCHSIZE, shuffle=True, num_workers=48 113 | ) 114 | valid_loader = DataLoader( 115 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48 116 | ) 117 | 118 | # * Define the model * 119 | 120 | # Initialize the model 121 | MODEL = models.swin_v2_t(weights="DEFAULT") 122 | MODEL.head = torch.nn.Linear(in_features=768, out_features=10, bias=True) 123 | MODEL.to(DEVICE) 124 | # Define (weighted) loss function 125 | weights = torch.tensor( 126 | [0.015605, 0.008709, 0.046078, 0.083078, 0.185434, 0.305953, 0.046934, 0.30821] 127 | ) 128 | criterion_cls = nn.CrossEntropyLoss(weights.to(DEVICE)) 129 | criterion_cls_val = ( 130 | nn.CrossEntropyLoss() 131 | ) # Use two loss functions, as the validation dataset is balanced 132 | criterion_reg = nn.MSELoss() 133 | 134 | optimizer = optim.AdamW(MODEL.parameters(), lr=LR) 135 | lr_scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=BATCHSIZE * NUM_EPOCHS) 136 | 137 | # ***** Train the model ***** 138 | print("--- Start training ---") 139 | scaler = torch.cuda.amp.GradScaler() 140 | best_valid_loss = 100 141 | 142 | for epoch in range(NUM_EPOCHS): 143 | MODEL.train() 144 | total_train_correct = 0 145 | total_train_samples = 0 146 | for images, classes, labels in tqdm( 147 | train_loader, desc="Epoch train_loader progress" 148 | ): 149 | images, classes, labels = ( 150 | images.to(DEVICE), 151 | classes.to(DEVICE), 152 | labels.to(DEVICE), 153 | ) 154 | optimizer.zero_grad() 155 | with torch.autocast(device_type="cuda", dtype=torch.float16): 156 | outputs = MODEL(images) 157 | outputs_cls = outputs[:, :8] 158 | outputs_reg = outputs[:, 8:] 159 | loss = criterion_cls( 160 | outputs_cls.cuda(), classes.cuda() 161 | ) + 5 * criterion_reg(outputs_reg.cuda(), labels.cuda()) 162 | scaler.scale(loss).backward() 163 | scaler.step(optimizer) 164 | scaler.update() 165 | lr_scheduler.step() 166 | current_lr = optimizer.param_groups[0]["lr"] 167 | 168 | _, train_predicted = torch.max(outputs_cls, 1) 169 | total_train_samples += classes.size(0) 170 | total_train_correct += (train_predicted == classes).sum().item() 171 | 172 | train_accuracy = (total_train_correct / total_train_samples) * 100 173 | 174 | MODEL.eval() 175 | valid_loss = 0.0 176 | correct = 0 177 | total = 0 178 | with torch.no_grad(): 179 | for images, classes, labels in valid_loader: 180 | images, classes, labels = ( 181 | images.to(DEVICE), 182 | classes.to(DEVICE), 183 | labels.to(DEVICE), 184 | ) 185 | outputs = MODEL(images) 186 | outputs_cls = outputs[:, :8] 187 | outputs_reg = outputs[:, 8:] 188 | loss = criterion_cls_val( 189 | outputs_cls.cuda(), classes.cuda() 190 | ) + 5 * criterion_reg(outputs_reg.cuda(), labels.cuda()) 191 | valid_loss += loss.item() 192 | _, predicted = torch.max(outputs_cls, 1) 193 | total += classes.size(0) 194 | correct += (predicted == classes).sum().item() 195 | 196 | print( 197 | f"Epoch [{epoch+1}/{NUM_EPOCHS}] - " 198 | f"Validation Loss: {valid_loss/len(valid_loader):.4f}, " 199 | f"Validation Accuracy: {(correct/total)*100:.2f}%" 200 | f", Training Accuracy: {train_accuracy:.2f}%, " 201 | ) 202 | 203 | if valid_loss < best_valid_loss: 204 | best_valid_loss = valid_loss 205 | print(f"Saving model at epoch {epoch+1}") 206 | torch.save(MODEL.state_dict(), "model.pt") # Save the best model 207 | -------------------------------------------------------------------------------- /models/AffectNet8_Swin_Discrete/generate_csv.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import os 3 | import torch 4 | import torchvision.transforms as transforms 5 | import torchvision.models as models 6 | from torch.utils.data import DataLoader, Dataset 7 | import torch.nn as nn 8 | from PIL import Image 9 | 10 | # Load the annotations for training and validation from separate CSV files 11 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/" 12 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/" 13 | valid_annotations_path = ( 14 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv" 15 | ) 16 | valid_annotations_df = pd.read_csv(valid_annotations_path) 17 | # Set parameters 18 | BATCHSIZE = 128 19 | 20 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") 21 | 22 | 23 | # **** Create dataset and data loaders **** 24 | class CustomDataset(Dataset): 25 | def __init__(self, dataframe, root_dir, transform=None, balance=False): 26 | self.dataframe = dataframe 27 | self.transform = transform 28 | self.root_dir = root_dir 29 | self.balance = balance 30 | 31 | if self.balance: 32 | self.dataframe = self.balance_dataset() 33 | 34 | def __len__(self): 35 | return len(self.dataframe) 36 | 37 | def __getitem__(self, idx): 38 | image_path = os.path.join( 39 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg" 40 | ) 41 | if os.path.exists(image_path): 42 | image = Image.open(image_path) 43 | else: 44 | image = Image.new( 45 | "RGB", (224, 224), color="white" 46 | ) # Handle missing image file 47 | 48 | label = torch.tensor(self.dataframe["exp"].iloc[idx], dtype=torch.long) 49 | 50 | if self.transform: 51 | image = self.transform(image) 52 | 53 | return image, label 54 | 55 | def balance_dataset(self): 56 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply( 57 | lambda x: x.sample(self.dataframe["exp"].value_counts().min()) 58 | ) 59 | return balanced_df 60 | 61 | 62 | transform_valid = transforms.Compose( 63 | [ 64 | transforms.ToTensor(), 65 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 66 | ] 67 | ) 68 | valid_dataset = CustomDataset( 69 | dataframe=valid_annotations_df, 70 | root_dir=IMAGE_FOLDER_TEST, 71 | transform=transform_valid, 72 | balance=False, 73 | ) 74 | valid_loader = DataLoader( 75 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48 76 | ) 77 | # * Define the model * 78 | 79 | # Initialize the model 80 | MODEL = models.swin_v2_t(weights="DEFAULT") 81 | MODEL.head = torch.nn.Linear(in_features=768, out_features=8, bias=True) 82 | MODEL.to(DEVICE) 83 | 84 | # Set the model to evaluation mode 85 | MODEL.load_state_dict(torch.load("model.pt")) 86 | MODEL.to(DEVICE) 87 | MODEL.eval() 88 | 89 | all_labels_cls = [] 90 | all_predicted_cls = [] 91 | 92 | # Start inference on test set 93 | with torch.no_grad(): 94 | for images, labels_cls in iter(valid_loader): 95 | images = images.to(DEVICE) 96 | labels_cls = labels_cls.to(DEVICE) 97 | 98 | outputs = MODEL(images) 99 | 100 | _, predicted_cls = torch.max(outputs, 1) 101 | 102 | all_labels_cls.extend(labels_cls.cpu().numpy()) 103 | all_predicted_cls.extend(predicted_cls.cpu().numpy()) 104 | 105 | 106 | df = pd.DataFrame({"cat_pred": all_predicted_cls, "cat_true": all_labels_cls}) 107 | df.to_csv("inference.csv", index=False) 108 | -------------------------------------------------------------------------------- /models/AffectNet8_Swin_Discrete/train.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import os 3 | import torch 4 | import torchvision.transforms as transforms 5 | import torchvision.models as models 6 | from torch.utils.data import DataLoader, Dataset 7 | import torch.nn as nn 8 | import torch.optim as optim 9 | from PIL import Image 10 | import torchvision 11 | from torch.optim import lr_scheduler 12 | import re 13 | from tqdm import tqdm 14 | 15 | # Load the annotations for training and validation from separate CSV files 16 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/" 17 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/" 18 | train_annotations_path = ( 19 | "../../affectnet_annotations/train_set_annotation_without_lnd.csv" 20 | ) 21 | valid_annotations_path = ( 22 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv" 23 | ) 24 | train_annotations_df = pd.read_csv(train_annotations_path) 25 | valid_annotations_df = pd.read_csv(valid_annotations_path) 26 | 27 | 28 | # Set parameters 29 | BATCHSIZE = 128 30 | NUM_EPOCHS = 20 31 | LR = 4e-5 32 | 33 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") 34 | 35 | 36 | # **** Create dataset and data loaders **** 37 | class CustomDataset(Dataset): 38 | def __init__(self, dataframe, root_dir, transform=None, balance=False): 39 | self.dataframe = dataframe 40 | self.transform = transform 41 | self.root_dir = root_dir 42 | self.balance = balance 43 | 44 | if self.balance: 45 | self.dataframe = self.balance_dataset() 46 | 47 | def __len__(self): 48 | return len(self.dataframe) 49 | 50 | def __getitem__(self, idx): 51 | image_path = os.path.join( 52 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg" 53 | ) 54 | if os.path.exists(image_path): 55 | image = Image.open(image_path) 56 | else: 57 | image = Image.new( 58 | "RGB", (224, 224), color="white" 59 | ) # Handle missing image file 60 | 61 | label = torch.tensor(self.dataframe["exp"].iloc[idx], dtype=torch.long) 62 | 63 | if self.transform: 64 | image = self.transform(image) 65 | 66 | return image, label 67 | 68 | def balance_dataset(self): 69 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply( 70 | lambda x: x.sample(self.dataframe["exp"].value_counts().min()) 71 | ) 72 | return balanced_df 73 | 74 | 75 | transform = transforms.Compose( 76 | [ 77 | transforms.ElasticTransform(alpha=5.0, sigma=5.0), 78 | transforms.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5)), 79 | transforms.RandomGrayscale(p=0.1), 80 | transforms.RandomRotation(degrees=15), 81 | transforms.RandomVerticalFlip(), 82 | transforms.ColorJitter(0.15, 0.15, 0.15), 83 | torchvision.transforms.RandomAutocontrast(p=0.4), 84 | transforms.ToTensor(), 85 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 86 | ] 87 | ) 88 | 89 | transform_valid = transforms.Compose( 90 | [ 91 | transforms.ToTensor(), 92 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 93 | ] 94 | ) 95 | 96 | train_dataset = CustomDataset( 97 | dataframe=train_annotations_df, 98 | root_dir=IMAGE_FOLDER, 99 | transform=transform, 100 | balance=False, 101 | ) 102 | valid_dataset = CustomDataset( 103 | dataframe=valid_annotations_df, 104 | root_dir=IMAGE_FOLDER_TEST, 105 | transform=transform_valid, 106 | balance=False, 107 | ) 108 | train_loader = DataLoader( 109 | train_dataset, batch_size=BATCHSIZE, shuffle=True, num_workers=48 110 | ) 111 | valid_loader = DataLoader( 112 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48 113 | ) 114 | 115 | # * Define the model * 116 | 117 | # Initialize the model 118 | MODEL = models.swin_v2_t(weights="DEFAULT") 119 | MODEL.head = torch.nn.Linear(in_features=768, out_features=8, bias=True) 120 | MODEL.to(DEVICE) 121 | 122 | # Define (weighted) loss function 123 | weights = torch.tensor( 124 | [0.015605, 0.008709, 0.046078, 0.083078, 0.185434, 0.305953, 0.046934, 0.30821] 125 | ) 126 | 127 | criterion = nn.CrossEntropyLoss(weights.to(DEVICE)) 128 | criterion_val = ( 129 | nn.CrossEntropyLoss() 130 | ) # Use two loss functions, as the validation dataset is balanced 131 | 132 | 133 | # Filter parameters for weight decay and no weight decay and create optimizer/scheduler 134 | def filter_params(params, include_patterns, exclude_patterns): 135 | included_params = [] 136 | excluded_params = [] 137 | for name, param in params: 138 | if any(re.search(pattern, name) for pattern in include_patterns): 139 | included_params.append(param) 140 | elif not any(re.search(pattern, name) for pattern in exclude_patterns): 141 | excluded_params.append(param) 142 | return included_params, excluded_params 143 | 144 | 145 | include_patterns = [ 146 | r"^(?!.*\.bn)" 147 | ] # Match any layer name that doesn't contain '.bn' = BatchNorm parameters 148 | exclude_patterns = [r".*\.bn.*"] # Vice versa 149 | params_to_decay, params_not_to_decay = filter_params( 150 | MODEL.named_parameters(), include_patterns, exclude_patterns 151 | ) 152 | 153 | # optimizer = optim.AdamW([ 154 | # {'params': params_to_decay, 'weight_decay': ADAMW_WEIGHT_DECAY}, # Apply weight decay to these parameters 155 | # {'params': params_not_to_decay, 'weight_decay': 0.0} # Exclude weight decay for these parameters = 0.0 156 | # ], lr=LR) 157 | optimizer = optim.AdamW(MODEL.parameters(), lr=LR) 158 | lr_scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=BATCHSIZE * NUM_EPOCHS) 159 | 160 | # ***** Train the model ***** 161 | print("--- Start training ---") 162 | scaler = torch.cuda.amp.GradScaler() 163 | best_valid_loss = 100 164 | 165 | for epoch in range(NUM_EPOCHS): 166 | MODEL.train() 167 | total_train_correct = 0 168 | total_train_samples = 0 169 | for images, labels in tqdm(train_loader, desc="Epoch train_loader progress"): 170 | images, labels = images.to(DEVICE), labels.to(DEVICE) 171 | optimizer.zero_grad() 172 | with torch.autocast(device_type="cuda", dtype=torch.float16): 173 | output = MODEL(images) 174 | loss = criterion(output.cuda(), labels.cuda()) 175 | scaler.scale(loss).backward() 176 | scaler.step(optimizer) 177 | scaler.update() 178 | lr_scheduler.step() 179 | current_lr = optimizer.param_groups[0]["lr"] 180 | 181 | _, train_predicted = torch.max(output, 1) 182 | total_train_samples += labels.size(0) 183 | total_train_correct += (train_predicted == labels).sum().item() 184 | 185 | train_accuracy = (total_train_correct / total_train_samples) * 100 186 | 187 | MODEL.eval() 188 | valid_loss = 0.0 189 | correct = 0 190 | total = 0 191 | with torch.no_grad(): 192 | for images, labels in valid_loader: 193 | images, labels = images.to(DEVICE), labels.to(DEVICE) 194 | outputs = MODEL(images) 195 | loss = criterion_val(outputs.cuda(), labels.cuda()) 196 | valid_loss += loss.item() 197 | _, predicted = torch.max(outputs, 1) 198 | total += labels.size(0) 199 | correct += (predicted == labels).sum().item() 200 | 201 | print( 202 | f"Epoch [{epoch+1}/{NUM_EPOCHS}] - " 203 | f"Validation Loss: {valid_loss/len(valid_loader):.4f}, " 204 | f"Validation Accuracy: {(correct/total)*100:.2f}%" 205 | f", Training Accuracy: {train_accuracy:.2f}%, " 206 | ) 207 | # TBD: Valid loss überschreiben, dann model speichern wie unten, wenn kleiner als zuvor 208 | 209 | if valid_loss < best_valid_loss: 210 | best_valid_loss = valid_loss 211 | print(f"Saving model at epoch {epoch+1}") 212 | torch.save(MODEL.state_dict(), "model.pt") # Save the best model 213 | -------------------------------------------------------------------------------- /models/AffectNet8_Swin_VA/generate_csv.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import os 3 | import torch 4 | import torchvision.transforms as transforms 5 | import torchvision.models as models 6 | from torch.utils.data import DataLoader, Dataset 7 | import torch.nn as nn 8 | from PIL import Image 9 | 10 | 11 | # Load the annotations for training and validation from separate CSV files 12 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/" 13 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/" 14 | 15 | valid_annotations_path = ( 16 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv" 17 | ) 18 | valid_annotations_df = pd.read_csv(valid_annotations_path) 19 | # Set parameters 20 | BATCHSIZE = 128 21 | 22 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") 23 | 24 | 25 | # **** Create dataset and data loaders **** 26 | class CustomDataset(Dataset): 27 | def __init__(self, dataframe, root_dir, transform=None, balance=False): 28 | self.dataframe = dataframe 29 | self.transform = transform 30 | self.root_dir = root_dir 31 | self.balance = balance 32 | 33 | if self.balance: 34 | self.dataframe = self.balance_dataset() 35 | 36 | def __len__(self): 37 | return len(self.dataframe) 38 | 39 | def __getitem__(self, idx): 40 | image_path = os.path.join( 41 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg" 42 | ) 43 | image = Image.open(image_path) 44 | 45 | classes = torch.tensor(self.dataframe.iloc[idx, 1], dtype=torch.int8) 46 | valence = torch.tensor(self.dataframe.iloc[idx, 2], dtype=torch.float16) 47 | arousal = torch.tensor(self.dataframe.iloc[idx, 3], dtype=torch.float16) 48 | 49 | if self.transform: 50 | image = self.transform(image) 51 | 52 | return image, classes, valence, arousal 53 | 54 | def balance_dataset(self): 55 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply( 56 | lambda x: x.sample(self.dataframe["exp"].value_counts().min()) 57 | ) 58 | return balanced_df 59 | 60 | 61 | transform_valid = transforms.Compose( 62 | [ 63 | transforms.ToTensor(), 64 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 65 | ] 66 | ) 67 | 68 | valid_dataset = CustomDataset( 69 | dataframe=valid_annotations_df, 70 | root_dir=IMAGE_FOLDER_TEST, 71 | transform=transform_valid, 72 | balance=False, 73 | ) 74 | 75 | valid_loader = DataLoader( 76 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48 77 | ) 78 | 79 | # * Define the model * 80 | 81 | # Initialize the model 82 | MODEL = models.swin_v2_t(weights="DEFAULT") 83 | MODEL.head = torch.nn.Linear(in_features=768, out_features=2, bias=True) 84 | MODEL.to(DEVICE) 85 | 86 | 87 | # **** Test the model performance for classification **** 88 | 89 | # Set the model to evaluation mode 90 | MODEL.load_state_dict(torch.load("model.pt")) 91 | MODEL.to(DEVICE) 92 | MODEL.eval() 93 | 94 | all_val_true_values = [] 95 | all_val_predicted_values = [] 96 | all_aro_true_values = [] 97 | all_aro_predicted_values = [] 98 | 99 | # Start inference on test set 100 | with torch.no_grad(): 101 | for images, _, val_true, aro_true in valid_loader: 102 | images, val_true, aro_true = ( 103 | images.to(DEVICE), 104 | val_true.to(DEVICE), 105 | aro_true.to(DEVICE), 106 | ) 107 | 108 | outputs = MODEL(images) 109 | val_pred = outputs[:, 0] 110 | aro_pred = outputs[:, 1] 111 | 112 | # Append to the lists --> Regression 113 | true_val_values = val_true.cpu().numpy() 114 | true_aro_values = aro_true.cpu().numpy() 115 | pred_val_values = val_pred.cpu().numpy() 116 | pred_aro_values = aro_pred.cpu().numpy() 117 | all_val_true_values.extend(true_val_values) 118 | all_aro_true_values.extend(true_aro_values) 119 | all_val_predicted_values.extend(pred_val_values) 120 | all_aro_predicted_values.extend(pred_aro_values) 121 | 122 | df = pd.DataFrame( 123 | { 124 | "val_pred": all_val_predicted_values, 125 | "val_true": all_val_true_values, 126 | "aro_pred": all_aro_predicted_values, 127 | "aro_true": all_aro_true_values, 128 | } 129 | ) 130 | df.to_csv("inference.csv", index=False) 131 | -------------------------------------------------------------------------------- /models/AffectNet8_Swin_VA/train.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import os 3 | import torch 4 | import torchvision.transforms as transforms 5 | import torchvision.models as models 6 | from torch.utils.data import DataLoader, Dataset 7 | import torch.nn as nn 8 | import torch.optim as optim 9 | from PIL import Image 10 | from torch.optim import lr_scheduler 11 | from tqdm import tqdm 12 | 13 | # Load the annotations for training and validation from separate CSV files 14 | IMAGE_FOLDER = "/data/AffectNet/train_set/images/" 15 | IMAGE_FOLDER_TEST = "/data/AffectNet/val_set/images/" 16 | train_annotations_path = ( 17 | "../../affectnet_annotations/train_set_annotation_without_lnd.csv" 18 | ) 19 | valid_annotations_path = ( 20 | "../../affectnet_annotations/val_set_annotation_without_lnd.csv" 21 | ) 22 | train_annotations_df = pd.read_csv(train_annotations_path) 23 | valid_annotations_df = pd.read_csv(valid_annotations_path) 24 | 25 | # Set parameters 26 | BATCHSIZE = 128 27 | NUM_EPOCHS = 20 28 | LR = 4e-5 29 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") 30 | 31 | 32 | # **** Create dataset and data loaders **** 33 | class CustomDataset(Dataset): 34 | def __init__(self, dataframe, root_dir, transform=None, balance=False): 35 | self.dataframe = dataframe 36 | self.transform = transform 37 | self.root_dir = root_dir 38 | self.balance = balance 39 | 40 | if self.balance: 41 | self.dataframe = self.balance_dataset() 42 | 43 | def __len__(self): 44 | return len(self.dataframe) 45 | 46 | def __getitem__(self, idx): 47 | image_path = os.path.join( 48 | self.root_dir, f"{self.dataframe['number'].iloc[idx]}.jpg" 49 | ) 50 | image = Image.open(image_path) 51 | 52 | classes = torch.tensor(self.dataframe.iloc[idx, 1], dtype=torch.int8) 53 | valence = torch.tensor(self.dataframe.iloc[idx, 2], dtype=torch.float16) 54 | arousal = torch.tensor(self.dataframe.iloc[idx, 3], dtype=torch.float16) 55 | 56 | if self.transform: 57 | image = self.transform(image) 58 | 59 | return image, classes, valence, arousal 60 | 61 | def balance_dataset(self): 62 | balanced_df = self.dataframe.groupby("exp", group_keys=False).apply( 63 | lambda x: x.sample(self.dataframe["exp"].value_counts().min()) 64 | ) 65 | return balanced_df 66 | 67 | 68 | transform = transforms.Compose( 69 | [ 70 | transforms.RandomHorizontalFlip(0.5), 71 | transforms.RandomGrayscale(0.01), 72 | transforms.RandomRotation(10), 73 | transforms.ColorJitter( 74 | brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1 75 | ), # model more robust to changes in lighting conditions. 76 | transforms.RandomPerspective( 77 | distortion_scale=0.2, p=0.5 78 | ), # can be helpful if your images might have varying perspectives. 79 | transforms.ToTensor(), # saves image as tensor (automatically divides by 255) 80 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 81 | transforms.RandomErasing( 82 | p=0.5, scale=(0.02, 0.2), ratio=(0.3, 3.3), value="random" 83 | ), # Should help overfitting 84 | ] 85 | ) 86 | 87 | transform_valid = transforms.Compose( 88 | [ 89 | transforms.ToTensor(), 90 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 91 | ] 92 | ) 93 | 94 | train_dataset = CustomDataset( 95 | dataframe=train_annotations_df, 96 | root_dir=IMAGE_FOLDER, 97 | transform=transform, 98 | balance=True, 99 | ) 100 | valid_dataset = CustomDataset( 101 | dataframe=valid_annotations_df, 102 | root_dir=IMAGE_FOLDER_TEST, 103 | transform=transform_valid, 104 | balance=False, 105 | ) 106 | train_loader = DataLoader( 107 | train_dataset, batch_size=BATCHSIZE, shuffle=True, num_workers=48 108 | ) 109 | valid_loader = DataLoader( 110 | valid_dataset, batch_size=BATCHSIZE, shuffle=False, num_workers=48 111 | ) 112 | 113 | # * Define the model * 114 | 115 | # Initialize the model 116 | MODEL = models.swin_v2_t(weights="DEFAULT") 117 | MODEL.head = torch.nn.Linear(in_features=768, out_features=10, bias=True) 118 | MODEL.to(DEVICE) 119 | MODEL.load_state_dict(torch.load("../AffectNet8_Swin_Combined/model.pt")) 120 | MODEL.head = torch.nn.Linear(in_features=768, out_features=2, bias=True) 121 | MODEL.to(DEVICE) 122 | 123 | 124 | def CCCLoss(x, y): 125 | # Compute means 126 | x_mean = torch.mean(x, dim=0) 127 | y_mean = torch.mean(y, dim=0) 128 | # Compute variances 129 | x_var = torch.var(x, dim=0) 130 | y_var = torch.var(y, dim=0) 131 | # Compute covariance matrix 132 | cov_matrix = torch.matmul( 133 | (x - x_mean).permute(*torch.arange(x.dim() - 1, -1, -1)), y - y_mean 134 | ) / (x.size(0) - 1) 135 | # Compute CCC 136 | numerator = 2 * cov_matrix 137 | denominator = x_var + y_var + torch.pow((x_mean - y_mean), 2) 138 | ccc = torch.mean(numerator / denominator) 139 | return -ccc 140 | 141 | 142 | val_loss = nn.MSELoss() 143 | aro_loss = nn.MSELoss() 144 | 145 | optimizer = optim.AdamW(MODEL.parameters(), lr=LR) 146 | lr_scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=BATCHSIZE * NUM_EPOCHS) 147 | 148 | # ***** Train the model ***** 149 | print("--- Start training ---") 150 | scaler = torch.cuda.amp.GradScaler() 151 | best_valid_loss = 100 152 | l2_lambda = 0.00001 # L1 Regularization 153 | l1_lambda = 0.00001 # L2 Regularization 154 | 155 | for epoch in range(NUM_EPOCHS): 156 | MODEL.train() 157 | total_train_correct = 0 158 | total_train_samples = 0 159 | current_lr = optimizer.param_groups[0]["lr"] 160 | for images, _, val_true, aro_true in tqdm( 161 | train_loader, desc="Epoch train_loader progress" 162 | ): 163 | images, val_true, aro_true = ( 164 | images.to(DEVICE), 165 | val_true.to(DEVICE), 166 | aro_true.to(DEVICE), 167 | ) 168 | optimizer.zero_grad() 169 | train_loss = 0 170 | l2_reg = 0 171 | l1_reg = 0 172 | with torch.autocast(device_type="cuda", dtype=torch.float16): 173 | outputs = MODEL(images) 174 | val_pred = outputs[:, 0] 175 | aro_pred = outputs[:, 1] 176 | for param in MODEL.parameters(): 177 | l2_reg += torch.norm(param, 2) # **2 178 | l1_reg += torch.norm(param, 1) 179 | loss = ( 180 | 3 * val_loss(val_pred.cuda(), val_true.cuda()) 181 | + 3 * aro_loss(aro_pred.cuda(), aro_true.cuda()) 182 | + CCCLoss(val_pred.cuda(), val_true.cuda()) 183 | + CCCLoss(aro_pred.cuda(), aro_true.cuda()) 184 | ) 185 | train_loss += loss.item() 186 | scaler.scale(loss).backward() 187 | scaler.step(optimizer) 188 | scaler.update() 189 | 190 | MODEL.eval() 191 | valid_loss = 0.0 192 | total_valid_correct = 0 193 | total_valid_samples = 0 194 | with torch.no_grad(): 195 | for images, _, val_true, aro_true in valid_loader: 196 | images, val_true, aro_true = ( 197 | images.to(DEVICE), 198 | val_true.to(DEVICE), 199 | aro_true.to(DEVICE), 200 | ) 201 | with torch.autocast(device_type="cuda", dtype=torch.float16): 202 | outputs = MODEL(images) 203 | val_pred = outputs[:, 0] 204 | aro_pred = outputs[:, 1] 205 | loss = ( 206 | 3 * val_loss(val_pred.cuda(), val_true.cuda()) 207 | + 3 * aro_loss(aro_pred.cuda(), aro_true.cuda()) 208 | + CCCLoss(val_pred.cuda(), val_true.cuda()) 209 | + CCCLoss(aro_pred.cuda(), aro_true.cuda()) 210 | ) 211 | valid_loss += loss.item() 212 | 213 | print( 214 | f"Epoch [{epoch+1}/{NUM_EPOCHS}] - " 215 | f"Training Loss: {train_loss/len(train_loader):.4f}, " 216 | f"Validation Loss: {valid_loss/len(valid_loader):.4f}, " 217 | f"Learning Rate: {current_lr:.8f}, " 218 | ) 219 | 220 | if valid_loss < best_valid_loss: 221 | best_valid_loss = valid_loss 222 | print(f"Saving model at epoch {epoch+1}") 223 | torch.save(MODEL.state_dict(), "model.pt") # Save the best model 224 | -------------------------------------------------------------------------------- /models/evaluation.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from sklearn.metrics import ( 4 | mean_absolute_error, 5 | mean_squared_error, 6 | root_mean_squared_error, 7 | classification_report, 8 | ) 9 | import os 10 | import subprocess 11 | 12 | ONLY_INFERENCE = False 13 | root_dir = "." 14 | command = "echo test" 15 | command = "python3 generate_csv.py" 16 | 17 | label_mapping = { 18 | "Neutral": 0, 19 | "Happy": 1, 20 | "Sad": 2, 21 | "Surprise": 3, 22 | "Fear": 4, 23 | "Disgust": 5, 24 | "Anger": 6, 25 | "Contempt": 7, 26 | } 27 | 28 | 29 | def get_subdirectories(directory): 30 | subdirs = [] 31 | for item in os.listdir(directory): 32 | full_path = os.path.abspath(os.path.join(directory, item)) 33 | if os.path.isdir(full_path): 34 | subdirs.append(full_path) 35 | return subdirs 36 | 37 | 38 | def get_files_in_directory(directory): 39 | files = [] 40 | # Iterate over each item in the directory 41 | for item in os.listdir(directory): 42 | # Check if it's a file 43 | if os.path.isfile(os.path.join(directory, item)): 44 | files.append(item) 45 | return files 46 | 47 | 48 | def concordance_correlation_coefficient(true_values, pred_values): 49 | mean_true = np.mean(true_values) 50 | mean_pred = np.mean(pred_values) 51 | 52 | num = 2 * np.cov(true_values, pred_values)[0, 1] 53 | den = np.var(true_values) + np.var(pred_values) + (mean_true - mean_pred) ** 2 54 | return num / den 55 | 56 | 57 | def print_discrete(true_labels, pred_labels): 58 | if max(true_labels) == 7: 59 | class_names = [ 60 | "Anger", 61 | "Disgust", 62 | "Fear", 63 | "Happy", 64 | "Sad", 65 | "Surprise", 66 | "Neutral", 67 | "Contempt", 68 | ] 69 | else: 70 | class_names = [ 71 | "Anger", 72 | "Disgust", 73 | "Fear", 74 | "Happy", 75 | "Sad", 76 | "Surprise", 77 | "Neutral", 78 | ] 79 | 80 | mapped_labels = [label_mapping[name] for name in class_names] 81 | map = classification_report( 82 | true_labels, 83 | pred_labels, 84 | labels=mapped_labels, 85 | target_names=class_names, 86 | zero_division=0.0, 87 | digits=3, 88 | output_dict=True, 89 | ) 90 | precision = map["weighted avg"]["precision"] 91 | recall = map["weighted avg"]["recall"] 92 | f1 = map["weighted avg"]["f1-score"] 93 | print(f"Precision: {precision:.3f}") 94 | print(f"Recall: {recall:.3f}") 95 | print(f"F1: {f1:.3f}") 96 | 97 | 98 | def evaluate(path: str): 99 | df = pd.read_csv(path) 100 | discrete = "cat_pred" in df.columns 101 | va = "val_pred" in df.columns 102 | if va: 103 | true_values = list(df["val_true"]) + list(df["aro_true"]) 104 | pred_values = list(df["val_pred"]) + list(df["aro_pred"]) 105 | if va: 106 | mse = mean_squared_error(true_values, pred_values) 107 | mae = mean_absolute_error(true_values, pred_values) 108 | rmse = root_mean_squared_error(true_values, pred_values) 109 | ccc = concordance_correlation_coefficient(true_values, pred_values) 110 | print(path) 111 | if discrete: 112 | print_discrete(df["cat_true"], df["cat_pred"]) 113 | if va: 114 | print(f"Mean Squared Error (MSE): {mse:.4f}") 115 | print(f"Mean Absolute Error (MAE): {mae:.4f}") 116 | print(f"Root Mean Squared Error (RMSE): {rmse:.4f}") 117 | print(f"Concordance Correlation Coefficient (CCC): {ccc:.4f}") 118 | 119 | 120 | for subdir in get_subdirectories("."): 121 | files = get_files_in_directory(subdir) 122 | if ONLY_INFERENCE is False: 123 | if "model.pt" in files: 124 | result = subprocess.run( 125 | command, shell=True, cwd=subdir, capture_output=True, text=True 126 | ) 127 | files = get_files_in_directory(subdir) 128 | if "inference.csv" in files: 129 | evaluate(os.path.join(subdir, "inference.csv")) 130 | print("\n") 131 | print(50 * "-") 132 | print("\n") 133 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | asttokens==2.4.1 2 | comm==0.2.2 3 | contourpy==1.2.0 4 | cycler==0.12.1 5 | debugpy==1.8.1 6 | decorator==5.1.1 7 | exceptiongroup==1.2.0 8 | executing==2.0.1 9 | filelock==3.13.1 10 | fonttools==4.50.0 11 | fsspec==2024.2.0 12 | ipykernel==6.29.3 13 | ipython==8.22.2 14 | jedi==0.19.1 15 | Jinja2==3.1.3 16 | joblib==1.3.2 17 | jupyter_client==8.6.1 18 | jupyter_core==5.7.2 19 | kiwisolver==1.4.5 20 | MarkupSafe==2.1.5 21 | matplotlib==3.8.3 22 | matplotlib-inline==0.1.6 23 | mpmath==1.3.0 24 | nest-asyncio==1.6.0 25 | networkx==3.2.1 26 | numpy==1.26.4 27 | nvidia-cublas-cu12==12.1.3.1 28 | nvidia-cuda-cupti-cu12==12.1.105 29 | nvidia-cuda-nvrtc-cu12==12.1.105 30 | nvidia-cuda-runtime-cu12==12.1.105 31 | nvidia-cudnn-cu12==8.9.2.26 32 | nvidia-cufft-cu12==11.0.2.54 33 | nvidia-curand-cu12==10.3.2.106 34 | nvidia-cusolver-cu12==11.4.5.107 35 | nvidia-cusparse-cu12==12.1.0.106 36 | nvidia-nccl-cu12==2.19.3 37 | nvidia-nvjitlink-cu12==12.4.99 38 | nvidia-nvtx-cu12==12.1.105 39 | packaging==24.0 40 | pandas==2.2.1 41 | parso==0.8.3 42 | pexpect==4.9.0 43 | pillow==10.2.0 44 | platformdirs==4.2.0 45 | prompt-toolkit==3.0.43 46 | psutil==5.9.8 47 | ptyprocess==0.7.0 48 | pure-eval==0.2.2 49 | Pygments==2.17.2 50 | pyparsing==3.1.2 51 | python-dateutil==2.9.0.post0 52 | pytz==2024.1 53 | pyzmq==25.1.2 54 | scikit-learn==1.4.1.post1 55 | scipy==1.12.0 56 | six==1.16.0 57 | stack-data==0.6.3 58 | sympy==1.12 59 | threadpoolctl==3.3.0 60 | torch==2.2.1 61 | torchvision==0.17.1 62 | tornado==6.4 63 | tqdm==4.66.2 64 | traitlets==5.14.2 65 | triton==2.2.0 66 | typing_extensions==4.10.0 67 | tzdata==2024.1 68 | wcwidth==0.2.13 69 | 70 | --------------------------------------------------------------------------------