├── .dockerignore ├── .env ├── .github └── workflows │ ├── build.yaml │ └── server-test.yaml ├── .gitignore ├── CITATION.cff ├── Dockerfile ├── LICENSE ├── README.md ├── assets └── emolysis.webp ├── env.d.ts ├── index.html ├── package.json ├── public └── 404.html ├── service ├── build_env.sh ├── gen_audio_result.py ├── gen_text_result.py ├── gen_visual_result.py ├── main.py ├── model │ ├── __init__.py │ ├── audio_head.py │ ├── face_detector.py │ ├── linguistic_head.py │ ├── text2speech.py │ └── visual_head.py ├── test │ └── integration_test.py └── util │ ├── consts.py │ ├── label_space_mapping.py │ └── misc.py ├── src ├── App.vue ├── assets │ └── tailwind.css ├── components │ ├── AffectiveBarPlot.vue │ ├── AffectiveLinePlot.vue │ ├── FaceBlock.vue │ ├── FaceInfo.vue │ └── FooterBlock.vue ├── config.ts ├── global │ ├── api.ts │ ├── consts.ts │ └── socket.ts ├── main.ts ├── preprocess │ ├── audio.ts │ ├── common.ts │ ├── faces.ts │ ├── overall.ts │ └── text.ts ├── router │ └── index.ts ├── shims-vue.d.ts ├── stores │ ├── dataPathStore.ts │ ├── faceCheckedStore.ts │ ├── lineChartCheckedStore.ts │ └── videoStore.ts ├── utils.ts └── views │ ├── MainView.vue │ └── WelcomeView.vue ├── tailwind.config.js ├── tsconfig.config.json ├── tsconfig.json └── vite.config.ts /.dockerignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | pnpm-debug.log* 8 | lerna-debug.log* 9 | 10 | node_modules 11 | .DS_Store 12 | dist-ssr 13 | coverage 14 | *.local 15 | 16 | /cypress/videos/ 17 | /cypress/screenshots/ 18 | 19 | # Editor directories and files 20 | .vscode/* 21 | !.vscode/extensions.json 22 | .idea 23 | *.suo 24 | *.ntvs* 25 | *.njsproj 26 | *.sln 27 | *.sw? 28 | /data 29 | *.ckpt 30 | *.pt 31 | *.pth 32 | /service/.idea 33 | /service/data 34 | .env.local 35 | __pycache__ 36 | package-lock.json 37 | src 38 | checkpoints/* 39 | public 40 | **/__pycache__/ 41 | **/*.pyc 42 | -------------------------------------------------------------------------------- /.env: -------------------------------------------------------------------------------- 1 | VITE_API_URL= -------------------------------------------------------------------------------- /.github/workflows/build.yaml: -------------------------------------------------------------------------------- 1 | name: build 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | 8 | jobs: 9 | build-demo: 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - uses: actions/checkout@v1 14 | 15 | - name: Install dependencies 16 | run: | 17 | npm install 18 | env: 19 | CI: true 20 | 21 | - uses: robinraju/release-downloader@v1.7 22 | name: Download Sample Data 23 | with: 24 | tag: "misc" 25 | fileName: "data.zip" 26 | 27 | - name: Unzip Sample Data 28 | run: | 29 | mkdir public/data 30 | unzip data.zip -d public/data 31 | rm data.zip 32 | 33 | - name: Add CNAME 34 | run: echo "emolysis.controlnet.space" > public/CNAME 35 | 36 | - name: Build with Sample Data 37 | env: 38 | VITE_API_URL: ${{ secrets.VITE_API_URL }} 39 | run: npm run build 40 | 41 | - name: Init git and commit 42 | env: 43 | EMOLYSIS_REPO_PRIVATE: ${{ secrets.EMOLYSIS_REPO_PRIVATE }} 44 | run: | 45 | mkdir -p ~/.ssh/ 46 | echo "$EMOLYSIS_REPO_PRIVATE" > ~/.ssh/id_rsa 47 | chmod 600 ~/.ssh/id_rsa 48 | ssh-keyscan github.com >> ~/.ssh/known_hosts 49 | git config --global user.email "smczx@hotmail.com" 50 | git config --global user.name "ControlNet" 51 | rm -rf .git 52 | cd dist 53 | git init 54 | git add -A 55 | git commit -m "update" 56 | git remote add origin-ssh git@github.com:ControlNet/emolysis.git 57 | 58 | - name: Push 59 | run: | 60 | cd dist 61 | git push origin-ssh HEAD:web -f 62 | 63 | build-docker: 64 | runs-on: ubuntu-latest 65 | 66 | steps: 67 | - uses: actions/checkout@v1 68 | 69 | - name: Install dependencies 70 | run: | 71 | npm install 72 | env: 73 | CI: true 74 | 75 | - name: Build frontend 76 | run: npm run build 77 | 78 | - name: Login to DockerHub 79 | uses: docker/login-action@v2 80 | with: 81 | username: ${{ secrets.DOCKERHUB_USERNAME }} 82 | password: ${{ secrets.DOCKERHUB_TOKEN }} 83 | 84 | - name: Build and push 85 | uses: docker/build-push-action@v4 86 | with: 87 | push: true 88 | tags: controlnet/emolysis:latest 89 | context: . -------------------------------------------------------------------------------- /.github/workflows/server-test.yaml: -------------------------------------------------------------------------------- 1 | name: Server Test 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | 8 | jobs: 9 | integration-test: 10 | name: Integration Test 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v1 14 | 15 | - name: Setup Python 16 | uses: actions/setup-python@v1 17 | with: 18 | python-version: "3.9" 19 | 20 | - uses: robinraju/release-downloader@v1.7 21 | name: Download Sample Data 22 | with: 23 | tag: "misc" 24 | fileName: "*" 25 | 26 | - name: Unzip Sample Data 27 | run: | 28 | mkdir service/data 29 | unzip data.zip -d service/data 30 | rm data.zip 31 | 32 | - name: Move Checkpoints 33 | run: | 34 | mkdir service/checkpoints 35 | mv audio_model_trill.pt service/checkpoints 36 | mv linguistic_head_en.ckpt service/checkpoints 37 | mv linguistic_head_zh.ckpt service/checkpoints 38 | 39 | - name: Install Dependencies 40 | working-directory: ./service 41 | run: | 42 | sudo apt update 43 | sudo apt install -y ffmpeg 44 | pip install "pip<24.1" 45 | pip install tensorflow==2.9.1 46 | pip install torch==1.11.0+cpu torchvision==0.12.0+cpu torchaudio==0.11.0 --extra-index-url https://download.pytorch.org/whl/cpu 47 | pip install numpy==1.26.4 numba==0.60.0 pyzmq==24.0.1 scipy==1.13.1 tensorneko==0.2.7 transformers==4.24.0 hsemotion==0.3 facenet_pytorch==2.5.2 tensorflow_hub==0.12.0 pydub==0.25.1 librosa==0.9.2 BeautifulSoup4==4.11.1 moviepy==1.0.3 lxml==4.9.1 pandas==1.3.5 fastapi==0.89.0 starlette==0.22.0 uvicorn==0.20.0 websockets==10.4 python-multipart==0.0.5 timm==0.9.7 torchmetrics==0.11.4 48 | pip install openai_whisper==20231117 49 | 50 | - name: Run Test for ZH 51 | working-directory: ./service 52 | run: python test/integration_test.py --video_path ./data/1/video.mp4 --lang zh 53 | env: 54 | PYTHONPATH: ${{ github.workspace }}/service 55 | 56 | - name: Run Test for EN 57 | working-directory: ./service 58 | run: python test/integration_test.py --video_path ./data/1/video.mp4 --lang en 59 | env: 60 | PYTHONPATH: ${{ github.workspace }}/service 61 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | pnpm-debug.log* 8 | lerna-debug.log* 9 | 10 | node_modules 11 | .DS_Store 12 | dist 13 | dist-ssr 14 | coverage 15 | *.local 16 | 17 | /cypress/videos/ 18 | /cypress/screenshots/ 19 | 20 | # Editor directories and files 21 | .vscode/* 22 | !.vscode/extensions.json 23 | .idea 24 | *.suo 25 | *.ntvs* 26 | *.njsproj 27 | *.sln 28 | *.sw? 29 | /data 30 | public/data 31 | *.ckpt 32 | *.pt 33 | *.pth 34 | /service/.idea 35 | /service/data 36 | .env.local 37 | __pycache__ 38 | package-lock.json -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | message: "If you find this work useful for your research, please consider citing it." 3 | preferred-citation: 4 | type: conference-paper 5 | authors: 6 | - family-names: "Ghosh" 7 | given-names: "Shreya" 8 | - family-names: "Cai" 9 | given-names: "Zhixi" 10 | - family-names: "Gupta" 11 | given-names: "Parul" 12 | - family-names: "Sharma" 13 | given-names: "Garima" 14 | - family-names: "Dhall" 15 | given-names: "Abhinav" 16 | - family-names: "Hayat" 17 | given-names: "Munawar" 18 | - family-names: "Gedeon" 19 | given-names: "Tom" 20 | collection-title: "12th International Conference on Affective Computing and Intelligent Interaction Workshops and Demos (ACIIW)" 21 | title: "Emolysis: A Multimodal Open-Source Group Emotion Analysis and Visualization Toolkit" 22 | year: 2024 23 | start: 116 24 | end: 118 25 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM pytorch/pytorch:1.13.1-cuda11.6-cudnn8-runtime 2 | 3 | EXPOSE 8000 4 | 5 | WORKDIR /app 6 | 7 | RUN apt update 8 | RUN apt install -y ffmpeg git 9 | RUN pip install "pip<24.1" 10 | RUN /opt/conda/bin/conda init bash 11 | RUN pip install tensorflow==2.9.1 12 | RUN pip install torchvision==0.14.1 torchaudio==0.13.1 torch==1.13.1 numpy==1.26.4 numba==0.60.0 pyzmq==24.0.1 tensorneko==0.2.7 transformers==4.24.0 hsemotion==0.3 facenet_pytorch==2.5.2 tensorflow_hub==0.12.0 pydub==0.25.1 librosa==0.9.2 BeautifulSoup4==4.11.1 moviepy==1.0.3 lxml==4.9.1 pandas==1.3.5 fastapi==0.89.0 starlette==0.22.0 uvicorn==0.20.0 websockets==10.4 python-multipart==0.0.5 timm==0.9.7 torchmetrics==0.11.4 13 | RUN pip install openai_whisper==20231117 14 | 15 | COPY . . 16 | 17 | RUN pip cache purge 18 | RUN apt-get clean && rm -rf /var/lib/apt/lists/* 19 | 20 | ENV LD_LIBRARY_PATH=/opt/conda/lib 21 | ENV TF_FORCE_GPU_ALLOW_GROWTH=true 22 | 23 | ENTRYPOINT ["/opt/conda/bin/python", "service/main.py", "--port", "8000"] 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Emolysis: A Multimodal Open-Source Group Emotion Analysis and Visualization Toolkit 2 | 3 |
4 | 5 |

6 |
7 | 8 |
9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 |
31 | 32 |
33 | 34 | 35 |
36 | 37 | This repo is official repository for the paper [Emolysis: A Multimodal Open-Source Group Emotion Analysis and Visualization Toolkit](https://ieeexplore.ieee.org/abstract/document/10970223). 38 | 39 | ## Get Started 40 | 41 | We provided a static demo review for you to try. Please visit [https://emolysis.controlnet.space/local/1](https://emolysis.controlnet.space/local/1). 42 | 43 | If you want to analyze your own video, please follow the instructions below to deploy the server. 44 | 45 | ## Deploy the Server 46 | 47 | ### From Docker (x86 with CUDA) 48 | 49 | Requires: 50 | - Docker 51 | - nvidia-docker 52 | 53 | Run the server. 54 | ```bash 55 | docker run --runtime nvidia -p :8000 [-v :/app/checkpoints] --name emolysis controlnet/emolysis 56 | ``` 57 | 58 | Then, you can access the app at `http://127.0.0.1:`. 59 | 60 | ### From Source 61 | 62 | Requires: 63 | - Conda 64 | - Node.js 65 | 66 | Install dependencies. 67 | ```bash 68 | npm install 69 | npm run build 70 | cd service 71 | bash -i build_env.sh # use `build_env.mac.sh` for arm-based mac 72 | conda activate emolysis 73 | cd .. 74 | ``` 75 | 76 | Run the server. 77 | ```bash 78 | python service/main.py --port 79 | ``` 80 | 81 | Then, you can access the app at `http://127.0.0.1:`. 82 | 83 | ## References 84 | 85 | If you find this work useful for your research, please consider citing it. 86 | ```bibtex 87 | @inproceedings{ghosh2024emolysis, 88 | title={Emolysis: A multimodal open-source group emotion analysis and visualization toolkit}, 89 | author={Ghosh, Shreya and Cai, Zhixi and Gupta, Parul and Sharma, Garima and Dhall, Abhinav and Hayat, Munawar and Gedeon, Tom}, 90 | booktitle={2024 12th International Conference on Affective Computing and Intelligent Interaction Workshops and Demos (ACIIW)}, 91 | pages={116--118}, 92 | year={2024}, 93 | organization={IEEE} 94 | } 95 | ``` 96 | -------------------------------------------------------------------------------- /assets/emolysis.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ControlNet/emolysis/aa608696939d33df8d4c513adbbfc5a3865117bc/assets/emolysis.webp -------------------------------------------------------------------------------- /env.d.ts: -------------------------------------------------------------------------------- 1 | /// 2 | 3 | interface ImportMetaEnv { 4 | VITE_API_URL: string 5 | } 6 | 7 | interface ImportMeta { 8 | env: ImportMetaEnv 9 | } -------------------------------------------------------------------------------- /index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Emolysis 7 | 28 | 29 | 30 |
31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "emolysis", 3 | "version": "0.0.0", 4 | "private": true, 5 | "scripts": { 6 | "dev": "vite", 7 | "build": "run-p type-check build-only", 8 | "preview": "vite preview", 9 | "build-only": "vite build", 10 | "type-check": "vue-tsc --noEmit" 11 | }, 12 | "dependencies": { 13 | "@fortawesome/fontawesome-svg-core": "^6.2.1", 14 | "@fortawesome/free-brands-svg-icons": "^6.2.1", 15 | "@fortawesome/free-regular-svg-icons": "^6.2.1", 16 | "@fortawesome/free-solid-svg-icons": "^6.2.1", 17 | "@fortawesome/vue-fontawesome": "^3.0.2", 18 | "axios": "^1.2.2", 19 | "d3": "^7.8.0", 20 | "daisyui": "^2.46.1", 21 | "html2canvas": "^1.4.1", 22 | "lodash": "^4.17.21", 23 | "pinia": "^2.0.28", 24 | "vue": "^3.2.45", 25 | "vue-router": "^4.1.6" 26 | }, 27 | "devDependencies": { 28 | "@types/d3": "^7.4.0", 29 | "@types/lodash": "^4.14.191", 30 | "@types/node": "^18.11.12", 31 | "@vitejs/plugin-vue": "^4.0.0", 32 | "@vitejs/plugin-vue-jsx": "^3.0.0", 33 | "@vue/language-server": "^2.1.6", 34 | "@vue/tsconfig": "^0.1.3", 35 | "autoprefixer": "^10.4.13", 36 | "npm-run-all": "^4.1.5", 37 | "postcss": "^8.4.20", 38 | "tailwindcss": "^3.2.4", 39 | "typescript": "~4.9.4", 40 | "vite": "^4.0.0", 41 | "vue-tsc": "^1.0.12" 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /public/404.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Emolysis 6 | 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /service/build_env.sh: -------------------------------------------------------------------------------- 1 | #/bin/bash 2 | 3 | ENV_NAME=${1:-emolysis} 4 | 5 | conda create -y -n $ENV_NAME python=3.9 6 | conda activate $ENV_NAME 7 | conda install -y -c conda-forge cudatoolkit=11.2 cudnn=8.1.0 8 | conda install -y ffmpeg 9 | 10 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CONDA_PREFIX/lib/ 11 | 12 | pip install "pip<24.1" 13 | pip install tensorflow==2.9.1 14 | pip install torch==1.11.0+cu113 torchvision==0.12.0+cu113 torchaudio==0.11.0 --extra-index-url https://download.pytorch.org/whl/cu113 15 | pip install numpy==1.26.4 numba==0.60.0 pyzmq==24.0.1 scipy==1.13.1 tensorneko==0.2.7 transformers==4.24.0 hsemotion==0.3 facenet_pytorch==2.5.2 tensorflow_hub==0.12.0 pydub==0.25.1 librosa==0.9.2 BeautifulSoup4==4.11.1 moviepy==1.0.3 lxml==4.9.1 pandas==1.3.5 fastapi==0.89.0 starlette==0.22.0 uvicorn==0.20.0 websockets==10.4 python-multipart==0.0.5 timm==0.9.7 torchmetrics==0.11.4 16 | pip install openai_whisper==20231117 17 | 18 | # download models 19 | mkdir -p checkpoints 20 | wget https://github.com/ControlNet/emolysis/releases/download/misc/audio_model_trill.pt -O checkpoints/audio_model_trill.pt 21 | wget https://github.com/ControlNet/emolysis/releases/download/misc/linguistic_head_en.ckpt -O checkpoints/linguistic_head_en.ckpt 22 | wget https://github.com/ControlNet/emolysis/releases/download/misc/linguistic_head_zh.ckpt -O checkpoints/linguistic_head_zh.ckpt -------------------------------------------------------------------------------- /service/gen_audio_result.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | from collections import OrderedDict 3 | 4 | import librosa 5 | import numpy as np 6 | import pandas as pd 7 | import torch 8 | from pydub import AudioSegment 9 | from starlette.websockets import WebSocket 10 | from torch.autograd import Variable 11 | 12 | from model.audio_head import get_audio_model, get_trill_model 13 | from util.consts import DEVICE, SEGMENT_STRIDE, SEGMENT_DURATION, REQUIRED_SAMPLE_RATE, COMMUNICATION_AUDIO_STEP 14 | from util.label_space_mapping import bold_to_main, bold_to_main_valence, bold_to_main_arousal 15 | 16 | 17 | def get_emotion_features_from_audio(audio, original_sample_rate): 18 | audio_model = get_audio_model() 19 | features = extract_trill_features(audio, original_sample_rate) 20 | 21 | with torch.no_grad(): 22 | features = Variable(features).to(DEVICE) 23 | output_dis, output_con, output_feat = audio_model(features.float()) 24 | 25 | output_emo = output_dis.cpu().detach().numpy() 26 | output_con = output_con.cpu().detach().numpy() 27 | output_valence = output_con[:, 0] 28 | output_arousal = output_con[:, 1] 29 | pen_features = output_feat.cpu().detach().numpy() 30 | 31 | return output_valence, output_arousal, output_emo, pen_features 32 | 33 | 34 | def extract_trill_features(audio, original_sample_rate): 35 | module = get_trill_model() 36 | float_audio = audio.astype(np.float32) / np.iinfo(np.int16).max 37 | if original_sample_rate != REQUIRED_SAMPLE_RATE: 38 | float_audio = librosa.core.resample( 39 | float_audio.T, orig_sr=original_sample_rate, target_sr=REQUIRED_SAMPLE_RATE, 40 | res_type='kaiser_best') 41 | float_audio = float_audio.flatten() 42 | emb_dict = module(samples=float_audio, sample_rate=16000) 43 | emb = emb_dict['embedding'] 44 | emb.shape.assert_is_compatible_with([None, 512]) 45 | feat = np.average(emb, axis=0) 46 | feat = torch.as_tensor(np.array(feat).astype('float')) 47 | # add a dimension to act as batch dimension 48 | feat = torch.unsqueeze(feat, 0) 49 | return feat 50 | 51 | 52 | async def process_audio_file(file_path: str, result_path: str, socket: WebSocket): 53 | clip = AudioSegment.from_file(file_path) 54 | orig_sampling_rate = clip.frame_rate 55 | 56 | data = OrderedDict() 57 | 58 | arange_iter = np.arange(0.0, clip.duration_seconds, SEGMENT_STRIDE) 59 | 60 | for (n, i) in enumerate(arange_iter): 61 | start_time = int(i * 1000) 62 | end_time = int(min(i + SEGMENT_DURATION, clip.duration_seconds) * 1000) 63 | 64 | segment = clip[start_time:end_time] 65 | # pass audio segment to audio based model 66 | # get ndarray from AudioSegment object 67 | audio_array = np.array(segment.get_array_of_samples()) 68 | audio_features = get_emotion_features_from_audio(audio_array, orig_sampling_rate) 69 | audio_valence, audio_arousal, audio_emotion, _ = audio_features 70 | 71 | # Mapping audio outputs to the main label space 72 | main_audio_emo_prob = bold_to_main(audio_emotion[0]) 73 | main_audio_valence = bold_to_main_valence(audio_valence[0]) 74 | main_audio_arousal = bold_to_main_arousal(audio_arousal[0]) 75 | result = np.array([main_audio_arousal, main_audio_valence, *main_audio_emo_prob]) 76 | 77 | start_time = start_time / 1000 78 | end_time = end_time / 1000 79 | mid_time = start_time + SEGMENT_STRIDE 80 | if mid_time < clip.duration_seconds: 81 | if (start_time, mid_time) in data: 82 | data[(start_time, mid_time)].append(result) 83 | else: 84 | data[(start_time, mid_time)] = [result] 85 | 86 | if (mid_time, end_time) in data: 87 | data[(mid_time, end_time)].append(result) 88 | else: 89 | data[(mid_time, end_time)] = [result] 90 | else: 91 | if (start_time, end_time) in data: 92 | data[(start_time, end_time)].append(result) 93 | else: 94 | data[(start_time, end_time)] = [result] 95 | 96 | if (n + 1) % COMMUNICATION_AUDIO_STEP == 0: 97 | await socket.send_json({"status": "audio", "data": {"current": n, "total": len(arange_iter)}}) 98 | await socket.receive_text() 99 | 100 | df = [] 101 | 102 | for key, value in data.items(): 103 | value = np.stack(value, axis=0).mean(axis=0) 104 | value[2:] = value[2:] / value[2:].sum() 105 | df.append({ 106 | "start": key[0], 107 | "end": key[1], 108 | "valence": value[1], 109 | "arousal": value[0], 110 | "emotion0": value[2], 111 | "emotion1": value[3], 112 | "emotion2": value[4], 113 | "emotion3": value[5], 114 | "emotion4": value[6], 115 | "emotion5": value[7], 116 | "emotion6": value[8], 117 | "emotion7": value[9], 118 | "emotion8": value[10], 119 | }) 120 | 121 | pathlib.Path(result_path).parent.mkdir(exist_ok=True, parents=True) 122 | pd.DataFrame(df).to_csv(result_path, index=False) 123 | print(f"[Audio Head] Process {pathlib.Path(file_path).parent.name}") 124 | -------------------------------------------------------------------------------- /service/gen_text_result.py: -------------------------------------------------------------------------------- 1 | import json 2 | import pathlib 3 | 4 | import pandas as pd 5 | from starlette.websockets import WebSocket 6 | 7 | from model.linguistic_head import get_tokenizer_zh, get_linguistic_model_zh, get_tokenizer_en, predict_emotion_en, \ 8 | get_linguistic_model_en 9 | from util.consts import DEVICE 10 | from util.label_space_mapping import bold_to_main, bold_to_main_valence, bold_to_main_arousal 11 | 12 | 13 | def get_results_from_text_zh(msg): 14 | tokenizer = get_tokenizer_zh() 15 | tokenized_text = tokenizer([msg], return_tensors="pt", padding=True) 16 | linguistic_model = get_linguistic_model_zh() 17 | tokenized_text = tokenized_text.to(DEVICE) 18 | if tokenized_text["input_ids"].shape[1] > 512: 19 | tokenized_text["input_ids"] = tokenized_text["input_ids"][:, :512] 20 | tokenized_text["token_type_ids"] = tokenized_text["token_type_ids"][:, :512] 21 | tokenized_text["attention_mask"] = tokenized_text["attention_mask"][:, :512] 22 | output_valence, output_arousal, output_emo, _ = linguistic_model(tokenized_text) 23 | 24 | output_emo = output_emo.softmax(dim=1).cpu().detach().numpy() 25 | output_valence = output_valence.cpu().detach().numpy() 26 | output_arousal = output_arousal.cpu().detach().numpy() 27 | 28 | # Mapping linguistic outputs to main label space 29 | main_linguistic_emo_prob = bold_to_main(output_emo[0]) 30 | main_linguistic_valence = bold_to_main_valence(output_valence[0]) 31 | main_linguistic_arousal = bold_to_main_arousal(output_arousal[0]) 32 | 33 | return main_linguistic_emo_prob, main_linguistic_valence, main_linguistic_arousal 34 | 35 | 36 | def get_results_from_text_en(msg): 37 | tokenizer = get_tokenizer_en() 38 | tokenized_text = tokenizer([msg], return_tensors="pt", padding=True) 39 | linguistic_model = get_linguistic_model_en() 40 | tokenized_text = tokenized_text.to(DEVICE) 41 | if tokenized_text["input_ids"].shape[1] > 512: 42 | tokenized_text["input_ids"] = tokenized_text["input_ids"][:, :512] 43 | tokenized_text["token_type_ids"] = tokenized_text["token_type_ids"][:, :512] 44 | tokenized_text["attention_mask"] = tokenized_text["attention_mask"][:, :512] 45 | output_valence, output_arousal = linguistic_model(tokenized_text) 46 | 47 | output_valence = output_valence.cpu().detach().numpy()[0, 0] 48 | output_arousal = output_arousal.cpu().detach().numpy()[0, 0] 49 | 50 | # Mapping linguistic outputs to main label space 51 | main_linguistic_valence = output_valence * 1000 52 | main_linguistic_arousal = output_arousal * 1000 53 | main_linguistic_emo_prob = predict_emotion_en(msg) 54 | 55 | return main_linguistic_emo_prob, main_linguistic_valence, main_linguistic_arousal 56 | 57 | 58 | def get_results_with_lang(lang): 59 | if lang == "zh": 60 | return get_results_from_text_zh 61 | elif lang == "en": 62 | return get_results_from_text_en 63 | else: 64 | raise ValueError("Language not supported") 65 | 66 | 67 | async def process_text_file(file_path: str, result_path: str, lang: str, socket: WebSocket): 68 | with open(file_path, "r", encoding="UTF-8") as f: 69 | input_text = json.load(f) 70 | 71 | df = [] 72 | 73 | total = len(input_text["segments"]) 74 | 75 | for (i, segment) in enumerate(input_text["segments"]): 76 | start = segment["start"] 77 | end = segment["end"] 78 | msg = segment["text"].strip() 79 | # pass msg to text model 80 | main_linguistic_emo_prob, main_linguistic_valence, main_linguistic_arousal = get_results_with_lang(lang)(msg) 81 | main_linguistic_emo_prob = main_linguistic_emo_prob / main_linguistic_emo_prob.sum() 82 | df.append({ 83 | "start": start, 84 | "end": end, 85 | "valence": main_linguistic_valence, 86 | "arousal": main_linguistic_arousal, 87 | "emotion0": main_linguistic_emo_prob[0], 88 | "emotion1": main_linguistic_emo_prob[1], 89 | "emotion2": main_linguistic_emo_prob[2], 90 | "emotion3": main_linguistic_emo_prob[3], 91 | "emotion4": main_linguistic_emo_prob[4], 92 | "emotion5": main_linguistic_emo_prob[5], 93 | "emotion6": main_linguistic_emo_prob[6], 94 | "emotion7": main_linguistic_emo_prob[7], 95 | "emotion8": main_linguistic_emo_prob[8], 96 | }) 97 | 98 | # if (i + 1) % COMMUNICATION_LINGUISTIC_STEP == 0: 99 | # await socket.send_json({"status": "text", "data": {"current": i, "total": total}}) 100 | # await socket.receive_text() 101 | 102 | pathlib.Path(result_path).parent.mkdir(exist_ok=True, parents=True) 103 | pd.DataFrame(df).to_csv(result_path, index=False) 104 | print(f"[Linguistic Head] Process {pathlib.Path(file_path).parent.name}") 105 | -------------------------------------------------------------------------------- /service/gen_visual_result.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | from math import ceil 3 | 4 | import numpy as np 5 | import pandas as pd 6 | from moviepy.editor import VideoFileClip 7 | from starlette.websockets import WebSocket 8 | from tqdm.auto import tqdm 9 | 10 | from model.face_detector import detect_face 11 | from model.visual_head import predict_emotions, get_video_model 12 | from util.consts import COMMUNICATION_VISUAL_STEP 13 | from util.label_space_mapping import affectnet_to_main, affectnet_to_main_valence, affectnet_to_main_arousal 14 | 15 | 16 | async def process_video_file(file_path: str, result_path: str, socket: WebSocket): 17 | 18 | video_model = get_video_model() 19 | 20 | with VideoFileClip(file_path) as clip: 21 | results = [] 22 | total = ceil(clip.fps * clip.duration) 23 | 24 | await socket.send_json({"status": "visual start", "data": {"fps": clip.fps}}) 25 | await socket.receive_text() 26 | 27 | for i, frame in enumerate(tqdm(clip.iter_frames(), total=total)): 28 | try: 29 | bounding_boxes, probs = detect_face(frame) 30 | 31 | for j, bbox in enumerate(bounding_boxes): 32 | prob = probs[j] 33 | box = bbox.astype(int) 34 | x1, y1, x2, y2 = box[0:4] 35 | face_img = frame[max(0, y1):y2, max(0, x1):x2] 36 | if len(face_img) != 0: 37 | _, scores = predict_emotions(video_model, face_img) 38 | valance = scores[8] 39 | arousal = scores[9] 40 | 41 | emotion_prob = affectnet_to_main(scores) 42 | 43 | valance = int(affectnet_to_main_valence(valance)) 44 | arousal = int(affectnet_to_main_arousal(arousal)) 45 | 46 | emotion_prob = emotion_prob / np.sum(emotion_prob) 47 | 48 | results.append({ 49 | "frame": i, 50 | "x1": x1, 51 | "y1": y1, 52 | "x2": x2, 53 | "y2": y2, 54 | "box_prob": prob, 55 | "emotion0": emotion_prob[0], 56 | "emotion1": emotion_prob[1], 57 | "emotion2": emotion_prob[2], 58 | "emotion3": emotion_prob[3], 59 | "emotion4": emotion_prob[4], 60 | "emotion5": emotion_prob[5], 61 | "emotion6": emotion_prob[6], 62 | "emotion7": emotion_prob[7], 63 | "emotion8": emotion_prob[8], 64 | "valence": valance, 65 | "arousal": arousal, 66 | }) 67 | else: 68 | print(i, ":", "No face") 69 | except Exception as e: 70 | raise e 71 | 72 | if (i + 1) % COMMUNICATION_VISUAL_STEP == 0: 73 | await socket.send_json({"status": "visual", "data": {"current": i, "total": total}}) 74 | await socket.receive_text() 75 | 76 | pathlib.Path(result_path).parent.mkdir(exist_ok=True, parents=True) 77 | pd.DataFrame(results).to_csv(result_path, index=False, sep=",") 78 | print(f"[Visual Head] Process {pathlib.Path(file_path).parent.name}") 79 | -------------------------------------------------------------------------------- /service/main.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | import argparse 3 | from pathlib import Path 4 | 5 | import uvicorn 6 | from fastapi import FastAPI, UploadFile 7 | from fastapi.staticfiles import StaticFiles 8 | from moviepy.video.io.VideoFileClip import VideoFileClip 9 | from starlette.middleware.cors import CORSMiddleware 10 | from starlette.requests import Request 11 | from starlette.responses import FileResponse 12 | from starlette.websockets import WebSocket 13 | 14 | from gen_audio_result import process_audio_file 15 | from gen_text_result import process_text_file 16 | from gen_visual_result import process_video_file 17 | from model.text2speech import text2speech 18 | from util.misc import VideoNamePool, range_requests_response, prepare_checkpoints 19 | 20 | warnings.filterwarnings("ignore") 21 | 22 | app = FastAPI() 23 | 24 | app.add_middleware( 25 | CORSMiddleware, 26 | allow_origins=["*"], 27 | allow_credentials=True, 28 | allow_methods=["*"], 29 | allow_headers=["*"], 30 | ) 31 | 32 | VideoNamePool.init() 33 | prepare_checkpoints() 34 | 35 | 36 | @app.get("/api/data/{video_id}/{file_name}") 37 | async def get_file(video_id: str, file_name: str, request: Request): 38 | path = str(Path("data") / video_id / file_name) 39 | if file_name.split(".")[-1] == "mp4": 40 | return range_requests_response( 41 | request, file_path=path, content_type="video/mp4" 42 | ) 43 | else: 44 | return FileResponse( 45 | path=path, media_type="text" 46 | ) 47 | 48 | 49 | async def process_uploaded(video_path: str, lang: str, socket: WebSocket): 50 | video_dir = Path(video_path).parent 51 | text2speech_path = str(video_dir / "text2speech.json") 52 | audio_result_path = str(video_dir / "audio.csv") 53 | text_result_path = str(video_dir / "text.csv") 54 | visual_result_path = str(video_dir / "faces.csv") 55 | 56 | text2speech(video_path, text2speech_path, lang) 57 | await process_audio_file(video_path, audio_result_path, socket) 58 | await socket.send_json({"status": "audio done", "data": {}}) 59 | await socket.receive_text() 60 | 61 | await process_text_file(text2speech_path, text_result_path, lang, socket) 62 | await socket.send_json({"status": "text done", "data": {}}) 63 | await socket.receive_text() 64 | 65 | await process_video_file(video_path, visual_result_path, socket) 66 | await socket.send_json({"status": "visual done", "data": {}}) 67 | await socket.receive_text() 68 | 69 | return { 70 | "id": video_dir.name, 71 | "audio": audio_result_path.replace("\\", "/"), 72 | "visual": visual_result_path.replace("\\", "/"), 73 | "text": text_result_path.replace("\\", "/"), 74 | } 75 | 76 | 77 | @app.post("/api/upload") 78 | async def upload_video(file: UploadFile): 79 | file_id = VideoNamePool.get() 80 | path = Path(f"data/{file_id}") 81 | video_path = str(path / "video.mp4") 82 | path.mkdir(exist_ok=True, parents=True) 83 | with open(video_path, "wb") as f: 84 | f.write(await file.read()) 85 | return {"file_id": file_id} 86 | 87 | 88 | @app.get("/api/fps/{video_id}") 89 | async def get_fps(video_id: str): 90 | video_path = Path(f"data/{video_id}/video.mp4") 91 | with VideoFileClip(str(video_path)) as video: 92 | fps = video.fps 93 | return {"fps": fps} 94 | 95 | 96 | @app.websocket("/ws/") 97 | async def socket_connection(socket: WebSocket): 98 | await socket.accept() 99 | video_info = await socket.receive_json() 100 | file_id = video_info["file_id"] 101 | lang = video_info["lang"] 102 | video_path = str(Path("data") / str(file_id) / "video.mp4") 103 | 104 | await socket.send_json({"status": "uploaded", "data": {}}) 105 | await socket.receive_text() 106 | result_paths = await process_uploaded(video_path, lang, socket) 107 | await socket.send_json({"status": "done", "data": result_paths}) 108 | await socket.close() 109 | 110 | 111 | app.mount("/", StaticFiles(directory=Path(__file__).parent.parent / "dist", html=True)) 112 | 113 | if __name__ == '__main__': 114 | parser = argparse.ArgumentParser() 115 | parser.add_argument("--port", "-p", type=int, help="port to run server on", default=8000) 116 | args = parser.parse_args() 117 | uvicorn.run(app, host="0.0.0.0", port=args.port) 118 | -------------------------------------------------------------------------------- /service/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ControlNet/emolysis/aa608696939d33df8d4c513adbbfc5a3865117bc/service/model/__init__.py -------------------------------------------------------------------------------- /service/model/audio_head.py: -------------------------------------------------------------------------------- 1 | import tensorflow_hub as hub 2 | import torch 3 | from torch import nn 4 | from torch.nn import functional as F 5 | 6 | from util.consts import DEVICE, AUDIO_MODEL_PATH 7 | 8 | 9 | class AudioDnn(nn.Module): 10 | 11 | def __init__(self): 12 | super(AudioDnn, self).__init__() 13 | self.fc1 = nn.Linear(512, 1024) 14 | self.fc2 = nn.Linear(1024, 2048) 15 | self.fc3 = nn.Linear(2048, 4096) 16 | self.fc4_cont = nn.Linear(4096, 3) 17 | self.fc4_dis = nn.Linear(4096, 26) 18 | self.dropout = nn.Dropout(0.3) 19 | self.bn1 = nn.BatchNorm1d(1024) 20 | self.bn2 = nn.BatchNorm1d(2048) 21 | self.bn3 = nn.BatchNorm1d(4096) 22 | 23 | def forward(self, x): 24 | x = F.relu(self.bn1(self.fc1(x))) 25 | x = F.relu(self.bn2(self.fc2(x))) 26 | x = self.dropout(x) 27 | x = F.relu(self.bn3(self.fc3(x))) 28 | x_pen = self.dropout(x) 29 | x_dis = self.fc4_dis(x_pen) 30 | x_dis = x_dis.sigmoid() # sigmoid activation to get logits for emotion 31 | x_cont = self.fc4_cont(x_pen) # valence, arousal and dominance 32 | return x_dis, x_cont, x_pen 33 | 34 | 35 | _module = None 36 | 37 | 38 | def get_trill_model(): 39 | global _module 40 | if _module is None: 41 | import tensorflow as tf 42 | for device in tf.config.experimental.list_physical_devices('GPU'): 43 | tf.config.experimental.set_virtual_device_configuration(device, 44 | [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=3500)]) 45 | 46 | _module = hub.load('https://tfhub.dev/google/nonsemantic-speech-benchmark/trill/3') 47 | return _module 48 | 49 | 50 | def get_audio_model(): 51 | checkpoint = torch.load(AUDIO_MODEL_PATH, map_location=DEVICE) 52 | model = AudioDnn() 53 | model.load_state_dict(checkpoint) 54 | model = model.to(DEVICE) 55 | model.eval() 56 | return model 57 | -------------------------------------------------------------------------------- /service/model/face_detector.py: -------------------------------------------------------------------------------- 1 | from facenet_pytorch.models.mtcnn import MTCNN 2 | 3 | from util.consts import DEVICE 4 | 5 | mtcnn = MTCNN(keep_all=False, post_process=False, 6 | min_face_size=40, device=DEVICE) 7 | 8 | 9 | def detect_face(frame, threshold=0.9): 10 | bounding_boxes, probs = mtcnn.detect(frame, landmarks=False) 11 | if bounding_boxes is not None: 12 | return bounding_boxes[probs > threshold], probs 13 | else: 14 | return [], None 15 | -------------------------------------------------------------------------------- /service/model/linguistic_head.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Tuple, Optional, Union, Sequence 2 | 3 | import numpy as np 4 | import torch 5 | from tensorneko import NekoModel 6 | from tensorneko.layer import Linear 7 | from torch import Tensor 8 | from torch.nn import ReLU, CrossEntropyLoss, BCEWithLogitsLoss, MSELoss 9 | from torch.optim import Adam 10 | from torch.optim.lr_scheduler import ReduceLROnPlateau 11 | from transformers import BertModel, BertTokenizer, pipeline, RobertaTokenizer, RobertaModel 12 | 13 | from util.consts import LINGUISTIC_MODEL_ZH_PATH, DEVICE, LINGUISTIC_MODEL_EN_PATH 14 | 15 | 16 | # English model 17 | 18 | class LinguisticHeadEn(NekoModel): 19 | 20 | def __init__(self, learning_rate: float = 1e-5, finetune: bool = False): 21 | super().__init__("linguistic_head_en") 22 | self.roberta = get_roberta_en() 23 | self.hidden_fc = Linear(768, 1024, build_activation=ReLU) 24 | self.valence_fc = Linear(1024, 1) 25 | self.arousal_fc = Linear(1024, 1) 26 | self.finetune = finetune 27 | 28 | self.valence_loss_fn = MSELoss() 29 | self.arousal_loss_fn = MSELoss() 30 | self.learning_rate = learning_rate 31 | 32 | def forward(self, x: Dict[str, Tensor]) -> Tuple[Tensor, Tensor]: 33 | if not self.finetune: 34 | self.roberta.eval() 35 | with torch.no_grad(): 36 | features = self.roberta(**x).pooler_output 37 | else: 38 | self.roberta.train() 39 | features = self.roberta(**x).pooler_output 40 | 41 | hidden = self.hidden_fc(features) 42 | pred_valence = self.valence_fc(hidden) 43 | pred_arousal = self.arousal_fc(hidden) 44 | return pred_valence, pred_arousal 45 | 46 | def step(self, batch: Optional[Union[Tensor, Sequence[Tensor]]]): 47 | x, y_valence, y_arousal = batch 48 | pred_valence, pred_arousal = self(x) 49 | 50 | results = {} 51 | v_loss = self.valence_loss_fn(pred_valence, y_valence) 52 | a_loss = self.arousal_loss_fn(pred_arousal, y_arousal) 53 | loss = v_loss + a_loss 54 | results["loss"] = loss 55 | results["v_loss"] = v_loss 56 | results["a_loss"] = a_loss 57 | 58 | return results 59 | 60 | def training_step(self, batch: Optional[Union[Tensor, Sequence[Tensor]]] = None, batch_idx: Optional[int] = None, 61 | optimizer_idx: Optional[int] = None, hiddens: Optional[Tensor] = None 62 | ) -> Dict[str, Tensor]: 63 | return self.step(batch) 64 | 65 | def validation_step(self, batch: Optional[Union[Tensor, Sequence[Tensor]]] = None, batch_idx: Optional[int] = None, 66 | dataloader_idx: Optional[int] = None 67 | ) -> Dict[str, Tensor]: 68 | return self.step(batch) 69 | 70 | def configure_optimizers(self): 71 | optimizer = Adam(self.parameters(), lr=self.learning_rate, betas=(0.5, 0.9)) 72 | return { 73 | "optimizer": optimizer, 74 | "lr_scheduler": { 75 | "scheduler": ReduceLROnPlateau(optimizer, factor=0.5, patience=10, verbose=True, min_lr=1e-8), 76 | "monitor": "val_loss" 77 | } 78 | } 79 | 80 | 81 | _roberta_en: Optional[BertModel] = None 82 | _tokenizer_en: Optional[BertTokenizer] = None 83 | _emotion_model_en: Optional[pipeline] = None 84 | 85 | 86 | def get_roberta_en() -> BertModel: 87 | global _roberta_en 88 | if _roberta_en is None: 89 | _roberta_en = RobertaModel.from_pretrained("roberta-base") 90 | return _roberta_en 91 | 92 | 93 | def get_emotion_model_en(): 94 | global _emotion_model_en 95 | if _emotion_model_en is None: 96 | _emotion_model_en = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", 97 | top_k=7) 98 | return _emotion_model_en 99 | 100 | 101 | def predict_emotion_en(text: str): 102 | emo_prob = np.zeros(9) 103 | model = get_emotion_model_en() 104 | result = model(text) 105 | for item in result[0]: 106 | if item["label"] == "joy": 107 | emo_prob[2] = item["score"] 108 | elif item["label"] == "surprise": 109 | emo_prob[5] = item["score"] 110 | elif item["label"] == "neutral": 111 | emo_prob[8] = item["score"] 112 | elif item["label"] == "anger": 113 | emo_prob[1] = item["score"] 114 | elif item["label"] == "sadness": 115 | emo_prob[3] = item["score"] 116 | elif item["label"] == "disgust": 117 | emo_prob[4] = item["score"] 118 | elif item["label"] == "fear": 119 | emo_prob[0] = item["score"] 120 | else: 121 | raise ValueError("Unknown emotion") 122 | return emo_prob 123 | 124 | 125 | def get_tokenizer_en() -> BertTokenizer: 126 | global _tokenizer_en 127 | if _tokenizer_en is None: 128 | _tokenizer_en = RobertaTokenizer.from_pretrained("roberta-base") 129 | return _tokenizer_en 130 | 131 | 132 | _linguistic_model_en = None 133 | 134 | 135 | def get_linguistic_model_en() -> LinguisticHeadEn: 136 | global _linguistic_model_en 137 | if _linguistic_model_en is None: 138 | _linguistic_model_en = LinguisticHeadEn(finetune=False).load_from_checkpoint(LINGUISTIC_MODEL_EN_PATH, 139 | strict=False, map_location=DEVICE) 140 | _linguistic_model_en = _linguistic_model_en.to(DEVICE) 141 | 142 | return _linguistic_model_en 143 | 144 | 145 | 146 | # Chinese model 147 | 148 | class LinguisticHeadZh(NekoModel): 149 | 150 | def __init__(self, learning_rate: float = 1e-5, finetune: bool = False): 151 | super().__init__("linguistic_head_zh") 152 | self.roberta = get_roberta_zh() 153 | self.hidden_fc = Linear(1024, 1024, build_activation=ReLU) 154 | self.sentiment_classifier = Linear(1024, 3) 155 | self.emotion_classifier = Linear(1024, 26) 156 | self.valence_fc = Linear(1024, 1) 157 | self.arousal_fc = Linear(1024, 1) 158 | self.finetune = finetune 159 | 160 | self.sentiment_loss_fn = CrossEntropyLoss() 161 | self.emotion_loss_fn = BCEWithLogitsLoss() 162 | self.valence_loss_fn = MSELoss() 163 | self.arousal_loss_fn = MSELoss() 164 | self.learning_rate = learning_rate 165 | 166 | def forward(self, x: Dict[str, Tensor]) -> Tuple[Tensor, Tensor, Tensor, Tensor]: 167 | if not self.finetune: 168 | self.roberta.eval() 169 | with torch.no_grad(): 170 | features = self.roberta(**x).pooler_output 171 | else: 172 | self.roberta.train() 173 | features = self.roberta(**x).pooler_output 174 | 175 | hidden = self.hidden_fc(features) 176 | pred_emotion = self.emotion_classifier(hidden) 177 | pred_valence = self.valence_fc(hidden) 178 | pred_arousal = self.arousal_fc(hidden) 179 | return pred_valence, pred_arousal, pred_emotion, hidden 180 | 181 | def step(self, batch: Optional[Union[Tensor, Sequence[Tensor]]]): 182 | x, y_sentiment, y_emotion, y_valence, y_arousal = batch 183 | pred_valence, pred_arousal, pred_emotion, hidden = self(x) 184 | 185 | results = {} 186 | pred_emotion = pred_emotion.softmax(dim=1) 187 | e_loss = self.emotion_loss_fn(pred_emotion, y_emotion.float()) 188 | v_loss = self.valence_loss_fn(pred_valence, y_valence) 189 | a_loss = self.arousal_loss_fn(pred_arousal, y_arousal) 190 | loss = e_loss + v_loss + a_loss 191 | results["loss"] = loss 192 | results["e_loss"] = e_loss 193 | results["e_acc"] = self._e_acc_fn( 194 | pred_emotion.sigmoid(), y_emotion) 195 | results["v_loss"] = v_loss 196 | results["a_loss"] = a_loss 197 | 198 | return results 199 | 200 | def training_step(self, batch: Optional[Union[Tensor, Sequence[Tensor]]] = None, batch_idx: Optional[int] = None, 201 | optimizer_idx: Optional[int] = None, hiddens: Optional[Tensor] = None 202 | ) -> Dict[str, Tensor]: 203 | return self.step(batch) 204 | 205 | def validation_step(self, batch: Optional[Union[Tensor, Sequence[Tensor]]] = None, batch_idx: Optional[int] = None, 206 | dataloader_idx: Optional[int] = None 207 | ) -> Dict[str, Tensor]: 208 | return self.step(batch) 209 | 210 | def configure_optimizers(self): 211 | optimizer = Adam(self.parameters(), lr=self.learning_rate, betas=(0.5, 0.9)) 212 | return { 213 | "optimizer": optimizer, 214 | "lr_scheduler": { 215 | "scheduler": ReduceLROnPlateau(optimizer, factor=0.5, patience=10, verbose=True, min_lr=1e-8), 216 | "monitor": "val_loss" 217 | } 218 | } 219 | 220 | @staticmethod 221 | def _s_acc_fn(pred_s, y_s): 222 | s_acc = (torch.argmax(pred_s, dim=1) == y_s).float().mean() 223 | return s_acc 224 | 225 | @staticmethod 226 | def _e_acc_fn(pred_e, y_e): 227 | e_acc = ((pred_e > 0.5) == y_e).float().mean() 228 | return e_acc 229 | 230 | 231 | _roberta_zh: Optional[BertModel] = None 232 | _tokenizer_zh: Optional[BertTokenizer] = None 233 | 234 | 235 | def get_roberta_zh() -> BertModel: 236 | global _roberta_zh 237 | if _roberta_zh is None: 238 | _roberta_zh = BertModel.from_pretrained("hfl/chinese-roberta-wwm-ext-large") 239 | return _roberta_zh 240 | 241 | 242 | def get_tokenizer_zh() -> BertTokenizer: 243 | global _tokenizer_zh 244 | if _tokenizer_zh is None: 245 | _tokenizer_zh = BertTokenizer.from_pretrained("hfl/chinese-roberta-wwm-ext-large") 246 | return _tokenizer_zh 247 | 248 | 249 | _linguistic_model_zh = None 250 | 251 | 252 | def get_linguistic_model_zh() -> LinguisticHeadZh: 253 | global _linguistic_model_zh 254 | if _linguistic_model_zh is None: 255 | _linguistic_model_zh = LinguisticHeadZh(finetune=False).load_from_checkpoint(LINGUISTIC_MODEL_ZH_PATH, 256 | strict=False, map_location=DEVICE) 257 | _linguistic_model_zh = _linguistic_model_zh.to(DEVICE) 258 | 259 | return _linguistic_model_zh 260 | -------------------------------------------------------------------------------- /service/model/text2speech.py: -------------------------------------------------------------------------------- 1 | import whisper 2 | import json 3 | import pathlib 4 | 5 | _model = None 6 | 7 | 8 | def get_model(): 9 | global _model 10 | if _model is None: 11 | _model = whisper.load_model("base") 12 | return _model 13 | 14 | 15 | def text2speech(file_path: str, output_path: str, lang: str): 16 | model = get_model() 17 | result = model.transcribe(file_path, language=lang) 18 | 19 | pathlib.Path(output_path).parent.mkdir(exist_ok=True, parents=True) 20 | with open(output_path, "w", encoding="UTF-8") as f: 21 | json.dump(result, f, ensure_ascii=False, indent=4) 22 | 23 | print(f"[text2speech] Process {pathlib.Path(file_path).parent.name}") 24 | -------------------------------------------------------------------------------- /service/model/visual_head.py: -------------------------------------------------------------------------------- 1 | from hsemotion.facial_emotions import HSEmotionRecognizer 2 | 3 | from util.consts import DEVICE 4 | 5 | 6 | def predict_emotions(model, face_img): 7 | emotion, scores = model.predict_emotions(face_img, logits=False) 8 | return emotion, scores 9 | 10 | 11 | def emotion_VA_MTL(device): 12 | model_name = 'enet_b0_8_va_mtl' 13 | fer = HSEmotionRecognizer(model_name=model_name, device=device) 14 | return fer 15 | 16 | 17 | _video_model = None 18 | 19 | 20 | def get_video_model(): 21 | global _video_model 22 | if _video_model is None: 23 | _video_model = emotion_VA_MTL(DEVICE) 24 | return _video_model 25 | -------------------------------------------------------------------------------- /service/test/integration_test.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import argparse 3 | from pathlib import Path 4 | 5 | from starlette.websockets import WebSocket 6 | 7 | 8 | from gen_audio_result import process_audio_file 9 | from gen_text_result import process_text_file 10 | from gen_visual_result import process_video_file 11 | from model.text2speech import text2speech 12 | 13 | 14 | class FakeWebSocket(WebSocket): 15 | 16 | def __init__(self): 17 | pass 18 | 19 | async def send_json(self, data, mode="text"): 20 | pass 21 | 22 | async def receive_text(self): 23 | return "ok" 24 | 25 | 26 | socket = FakeWebSocket() 27 | 28 | 29 | async def process_uploaded(video_path: str, lang: str): 30 | video_dir = Path(video_path).parent 31 | text2speech_path = str(video_dir / "text2speech.json") 32 | audio_result_path = str(video_dir / "audio.csv") 33 | text_result_path = str(video_dir / "text.csv") 34 | visual_result_path = str(video_dir / "faces.csv") 35 | 36 | text2speech(video_path, text2speech_path, lang) 37 | await process_audio_file(video_path, audio_result_path, socket) 38 | await process_text_file(text2speech_path, text_result_path, lang, socket) 39 | await process_video_file(video_path, visual_result_path, socket) 40 | 41 | return { 42 | "id": video_dir.name, 43 | "audio": audio_result_path.replace("\\", "/"), 44 | "visual": visual_result_path.replace("\\", "/"), 45 | "text": text_result_path.replace("\\", "/"), 46 | } 47 | 48 | 49 | parser = argparse.ArgumentParser() 50 | parser.add_argument("--video_path", type=str) 51 | parser.add_argument("--lang", type=str) 52 | 53 | if __name__ == '__main__': 54 | args = parser.parse_args() 55 | asyncio.run(process_uploaded(args.video_path, args.lang)) 56 | -------------------------------------------------------------------------------- /service/util/consts.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | 3 | import torch 4 | 5 | DEVICE = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu") 6 | SEGMENT_STRIDE = 7.5 7 | SEGMENT_DURATION = 15.0 8 | REQUIRED_SAMPLE_RATE = 16000 9 | 10 | LINGUISTIC_MODEL_EN_PATH = os.path.join("checkpoints", "linguistic_head_en.ckpt") 11 | LINGUISTIC_MODEL_ZH_PATH = os.path.join("checkpoints", "linguistic_head_zh.ckpt") 12 | AUDIO_MODEL_PATH = os.path.join("checkpoints", "audio_model_trill.pt") 13 | 14 | LINGUISTIC_MODEL_ZH_URL = "https://github.com/ControlNet/emolysis/releases/download/misc/linguistic_head_zh.ckpt" 15 | LINGUISTIC_MODEL_EN_URL = "https://github.com/ControlNet/emolysis/releases/download/misc/linguistic_head_en.ckpt" 16 | AUDIO_MODEL_URL = "https://github.com/ControlNet/emolysis/releases/download/misc/audio_model_trill.pt" 17 | 18 | COMMUNICATION_VISUAL_STEP = 100 19 | COMMUNICATION_AUDIO_STEP = 10 20 | COMMUNICATION_LINGUISTIC_STEP = 10 21 | -------------------------------------------------------------------------------- /service/util/label_space_mapping.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | # ============================================================================= 4 | # Emotion Mapping 5 | # ============================================================================= 6 | 7 | 8 | def affectnet_to_main(emo_scores): 9 | emo_prob = np.zeros(9) 10 | emo_prob[0] = emo_scores[1] + emo_scores[3] # fear (contempt + fear) 11 | emo_prob[1] = emo_scores[0] # anger (anger) 12 | emo_prob[2] = emo_scores[4] # joy (happy) 13 | emo_prob[3] = emo_scores[6] # sadness (sad) 14 | emo_prob[4] = emo_scores[2] # disgust (disgust) 15 | emo_prob[5] = emo_scores[7] # surprise 16 | emo_prob[6] = 0.0 # trust 17 | emo_prob[7] = 0.0 # anticipation 18 | emo_prob[8] = emo_scores[5] # none 19 | return emo_prob 20 | 21 | 22 | def bold_to_main(emo_scores): 23 | emo_prob = np.zeros(9) 24 | emo_prob[0] = emo_scores[22] + emo_scores[23] # fear (disquitement + fear) 25 | emo_prob[1] = emo_scores[18] + emo_scores[19] # anger (annoyance +anger) 26 | emo_prob[2] = emo_scores[0] + emo_scores[1] + emo_scores[2] \ 27 | + emo_scores[4] + emo_scores[5] + emo_scores[6] + emo_scores[7] \ 28 | + emo_scores[10] + emo_scores[ 29 | 15] # joy (peace+affection+engagement+confidence+happy+pleasure+sympathy+yearning) 30 | emo_prob[3] = emo_scores[12] + emo_scores[13] + emo_scores[14] + emo_scores[20] \ 31 | + emo_scores[21] + emo_scores[24] + emo_scores[25] # sadness 32 | emo_prob[4] = emo_scores[16] + emo_scores[17] # disgust 33 | emo_prob[5] = emo_scores[8] + emo_scores[9] + emo_scores[11] # surprise 34 | emo_prob[6] = 0.0 # trust 35 | emo_prob[7] = emo_scores[3] # anticipation 36 | emo_prob[8] = 0.0 # none 37 | return emo_prob 38 | 39 | 40 | def cped_to_main(emo_scores): 41 | emo_prob = np.zeros(9) 42 | emo_prob[0] = emo_scores[7] + emo_scores[11] # fear 43 | emo_prob[1] = emo_scores[5] # anger 44 | emo_prob[2] = emo_scores[0] + emo_scores[1] + emo_scores[2] \ 45 | + emo_scores[3] # joy 46 | emo_prob[3] = emo_scores[6] + emo_scores[8] + emo_scores[12] # sadness 47 | emo_prob[4] = emo_scores[9] # disgust 48 | emo_prob[5] = emo_scores[10] # surprise 49 | emo_prob[6] = 0.0 # trust 50 | emo_prob[7] = 0.0 # anticipation 51 | emo_prob[8] = emo_scores[4] # none 52 | return emo_prob 53 | 54 | 55 | # ============================================================================= 56 | # Valence Mapping 57 | # ============================================================================= 58 | 59 | def translate(value, r1_Min, r1_Max, r2_Min, r2_Max): 60 | # Figure out how 'wide' each range is 61 | r1_Span = r1_Max - r1_Min 62 | r2_Span = r2_Max - r2_Min 63 | 64 | # Convert the r1 range into a 0-1 range (float) 65 | valueScaled = float(value - r1_Min) / float(r1_Span) 66 | 67 | # Convert the 0-1 range into a value in the r2 range. 68 | return r2_Min + (valueScaled * r2_Span) 69 | 70 | 71 | def affectnet_to_main_valence(valence_score): 72 | try: 73 | affectnet_min = -1 74 | affectnet_max = 1 75 | 76 | main_min = 1 77 | main_max = 1000 78 | 79 | valence_mapped_main = translate(valence_score, 80 | affectnet_min, affectnet_max, 81 | main_min, main_max) 82 | except Exception: 83 | raise ValueError('The predicted valence score is not valid') 84 | return valence_mapped_main 85 | 86 | 87 | def bold_to_main_valence(valence_score): 88 | try: 89 | bold_min = 1 90 | bold_max = 10 91 | 92 | main_min = 1 93 | main_max = 1000 94 | 95 | valence_mapped_main = translate(valence_score, 96 | bold_min, bold_max, 97 | main_min, main_max) 98 | except Exception: 99 | raise ValueError('The predicted valence score is not valid') 100 | return valence_mapped_main 101 | 102 | 103 | # ============================================================================= 104 | # Arousal Mapping 105 | # ============================================================================= 106 | 107 | def bold_to_main_arousal(arousal_score): 108 | try: 109 | bold_min = 1 110 | bold_max = 10 111 | 112 | main_min = 1 113 | main_max = 1000 114 | 115 | arousal_mapped_main = translate(arousal_score, 116 | bold_min, bold_max, 117 | main_min, main_max) 118 | except Exception: 119 | raise ValueError('The predicted arousal score is not valid') 120 | return arousal_mapped_main 121 | 122 | 123 | def affectnet_to_main_arousal(arousal_score): 124 | try: 125 | affectnet_min = -1 126 | affectnet_max = 1 127 | 128 | main_min = 1 129 | main_max = 1000 130 | 131 | arousal_mapped_main = translate(arousal_score, 132 | affectnet_min, affectnet_max, 133 | main_min, main_max) 134 | except Exception: 135 | raise ValueError('The predicted arousal score is not valid') 136 | return arousal_mapped_main 137 | -------------------------------------------------------------------------------- /service/util/misc.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | from typing import BinaryIO, List 4 | 5 | from fastapi import HTTPException, Request, status 6 | from fastapi.responses import StreamingResponse 7 | from tqdm.auto import tqdm 8 | from urllib.request import urlretrieve 9 | 10 | from util.consts import AUDIO_MODEL_PATH, LINGUISTIC_MODEL_EN_PATH, LINGUISTIC_MODEL_ZH_PATH, AUDIO_MODEL_URL, \ 11 | LINGUISTIC_MODEL_EN_URL, LINGUISTIC_MODEL_ZH_URL 12 | 13 | 14 | class VideoNamePool: 15 | latest = -1 16 | 17 | @classmethod 18 | def init(cls): 19 | all_video_ids: List[int] = [] 20 | if not os.path.exists("data"): 21 | os.mkdir("data") 22 | for directory in filter(lambda x: os.path.isdir(os.path.join("data", x)), os.listdir("data")): 23 | try: 24 | all_video_ids.append(int(directory)) 25 | except ValueError: 26 | continue 27 | 28 | if len(all_video_ids) > 0: 29 | cls.latest = max(all_video_ids) 30 | else: 31 | cls.latest = -1 32 | 33 | @classmethod 34 | def get(cls): 35 | cls.latest += 1 36 | return cls.latest 37 | 38 | 39 | def prepare_checkpoints(): 40 | if not os.path.exists("checkpoints"): 41 | os.mkdir("checkpoints") 42 | 43 | if not os.path.exists(AUDIO_MODEL_PATH): 44 | download_file(AUDIO_MODEL_URL, AUDIO_MODEL_PATH) 45 | 46 | if not os.path.exists(LINGUISTIC_MODEL_EN_PATH): 47 | download_file(LINGUISTIC_MODEL_EN_URL, LINGUISTIC_MODEL_EN_PATH) 48 | 49 | if not os.path.exists(LINGUISTIC_MODEL_ZH_PATH): 50 | download_file(LINGUISTIC_MODEL_ZH_URL, LINGUISTIC_MODEL_ZH_PATH) 51 | 52 | 53 | # https://github.com/tiangolo/fastapi/issues/1240 54 | def send_bytes_range_requests( 55 | file_obj: BinaryIO, start: int, end: int, chunk_size: int = 10_000 56 | ): 57 | """Send a file in chunks using Range Requests specification RFC7233 58 | 59 | `start` and `end` parameters are inclusive due to specification 60 | """ 61 | with file_obj as f: 62 | f.seek(start) 63 | pos = f.tell() 64 | while pos <= end: 65 | read_size = min(chunk_size, end + 1 - pos) 66 | yield f.read(read_size) 67 | 68 | 69 | def _get_range_header(range_header: str, file_size: int) -> tuple[int, int]: 70 | def _invalid_range(): 71 | return HTTPException( 72 | status.HTTP_416_REQUESTED_RANGE_NOT_SATISFIABLE, 73 | detail=f"Invalid request range (Range:{range_header!r})", 74 | ) 75 | 76 | try: 77 | h = range_header.replace("bytes=", "").split("-") 78 | start = int(h[0]) if h[0] != "" else 0 79 | end = int(h[1]) if h[1] != "" else file_size - 1 80 | except ValueError: 81 | raise _invalid_range() 82 | 83 | if start > end or start < 0 or end > file_size - 1: 84 | raise _invalid_range() 85 | return start, end 86 | 87 | 88 | def range_requests_response( 89 | request: Request, file_path: str, content_type: str 90 | ): 91 | """Returns StreamingResponse using Range Requests of a given file""" 92 | 93 | file_size = os.stat(file_path).st_size 94 | range_header = request.headers.get("range") 95 | 96 | headers = { 97 | "content-type": content_type, 98 | "accept-ranges": "bytes", 99 | "content-encoding": "identity", 100 | "content-length": str(file_size), 101 | "access-control-expose-headers": ( 102 | "content-type, accept-ranges, content-length, " 103 | "content-range, content-encoding" 104 | ), 105 | } 106 | start = 0 107 | end = file_size - 1 108 | status_code = status.HTTP_200_OK 109 | 110 | if range_header is not None: 111 | start, end = _get_range_header(range_header, file_size) 112 | size = end - start + 1 113 | headers["content-length"] = str(size) 114 | headers["content-range"] = f"bytes {start}-{end}/{file_size}" 115 | status_code = status.HTTP_206_PARTIAL_CONTENT 116 | 117 | return StreamingResponse( 118 | send_bytes_range_requests(open(file_path, mode="rb"), start, end), 119 | headers=headers, 120 | status_code=status_code, 121 | ) 122 | 123 | 124 | class DownloadProgressBar(tqdm): 125 | total: int 126 | 127 | def update_to(self, b=1, bsize=1, tsize=None): 128 | if tsize is not None: 129 | self.total = tsize 130 | self.update(b * bsize - self.n) 131 | 132 | 133 | def download_file(url: str, file_path: str, progress_bar: bool = True) -> str: 134 | path = Path(file_path) 135 | path.parent.mkdir(exist_ok=True, parents=True) 136 | if not path.exists(): 137 | if progress_bar: 138 | with DownloadProgressBar(unit="B", unit_scale=True, miniters=1, desc=f"Downloading {path.name}") as pb: 139 | urlretrieve(url, filename=path, reporthook=pb.update_to) 140 | else: 141 | urlretrieve(url, filename=path) 142 | 143 | return str(path) 144 | -------------------------------------------------------------------------------- /src/App.vue: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/assets/tailwind.css: -------------------------------------------------------------------------------- 1 | @tailwind base; 2 | @tailwind components; 3 | @tailwind utilities; 4 | 5 | .card-checked { 6 | @apply bg-base-300 text-base-content 7 | } 8 | .card-unchecked { 9 | @apply bg-neutral text-neutral-content 10 | } 11 | 12 | #audio-card, #text-card, #visual-card { 13 | @apply transition-all duration-300 14 | } 15 | 16 | html { 17 | @apply caret-transparent 18 | } 19 | 20 | .bar { 21 | } 22 | 23 | .emotion-bar { 24 | @apply bar fill-blue-800 25 | } 26 | 27 | .valence-bar { 28 | @apply bar fill-emerald-800 29 | } 30 | 31 | .arousal-bar { 32 | @apply bar fill-orange-800 33 | } 34 | -------------------------------------------------------------------------------- /src/components/AffectiveBarPlot.vue: -------------------------------------------------------------------------------- 1 | 116 | 117 | 136 | 137 | -------------------------------------------------------------------------------- /src/components/AffectiveLinePlot.vue: -------------------------------------------------------------------------------- 1 | 220 | 221 | 240 | 241 | -------------------------------------------------------------------------------- /src/components/FaceBlock.vue: -------------------------------------------------------------------------------- 1 | 10 | 11 | 18 | -------------------------------------------------------------------------------- /src/components/FaceInfo.vue: -------------------------------------------------------------------------------- 1 | 77 | 78 | 104 | 105 | -------------------------------------------------------------------------------- /src/components/FooterBlock.vue: -------------------------------------------------------------------------------- 1 | 5 | 6 | 21 | -------------------------------------------------------------------------------- /src/config.ts: -------------------------------------------------------------------------------- 1 | export const config = { 2 | faceCanvasSize: 128, 3 | faceInfoTitleFontSize: 24, 4 | faceInfoFontSize: 16, 5 | faceInfoBarLength: 300, 6 | faceInfoLineLength: 280, 7 | overallBarLength: 570, 8 | affectiveSvgHeight: 40, 9 | fps: 30, 10 | boxProbThreshold: 0.99 11 | } -------------------------------------------------------------------------------- /src/global/api.ts: -------------------------------------------------------------------------------- 1 | export const getRemoteUploadApi = () => `/api/upload` 2 | export const getRemoteDataPath = (videoId: string) => `/api/data/${videoId}`; 3 | export const getRemoteDataFps = (videoId: string) => `/api/fps/${videoId}`; 4 | export const getLocalDataPath = (videoId: string) => `/data/${videoId}` 5 | -------------------------------------------------------------------------------- /src/global/consts.ts: -------------------------------------------------------------------------------- 1 | export const EMOTIONS = [ 2 | "fear", "anger", "joy", "sadness", "disgust", "surprise", "trust", "anticipation", "neutral" 3 | ] 4 | 5 | export const PAPER_URL = "https://arxiv.org/abs/2305.05255" 6 | export const GITHUB_URL = "https://github.com/ControlNet/emolysis" 7 | 8 | export type MessageStatus = 9 | "uploaded" 10 | | "audio" 11 | | "audio done" 12 | | "text" 13 | | "text done" 14 | | "visual start" 15 | | "visual" 16 | | "visual done" 17 | | "done" 18 | export type MessageProcessData = { current: number, total: number } 19 | export type MessageResultData = { id: string, audio: string, visual: string, text: string } 20 | export type MessageVideoData = { fps: number } 21 | 22 | export interface Message { 23 | status: MessageStatus, 24 | data: T 25 | } -------------------------------------------------------------------------------- /src/global/socket.ts: -------------------------------------------------------------------------------- 1 | let socket: WebSocket | undefined = undefined; 2 | 3 | export function getSocket() { 4 | if (socket === undefined || socket.readyState === WebSocket.CLOSED) { 5 | socket = new WebSocket(`ws://${window.location.host}/ws/`) 6 | } 7 | return socket; 8 | } 9 | 10 | export function closeSocket() { 11 | if (socket !== undefined) { 12 | socket.close(); 13 | socket = undefined; 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /src/main.ts: -------------------------------------------------------------------------------- 1 | import { createApp } from 'vue' 2 | import { createPinia } from 'pinia' 3 | import "@/assets/tailwind.css" 4 | 5 | import App from './App.vue' 6 | import router from "@/router"; 7 | import { library } from '@fortawesome/fontawesome-svg-core' 8 | import { FontAwesomeIcon } from '@fortawesome/vue-fontawesome' 9 | import { faGithub } from '@fortawesome/free-brands-svg-icons' 10 | import { faBook } from "@fortawesome/free-solid-svg-icons" 11 | 12 | 13 | library.add(faGithub, faBook) 14 | 15 | const app = createApp(App) 16 | 17 | app.use(createPinia()) 18 | app.use(router) 19 | app.component("font-awesome-icon", FontAwesomeIcon) 20 | 21 | app.mount("#app") 22 | -------------------------------------------------------------------------------- /src/preprocess/audio.ts: -------------------------------------------------------------------------------- 1 | import { useDataPathStore } from "@/stores/dataPathStore"; 2 | import * as d3 from "d3"; 3 | import { config } from "@/config"; 4 | import _ from "lodash"; 5 | import type { DataRow } from "@/preprocess/common"; 6 | 7 | type AudioCsv = Array<{ 8 | start: string, 9 | end: string, 10 | valence: string, 11 | arousal: string, 12 | emotion0: string, 13 | emotion1: string, 14 | emotion2: string, 15 | emotion3: string, 16 | emotion4: string, 17 | emotion5: string, 18 | emotion6: string, 19 | emotion7: string, 20 | emotion8: string 21 | }> 22 | 23 | type AudioData = Map 24 | 25 | export interface AudioRow extends DataRow {} 26 | 27 | const audioData: AudioData = new Map(); 28 | 29 | async function loadAudioData(): Promise { 30 | const dataPathStore = useDataPathStore() 31 | const d = await d3.csv(dataPathStore.audioDataPath) as AudioCsv 32 | 33 | d.forEach(row => { 34 | const start = parseInt(row.start) * config.fps 35 | const end = parseInt(row.end) * config.fps 36 | const valence = _.round(parseFloat(row.valence)) 37 | const arousal = _.round(parseFloat(row.arousal)) 38 | const emotionProb = [row.emotion0, row.emotion1, row.emotion2, row.emotion3, row.emotion4, row.emotion5, row.emotion6, row.emotion7, row.emotion8].map(parseFloat) 39 | 40 | _.range(start, end).forEach(frame => { 41 | audioData.set(frame, { 42 | frame, 43 | valence, 44 | arousal, 45 | emotionProb 46 | }) 47 | }) 48 | }) 49 | } 50 | 51 | export async function getAudioData(): Promise { 52 | if (audioData.size === 0) { 53 | await loadAudioData() 54 | } 55 | return audioData 56 | } 57 | 58 | export function getAudioDataByFrame(frame: number): AudioRow | undefined { 59 | return audioData.get(frame) 60 | } -------------------------------------------------------------------------------- /src/preprocess/common.ts: -------------------------------------------------------------------------------- 1 | export interface DataRow { 2 | frame: number 3 | valence: number 4 | arousal: number 5 | emotionProb: Array 6 | } 7 | 8 | export type Data = Map 9 | -------------------------------------------------------------------------------- /src/preprocess/faces.ts: -------------------------------------------------------------------------------- 1 | import { useDataPathStore } from "@/stores/dataPathStore"; 2 | import * as d3 from "d3"; 3 | import type { DataRow } from "@/preprocess/common"; 4 | 5 | type FaceCsv = Array<{ 6 | frame: string, 7 | x1: string, 8 | x2: string, 9 | y1: string, 10 | y2: string, 11 | box_prob: string, 12 | emotion: string, 13 | emotion0: string, 14 | emotion1: string, 15 | emotion2: string, 16 | emotion3: string, 17 | emotion4: string, 18 | emotion5: string, 19 | emotion6: string, 20 | emotion7: string, 21 | emotion8: string, 22 | valence: string, 23 | arousal: string, 24 | }> 25 | 26 | type FaceData = Map> 27 | 28 | export interface FaceRow extends DataRow { 29 | x1: number 30 | x2: number 31 | y1: number 32 | y2: number 33 | boxProb: number 34 | } 35 | 36 | export interface VisualRow extends DataRow {} 37 | 38 | const faceData: FaceData = new Map(); 39 | 40 | async function loadFaceData(): Promise { 41 | const dataPathStore = useDataPathStore() 42 | const d = await d3.csv(dataPathStore.faceDataPath) as FaceCsv 43 | d.forEach(row => { 44 | const currentFrame = parseInt(row.frame) 45 | const faceRow = { 46 | frame: currentFrame, 47 | x1: parseInt(row.x1), 48 | x2: parseInt(row.x2), 49 | y1: parseInt(row.y1), 50 | y2: parseInt(row.y2), 51 | boxProb: parseFloat(row.box_prob), 52 | valence: parseInt(row.valence), 53 | arousal: parseInt(row.arousal), 54 | emotion: parseInt(row.emotion), 55 | emotionProb: [row.emotion0, row.emotion1, row.emotion2, row.emotion3, row.emotion4, row.emotion5, row.emotion6, row.emotion7, row.emotion8].map(parseFloat) 56 | } 57 | if (faceData.has(currentFrame)) { 58 | faceData.get(currentFrame)!.push(faceRow) 59 | } else { 60 | faceData.set(currentFrame, [faceRow]) 61 | } 62 | }) 63 | } 64 | 65 | export async function getFaceData(): Promise { 66 | if (faceData.size === 0) { 67 | await loadFaceData() 68 | } 69 | return faceData 70 | } 71 | 72 | export function getFaceDataByFrame(frame: number): Array { 73 | return faceData.get(frame) ?? [] 74 | } 75 | -------------------------------------------------------------------------------- /src/preprocess/overall.ts: -------------------------------------------------------------------------------- 1 | type OverallBaseCsvRow = { 2 | file_id: string, 3 | start: string, 4 | end: string, 5 | } 6 | 7 | type OverallArousalCsvRow = OverallBaseCsvRow & { 8 | arousal_continuous: string, 9 | } 10 | 11 | type OverallValenceCsvRow = OverallBaseCsvRow & { 12 | valence_continuous: string, 13 | } 14 | 15 | type OverallEmotionCsvRow = OverallBaseCsvRow & { 16 | emotion: string, 17 | llr: string, 18 | } 19 | 20 | type OverallCsv = Array 21 | 22 | export interface OverallRow { 23 | frame: number, 24 | valence: number 25 | arousal: number 26 | emotionProb: Array 27 | } 28 | 29 | type OverallData = Map 30 | const overallData: OverallData = new Map(); 31 | 32 | // async function processArousalData() { 33 | // const dataPathStore = useDataPathStore() 34 | // const d = await d3.dsv("\t", dataPathStore.arousalDataPath) as OverallCsv 35 | // return smoothArrayBy(_.sortBy(_.flatMap(d, row => { 36 | // const start = parseInt(row.start) * config.fps 37 | // const end = parseInt(row.end) * config.fps 38 | // const arousal = parseFloat(row.arousal_continuous) 39 | // return _.range(start, end).map(frame => { 40 | // return { 41 | // frame, 42 | // arousal 43 | // } 44 | // }) 45 | // }), row => row.frame), 300, "arousal") 46 | // } 47 | // 48 | // async function processValenceData() { 49 | // const dataPathStore = useDataPathStore() 50 | // const d = await d3.dsv("\t", dataPathStore.valenceDataPath) as OverallCsv 51 | // return smoothArrayBy(_.sortBy(_.flatMap(d, row => { 52 | // const start = parseInt(row.start) * config.fps 53 | // const end = parseInt(row.end) * config.fps 54 | // const valence = parseFloat(row.valence_continuous) 55 | // return _.range(start, end).map(frame => { 56 | // return { 57 | // frame, 58 | // valence 59 | // } 60 | // }) 61 | // }), row => row.frame), 300, "valence") 62 | // } 63 | // 64 | // async function processEmotionData() { 65 | // const dataPathStore = useDataPathStore() 66 | // const d = await d3.dsv("\t", dataPathStore.emotionDataPath) as OverallCsv 67 | // return smoothArrayBy(_.sortBy(_.flatMap(d, row => { 68 | // const start = parseInt(row.start) * config.fps 69 | // const end = parseInt(row.end) * config.fps 70 | // const emotion = row.emotion === "none" ? "neutral" : row.emotion 71 | // const emotion_llr = parseFloat(row.llr) 72 | // return _.range(start, end).map(frame => { 73 | // return { 74 | // frame, 75 | // emotion, 76 | // emotion_prob: llr2prob(emotion_llr) 77 | // } 78 | // }) 79 | // }), row => row.frame), 300, "emotion_prob") 80 | // } 81 | // 82 | // 83 | // async function loadOverallData() { 84 | // const [emotionData, valenceData, arousalData] = await Promise.all( 85 | // [processEmotionData(), processValenceData(), processArousalData()] 86 | // ); 87 | // 88 | // for (let i = 0; i < emotionData.length; i++) { 89 | // if (arousalData[i].frame !== valenceData[i].frame || arousalData[i].frame !== emotionData[i].frame) { 90 | // throw new Error("Frame mismatch") 91 | // } 92 | // 93 | // const arousal = arousalData[i].arousal 94 | // const valence = valenceData[i].valence 95 | // const emotion = emotionData[i].emotion 96 | // const emotionProb = emotionData[i].emotion_prob 97 | // overallData.set(i, { 98 | // frame: i, 99 | // arousal, 100 | // valence, 101 | // emotion, 102 | // emotionProb 103 | // }) 104 | // } 105 | // } 106 | // 107 | // export async function getOverallData(): Promise { 108 | // if (overallData.size === 0) { 109 | // await loadOverallData() 110 | // } 111 | // return overallData 112 | // } 113 | -------------------------------------------------------------------------------- /src/preprocess/text.ts: -------------------------------------------------------------------------------- 1 | import { useDataPathStore } from "@/stores/dataPathStore"; 2 | import * as d3 from "d3"; 3 | import { config } from "@/config"; 4 | import _ from "lodash"; 5 | import type { DataRow } from "@/preprocess/common"; 6 | 7 | type TextCsv = Array<{ 8 | start: string, 9 | end: string, 10 | valence: string, 11 | arousal: string, 12 | emotion0: string, 13 | emotion1: string, 14 | emotion2: string, 15 | emotion3: string, 16 | emotion4: string, 17 | emotion5: string, 18 | emotion6: string, 19 | emotion7: string, 20 | emotion8: string 21 | }> 22 | 23 | type TextData = Map 24 | 25 | export interface TextRow extends DataRow {} 26 | 27 | const textData: TextData = new Map(); 28 | 29 | async function loadTextData(): Promise { 30 | const dataPathStore = useDataPathStore() 31 | const d = await d3.csv(dataPathStore.textDataPath) as TextCsv 32 | 33 | d.forEach((row, index) => { 34 | // drop last row 35 | if (index === d.length - 1) { 36 | return 37 | } 38 | 39 | const start = parseInt(row.start) * config.fps 40 | const end = parseInt(row.end) * config.fps 41 | const valence = _.round(parseFloat(row.valence)) 42 | const arousal = _.round(parseFloat(row.arousal)) 43 | const emotionProb = [row.emotion0, row.emotion1, row.emotion2, row.emotion3, row.emotion4, row.emotion5, row.emotion6, row.emotion7, row.emotion8].map(parseFloat) 44 | 45 | _.range(start, end).forEach(frame => { 46 | textData.set(frame, { 47 | frame, 48 | valence, 49 | arousal, 50 | emotionProb 51 | }) 52 | }) 53 | }) 54 | } 55 | 56 | export async function getTextData(): Promise { 57 | if (textData.size === 0) { 58 | await loadTextData() 59 | } 60 | return textData 61 | } 62 | 63 | export function getTextDataByFrame(frame: number): TextRow | undefined { 64 | return textData.get(frame) 65 | } -------------------------------------------------------------------------------- /src/router/index.ts: -------------------------------------------------------------------------------- 1 | import { createRouter, createWebHistory } from "vue-router" 2 | import WelcomeView from "@/views/WelcomeView.vue"; 3 | import MainView from "@/views/MainView.vue"; 4 | 5 | export default createRouter({ 6 | history: createWebHistory("/"), 7 | routes: [ 8 | { path: "/", component: WelcomeView}, 9 | { path: "/:mode/:videoId", component: MainView}, 10 | { path: "/:mode/:videoId", component: MainView}, 11 | ] 12 | }) 13 | -------------------------------------------------------------------------------- /src/shims-vue.d.ts: -------------------------------------------------------------------------------- 1 | declare module '*.vue'; 2 | -------------------------------------------------------------------------------- /src/stores/dataPathStore.ts: -------------------------------------------------------------------------------- 1 | import { defineStore } from 'pinia' 2 | import { getFaceData } from "@/preprocess/faces"; 3 | import { getAudioData } from "@/preprocess/audio"; 4 | import { getTextData } from "@/preprocess/text"; 5 | 6 | export const useDataPathStore = defineStore({ 7 | id: 'dataPath', 8 | state: () => ({ 9 | dataDir: "", 10 | }), 11 | getters: { 12 | videoPath: state => `${state.dataDir}/video.mp4`, 13 | faceDataPath: state => `${state.dataDir}/faces.csv`, 14 | audioDataPath: state => `${state.dataDir}/audio.csv`, 15 | textDataPath: state => `${state.dataDir}/text.csv`, 16 | }, 17 | actions: { 18 | async setDataDir(dir: string) { 19 | this.dataDir = dir 20 | await Promise.all([ 21 | getFaceData(), 22 | getAudioData(), 23 | getTextData() 24 | ]) 25 | } 26 | } 27 | }) 28 | -------------------------------------------------------------------------------- /src/stores/faceCheckedStore.ts: -------------------------------------------------------------------------------- 1 | import { defineStore } from 'pinia' 2 | 3 | export const useFaceCheckedStore = defineStore({ 4 | id: 'faceChecked', 5 | state: () => ({ 6 | faceChecked: Array(99).fill(true) as Array, 7 | visualChecked: true 8 | }), 9 | actions: { 10 | setFaceChecked(index: number, value: boolean) { 11 | this.faceChecked[index] = value 12 | }, 13 | setVisualChecked(value: boolean) { 14 | this.visualChecked = value 15 | } 16 | } 17 | }) -------------------------------------------------------------------------------- /src/stores/lineChartCheckedStore.ts: -------------------------------------------------------------------------------- 1 | import { defineStore } from 'pinia' 2 | 3 | export const useLineChartCheckedStore = defineStore({ 4 | id: 'lineChartChecked', 5 | state: () => ({ 6 | audioChecked: false, 7 | textChecked: false, 8 | }), 9 | actions: { 10 | setAudioChecked(value: boolean) { 11 | this.audioChecked = value 12 | }, 13 | 14 | setTextChecked(value: boolean) { 15 | this.textChecked = value 16 | } 17 | } 18 | }) -------------------------------------------------------------------------------- /src/stores/videoStore.ts: -------------------------------------------------------------------------------- 1 | import { defineStore } from 'pinia' 2 | 3 | export const useVideoStore = defineStore({ 4 | id: 'video', 5 | actions: { 6 | onVideoSeeked: () => {} 7 | } 8 | }) -------------------------------------------------------------------------------- /src/utils.ts: -------------------------------------------------------------------------------- 1 | import _ from "lodash"; 2 | 3 | export function moveTo(prevX: number, prevY: number, x: number, y: number, speed: number): { x: number, y: number } { 4 | const dx = x - prevX 5 | const dy = y - prevY 6 | const distance = Math.sqrt(dx * dx + dy * dy) 7 | if (distance < speed) { 8 | return {x, y} 9 | } 10 | const angle = Math.atan2(dy, dx) 11 | return {x: prevX + speed * Math.cos(angle), y: prevY + speed * Math.sin(angle)} 12 | } 13 | 14 | export function smoothArray(array: Array, window: number): Array { 15 | const smoothArray = [] 16 | for (let i = 0; i < array.length; i++) { 17 | const start = Math.max(0, i - window) 18 | const end = Math.min(array.length, i + window) 19 | const subArray = array.slice(start, end) 20 | smoothArray.push(_.mean(subArray)) 21 | } 22 | return smoothArray 23 | } 24 | 25 | export function smoothArrayBy(array: Array, window: number, key: keyof T): Array { 26 | const smoothArray = [] 27 | for (let i = 0; i < array.length; i++) { 28 | const start = Math.max(0, i - window) 29 | const end = Math.min(array.length, i + window) 30 | const subArray = array.slice(start, end).map(item => item[key]) 31 | const mean = _.mean(subArray) 32 | smoothArray.push({...array[i], [key]: mean}) 33 | } 34 | return smoothArray 35 | } 36 | 37 | export function llr2prob(llr: number): number { 38 | return 1 / (1 + Math.exp(-llr)) 39 | } 40 | -------------------------------------------------------------------------------- /src/views/MainView.vue: -------------------------------------------------------------------------------- 1 | 306 | 307 | 536 | -------------------------------------------------------------------------------- /src/views/WelcomeView.vue: -------------------------------------------------------------------------------- 1 | 124 | 125 | 158 | 159 | -------------------------------------------------------------------------------- /tailwind.config.js: -------------------------------------------------------------------------------- 1 | /** @type {import('tailwindcss').Config} */ 2 | module.exports = { 3 | content: [ 4 | "./index.html", 5 | "./src/**/*.{vue,js,ts,jsx,tsx}" 6 | ], 7 | theme: { 8 | extend: {}, 9 | }, 10 | plugins: [require("daisyui")], 11 | } 12 | -------------------------------------------------------------------------------- /tsconfig.config.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "@vue/tsconfig/tsconfig.node.json", 3 | "include": ["vite.config.*", "vitest.config.*", "cypress.config.*", "playwright.config.*"], 4 | "compilerOptions": { 5 | "composite": true, 6 | "types": ["node"] 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "@vue/tsconfig/tsconfig.web.json", 3 | "include": [ 4 | "env.d.ts", 5 | "src/**/*", 6 | "src/**/*.vue" 7 | ], 8 | "compilerOptions": { 9 | "baseUrl": ".", 10 | "paths": { 11 | "@/*": [ 12 | "./src/*" 13 | ] 14 | } 15 | }, 16 | "references": [ 17 | { 18 | "path": "./tsconfig.config.json" 19 | } 20 | ], 21 | "vueCompilerOptions": { 22 | "experimentalModelPropName": { 23 | } 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /vite.config.ts: -------------------------------------------------------------------------------- 1 | import { fileURLToPath, URL } from 'node:url' 2 | 3 | import { defineConfig } from 'vite' 4 | import vue from '@vitejs/plugin-vue' 5 | import vueJsx from '@vitejs/plugin-vue-jsx' 6 | 7 | // https://vitejs.dev/config/ 8 | export default defineConfig({ 9 | plugins: [vue(), vueJsx()], 10 | resolve: { 11 | alias: { 12 | '@': fileURLToPath(new URL('./src', import.meta.url)) 13 | }, 14 | }, 15 | css: { 16 | postcss: { 17 | plugins: [ 18 | require('tailwindcss'), 19 | require('autoprefixer'), 20 | ], 21 | } 22 | }, 23 | }) 24 | --------------------------------------------------------------------------------