├── .dockerignore ├── .editorconfig ├── .env.example ├── .gitignore ├── .prettierrc ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── __mocks__ └── pexels-response.json ├── docker-compose.yml ├── eslint.config.mjs ├── main-cuda.Dockerfile ├── main.Dockerfile ├── package.json ├── pnpm-lock.yaml ├── postcss.config.mjs ├── remotion.config.ts ├── src ├── components │ ├── root │ │ ├── Root.tsx │ │ ├── index.css │ │ └── index.ts │ └── videos │ │ └── ShortVideo.tsx ├── config.ts ├── index.ts ├── logger.ts ├── scripts │ └── install.ts ├── server │ ├── routers │ │ ├── mcp.ts │ │ └── rest.ts │ ├── server.ts │ └── validator.ts ├── short-creator │ ├── ShortCreator.ts │ ├── libraries │ │ ├── FFmpeg.ts │ │ ├── Kokoro.ts │ │ ├── Pexels.test.ts │ │ ├── Pexels.ts │ │ ├── Remotion.ts │ │ └── Whisper.ts │ └── music.ts └── types │ └── shorts.ts ├── static └── music │ ├── 80s-synthwave-chill-166744.mp3 │ ├── README.md │ ├── angry-trap-beat-136015.mp3 │ ├── anxious-heartbeat-dark-thriller-180565.mp3 │ ├── better-day-186374.mp3 │ ├── bright-energetic-upbeat-pop-324997.mp3 │ ├── cinematic-documentary-115669.mp3 │ ├── dark-140112.mp3 │ ├── dark-anxious-tension-dramatic-suspense-112169.mp3 │ ├── dark-electronic-207913.mp3 │ ├── dark-intentions-288498.mp3 │ ├── dark-mysterious-tense-piano-cinematic-soundtrack-226665.mp3 │ ├── deep-282969.mp3 │ ├── exciting-upbeat-background-music-306032.mp3 │ ├── frenzy-story-234221.mp3 │ ├── fun-upbeat-background-music-311769.mp3 │ ├── funny-comedy-quirky-background-music-316889.mp3 │ ├── haunting-dark-atmosphere-304116.mp3 │ ├── heerful-99148.mp3 │ ├── hopeful-cinematic-248601.mp3 │ ├── hopeful-optimism-266072.mp3 │ ├── horror-dark-spooky-piano-251474.mp3 │ ├── into-the-wild-315582.mp3 │ ├── lofi-chill-melancholic-259764.mp3 │ ├── melancholic-reflective-floating-piano-atmosphere-324686.mp3 │ ├── mellow-fellow-in-the-bellow-peaceful-lofi-instrumental-262780.mp3 │ ├── mellow-smooth-rap-beat-20230107-132480.mp3 │ ├── no-place-to-go-216744.mp3 │ ├── powerful-energy-upbeat-rock-advertising-music-245728.mp3 │ ├── quirky-169825.mp3 │ ├── sad-emotional-beat-cry-alone-121597.mp3 │ ├── sad-piano-one-181090.mp3 │ ├── sad-violin-150146.mp3 │ ├── sneaky-and-quirky-music-loop-287412.mp3 │ ├── sunset-rising-113685.mp3 │ ├── tension-113661.mp3 │ ├── unforgiving-253312.mp3 │ ├── upbeat-background-music-315196.mp3 │ ├── upbeat-funk-happy-315162.mp3 │ ├── upbeat-funky-vlog-background-music-313080.mp3 │ ├── upbeat-hip-hop-vlog-music-322878.mp3 │ └── vintage-wonder-142553.mp3 ├── tsconfig.json └── vitest.config.ts /.dockerignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | .git 3 | .gitignore 4 | *.md 5 | dist 6 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | end_of_line = crlf 5 | charset = utf-8 6 | trim_trailing_whitespace = true 7 | insert_final_newline = true 8 | indent_style = space 9 | indent_size = 2 10 | -------------------------------------------------------------------------------- /.env.example: -------------------------------------------------------------------------------- 1 | PEXELS_API_KEY= # crucial for the project to work 2 | LOG_LEVEL=trace # trace, debug, info, warn, error, fatal, silent 3 | WHISPER_VERBOSE=true 4 | PORT=3123 5 | DEV=true # local development mode 6 | DATA_DIR_PATH= # only for docker, otherwise leave empty 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | dist 3 | .DS_Store 4 | .env 5 | -------------------------------------------------------------------------------- /.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "useTabs": false, 3 | "bracketSpacing": true, 4 | "tabWidth": 2 5 | } 6 | 7 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to Shorts Creator 2 | 3 | ## How to setup the development environment 4 | 5 | 1. Clone the repository 6 | 7 | ```bash 8 | git clone git@github.com:gyoridavid/short-video-maker.git 9 | cd shorts-video-maker 10 | ``` 11 | 12 | 2. Install dependencies 13 | 14 | ```bash 15 | pnpm install 16 | ``` 17 | 18 | 3. Copy `.env.example` to `.env` and set the right environment variables. 19 | 20 | 4. Start the server 21 | ```bash 22 | pnpm dev 23 | ``` 24 | 25 | ## How to preview the videos and debug the rendering process 26 | 27 | You can use Remotion Studio to preview videos. Make sure to update the template if the underlying data structure changes. 28 | 29 | ```bash 30 | npx remotion studio 31 | ``` 32 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 David Gyori 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Short Video Maker 2 | 3 | An open source automated video creation tool for generating short-form video content. Short Video Maker combines text-to-speech, automatic captions, background videos, and music to create engaging short videos from simple text inputs. 4 | 5 | This repository was open-sourced by the [AI Agents A-Z Youtube Channel](https://www.youtube.com/channel/UCloXqLhp_KGhHBe1kwaL2Tg). We encourage you to check out the channel for more AI-related content and tutorials. 6 | 7 | ## Hardware requirements 8 | 9 | - CPU: at least 2 cores are recommended 10 | - GPU: optional, makes the caption generation a lot faster (whisper.cpp) and the video rendering somewhat faster 11 | 12 | ## Watch the official video on how to generate videos with n8n 13 | 14 | [![Automated faceless video generation (n8n + MCP) with captions, background music, local and 100% free](https://img.youtube.com/vi/jzsQpn-AciM/0.jpg)](https://www.youtube.com/watch?v=jzsQpn-AciM) 15 | 16 | ## Running the Project 17 | 18 | ### Using NPX (recommended) 19 | 20 | The easiest way to run the project with GPU support out of the box: 21 | 22 | ```bash 23 | LOG_LEVEL=debug PEXELS_API_KEY= npx short-video-maker 24 | ``` 25 | 26 | ### Using Docker 27 | 28 | #### CPU image 29 | 30 | ```bash 31 | docker run -it --rm --name short-video-maker -p 3123:3123 \ 32 | -e PEXELS_API_KEY= \ 33 | gyoridavid/short-video-maker:latest 34 | ``` 35 | 36 | #### NVIDIA GPUs 37 | ```bash 38 | docker run -it --rm --name shorts-video-maker -p 3123:3123 \ 39 | -e PEXELS_API_KEY= --gpus=all \ 40 | gyoridavid/short-video-maker:latest-cuda 41 | ``` 42 | 43 | ## Find help 44 | 45 | Join our [Discord](https://discord.gg/G7FJVJQ6RE) community for support and discussions. 46 | 47 | ## Environment Variables 48 | 49 | | Variable | Description | 50 | | --------------- | ---------------------------------------------------------------------------------- | 51 | | PEXELS_API_KEY | Your Pexels API key for background video sourcing | 52 | | PORT | Port for the API/MCP server (default: 3123) | 53 | | LOG_LEVEL | Log level for the server (default: info, options: trace, debug, info, warn, error) | 54 | | WHISPER_VERBOSE | Verbose mode for Whisper (default: false) | 55 | 56 | ## Example 57 | 58 | 59 | 60 | 63 | 81 | 82 |
61 | 62 | 64 | 65 | ```json 66 | { 67 | "scenes": [ 68 | { 69 | "text": "Hello world! Enjoy using this tool to create awesome AI workflows", 70 | "searchTerms": ["rainbow"] 71 | } 72 | ], 73 | "config": { 74 | "paddingBack": 1500, 75 | "music": "happy" 76 | } 77 | } 78 | ``` 79 | 80 |
83 | 84 | ## Features 85 | 86 | - Generate complete short videos from text prompts 87 | - Text-to-speech conversion 88 | - Automatic caption generation and styling 89 | - Background video search and selection via Pexels 90 | - Background music with genre/mood selection 91 | - Serve as both REST API and Model Context Protocol (MCP) server 92 | 93 | ## How It Works 94 | 95 | Shorts Creator takes simple text inputs and search terms, then: 96 | 97 | 1. Converts text to speech using Kokoro TTS 98 | 2. Generates accurate captions via Whisper 99 | 3. Finds relevant background videos from Pexels 100 | 4. Composes all elements with Remotion 101 | 5. Renders a professional-looking short video with perfectly timed captions 102 | 103 | ## Dependencies for the video generation 104 | 105 | | Dependency | Version | License | Purpose | 106 | | ------------------------------------------------------ | -------- | --------------------------------------------------------------------------------- | ------------------------------- | 107 | | [Remotion](https://remotion.dev/) | ^4.0.286 | [Remotion License](https://github.com/remotion-dev/remotion/blob/main/LICENSE.md) | Video composition and rendering | 108 | | [Whisper CPP](https://github.com/ggml-org/whisper.cpp) | v1.5.5 | MIT | Speech-to-text for captions | 109 | | [FFmpeg](https://ffmpeg.org/) | ^2.1.3 | LGPL/GPL | Audio/video manipulation | 110 | | [Kokoro.js](https://www.npmjs.com/package/kokoro-js) | ^1.2.0 | MIT | Text-to-speech generation | 111 | | [Pexels API](https://www.pexels.com/api/) | N/A | [Pexels Terms](https://www.pexels.com/license/) | Background videos | 112 | 113 | ## How to contribute? 114 | 115 | PRs are welcome. 116 | See the [CONTRIBUTING.md](CONTRIBUTING.md) file for instructions on setting up a local development environment. 117 | 118 | ## API Usage 119 | 120 | ### REST API 121 | 122 | The following REST endpoints are available: 123 | 124 | 1. `GET /api/short-video/:id` - Get a video by ID and also can be downloaded like this : 125 | 126 | ```curl -o output.mp4 http://localhost:3123/api/short-video/ ``` 127 | 128 | 129 | 3. `POST /api/short-video` - Create a new video 130 | ```json 131 | { 132 | "scenes": [ 133 | { 134 | "text": "This is the text to be spoken in the video", 135 | "searchTerms": ["nature sunset"] 136 | } 137 | ], 138 | "config": { 139 | "paddingBack": 3000, 140 | "music": "chill" 141 | } 142 | } 143 | ``` 144 | 4. `DELETE /api/short-video/:id` - Delete a video by ID 145 | 5. `GET /api/music-tags` - Get available music tags 146 | 147 | 148 | 149 | ### Model Context Protocol (MCP) 150 | 151 | The service also implements the Model Context Protocol: 152 | 153 | 1. `GET /mcp/sse` - Server-sent events for MCP 154 | 2. `POST /mcp/messages` - Send messages to MCP server 155 | 156 | Available MCP tools: 157 | 158 | - `create-short-video` - Create a video from a list of scenes 159 | - `get-video-status` - Check video creation status 160 | 161 | ## License 162 | 163 | This project is licensed under the [MIT License](LICENSE). 164 | 165 | ## Acknowledgments 166 | 167 | - ❤️ [Remotion](https://remotion.dev/) for programmatic video generation 168 | - ❤️ [Whisper](https://github.com/ggml-org/whisper.cpp) for speech-to-text 169 | - ❤️ [Pexels](https://www.pexels.com/) for video content 170 | - ❤️ [FFmpeg](https://ffmpeg.org/) for audio/video processing 171 | - ❤️ [Kokoro](https://github.com/hexgrad/kokoro) for TTS 172 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3" 2 | 3 | services: 4 | short-creator: 5 | build: 6 | context: . 7 | dockerfile: main.Dockerfile 8 | env_file: 9 | - .env 10 | environment: 11 | - DEV=false 12 | ports: 13 | - "3123:3123" 14 | entrypoint: ["node", "dist/index.js"] 15 | -------------------------------------------------------------------------------- /eslint.config.mjs: -------------------------------------------------------------------------------- 1 | import { config } from "@remotion/eslint-config-flat"; 2 | 3 | export default config; 4 | -------------------------------------------------------------------------------- /main-cuda.Dockerfile: -------------------------------------------------------------------------------- 1 | ARG UBUNTU_VERSION=22.04 2 | ARG CUDA_VERSION=12.3.1 3 | ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION} 4 | ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} 5 | 6 | # Ref: https://github.com/ggml-org/whisper.cpp 7 | FROM ${BASE_CUDA_DEV_CONTAINER} AS install-whisper 8 | ENV DEBIAN_FRONTEND=noninteractive 9 | 10 | RUN apt-get update && \ 11 | apt-get install --fix-missing --no-install-recommends -y bash git make vim wget g++ ffmpeg curl 12 | 13 | WORKDIR /app/data/libs/whisper.cpp 14 | RUN git clone https://github.com/ggerganov/whisper.cpp.git -b v1.7.1 --depth 1 . 15 | 16 | RUN make clean 17 | RUN GGML_CUDA=1 make -j 18 | 19 | RUN sh ./models/download-ggml-model.sh medium.en 20 | 21 | FROM ${BASE_CUDA_RUN_CONTAINER} AS base 22 | 23 | # install node 24 | RUN apt-get update && apt-get install -y \ 25 | curl \ 26 | ca-certificates \ 27 | gnupg \ 28 | lsb-release \ 29 | && rm -rf /var/lib/apt/lists/* 30 | RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - \ 31 | && apt-get update && apt-get install -y nodejs \ 32 | && rm -rf /var/lib/apt/lists/* 33 | RUN node -v && npm -v 34 | 35 | # install dependencies 36 | ENV DEBIAN_FRONTEND=noninteractive 37 | WORKDIR /app 38 | RUN apt update 39 | RUN apt install -y \ 40 | # whisper dependencies 41 | git \ 42 | wget \ 43 | cmake \ 44 | ffmpeg \ 45 | curl \ 46 | build-essential \ 47 | make \ 48 | # remotion dependencies 49 | libnss3 \ 50 | libdbus-1-3 \ 51 | libatk1.0-0 \ 52 | libgbm-dev \ 53 | libasound2 \ 54 | libxrandr2 \ 55 | libxkbcommon-dev \ 56 | libxfixes3 \ 57 | libxcomposite1 \ 58 | libxdamage1 \ 59 | libatk-bridge2.0-0 \ 60 | libpango-1.0-0 \ 61 | libcairo2 \ 62 | libcups2 \ 63 | && apt-get clean \ 64 | && rm -rf /var/lib/apt/lists/* 65 | # setup pnpm 66 | ENV PNPM_HOME="/pnpm" 67 | ENV PATH="$PNPM_HOME:$PATH" 68 | ENV COREPACK_ENABLE_DOWNLOAD_PROMPT=0 69 | RUN corepack enable 70 | 71 | FROM base AS prod-deps 72 | COPY package.json pnpm-lock.yaml* /app/ 73 | RUN --mount=type=cache,id=pnpm,target=/pnpm/store pnpm install --prod --frozen-lockfile 74 | RUN pnpm install --prefer-offline --no-cache --prod 75 | 76 | FROM prod-deps AS build 77 | COPY tsconfig.json /app 78 | COPY src /app/src 79 | RUN --mount=type=cache,id=pnpm,target=/pnpm/store pnpm install --frozen-lockfile 80 | RUN pnpm build 81 | 82 | FROM base 83 | COPY static /app/static 84 | COPY --from=install-whisper /app/data/libs/whisper.cpp /app/data/libs/whisper.cpp 85 | COPY --from=prod-deps /app/node_modules /app/node_modules 86 | COPY --from=build /app/dist /app/dist 87 | COPY package.json /app/ 88 | 89 | # app configuration via environment variables 90 | ENV DATA_DIR_PATH=/app/data 91 | ENV DOCKER=true 92 | 93 | # install kokoro, headless chrome and ensure music files are present 94 | RUN node dist/scripts/install.js 95 | 96 | CMD ["pnpm", "start"] 97 | -------------------------------------------------------------------------------- /main.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:22.04 AS install-whisper 2 | ENV DEBIAN_FRONTEND=noninteractive 3 | RUN apt update 4 | # whisper install dependencies 5 | RUN apt install -y \ 6 | git \ 7 | build-essential \ 8 | wget \ 9 | cmake \ 10 | && apt-get clean \ 11 | && rm -rf /var/lib/apt/lists/* 12 | WORKDIR /whisper 13 | RUN git clone https://github.com/ggml-org/whisper.cpp.git . 14 | RUN git checkout v1.5.5 15 | RUN make 16 | WORKDIR /whisper/models 17 | RUN sh ./download-ggml-model.sh medium.en 18 | 19 | FROM node:22-bookworm-slim AS base 20 | ENV DEBIAN_FRONTEND=noninteractive 21 | WORKDIR /app 22 | RUN apt update 23 | RUN apt install -y \ 24 | # whisper dependencies 25 | git \ 26 | wget \ 27 | cmake \ 28 | ffmpeg \ 29 | curl \ 30 | make \ 31 | libsdl2-dev \ 32 | # remotion dependencies 33 | libnss3 \ 34 | libdbus-1-3 \ 35 | libatk1.0-0 \ 36 | libgbm-dev \ 37 | libasound2 \ 38 | libxrandr2 \ 39 | libxkbcommon-dev \ 40 | libxfixes3 \ 41 | libxcomposite1 \ 42 | libxdamage1 \ 43 | libatk-bridge2.0-0 \ 44 | libpango-1.0-0 \ 45 | libcairo2 \ 46 | libcups2 \ 47 | && apt-get clean \ 48 | && rm -rf /var/lib/apt/lists/* 49 | # setup pnpm 50 | ENV PNPM_HOME="/pnpm" 51 | ENV PATH="$PNPM_HOME:$PATH" 52 | ENV COREPACK_ENABLE_DOWNLOAD_PROMPT=0 53 | RUN corepack enable 54 | 55 | FROM base AS prod-deps 56 | COPY package.json pnpm-lock.yaml* /app/ 57 | RUN --mount=type=cache,id=pnpm,target=/pnpm/store pnpm install --prod --frozen-lockfile 58 | RUN pnpm install --prefer-offline --no-cache --prod 59 | 60 | FROM prod-deps AS build 61 | COPY tsconfig.json /app 62 | COPY src /app/src 63 | RUN --mount=type=cache,id=pnpm,target=/pnpm/store pnpm install --frozen-lockfile 64 | RUN pnpm build 65 | 66 | FROM base 67 | COPY static /app/static 68 | COPY --from=install-whisper /whisper /app/data/libs/whisper.cpp 69 | COPY --from=prod-deps /app/node_modules /app/node_modules 70 | COPY --from=build /app/dist /app/dist 71 | COPY package.json /app/ 72 | 73 | # app configuration via environment variables 74 | ENV DATA_DIR_PATH=/app/data 75 | ENV DOCKER=true 76 | 77 | # install kokoro, headless chrome and ensure music files are present 78 | RUN node dist/scripts/install.js 79 | 80 | CMD ["pnpm", "start"] 81 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "short-video-maker", 3 | "version": "1.0.12", 4 | "description": "Creates short videos for TikTok, Instagram Reels, and YouTube Shorts using the Model Context Protocol (MCP) and a REST API.", 5 | "main": "index.js", 6 | "bugs": "https://github.com/gyoridavid/short-video-maker/issues", 7 | "homepage": "https://github.com/gyoridavid/short-video-maker", 8 | "scripts": { 9 | "build": "rimraf dist && tsc", 10 | "dev": "node --watch -r ts-node/register src/index.ts", 11 | "start": "node dist/index.js", 12 | "test": "echo \"Error: no test specified\" && exit 1", 13 | "prepublishOnly": "npm run build && echo \"#!/usr/bin/env node\n$(cat dist/index.js)\" > dist/index.js && chmod +x dist/index.js" 14 | }, 15 | "bin": { 16 | "short-video-maker": "dist/index.js" 17 | }, 18 | "files": [ 19 | "dist", 20 | "static" 21 | ], 22 | "keywords": [ 23 | "shorts", 24 | "mcp", 25 | "model context protocol", 26 | "reels", 27 | "tiktok", 28 | "youtube shorts", 29 | "youtube", 30 | "short video", 31 | "video creation", 32 | "instagram", 33 | "video", 34 | "generator", 35 | "remotion", 36 | "faceless video" 37 | ], 38 | "author": "David Gyori", 39 | "license": "MIT", 40 | "dependencies": { 41 | "@ffmpeg-installer/ffmpeg": "^1.1.0", 42 | "@modelcontextprotocol/sdk": "^1.9.0", 43 | "@remotion/bundler": "^4.0.286", 44 | "@remotion/cli": "^4.0.286", 45 | "@remotion/google-fonts": "^4.0.286", 46 | "@remotion/install-whisper-cpp": "^4.0.286", 47 | "@remotion/renderer": "^4.0.286", 48 | "@remotion/zod-types": "^4.0.286", 49 | "content-type": "^1.0.5", 50 | "cuid": "^3.0.0", 51 | "dotenv": "^16.4.7", 52 | "express": "^5.1.0", 53 | "fluent-ffmpeg": "^2.1.3", 54 | "fs-extra": "^11.3.0", 55 | "kokoro-js": "^1.2.0", 56 | "nock": "^14.0.3", 57 | "pino": "^9.6.0", 58 | "react": "^19.1.0", 59 | "react-dom": "^19.1.0", 60 | "remotion": "^4.0.286", 61 | "zod": "^3.24.2", 62 | "zod-to-json-schema": "^3.24.5" 63 | }, 64 | "devDependencies": { 65 | "@remotion/eslint-config-flat": "^4.0.286", 66 | "@types/content-type": "^1.1.8", 67 | "@types/express": "^5.0.1", 68 | "@types/fluent-ffmpeg": "^2.1.27", 69 | "@types/fs-extra": "^11.0.4", 70 | "@types/nock": "^11.1.0", 71 | "@types/node": "^22.14.0", 72 | "@types/react": "^19.1.0", 73 | "eslint": "^9.24.0", 74 | "prettier": "^3.5.3", 75 | "rimraf": "^6.0.1", 76 | "ts-node": "^10.9.2", 77 | "typescript": "^5.8.3", 78 | "vitest": "^3.1.1" 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /postcss.config.mjs: -------------------------------------------------------------------------------- 1 | export default { 2 | plugins: { 3 | "@tailwindcss/postcss": {}, 4 | }, 5 | }; 6 | -------------------------------------------------------------------------------- /remotion.config.ts: -------------------------------------------------------------------------------- 1 | // See all configuration options: https://remotion.dev/docs/config 2 | // Each option also is available as a CLI flag: https://remotion.dev/docs/cli 3 | 4 | // Note: When using the Node.JS APIs, the config file doesn't apply. Instead, pass options directly to the APIs 5 | 6 | import { Config } from "@remotion/cli/config"; 7 | 8 | Config.setVideoImageFormat("jpeg"); 9 | Config.setOverwriteOutput(true); 10 | Config.setPublicDir("data/music"); 11 | Config.setEntryPoint("src/components/root/index.ts"); 12 | -------------------------------------------------------------------------------- /src/components/root/index.css: -------------------------------------------------------------------------------- 1 | @import "tailwindcss"; 2 | -------------------------------------------------------------------------------- /src/components/root/index.ts: -------------------------------------------------------------------------------- 1 | import { registerRoot } from "remotion"; 2 | import { RemotionRoot } from "./Root"; 3 | 4 | registerRoot(RemotionRoot); 5 | -------------------------------------------------------------------------------- /src/components/videos/ShortVideo.tsx: -------------------------------------------------------------------------------- 1 | import { 2 | AbsoluteFill, 3 | Sequence, 4 | useCurrentFrame, 5 | useVideoConfig, 6 | Audio, 7 | staticFile, 8 | OffthreadVideo, 9 | } from "remotion"; 10 | import { z } from "zod"; 11 | import { loadFont } from "@remotion/google-fonts/BarlowCondensed"; 12 | import type { Caption, CaptionLine, CaptionPage } from "../../types/shorts"; 13 | 14 | const { fontFamily } = loadFont(); // "Barlow Condensed" 15 | 16 | export const shortVideoSchema = z.object({ 17 | scenes: z.array( 18 | z.object({ 19 | captions: z.custom(), 20 | audio: z.object({ 21 | dataUri: z.string(), 22 | duration: z.number(), 23 | }), 24 | video: z.string(), 25 | }), 26 | ), 27 | config: z.object({ 28 | paddingBack: z.number().optional(), 29 | durationMs: z.number(), 30 | }), 31 | music: z.object({ 32 | file: z.string(), 33 | start: z.number(), 34 | end: z.number(), 35 | }), 36 | }); 37 | 38 | function createCaptionPages({ 39 | captions, 40 | lineMaxLength, 41 | lineCount, 42 | maxDistanceMs, 43 | }: { 44 | captions: Caption[]; 45 | lineMaxLength: number; 46 | lineCount: number; 47 | maxDistanceMs: number; 48 | }) { 49 | const pages = []; 50 | let currentPage: CaptionPage = { 51 | startMs: 0, 52 | endMs: 0, 53 | lines: [], 54 | }; 55 | let currentLine: CaptionLine = { 56 | texts: [], 57 | }; 58 | 59 | captions.forEach((caption, i) => { 60 | // Check if we need to start a new page due to time gap 61 | if (i > 0 && caption.startMs - currentPage.endMs > maxDistanceMs) { 62 | // Add current line if not empty 63 | if (currentLine.texts.length > 0) { 64 | currentPage.lines.push(currentLine); 65 | } 66 | // Add current page if not empty 67 | if (currentPage.lines.length > 0) { 68 | pages.push(currentPage); 69 | } 70 | // Start new page 71 | currentPage = { 72 | startMs: caption.startMs, 73 | endMs: caption.endMs, 74 | lines: [], 75 | }; 76 | currentLine = { 77 | texts: [], 78 | }; 79 | } 80 | 81 | // Check if adding this caption exceeds the line length 82 | const currentLineText = currentLine.texts.map((t) => t.text).join(" "); 83 | if ( 84 | currentLine.texts.length > 0 && 85 | currentLineText.length + 1 + caption.text.length > lineMaxLength 86 | ) { 87 | // Line is full, add it to current page 88 | currentPage.lines.push(currentLine); 89 | currentLine = { 90 | texts: [], 91 | }; 92 | 93 | // Check if page is full 94 | if (currentPage.lines.length >= lineCount) { 95 | // Page is full, add it to pages 96 | pages.push(currentPage); 97 | // Start new page 98 | currentPage = { 99 | startMs: caption.startMs, 100 | endMs: caption.endMs, 101 | lines: [], 102 | }; 103 | } 104 | } 105 | 106 | // Add caption to current line 107 | currentLine.texts.push({ 108 | text: caption.text, 109 | startMs: caption.startMs, 110 | endMs: caption.endMs, 111 | }); 112 | 113 | // Update page timing 114 | currentPage.endMs = caption.endMs; 115 | if (i === 0 || currentPage.startMs === 0) { 116 | currentPage.startMs = caption.startMs; 117 | } else { 118 | currentPage.startMs = Math.min(currentPage.startMs, caption.startMs); 119 | } 120 | }); 121 | 122 | // Don't forget to add the last line and page 123 | if (currentLine.texts.length > 0) { 124 | currentPage.lines.push(currentLine); 125 | } 126 | if (currentPage.lines.length > 0) { 127 | pages.push(currentPage); 128 | } 129 | 130 | return pages; 131 | } 132 | 133 | export const ShortVideo: React.FC> = ({ 134 | scenes, 135 | music, 136 | config, 137 | }) => { 138 | const frame = useCurrentFrame(); 139 | const { fps } = useVideoConfig(); 140 | const activeStyle = { 141 | backgroundColor: "blue", 142 | padding: "10px", 143 | marginLeft: "-10px", 144 | marginRight: "-10px", 145 | borderRadius: "10px", 146 | }; 147 | return ( 148 | 149 | 253 | ); 254 | }; 255 | -------------------------------------------------------------------------------- /src/config.ts: -------------------------------------------------------------------------------- 1 | import path from "path"; 2 | import "dotenv/config"; 3 | import os from "os"; 4 | import fs from "fs-extra"; 5 | import pino from "pino"; 6 | 7 | type whisperModels = 8 | | "tiny" 9 | | "tiny.en" 10 | | "base" 11 | | "base.en" 12 | | "small" 13 | | "small.en" 14 | | "medium" 15 | | "medium.en" 16 | | "large-v1" 17 | | "large-v2" 18 | | "large-v3" 19 | | "large-v3-turbo"; 20 | 21 | const defaultLogLevel: pino.Level = "info"; 22 | const defaultPort = 3123; 23 | const whisperVersion = "1.7.1"; 24 | const whisperModel: whisperModels = "medium.en"; // possible options: "tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large-v1", "large-v2", "large-v3", "large-v3-turbo" 25 | 26 | // Create the global logger 27 | export const logger = pino({ 28 | level: process.env.LOG_LEVEL ?? defaultLogLevel, 29 | timestamp: pino.stdTimeFunctions.isoTime, 30 | formatters: { 31 | level: (label) => { 32 | return { level: label }; 33 | }, 34 | }, 35 | }); 36 | 37 | export class Config { 38 | private dataDirPath: string; 39 | private libsDirPath: string; 40 | private staticDirPath: string; 41 | 42 | public whisperInstallPath: string; 43 | public videosDirPath: string; 44 | public tempDirPath: string; 45 | public packageDirPath: string; 46 | public musicDirPath: string; 47 | public pexelsApiKey: string; 48 | public logLevel: pino.Level; 49 | public whisperVerbose: boolean; 50 | public port: number; 51 | public runningInDocker: boolean; 52 | public devMode: boolean; 53 | public whisperVersion: string = whisperVersion; 54 | public whisperModel: whisperModels = whisperModel; 55 | 56 | constructor() { 57 | this.dataDirPath = 58 | process.env.DATA_DIR_PATH ?? 59 | path.join(os.homedir(), ".ai-agents-az-video-generator"); 60 | this.libsDirPath = path.join(this.dataDirPath, "libs"); 61 | 62 | this.whisperInstallPath = path.join(this.libsDirPath, "whisper.cpp"); 63 | this.videosDirPath = path.join(this.dataDirPath, "videos"); 64 | this.tempDirPath = path.join(this.dataDirPath, "temp"); 65 | 66 | fs.ensureDirSync(this.dataDirPath); 67 | fs.ensureDirSync(this.libsDirPath); 68 | fs.ensureDirSync(this.videosDirPath); 69 | fs.ensureDirSync(this.tempDirPath); 70 | 71 | this.packageDirPath = path.join(__dirname, ".."); 72 | this.staticDirPath = path.join(this.packageDirPath, "static"); 73 | this.musicDirPath = path.join(this.staticDirPath, "music"); 74 | 75 | this.pexelsApiKey = process.env.PEXELS_API_KEY as string; 76 | this.logLevel = (process.env.LOG_LEVEL ?? defaultLogLevel) as pino.Level; 77 | this.whisperVerbose = process.env.WHISPER_VERBOSE === "true"; 78 | this.port = process.env.PORT ? parseInt(process.env.PORT) : defaultPort; 79 | this.runningInDocker = process.env.DOCKER === "true"; 80 | this.devMode = process.env.DEV === "true"; 81 | } 82 | 83 | public ensureConfig() { 84 | if (!this.pexelsApiKey) { 85 | throw new Error( 86 | "PEXELS_API_KEY environment variable is missing. Get your free API key: https://www.pexels.com/api/key/ - see how to run the project: https://github.com/gyoridavid/short-video-maker", 87 | ); 88 | } 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | /* eslint-disable @typescript-eslint/no-unused-vars */ 2 | import { Kokoro } from "./short-creator/libraries/Kokoro"; 3 | import { Remotion } from "./short-creator/libraries/Remotion"; 4 | import { Whisper } from "./short-creator/libraries/Whisper"; 5 | import { FFMpeg } from "./short-creator/libraries/FFmpeg"; 6 | import { PexelsAPI } from "./short-creator/libraries/Pexels"; 7 | import { Config } from "./config"; 8 | import { ShortCreator } from "./short-creator/ShortCreator"; 9 | import { logger } from "./logger"; 10 | import { Server } from "./server/server"; 11 | import { MusicManager } from "./short-creator/music"; 12 | 13 | async function main() { 14 | const config = new Config(); 15 | try { 16 | config.ensureConfig(); 17 | } catch (err: unknown) { 18 | if (err instanceof Error) { 19 | logger.error(err.message, "Error in config"); 20 | } else if (typeof err === "string") { 21 | logger.error(err, "Error in config"); 22 | } else { 23 | logger.error("Unknown error", "Error in config"); 24 | } 25 | 26 | process.exit(1); 27 | } 28 | 29 | const musicManager = new MusicManager(config); 30 | try { 31 | logger.debug("checking music files"); 32 | musicManager.ensureMusicFilesExist(); 33 | } catch (err) { 34 | logger.error(err, "Missing music files"); 35 | process.exit(1); 36 | } 37 | 38 | logger.debug("initializing remotion"); 39 | const remotion = await Remotion.init(config); 40 | logger.debug("initializing kokoro"); 41 | const kokoro = await Kokoro.init(); 42 | logger.debug("initializing whisper"); 43 | const whisper = await Whisper.init(config); 44 | logger.debug("initializing ffmpeg"); 45 | const ffmpeg = await FFMpeg.init(); 46 | const pexelsApi = new PexelsAPI(config.pexelsApiKey); 47 | 48 | logger.debug("initializing the short creator"); 49 | const shortCreator = new ShortCreator( 50 | config, 51 | remotion, 52 | kokoro, 53 | whisper, 54 | ffmpeg, 55 | pexelsApi, 56 | musicManager, 57 | ); 58 | 59 | logger.debug("initializing the server"); 60 | const server = new Server(config.port, shortCreator); 61 | const app = server.start(); 62 | 63 | // todo add shutdown handler 64 | } 65 | 66 | main().catch((err) => { 67 | logger.error(err, "Error starting server"); 68 | }); 69 | -------------------------------------------------------------------------------- /src/logger.ts: -------------------------------------------------------------------------------- 1 | import { logger } from "./config"; 2 | 3 | export default logger; 4 | export { logger }; 5 | -------------------------------------------------------------------------------- /src/scripts/install.ts: -------------------------------------------------------------------------------- 1 | import { ensureBrowser } from "@remotion/renderer"; 2 | 3 | import { logger } from "../logger"; 4 | import { Kokoro } from "../short-creator/libraries/Kokoro"; 5 | import { MusicManager } from "../short-creator/music"; 6 | import { Config } from "../config"; 7 | import { Whisper } from "../short-creator/libraries/Whisper"; 8 | 9 | // runs in docker 10 | export async function install() { 11 | const config = new Config(); 12 | 13 | logger.info("Installing dependencies..."); 14 | logger.info("Installing Kokoro..."); 15 | await Kokoro.init(); 16 | logger.info("Installing browser shell..."); 17 | await ensureBrowser(); 18 | logger.info("Installing whisper.cpp"); 19 | await Whisper.init(config); 20 | logger.info("Installing dependencies complete"); 21 | 22 | logger.info("Ensuring the music files exist..."); 23 | const musicManager = new MusicManager(config); 24 | try { 25 | musicManager.ensureMusicFilesExist(); 26 | } catch (err) { 27 | logger.error(err, "Missing music files"); 28 | process.exit(1); 29 | } 30 | } 31 | 32 | install() 33 | .then(() => { 34 | logger.info("Installation complete"); 35 | }) 36 | .catch((err: unknown) => { 37 | logger.error(err, "Installation failed"); 38 | }); 39 | -------------------------------------------------------------------------------- /src/server/routers/mcp.ts: -------------------------------------------------------------------------------- 1 | import express from "express"; 2 | import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; 3 | import { SSEServerTransport } from "@modelcontextprotocol/sdk/server/sse.js"; 4 | import z from "zod"; 5 | 6 | import { ShortCreator } from "../../short-creator/ShortCreator"; 7 | import { logger } from "../../logger"; 8 | import { renderConfig, sceneInput } from "../../types/shorts"; 9 | 10 | export class MCPRouter { 11 | router: express.Router; 12 | shortCreator: ShortCreator; 13 | transports: { [sessionId: string]: SSEServerTransport } = {}; 14 | mcpServer: McpServer; 15 | constructor(shortCreator: ShortCreator) { 16 | this.router = express.Router(); 17 | this.shortCreator = shortCreator; 18 | 19 | this.mcpServer = new McpServer({ 20 | name: "Short Creator", 21 | version: "0.0.1", 22 | capabilities: { 23 | resources: {}, 24 | tools: {}, 25 | }, 26 | }); 27 | 28 | this.setupMCPServer(); 29 | this.setupRoutes(); 30 | } 31 | 32 | private setupMCPServer() { 33 | this.mcpServer.tool( 34 | "get-video-status", 35 | "Get the status of a video (ready, processing, failed)", 36 | { 37 | videoId: z.string().describe("The ID of the video"), 38 | }, 39 | async ({ videoId }) => { 40 | const status = this.shortCreator.status(videoId); 41 | return { 42 | content: [ 43 | { 44 | type: "text", 45 | text: status, 46 | }, 47 | ], 48 | }; 49 | }, 50 | ); 51 | 52 | this.mcpServer.tool( 53 | "create-short-video", 54 | "Create a short video from a list of scenes", 55 | { 56 | scenes: z.array(sceneInput).describe("Each scene to be created"), 57 | config: renderConfig.describe("Configuration for rendering the video"), 58 | }, 59 | async ({ scenes, config }) => { 60 | const videoId = await this.shortCreator.addToQueue(scenes, config); 61 | 62 | return { 63 | content: [ 64 | { 65 | type: "text", 66 | text: videoId, 67 | }, 68 | ], 69 | }; 70 | }, 71 | ); 72 | } 73 | 74 | private setupRoutes() { 75 | this.router.get("/sse", async (req, res) => { 76 | logger.info("SSE GET request received"); 77 | 78 | const transport = new SSEServerTransport("/mcp/messages", res); 79 | this.transports[transport.sessionId] = transport; 80 | res.on("close", () => { 81 | delete this.transports[transport.sessionId]; 82 | }); 83 | await this.mcpServer.connect(transport); 84 | }); 85 | 86 | this.router.post("/messages", async (req, res) => { 87 | logger.info("SSE POST request received"); 88 | 89 | const sessionId = req.query.sessionId as string; 90 | const transport = this.transports[sessionId]; 91 | if (transport) { 92 | await transport.handlePostMessage(req, res); 93 | } else { 94 | res.status(400).send("No transport found for sessionId"); 95 | } 96 | }); 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /src/server/routers/rest.ts: -------------------------------------------------------------------------------- 1 | import express from "express"; 2 | import type { 3 | Request as ExpressRequest, 4 | Response as ExpressResponse, 5 | } from "express"; 6 | 7 | import { validateCreateShortInput } from "../validator"; 8 | import { ShortCreator } from "../../short-creator/ShortCreator"; 9 | import { logger } from "../../logger"; 10 | 11 | // todo abstract class 12 | export class APIRouter { 13 | router: express.Router; 14 | shortCreator: ShortCreator; 15 | constructor(shortCreator: ShortCreator) { 16 | this.router = express.Router(); 17 | this.shortCreator = shortCreator; 18 | 19 | this.router.use(express.json()); 20 | 21 | this.setupRoutes(); 22 | } 23 | 24 | private setupRoutes() { 25 | this.router.post( 26 | "/short-video", 27 | async (req: ExpressRequest, res: ExpressResponse) => { 28 | try { 29 | const input = validateCreateShortInput(req.body); 30 | 31 | const videoId = this.shortCreator.addToQueue( 32 | input.scenes, 33 | input.config, 34 | ); 35 | 36 | res.status(201).json({ 37 | videoId, 38 | }); 39 | } catch (err: unknown) { 40 | logger.error(err, "Error validating input"); 41 | 42 | // Handle validation errors specifically 43 | if (err instanceof Error && err.message.startsWith("{")) { 44 | try { 45 | const errorData = JSON.parse(err.message); 46 | res.status(400).json({ 47 | error: "Validation failed", 48 | message: errorData.message, 49 | missingFields: errorData.missingFields, 50 | }); 51 | return; 52 | } catch (parseError) { 53 | logger.error(parseError, "Error parsing validation error"); 54 | } 55 | } 56 | 57 | // Fallback for other errors 58 | res.status(400).json({ 59 | error: "Invalid input", 60 | message: err instanceof Error ? err.message : "Unknown error", 61 | }); 62 | } 63 | }, 64 | ); 65 | 66 | this.router.get( 67 | "/short-video/:videoId/status", 68 | async (req: ExpressRequest, res: ExpressResponse) => { 69 | const { videoId } = req.params; 70 | if (!videoId) { 71 | res.status(400).json({ 72 | error: "videoId is required", 73 | }); 74 | return; 75 | } 76 | const status = this.shortCreator.status(videoId); 77 | res.status(200).json({ 78 | status, 79 | }); 80 | }, 81 | ); 82 | 83 | this.router.get( 84 | "/music-tags", 85 | (req: ExpressRequest, res: ExpressResponse) => { 86 | res.status(200).json(this.shortCreator.ListAvailableMusicTags()); 87 | }, 88 | ); 89 | 90 | this.router.delete( 91 | "/short-video/:videoId", 92 | (req: ExpressRequest, res: ExpressResponse) => { 93 | const { videoId } = req.params; 94 | if (!videoId) { 95 | res.status(400).json({ 96 | error: "videoId is required", 97 | }); 98 | return; 99 | } 100 | this.shortCreator.deleteVideo(videoId); 101 | res.status(200).json({ 102 | success: true, 103 | }); 104 | }, 105 | ); 106 | 107 | this.router.get( 108 | "/short-video/:videoId", 109 | (req: ExpressRequest, res: ExpressResponse) => { 110 | try { 111 | const { videoId } = req.params; 112 | if (!videoId) { 113 | res.status(400).json({ 114 | error: "videoId is required", 115 | }); 116 | return; 117 | } 118 | const video = this.shortCreator.getVideo(videoId); 119 | res.setHeader("Content-Type", "video/mp4"); 120 | res.setHeader( 121 | "Content-Disposition", 122 | `inline; filename=${videoId}.mp4`, 123 | ); 124 | res.send(video); 125 | } catch (error: unknown) { 126 | logger.error(error, "Error getting video"); 127 | res.status(404).json({ 128 | error: "Video not found", 129 | }); 130 | } 131 | }, 132 | ); 133 | } 134 | } 135 | -------------------------------------------------------------------------------- /src/server/server.ts: -------------------------------------------------------------------------------- 1 | import http from "http"; 2 | import express from "express"; 3 | import type { 4 | Request as ExpressRequest, 5 | Response as ExpressResponse, 6 | } from "express"; 7 | import { ShortCreator } from "../short-creator/ShortCreator"; 8 | import { APIRouter } from "./routers/rest"; 9 | import { MCPRouter } from "./routers/mcp"; 10 | import { logger } from "../logger"; 11 | 12 | export class Server { 13 | private app: express.Application; 14 | private port: number; 15 | private shortCreator: ShortCreator; 16 | 17 | constructor(port: number, shortCreator: ShortCreator) { 18 | this.port = port; 19 | this.app = express(); 20 | this.shortCreator = shortCreator; 21 | 22 | // add healthcheck endpoint 23 | this.app.get("/health", (req: ExpressRequest, res: ExpressResponse) => { 24 | res.status(200).json({ status: "ok" }); 25 | }); 26 | 27 | const apiRouter = new APIRouter(shortCreator); 28 | const mcpRouter = new MCPRouter(shortCreator); 29 | this.app.use("/api", apiRouter.router); 30 | this.app.use("/mcp", mcpRouter.router); 31 | } 32 | 33 | public start(): http.Server { 34 | return this.app.listen(this.port, (error: unknown) => { 35 | if (error) { 36 | logger.error(error, "Error starting server"); 37 | return; 38 | } 39 | logger.info( 40 | { port: this.port, mcp: "/mcp", api: "/api" }, 41 | "MCP and API server is running", 42 | ); 43 | // todo log instructions 44 | }); 45 | } 46 | 47 | public getApp() { 48 | return this.app; 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/server/validator.ts: -------------------------------------------------------------------------------- 1 | import { createShortInput, CreateShortInput } from "../types/shorts"; 2 | import { logger } from "../logger"; 3 | import { ZodError } from "zod"; 4 | 5 | export interface ValidationErrorResult { 6 | message: string; 7 | missingFields: Record; 8 | } 9 | 10 | export function validateCreateShortInput(input: object): CreateShortInput { 11 | const validated = createShortInput.safeParse(input); 12 | logger.info({ validated }, "Validated input"); 13 | 14 | if (validated.success) { 15 | return validated.data; 16 | } 17 | 18 | // Process the validation errors 19 | const errorResult = formatZodError(validated.error); 20 | 21 | throw new Error( 22 | JSON.stringify({ 23 | message: errorResult.message, 24 | missingFields: errorResult.missingFields, 25 | }), 26 | ); 27 | } 28 | 29 | function formatZodError(error: ZodError): ValidationErrorResult { 30 | const missingFields: Record = {}; 31 | 32 | // Extract all the errors into a human-readable format 33 | error.errors.forEach((err) => { 34 | const path = err.path.join("."); 35 | missingFields[path] = err.message; 36 | }); 37 | 38 | // Create a human-readable message 39 | const errorPaths = Object.keys(missingFields); 40 | let message = `Validation failed for ${errorPaths.length} field(s): `; 41 | message += errorPaths.join(", "); 42 | 43 | return { 44 | message, 45 | missingFields, 46 | }; 47 | } 48 | -------------------------------------------------------------------------------- /src/short-creator/ShortCreator.ts: -------------------------------------------------------------------------------- 1 | /* eslint-disable @remotion/deterministic-randomness */ 2 | import fs from "fs-extra"; 3 | import cuid from "cuid"; 4 | import path from "path"; 5 | 6 | import { Kokoro } from "./libraries/Kokoro"; 7 | import { Remotion } from "./libraries/Remotion"; 8 | import { Whisper } from "./libraries/Whisper"; 9 | import { FFMpeg } from "./libraries/FFmpeg"; 10 | import { PexelsAPI } from "./libraries/Pexels"; 11 | import { Config } from "../config"; 12 | import { logger } from "../logger"; 13 | import { MusicManager } from "./music"; 14 | import { type Music } from "../types/shorts"; 15 | import type { 16 | SceneInput, 17 | RenderConfig, 18 | Scene, 19 | VideoStatus, 20 | MusicMoodEnum, 21 | MusicTag, 22 | } from "../types/shorts"; 23 | 24 | export class ShortCreator { 25 | private queue: { 26 | sceneInput: SceneInput[]; 27 | config: RenderConfig; 28 | id: string; 29 | }[] = []; 30 | constructor( 31 | private config: Config, 32 | private remotion: Remotion, 33 | private kokoro: Kokoro, 34 | private whisper: Whisper, 35 | private ffmpeg: FFMpeg, 36 | private pexelsApi: PexelsAPI, 37 | private musicManager: MusicManager, 38 | ) {} 39 | 40 | public status(id: string): VideoStatus { 41 | const videoPath = this.getVideoPath(id); 42 | if (this.queue.find((item) => item.id === id)) { 43 | return "processing"; 44 | } 45 | if (fs.existsSync(videoPath)) { 46 | return "ready"; 47 | } 48 | return "failed"; 49 | } 50 | 51 | public addToQueue(sceneInput: SceneInput[], config: RenderConfig): string { 52 | // todo add mutex lock 53 | const id = cuid(); 54 | this.queue.push({ 55 | sceneInput, 56 | config, 57 | id, 58 | }); 59 | if (this.queue.length === 1) { 60 | this.processQueue(); 61 | } 62 | return id; 63 | } 64 | 65 | private async processQueue(): Promise { 66 | // todo add a semaphore 67 | if (this.queue.length === 0) { 68 | return; 69 | } 70 | const { sceneInput, config, id } = this.queue[0]; 71 | logger.debug( 72 | { sceneInput, config, id }, 73 | "Processing video item in the queue", 74 | ); 75 | try { 76 | await this.createShort(id, sceneInput, config); 77 | logger.debug({ id }, "Video created successfully"); 78 | } catch (error) { 79 | logger.error({ error }, "Error creating video"); 80 | } finally { 81 | this.queue.shift(); 82 | this.processQueue(); 83 | } 84 | } 85 | 86 | private async createShort( 87 | videoId: string, 88 | inputScenes: SceneInput[], 89 | config: RenderConfig, 90 | ): Promise { 91 | logger.debug( 92 | { 93 | inputScenes, 94 | }, 95 | "Creating short video", 96 | ); 97 | const scenes: Scene[] = []; 98 | let totalDuration = 0; 99 | const excludeVideoIds = []; 100 | 101 | let index = 0; 102 | for (const scene of inputScenes) { 103 | const audio = await this.kokoro.generate(scene.text, "af_heart"); 104 | let { audioLength } = audio; 105 | const { audio: audioStream } = audio; 106 | 107 | // add the paddingBack in seconds to the last scene 108 | if (index + 1 === inputScenes.length && config.paddingBack) { 109 | audioLength += config.paddingBack / 1000; 110 | } 111 | 112 | const tempAudioPath = path.join(this.config.tempDirPath, `${cuid()}.wav`); 113 | await this.ffmpeg.normalizeAudioForWhisper(audioStream, tempAudioPath); 114 | const captions = await this.whisper.CreateCaption(tempAudioPath); 115 | fs.removeSync(tempAudioPath); 116 | 117 | const audioDataUri = await this.ffmpeg.createMp3DataUri(audioStream); 118 | const video = await this.pexelsApi.findVideo( 119 | scene.searchTerms, 120 | audioLength, 121 | excludeVideoIds, 122 | ); 123 | excludeVideoIds.push(video.id); 124 | 125 | scenes.push({ 126 | captions, 127 | video: video.url, 128 | audio: { 129 | dataUri: audioDataUri, 130 | duration: audioLength, 131 | }, 132 | }); 133 | 134 | totalDuration += audioLength; 135 | index++; 136 | } 137 | if (config.paddingBack) { 138 | totalDuration += config.paddingBack / 1000; 139 | } 140 | 141 | const selectedMusic = this.findMusic(totalDuration, config.music); 142 | logger.debug({ selectedMusic }, "Selected music for the video"); 143 | 144 | await this.remotion.render( 145 | { 146 | music: selectedMusic, 147 | scenes, 148 | config: { 149 | durationMs: totalDuration * 1000, 150 | paddingBack: config.paddingBack, 151 | }, 152 | }, 153 | videoId, 154 | ); 155 | 156 | return videoId; 157 | } 158 | 159 | public getVideoPath(videoId: string): string { 160 | return path.join(this.config.videosDirPath, `${videoId}.mp4`); 161 | } 162 | 163 | public deleteVideo(videoId: string): void { 164 | const videoPath = this.getVideoPath(videoId); 165 | fs.removeSync(videoPath); 166 | logger.debug({ videoId }, "Deleted video file"); 167 | } 168 | 169 | public getVideo(videoId: string): Buffer { 170 | const videoPath = this.getVideoPath(videoId); 171 | if (!fs.existsSync(videoPath)) { 172 | throw new Error(`Video ${videoId} not found`); 173 | } 174 | return fs.readFileSync(videoPath); 175 | } 176 | 177 | private findMusic(videoDuration: number, tag?: MusicMoodEnum): Music { 178 | const musicFiles = this.musicManager.musicList().filter((music) => { 179 | if (tag) { 180 | return music.mood === tag; 181 | } 182 | return true; 183 | }); 184 | return musicFiles[Math.floor(Math.random() * musicFiles.length)]; 185 | } 186 | 187 | public ListAvailableMusicTags(): MusicTag[] { 188 | const tags = new Set(); 189 | this.musicManager.musicList().forEach((music) => { 190 | tags.add(music.mood as MusicTag); 191 | }); 192 | return Array.from(tags.values()); 193 | } 194 | } 195 | -------------------------------------------------------------------------------- /src/short-creator/libraries/FFmpeg.ts: -------------------------------------------------------------------------------- 1 | import ffmpeg from "fluent-ffmpeg"; 2 | import { Readable } from "node:stream"; 3 | import { logger } from "../../logger"; 4 | 5 | export class FFMpeg { 6 | static async init(): Promise { 7 | return import("@ffmpeg-installer/ffmpeg").then((ffmpegInstaller) => { 8 | ffmpeg.setFfmpegPath(ffmpegInstaller.path); 9 | logger.info("FFmpeg path set to:", ffmpegInstaller.path); 10 | return new FFMpeg(); 11 | }); 12 | } 13 | 14 | async normalizeAudioForWhisper( 15 | audio: ArrayBuffer, 16 | outputPath: string, 17 | ): Promise { 18 | logger.debug("Normalizing audio for Whisper"); 19 | const inputStream = new Readable(); 20 | inputStream.push(Buffer.from(audio)); 21 | inputStream.push(null); 22 | 23 | return new Promise((resolve, reject) => { 24 | ffmpeg() 25 | .input(inputStream) 26 | .audioCodec("pcm_s16le") 27 | .audioChannels(1) 28 | .audioFrequency(16000) 29 | .toFormat("wav") 30 | .on("end", () => { 31 | logger.debug("Audio normalization complete"); 32 | resolve(outputPath); 33 | }) 34 | .on("error", (err) => { 35 | logger.error(err, "Error normalizing audio:"); 36 | reject(err); 37 | }) 38 | .save(outputPath); 39 | }); 40 | } 41 | 42 | async createMp3DataUri(audio: ArrayBuffer): Promise { 43 | const inputStream = new Readable(); 44 | inputStream.push(Buffer.from(audio)); 45 | inputStream.push(null); 46 | return new Promise((resolve, reject) => { 47 | const chunk: Buffer[] = []; 48 | 49 | ffmpeg() 50 | .input(inputStream) 51 | .audioCodec("libmp3lame") 52 | .audioBitrate(128) 53 | .audioChannels(2) 54 | .toFormat("mp3") 55 | .on("error", (err) => { 56 | reject(err); 57 | }) 58 | .pipe() 59 | .on("data", (data: Buffer) => { 60 | chunk.push(data); 61 | }) 62 | .on("end", () => { 63 | const buffer = Buffer.concat(chunk); 64 | resolve(`data:audio/mp3;base64,${buffer.toString("base64")}`); 65 | }) 66 | .on("error", (err) => { 67 | reject(err); 68 | }); 69 | }); 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /src/short-creator/libraries/Kokoro.ts: -------------------------------------------------------------------------------- 1 | import { KokoroTTS } from "kokoro-js"; 2 | import type { Voices } from "../../types/shorts"; 3 | 4 | const MODEL = "onnx-community/Kokoro-82M-v1.0-ONNX"; 5 | const D_TYPE = "fp32"; // Options: "fp32", "fp16", "q8", "q4", "q4f16" 6 | 7 | export class Kokoro { 8 | constructor(private tts: KokoroTTS) {} 9 | 10 | async generate( 11 | text: string, 12 | voice: Voices, 13 | ): Promise<{ 14 | audio: ArrayBuffer; 15 | audioLength: number; 16 | }> { 17 | const audio = await this.tts.generate(text, { 18 | voice: voice, 19 | }); 20 | 21 | return { 22 | audio: audio.toWav(), 23 | audioLength: audio.audio.length / audio.sampling_rate, 24 | }; 25 | } 26 | 27 | static async init(): Promise { 28 | const tts = await KokoroTTS.from_pretrained(MODEL, { 29 | dtype: D_TYPE, 30 | device: "cpu", // only "cpu" is supported in node 31 | }); 32 | 33 | return new Kokoro(tts); 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/short-creator/libraries/Pexels.test.ts: -------------------------------------------------------------------------------- 1 | process.env.LOG_LEVEL = "debug"; 2 | 3 | import nock from "nock"; 4 | import { PexelsAPI } from "./Pexels"; 5 | import { test, assert } from "vitest"; 6 | import fs from "fs-extra"; 7 | import path from "path"; 8 | 9 | test("test pexels", async () => { 10 | const mockResponse = fs.readFileSync( 11 | path.resolve("__mocks__/pexels-response.json"), 12 | "utf-8", 13 | ); 14 | nock("https://api.pexels.com") 15 | .get(/videos\/search/) 16 | .reply(200, mockResponse); 17 | const pexels = new PexelsAPI("asdf"); 18 | const video = await pexels.findVideo(["dog"], 2.4, []); 19 | console.log(video); 20 | assert.isObject(video, "Video should be an object"); 21 | }); 22 | -------------------------------------------------------------------------------- /src/short-creator/libraries/Pexels.ts: -------------------------------------------------------------------------------- 1 | /* eslint-disable @remotion/deterministic-randomness */ 2 | import { logger } from "../../logger"; 3 | import type { Video } from "../../types/shorts"; 4 | 5 | const jokerTerms: string[] = ["nature", "globe", "space", "ocean"]; 6 | const durationBufferSeconds = 3; 7 | 8 | export class PexelsAPI { 9 | constructor(private API_KEY: string) {} 10 | 11 | private async _findVideo( 12 | searchTerm: string, 13 | minDurationSeconds: number, 14 | excludeIds: string[], 15 | ): Promise