├── .dockerignore
├── .editorconfig
├── .env.example
├── .gitignore
├── .prettierrc
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── __mocks__
└── pexels-response.json
├── docker-compose.yml
├── eslint.config.mjs
├── main-cuda.Dockerfile
├── main.Dockerfile
├── package.json
├── pnpm-lock.yaml
├── postcss.config.mjs
├── remotion.config.ts
├── src
├── components
│ ├── root
│ │ ├── Root.tsx
│ │ ├── index.css
│ │ └── index.ts
│ └── videos
│ │ └── ShortVideo.tsx
├── config.ts
├── index.ts
├── logger.ts
├── scripts
│ └── install.ts
├── server
│ ├── routers
│ │ ├── mcp.ts
│ │ └── rest.ts
│ ├── server.ts
│ └── validator.ts
├── short-creator
│ ├── ShortCreator.ts
│ ├── libraries
│ │ ├── FFmpeg.ts
│ │ ├── Kokoro.ts
│ │ ├── Pexels.test.ts
│ │ ├── Pexels.ts
│ │ ├── Remotion.ts
│ │ └── Whisper.ts
│ └── music.ts
└── types
│ └── shorts.ts
├── static
└── music
│ ├── 80s-synthwave-chill-166744.mp3
│ ├── README.md
│ ├── angry-trap-beat-136015.mp3
│ ├── anxious-heartbeat-dark-thriller-180565.mp3
│ ├── better-day-186374.mp3
│ ├── bright-energetic-upbeat-pop-324997.mp3
│ ├── cinematic-documentary-115669.mp3
│ ├── dark-140112.mp3
│ ├── dark-anxious-tension-dramatic-suspense-112169.mp3
│ ├── dark-electronic-207913.mp3
│ ├── dark-intentions-288498.mp3
│ ├── dark-mysterious-tense-piano-cinematic-soundtrack-226665.mp3
│ ├── deep-282969.mp3
│ ├── exciting-upbeat-background-music-306032.mp3
│ ├── frenzy-story-234221.mp3
│ ├── fun-upbeat-background-music-311769.mp3
│ ├── funny-comedy-quirky-background-music-316889.mp3
│ ├── haunting-dark-atmosphere-304116.mp3
│ ├── heerful-99148.mp3
│ ├── hopeful-cinematic-248601.mp3
│ ├── hopeful-optimism-266072.mp3
│ ├── horror-dark-spooky-piano-251474.mp3
│ ├── into-the-wild-315582.mp3
│ ├── lofi-chill-melancholic-259764.mp3
│ ├── melancholic-reflective-floating-piano-atmosphere-324686.mp3
│ ├── mellow-fellow-in-the-bellow-peaceful-lofi-instrumental-262780.mp3
│ ├── mellow-smooth-rap-beat-20230107-132480.mp3
│ ├── no-place-to-go-216744.mp3
│ ├── powerful-energy-upbeat-rock-advertising-music-245728.mp3
│ ├── quirky-169825.mp3
│ ├── sad-emotional-beat-cry-alone-121597.mp3
│ ├── sad-piano-one-181090.mp3
│ ├── sad-violin-150146.mp3
│ ├── sneaky-and-quirky-music-loop-287412.mp3
│ ├── sunset-rising-113685.mp3
│ ├── tension-113661.mp3
│ ├── unforgiving-253312.mp3
│ ├── upbeat-background-music-315196.mp3
│ ├── upbeat-funk-happy-315162.mp3
│ ├── upbeat-funky-vlog-background-music-313080.mp3
│ ├── upbeat-hip-hop-vlog-music-322878.mp3
│ └── vintage-wonder-142553.mp3
├── tsconfig.json
└── vitest.config.ts
/.dockerignore:
--------------------------------------------------------------------------------
1 | node_modules
2 | .git
3 | .gitignore
4 | *.md
5 | dist
6 |
--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
1 | root = true
2 |
3 | [*]
4 | end_of_line = crlf
5 | charset = utf-8
6 | trim_trailing_whitespace = true
7 | insert_final_newline = true
8 | indent_style = space
9 | indent_size = 2
10 |
--------------------------------------------------------------------------------
/.env.example:
--------------------------------------------------------------------------------
1 | PEXELS_API_KEY= # crucial for the project to work
2 | LOG_LEVEL=trace # trace, debug, info, warn, error, fatal, silent
3 | WHISPER_VERBOSE=true
4 | PORT=3123
5 | DEV=true # local development mode
6 | DATA_DIR_PATH= # only for docker, otherwise leave empty
7 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules
2 | dist
3 | .DS_Store
4 | .env
5 |
--------------------------------------------------------------------------------
/.prettierrc:
--------------------------------------------------------------------------------
1 | {
2 | "useTabs": false,
3 | "bracketSpacing": true,
4 | "tabWidth": 2
5 | }
6 |
7 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing to Shorts Creator
2 |
3 | ## How to setup the development environment
4 |
5 | 1. Clone the repository
6 |
7 | ```bash
8 | git clone git@github.com:gyoridavid/short-video-maker.git
9 | cd shorts-video-maker
10 | ```
11 |
12 | 2. Install dependencies
13 |
14 | ```bash
15 | pnpm install
16 | ```
17 |
18 | 3. Copy `.env.example` to `.env` and set the right environment variables.
19 |
20 | 4. Start the server
21 | ```bash
22 | pnpm dev
23 | ```
24 |
25 | ## How to preview the videos and debug the rendering process
26 |
27 | You can use Remotion Studio to preview videos. Make sure to update the template if the underlying data structure changes.
28 |
29 | ```bash
30 | npx remotion studio
31 | ```
32 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2025 David Gyori
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Short Video Maker
2 |
3 | An open source automated video creation tool for generating short-form video content. Short Video Maker combines text-to-speech, automatic captions, background videos, and music to create engaging short videos from simple text inputs.
4 |
5 | This repository was open-sourced by the [AI Agents A-Z Youtube Channel](https://www.youtube.com/channel/UCloXqLhp_KGhHBe1kwaL2Tg). We encourage you to check out the channel for more AI-related content and tutorials.
6 |
7 | ## Hardware requirements
8 |
9 | - CPU: at least 2 cores are recommended
10 | - GPU: optional, makes the caption generation a lot faster (whisper.cpp) and the video rendering somewhat faster
11 |
12 | ## Watch the official video on how to generate videos with n8n
13 |
14 | [](https://www.youtube.com/watch?v=jzsQpn-AciM)
15 |
16 | ## Running the Project
17 |
18 | ### Using NPX (recommended)
19 |
20 | The easiest way to run the project with GPU support out of the box:
21 |
22 | ```bash
23 | LOG_LEVEL=debug PEXELS_API_KEY= npx short-video-maker
24 | ```
25 |
26 | ### Using Docker
27 |
28 | #### CPU image
29 |
30 | ```bash
31 | docker run -it --rm --name short-video-maker -p 3123:3123 \
32 | -e PEXELS_API_KEY= \
33 | gyoridavid/short-video-maker:latest
34 | ```
35 |
36 | #### NVIDIA GPUs
37 | ```bash
38 | docker run -it --rm --name shorts-video-maker -p 3123:3123 \
39 | -e PEXELS_API_KEY= --gpus=all \
40 | gyoridavid/short-video-maker:latest-cuda
41 | ```
42 |
43 | ## Find help
44 |
45 | Join our [Discord](https://discord.gg/G7FJVJQ6RE) community for support and discussions.
46 |
47 | ## Environment Variables
48 |
49 | | Variable | Description |
50 | | --------------- | ---------------------------------------------------------------------------------- |
51 | | PEXELS_API_KEY | Your Pexels API key for background video sourcing |
52 | | PORT | Port for the API/MCP server (default: 3123) |
53 | | LOG_LEVEL | Log level for the server (default: info, options: trace, debug, info, warn, error) |
54 | | WHISPER_VERBOSE | Verbose mode for Whisper (default: false) |
55 |
56 | ## Example
57 |
58 |
59 |
60 |
61 |
62 | |
63 |
64 |
65 | ```json
66 | {
67 | "scenes": [
68 | {
69 | "text": "Hello world! Enjoy using this tool to create awesome AI workflows",
70 | "searchTerms": ["rainbow"]
71 | }
72 | ],
73 | "config": {
74 | "paddingBack": 1500,
75 | "music": "happy"
76 | }
77 | }
78 | ```
79 |
80 | |
81 |
82 |
83 |
84 | ## Features
85 |
86 | - Generate complete short videos from text prompts
87 | - Text-to-speech conversion
88 | - Automatic caption generation and styling
89 | - Background video search and selection via Pexels
90 | - Background music with genre/mood selection
91 | - Serve as both REST API and Model Context Protocol (MCP) server
92 |
93 | ## How It Works
94 |
95 | Shorts Creator takes simple text inputs and search terms, then:
96 |
97 | 1. Converts text to speech using Kokoro TTS
98 | 2. Generates accurate captions via Whisper
99 | 3. Finds relevant background videos from Pexels
100 | 4. Composes all elements with Remotion
101 | 5. Renders a professional-looking short video with perfectly timed captions
102 |
103 | ## Dependencies for the video generation
104 |
105 | | Dependency | Version | License | Purpose |
106 | | ------------------------------------------------------ | -------- | --------------------------------------------------------------------------------- | ------------------------------- |
107 | | [Remotion](https://remotion.dev/) | ^4.0.286 | [Remotion License](https://github.com/remotion-dev/remotion/blob/main/LICENSE.md) | Video composition and rendering |
108 | | [Whisper CPP](https://github.com/ggml-org/whisper.cpp) | v1.5.5 | MIT | Speech-to-text for captions |
109 | | [FFmpeg](https://ffmpeg.org/) | ^2.1.3 | LGPL/GPL | Audio/video manipulation |
110 | | [Kokoro.js](https://www.npmjs.com/package/kokoro-js) | ^1.2.0 | MIT | Text-to-speech generation |
111 | | [Pexels API](https://www.pexels.com/api/) | N/A | [Pexels Terms](https://www.pexels.com/license/) | Background videos |
112 |
113 | ## How to contribute?
114 |
115 | PRs are welcome.
116 | See the [CONTRIBUTING.md](CONTRIBUTING.md) file for instructions on setting up a local development environment.
117 |
118 | ## API Usage
119 |
120 | ### REST API
121 |
122 | The following REST endpoints are available:
123 |
124 | 1. `GET /api/short-video/:id` - Get a video by ID and also can be downloaded like this :
125 |
126 | ```curl -o output.mp4 http://localhost:3123/api/short-video/ ```
127 |
128 |
129 | 3. `POST /api/short-video` - Create a new video
130 | ```json
131 | {
132 | "scenes": [
133 | {
134 | "text": "This is the text to be spoken in the video",
135 | "searchTerms": ["nature sunset"]
136 | }
137 | ],
138 | "config": {
139 | "paddingBack": 3000,
140 | "music": "chill"
141 | }
142 | }
143 | ```
144 | 4. `DELETE /api/short-video/:id` - Delete a video by ID
145 | 5. `GET /api/music-tags` - Get available music tags
146 |
147 |
148 |
149 | ### Model Context Protocol (MCP)
150 |
151 | The service also implements the Model Context Protocol:
152 |
153 | 1. `GET /mcp/sse` - Server-sent events for MCP
154 | 2. `POST /mcp/messages` - Send messages to MCP server
155 |
156 | Available MCP tools:
157 |
158 | - `create-short-video` - Create a video from a list of scenes
159 | - `get-video-status` - Check video creation status
160 |
161 | ## License
162 |
163 | This project is licensed under the [MIT License](LICENSE).
164 |
165 | ## Acknowledgments
166 |
167 | - ❤️ [Remotion](https://remotion.dev/) for programmatic video generation
168 | - ❤️ [Whisper](https://github.com/ggml-org/whisper.cpp) for speech-to-text
169 | - ❤️ [Pexels](https://www.pexels.com/) for video content
170 | - ❤️ [FFmpeg](https://ffmpeg.org/) for audio/video processing
171 | - ❤️ [Kokoro](https://github.com/hexgrad/kokoro) for TTS
172 |
--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: "3"
2 |
3 | services:
4 | short-creator:
5 | build:
6 | context: .
7 | dockerfile: main.Dockerfile
8 | env_file:
9 | - .env
10 | environment:
11 | - DEV=false
12 | ports:
13 | - "3123:3123"
14 | entrypoint: ["node", "dist/index.js"]
15 |
--------------------------------------------------------------------------------
/eslint.config.mjs:
--------------------------------------------------------------------------------
1 | import { config } from "@remotion/eslint-config-flat";
2 |
3 | export default config;
4 |
--------------------------------------------------------------------------------
/main-cuda.Dockerfile:
--------------------------------------------------------------------------------
1 | ARG UBUNTU_VERSION=22.04
2 | ARG CUDA_VERSION=12.3.1
3 | ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
4 | ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
5 |
6 | # Ref: https://github.com/ggml-org/whisper.cpp
7 | FROM ${BASE_CUDA_DEV_CONTAINER} AS install-whisper
8 | ENV DEBIAN_FRONTEND=noninteractive
9 |
10 | RUN apt-get update && \
11 | apt-get install --fix-missing --no-install-recommends -y bash git make vim wget g++ ffmpeg curl
12 |
13 | WORKDIR /app/data/libs/whisper.cpp
14 | RUN git clone https://github.com/ggerganov/whisper.cpp.git -b v1.7.1 --depth 1 .
15 |
16 | RUN make clean
17 | RUN GGML_CUDA=1 make -j
18 |
19 | RUN sh ./models/download-ggml-model.sh medium.en
20 |
21 | FROM ${BASE_CUDA_RUN_CONTAINER} AS base
22 |
23 | # install node
24 | RUN apt-get update && apt-get install -y \
25 | curl \
26 | ca-certificates \
27 | gnupg \
28 | lsb-release \
29 | && rm -rf /var/lib/apt/lists/*
30 | RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - \
31 | && apt-get update && apt-get install -y nodejs \
32 | && rm -rf /var/lib/apt/lists/*
33 | RUN node -v && npm -v
34 |
35 | # install dependencies
36 | ENV DEBIAN_FRONTEND=noninteractive
37 | WORKDIR /app
38 | RUN apt update
39 | RUN apt install -y \
40 | # whisper dependencies
41 | git \
42 | wget \
43 | cmake \
44 | ffmpeg \
45 | curl \
46 | build-essential \
47 | make \
48 | # remotion dependencies
49 | libnss3 \
50 | libdbus-1-3 \
51 | libatk1.0-0 \
52 | libgbm-dev \
53 | libasound2 \
54 | libxrandr2 \
55 | libxkbcommon-dev \
56 | libxfixes3 \
57 | libxcomposite1 \
58 | libxdamage1 \
59 | libatk-bridge2.0-0 \
60 | libpango-1.0-0 \
61 | libcairo2 \
62 | libcups2 \
63 | && apt-get clean \
64 | && rm -rf /var/lib/apt/lists/*
65 | # setup pnpm
66 | ENV PNPM_HOME="/pnpm"
67 | ENV PATH="$PNPM_HOME:$PATH"
68 | ENV COREPACK_ENABLE_DOWNLOAD_PROMPT=0
69 | RUN corepack enable
70 |
71 | FROM base AS prod-deps
72 | COPY package.json pnpm-lock.yaml* /app/
73 | RUN --mount=type=cache,id=pnpm,target=/pnpm/store pnpm install --prod --frozen-lockfile
74 | RUN pnpm install --prefer-offline --no-cache --prod
75 |
76 | FROM prod-deps AS build
77 | COPY tsconfig.json /app
78 | COPY src /app/src
79 | RUN --mount=type=cache,id=pnpm,target=/pnpm/store pnpm install --frozen-lockfile
80 | RUN pnpm build
81 |
82 | FROM base
83 | COPY static /app/static
84 | COPY --from=install-whisper /app/data/libs/whisper.cpp /app/data/libs/whisper.cpp
85 | COPY --from=prod-deps /app/node_modules /app/node_modules
86 | COPY --from=build /app/dist /app/dist
87 | COPY package.json /app/
88 |
89 | # app configuration via environment variables
90 | ENV DATA_DIR_PATH=/app/data
91 | ENV DOCKER=true
92 |
93 | # install kokoro, headless chrome and ensure music files are present
94 | RUN node dist/scripts/install.js
95 |
96 | CMD ["pnpm", "start"]
97 |
--------------------------------------------------------------------------------
/main.Dockerfile:
--------------------------------------------------------------------------------
1 | FROM ubuntu:22.04 AS install-whisper
2 | ENV DEBIAN_FRONTEND=noninteractive
3 | RUN apt update
4 | # whisper install dependencies
5 | RUN apt install -y \
6 | git \
7 | build-essential \
8 | wget \
9 | cmake \
10 | && apt-get clean \
11 | && rm -rf /var/lib/apt/lists/*
12 | WORKDIR /whisper
13 | RUN git clone https://github.com/ggml-org/whisper.cpp.git .
14 | RUN git checkout v1.5.5
15 | RUN make
16 | WORKDIR /whisper/models
17 | RUN sh ./download-ggml-model.sh medium.en
18 |
19 | FROM node:22-bookworm-slim AS base
20 | ENV DEBIAN_FRONTEND=noninteractive
21 | WORKDIR /app
22 | RUN apt update
23 | RUN apt install -y \
24 | # whisper dependencies
25 | git \
26 | wget \
27 | cmake \
28 | ffmpeg \
29 | curl \
30 | make \
31 | libsdl2-dev \
32 | # remotion dependencies
33 | libnss3 \
34 | libdbus-1-3 \
35 | libatk1.0-0 \
36 | libgbm-dev \
37 | libasound2 \
38 | libxrandr2 \
39 | libxkbcommon-dev \
40 | libxfixes3 \
41 | libxcomposite1 \
42 | libxdamage1 \
43 | libatk-bridge2.0-0 \
44 | libpango-1.0-0 \
45 | libcairo2 \
46 | libcups2 \
47 | && apt-get clean \
48 | && rm -rf /var/lib/apt/lists/*
49 | # setup pnpm
50 | ENV PNPM_HOME="/pnpm"
51 | ENV PATH="$PNPM_HOME:$PATH"
52 | ENV COREPACK_ENABLE_DOWNLOAD_PROMPT=0
53 | RUN corepack enable
54 |
55 | FROM base AS prod-deps
56 | COPY package.json pnpm-lock.yaml* /app/
57 | RUN --mount=type=cache,id=pnpm,target=/pnpm/store pnpm install --prod --frozen-lockfile
58 | RUN pnpm install --prefer-offline --no-cache --prod
59 |
60 | FROM prod-deps AS build
61 | COPY tsconfig.json /app
62 | COPY src /app/src
63 | RUN --mount=type=cache,id=pnpm,target=/pnpm/store pnpm install --frozen-lockfile
64 | RUN pnpm build
65 |
66 | FROM base
67 | COPY static /app/static
68 | COPY --from=install-whisper /whisper /app/data/libs/whisper.cpp
69 | COPY --from=prod-deps /app/node_modules /app/node_modules
70 | COPY --from=build /app/dist /app/dist
71 | COPY package.json /app/
72 |
73 | # app configuration via environment variables
74 | ENV DATA_DIR_PATH=/app/data
75 | ENV DOCKER=true
76 |
77 | # install kokoro, headless chrome and ensure music files are present
78 | RUN node dist/scripts/install.js
79 |
80 | CMD ["pnpm", "start"]
81 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "short-video-maker",
3 | "version": "1.0.12",
4 | "description": "Creates short videos for TikTok, Instagram Reels, and YouTube Shorts using the Model Context Protocol (MCP) and a REST API.",
5 | "main": "index.js",
6 | "bugs": "https://github.com/gyoridavid/short-video-maker/issues",
7 | "homepage": "https://github.com/gyoridavid/short-video-maker",
8 | "scripts": {
9 | "build": "rimraf dist && tsc",
10 | "dev": "node --watch -r ts-node/register src/index.ts",
11 | "start": "node dist/index.js",
12 | "test": "echo \"Error: no test specified\" && exit 1",
13 | "prepublishOnly": "npm run build && echo \"#!/usr/bin/env node\n$(cat dist/index.js)\" > dist/index.js && chmod +x dist/index.js"
14 | },
15 | "bin": {
16 | "short-video-maker": "dist/index.js"
17 | },
18 | "files": [
19 | "dist",
20 | "static"
21 | ],
22 | "keywords": [
23 | "shorts",
24 | "mcp",
25 | "model context protocol",
26 | "reels",
27 | "tiktok",
28 | "youtube shorts",
29 | "youtube",
30 | "short video",
31 | "video creation",
32 | "instagram",
33 | "video",
34 | "generator",
35 | "remotion",
36 | "faceless video"
37 | ],
38 | "author": "David Gyori",
39 | "license": "MIT",
40 | "dependencies": {
41 | "@ffmpeg-installer/ffmpeg": "^1.1.0",
42 | "@modelcontextprotocol/sdk": "^1.9.0",
43 | "@remotion/bundler": "^4.0.286",
44 | "@remotion/cli": "^4.0.286",
45 | "@remotion/google-fonts": "^4.0.286",
46 | "@remotion/install-whisper-cpp": "^4.0.286",
47 | "@remotion/renderer": "^4.0.286",
48 | "@remotion/zod-types": "^4.0.286",
49 | "content-type": "^1.0.5",
50 | "cuid": "^3.0.0",
51 | "dotenv": "^16.4.7",
52 | "express": "^5.1.0",
53 | "fluent-ffmpeg": "^2.1.3",
54 | "fs-extra": "^11.3.0",
55 | "kokoro-js": "^1.2.0",
56 | "nock": "^14.0.3",
57 | "pino": "^9.6.0",
58 | "react": "^19.1.0",
59 | "react-dom": "^19.1.0",
60 | "remotion": "^4.0.286",
61 | "zod": "^3.24.2",
62 | "zod-to-json-schema": "^3.24.5"
63 | },
64 | "devDependencies": {
65 | "@remotion/eslint-config-flat": "^4.0.286",
66 | "@types/content-type": "^1.1.8",
67 | "@types/express": "^5.0.1",
68 | "@types/fluent-ffmpeg": "^2.1.27",
69 | "@types/fs-extra": "^11.0.4",
70 | "@types/nock": "^11.1.0",
71 | "@types/node": "^22.14.0",
72 | "@types/react": "^19.1.0",
73 | "eslint": "^9.24.0",
74 | "prettier": "^3.5.3",
75 | "rimraf": "^6.0.1",
76 | "ts-node": "^10.9.2",
77 | "typescript": "^5.8.3",
78 | "vitest": "^3.1.1"
79 | }
80 | }
81 |
--------------------------------------------------------------------------------
/postcss.config.mjs:
--------------------------------------------------------------------------------
1 | export default {
2 | plugins: {
3 | "@tailwindcss/postcss": {},
4 | },
5 | };
6 |
--------------------------------------------------------------------------------
/remotion.config.ts:
--------------------------------------------------------------------------------
1 | // See all configuration options: https://remotion.dev/docs/config
2 | // Each option also is available as a CLI flag: https://remotion.dev/docs/cli
3 |
4 | // Note: When using the Node.JS APIs, the config file doesn't apply. Instead, pass options directly to the APIs
5 |
6 | import { Config } from "@remotion/cli/config";
7 |
8 | Config.setVideoImageFormat("jpeg");
9 | Config.setOverwriteOutput(true);
10 | Config.setPublicDir("data/music");
11 | Config.setEntryPoint("src/components/root/index.ts");
12 |
--------------------------------------------------------------------------------
/src/components/root/index.css:
--------------------------------------------------------------------------------
1 | @import "tailwindcss";
2 |
--------------------------------------------------------------------------------
/src/components/root/index.ts:
--------------------------------------------------------------------------------
1 | import { registerRoot } from "remotion";
2 | import { RemotionRoot } from "./Root";
3 |
4 | registerRoot(RemotionRoot);
5 |
--------------------------------------------------------------------------------
/src/components/videos/ShortVideo.tsx:
--------------------------------------------------------------------------------
1 | import {
2 | AbsoluteFill,
3 | Sequence,
4 | useCurrentFrame,
5 | useVideoConfig,
6 | Audio,
7 | staticFile,
8 | OffthreadVideo,
9 | } from "remotion";
10 | import { z } from "zod";
11 | import { loadFont } from "@remotion/google-fonts/BarlowCondensed";
12 | import type { Caption, CaptionLine, CaptionPage } from "../../types/shorts";
13 |
14 | const { fontFamily } = loadFont(); // "Barlow Condensed"
15 |
16 | export const shortVideoSchema = z.object({
17 | scenes: z.array(
18 | z.object({
19 | captions: z.custom(),
20 | audio: z.object({
21 | dataUri: z.string(),
22 | duration: z.number(),
23 | }),
24 | video: z.string(),
25 | }),
26 | ),
27 | config: z.object({
28 | paddingBack: z.number().optional(),
29 | durationMs: z.number(),
30 | }),
31 | music: z.object({
32 | file: z.string(),
33 | start: z.number(),
34 | end: z.number(),
35 | }),
36 | });
37 |
38 | function createCaptionPages({
39 | captions,
40 | lineMaxLength,
41 | lineCount,
42 | maxDistanceMs,
43 | }: {
44 | captions: Caption[];
45 | lineMaxLength: number;
46 | lineCount: number;
47 | maxDistanceMs: number;
48 | }) {
49 | const pages = [];
50 | let currentPage: CaptionPage = {
51 | startMs: 0,
52 | endMs: 0,
53 | lines: [],
54 | };
55 | let currentLine: CaptionLine = {
56 | texts: [],
57 | };
58 |
59 | captions.forEach((caption, i) => {
60 | // Check if we need to start a new page due to time gap
61 | if (i > 0 && caption.startMs - currentPage.endMs > maxDistanceMs) {
62 | // Add current line if not empty
63 | if (currentLine.texts.length > 0) {
64 | currentPage.lines.push(currentLine);
65 | }
66 | // Add current page if not empty
67 | if (currentPage.lines.length > 0) {
68 | pages.push(currentPage);
69 | }
70 | // Start new page
71 | currentPage = {
72 | startMs: caption.startMs,
73 | endMs: caption.endMs,
74 | lines: [],
75 | };
76 | currentLine = {
77 | texts: [],
78 | };
79 | }
80 |
81 | // Check if adding this caption exceeds the line length
82 | const currentLineText = currentLine.texts.map((t) => t.text).join(" ");
83 | if (
84 | currentLine.texts.length > 0 &&
85 | currentLineText.length + 1 + caption.text.length > lineMaxLength
86 | ) {
87 | // Line is full, add it to current page
88 | currentPage.lines.push(currentLine);
89 | currentLine = {
90 | texts: [],
91 | };
92 |
93 | // Check if page is full
94 | if (currentPage.lines.length >= lineCount) {
95 | // Page is full, add it to pages
96 | pages.push(currentPage);
97 | // Start new page
98 | currentPage = {
99 | startMs: caption.startMs,
100 | endMs: caption.endMs,
101 | lines: [],
102 | };
103 | }
104 | }
105 |
106 | // Add caption to current line
107 | currentLine.texts.push({
108 | text: caption.text,
109 | startMs: caption.startMs,
110 | endMs: caption.endMs,
111 | });
112 |
113 | // Update page timing
114 | currentPage.endMs = caption.endMs;
115 | if (i === 0 || currentPage.startMs === 0) {
116 | currentPage.startMs = caption.startMs;
117 | } else {
118 | currentPage.startMs = Math.min(currentPage.startMs, caption.startMs);
119 | }
120 | });
121 |
122 | // Don't forget to add the last line and page
123 | if (currentLine.texts.length > 0) {
124 | currentPage.lines.push(currentLine);
125 | }
126 | if (currentPage.lines.length > 0) {
127 | pages.push(currentPage);
128 | }
129 |
130 | return pages;
131 | }
132 |
133 | export const ShortVideo: React.FC> = ({
134 | scenes,
135 | music,
136 | config,
137 | }) => {
138 | const frame = useCurrentFrame();
139 | const { fps } = useVideoConfig();
140 | const activeStyle = {
141 | backgroundColor: "blue",
142 | padding: "10px",
143 | marginLeft: "-10px",
144 | marginRight: "-10px",
145 | borderRadius: "10px",
146 | };
147 | return (
148 |
149 |
156 |
157 | {scenes.map((scene, i) => {
158 | const { captions, audio, video } = scene;
159 | const pages = createCaptionPages({
160 | captions,
161 | lineMaxLength: 20,
162 | lineCount: 1,
163 | maxDistanceMs: 1000,
164 | });
165 |
166 | // Calculate the start and end time of the scene
167 | const startFrame =
168 | scenes.slice(0, i).reduce((acc, curr) => {
169 | return acc + curr.audio.duration;
170 | }, 0) * fps;
171 | let durationInFrames =
172 | scenes.slice(0, i + 1).reduce((acc, curr) => {
173 | return acc + curr.audio.duration;
174 | }, 0) * fps;
175 | if (config.paddingBack && i === scenes.length - 1) {
176 | durationInFrames += (config.paddingBack / 1000) * fps;
177 | }
178 |
179 | return (
180 |
185 |
186 |
187 | {pages.map((page, j) => {
188 | return (
189 |
196 |
204 | {page.lines.map((line, k) => {
205 | return (
206 |
222 | {line.texts.map((text, l) => {
223 | const active =
224 | frame >=
225 | startFrame + (text.startMs / 1000) * fps &&
226 | frame <= startFrame + (text.endMs / 1000) * fps;
227 | return (
228 | <>
229 |
236 | {text.text}
237 |
238 | {l < line.texts.length - 1 ? " " : ""}
239 | >
240 | );
241 | })}
242 |
243 | );
244 | })}
245 |
246 |
247 | );
248 | })}
249 |
250 | );
251 | })}
252 |
253 | );
254 | };
255 |
--------------------------------------------------------------------------------
/src/config.ts:
--------------------------------------------------------------------------------
1 | import path from "path";
2 | import "dotenv/config";
3 | import os from "os";
4 | import fs from "fs-extra";
5 | import pino from "pino";
6 |
7 | type whisperModels =
8 | | "tiny"
9 | | "tiny.en"
10 | | "base"
11 | | "base.en"
12 | | "small"
13 | | "small.en"
14 | | "medium"
15 | | "medium.en"
16 | | "large-v1"
17 | | "large-v2"
18 | | "large-v3"
19 | | "large-v3-turbo";
20 |
21 | const defaultLogLevel: pino.Level = "info";
22 | const defaultPort = 3123;
23 | const whisperVersion = "1.7.1";
24 | const whisperModel: whisperModels = "medium.en"; // possible options: "tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large-v1", "large-v2", "large-v3", "large-v3-turbo"
25 |
26 | // Create the global logger
27 | export const logger = pino({
28 | level: process.env.LOG_LEVEL ?? defaultLogLevel,
29 | timestamp: pino.stdTimeFunctions.isoTime,
30 | formatters: {
31 | level: (label) => {
32 | return { level: label };
33 | },
34 | },
35 | });
36 |
37 | export class Config {
38 | private dataDirPath: string;
39 | private libsDirPath: string;
40 | private staticDirPath: string;
41 |
42 | public whisperInstallPath: string;
43 | public videosDirPath: string;
44 | public tempDirPath: string;
45 | public packageDirPath: string;
46 | public musicDirPath: string;
47 | public pexelsApiKey: string;
48 | public logLevel: pino.Level;
49 | public whisperVerbose: boolean;
50 | public port: number;
51 | public runningInDocker: boolean;
52 | public devMode: boolean;
53 | public whisperVersion: string = whisperVersion;
54 | public whisperModel: whisperModels = whisperModel;
55 |
56 | constructor() {
57 | this.dataDirPath =
58 | process.env.DATA_DIR_PATH ??
59 | path.join(os.homedir(), ".ai-agents-az-video-generator");
60 | this.libsDirPath = path.join(this.dataDirPath, "libs");
61 |
62 | this.whisperInstallPath = path.join(this.libsDirPath, "whisper.cpp");
63 | this.videosDirPath = path.join(this.dataDirPath, "videos");
64 | this.tempDirPath = path.join(this.dataDirPath, "temp");
65 |
66 | fs.ensureDirSync(this.dataDirPath);
67 | fs.ensureDirSync(this.libsDirPath);
68 | fs.ensureDirSync(this.videosDirPath);
69 | fs.ensureDirSync(this.tempDirPath);
70 |
71 | this.packageDirPath = path.join(__dirname, "..");
72 | this.staticDirPath = path.join(this.packageDirPath, "static");
73 | this.musicDirPath = path.join(this.staticDirPath, "music");
74 |
75 | this.pexelsApiKey = process.env.PEXELS_API_KEY as string;
76 | this.logLevel = (process.env.LOG_LEVEL ?? defaultLogLevel) as pino.Level;
77 | this.whisperVerbose = process.env.WHISPER_VERBOSE === "true";
78 | this.port = process.env.PORT ? parseInt(process.env.PORT) : defaultPort;
79 | this.runningInDocker = process.env.DOCKER === "true";
80 | this.devMode = process.env.DEV === "true";
81 | }
82 |
83 | public ensureConfig() {
84 | if (!this.pexelsApiKey) {
85 | throw new Error(
86 | "PEXELS_API_KEY environment variable is missing. Get your free API key: https://www.pexels.com/api/key/ - see how to run the project: https://github.com/gyoridavid/short-video-maker",
87 | );
88 | }
89 | }
90 | }
91 |
--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------
1 | /* eslint-disable @typescript-eslint/no-unused-vars */
2 | import { Kokoro } from "./short-creator/libraries/Kokoro";
3 | import { Remotion } from "./short-creator/libraries/Remotion";
4 | import { Whisper } from "./short-creator/libraries/Whisper";
5 | import { FFMpeg } from "./short-creator/libraries/FFmpeg";
6 | import { PexelsAPI } from "./short-creator/libraries/Pexels";
7 | import { Config } from "./config";
8 | import { ShortCreator } from "./short-creator/ShortCreator";
9 | import { logger } from "./logger";
10 | import { Server } from "./server/server";
11 | import { MusicManager } from "./short-creator/music";
12 |
13 | async function main() {
14 | const config = new Config();
15 | try {
16 | config.ensureConfig();
17 | } catch (err: unknown) {
18 | if (err instanceof Error) {
19 | logger.error(err.message, "Error in config");
20 | } else if (typeof err === "string") {
21 | logger.error(err, "Error in config");
22 | } else {
23 | logger.error("Unknown error", "Error in config");
24 | }
25 |
26 | process.exit(1);
27 | }
28 |
29 | const musicManager = new MusicManager(config);
30 | try {
31 | logger.debug("checking music files");
32 | musicManager.ensureMusicFilesExist();
33 | } catch (err) {
34 | logger.error(err, "Missing music files");
35 | process.exit(1);
36 | }
37 |
38 | logger.debug("initializing remotion");
39 | const remotion = await Remotion.init(config);
40 | logger.debug("initializing kokoro");
41 | const kokoro = await Kokoro.init();
42 | logger.debug("initializing whisper");
43 | const whisper = await Whisper.init(config);
44 | logger.debug("initializing ffmpeg");
45 | const ffmpeg = await FFMpeg.init();
46 | const pexelsApi = new PexelsAPI(config.pexelsApiKey);
47 |
48 | logger.debug("initializing the short creator");
49 | const shortCreator = new ShortCreator(
50 | config,
51 | remotion,
52 | kokoro,
53 | whisper,
54 | ffmpeg,
55 | pexelsApi,
56 | musicManager,
57 | );
58 |
59 | logger.debug("initializing the server");
60 | const server = new Server(config.port, shortCreator);
61 | const app = server.start();
62 |
63 | // todo add shutdown handler
64 | }
65 |
66 | main().catch((err) => {
67 | logger.error(err, "Error starting server");
68 | });
69 |
--------------------------------------------------------------------------------
/src/logger.ts:
--------------------------------------------------------------------------------
1 | import { logger } from "./config";
2 |
3 | export default logger;
4 | export { logger };
5 |
--------------------------------------------------------------------------------
/src/scripts/install.ts:
--------------------------------------------------------------------------------
1 | import { ensureBrowser } from "@remotion/renderer";
2 |
3 | import { logger } from "../logger";
4 | import { Kokoro } from "../short-creator/libraries/Kokoro";
5 | import { MusicManager } from "../short-creator/music";
6 | import { Config } from "../config";
7 | import { Whisper } from "../short-creator/libraries/Whisper";
8 |
9 | // runs in docker
10 | export async function install() {
11 | const config = new Config();
12 |
13 | logger.info("Installing dependencies...");
14 | logger.info("Installing Kokoro...");
15 | await Kokoro.init();
16 | logger.info("Installing browser shell...");
17 | await ensureBrowser();
18 | logger.info("Installing whisper.cpp");
19 | await Whisper.init(config);
20 | logger.info("Installing dependencies complete");
21 |
22 | logger.info("Ensuring the music files exist...");
23 | const musicManager = new MusicManager(config);
24 | try {
25 | musicManager.ensureMusicFilesExist();
26 | } catch (err) {
27 | logger.error(err, "Missing music files");
28 | process.exit(1);
29 | }
30 | }
31 |
32 | install()
33 | .then(() => {
34 | logger.info("Installation complete");
35 | })
36 | .catch((err: unknown) => {
37 | logger.error(err, "Installation failed");
38 | });
39 |
--------------------------------------------------------------------------------
/src/server/routers/mcp.ts:
--------------------------------------------------------------------------------
1 | import express from "express";
2 | import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
3 | import { SSEServerTransport } from "@modelcontextprotocol/sdk/server/sse.js";
4 | import z from "zod";
5 |
6 | import { ShortCreator } from "../../short-creator/ShortCreator";
7 | import { logger } from "../../logger";
8 | import { renderConfig, sceneInput } from "../../types/shorts";
9 |
10 | export class MCPRouter {
11 | router: express.Router;
12 | shortCreator: ShortCreator;
13 | transports: { [sessionId: string]: SSEServerTransport } = {};
14 | mcpServer: McpServer;
15 | constructor(shortCreator: ShortCreator) {
16 | this.router = express.Router();
17 | this.shortCreator = shortCreator;
18 |
19 | this.mcpServer = new McpServer({
20 | name: "Short Creator",
21 | version: "0.0.1",
22 | capabilities: {
23 | resources: {},
24 | tools: {},
25 | },
26 | });
27 |
28 | this.setupMCPServer();
29 | this.setupRoutes();
30 | }
31 |
32 | private setupMCPServer() {
33 | this.mcpServer.tool(
34 | "get-video-status",
35 | "Get the status of a video (ready, processing, failed)",
36 | {
37 | videoId: z.string().describe("The ID of the video"),
38 | },
39 | async ({ videoId }) => {
40 | const status = this.shortCreator.status(videoId);
41 | return {
42 | content: [
43 | {
44 | type: "text",
45 | text: status,
46 | },
47 | ],
48 | };
49 | },
50 | );
51 |
52 | this.mcpServer.tool(
53 | "create-short-video",
54 | "Create a short video from a list of scenes",
55 | {
56 | scenes: z.array(sceneInput).describe("Each scene to be created"),
57 | config: renderConfig.describe("Configuration for rendering the video"),
58 | },
59 | async ({ scenes, config }) => {
60 | const videoId = await this.shortCreator.addToQueue(scenes, config);
61 |
62 | return {
63 | content: [
64 | {
65 | type: "text",
66 | text: videoId,
67 | },
68 | ],
69 | };
70 | },
71 | );
72 | }
73 |
74 | private setupRoutes() {
75 | this.router.get("/sse", async (req, res) => {
76 | logger.info("SSE GET request received");
77 |
78 | const transport = new SSEServerTransport("/mcp/messages", res);
79 | this.transports[transport.sessionId] = transport;
80 | res.on("close", () => {
81 | delete this.transports[transport.sessionId];
82 | });
83 | await this.mcpServer.connect(transport);
84 | });
85 |
86 | this.router.post("/messages", async (req, res) => {
87 | logger.info("SSE POST request received");
88 |
89 | const sessionId = req.query.sessionId as string;
90 | const transport = this.transports[sessionId];
91 | if (transport) {
92 | await transport.handlePostMessage(req, res);
93 | } else {
94 | res.status(400).send("No transport found for sessionId");
95 | }
96 | });
97 | }
98 | }
99 |
--------------------------------------------------------------------------------
/src/server/routers/rest.ts:
--------------------------------------------------------------------------------
1 | import express from "express";
2 | import type {
3 | Request as ExpressRequest,
4 | Response as ExpressResponse,
5 | } from "express";
6 |
7 | import { validateCreateShortInput } from "../validator";
8 | import { ShortCreator } from "../../short-creator/ShortCreator";
9 | import { logger } from "../../logger";
10 |
11 | // todo abstract class
12 | export class APIRouter {
13 | router: express.Router;
14 | shortCreator: ShortCreator;
15 | constructor(shortCreator: ShortCreator) {
16 | this.router = express.Router();
17 | this.shortCreator = shortCreator;
18 |
19 | this.router.use(express.json());
20 |
21 | this.setupRoutes();
22 | }
23 |
24 | private setupRoutes() {
25 | this.router.post(
26 | "/short-video",
27 | async (req: ExpressRequest, res: ExpressResponse) => {
28 | try {
29 | const input = validateCreateShortInput(req.body);
30 |
31 | const videoId = this.shortCreator.addToQueue(
32 | input.scenes,
33 | input.config,
34 | );
35 |
36 | res.status(201).json({
37 | videoId,
38 | });
39 | } catch (err: unknown) {
40 | logger.error(err, "Error validating input");
41 |
42 | // Handle validation errors specifically
43 | if (err instanceof Error && err.message.startsWith("{")) {
44 | try {
45 | const errorData = JSON.parse(err.message);
46 | res.status(400).json({
47 | error: "Validation failed",
48 | message: errorData.message,
49 | missingFields: errorData.missingFields,
50 | });
51 | return;
52 | } catch (parseError) {
53 | logger.error(parseError, "Error parsing validation error");
54 | }
55 | }
56 |
57 | // Fallback for other errors
58 | res.status(400).json({
59 | error: "Invalid input",
60 | message: err instanceof Error ? err.message : "Unknown error",
61 | });
62 | }
63 | },
64 | );
65 |
66 | this.router.get(
67 | "/short-video/:videoId/status",
68 | async (req: ExpressRequest, res: ExpressResponse) => {
69 | const { videoId } = req.params;
70 | if (!videoId) {
71 | res.status(400).json({
72 | error: "videoId is required",
73 | });
74 | return;
75 | }
76 | const status = this.shortCreator.status(videoId);
77 | res.status(200).json({
78 | status,
79 | });
80 | },
81 | );
82 |
83 | this.router.get(
84 | "/music-tags",
85 | (req: ExpressRequest, res: ExpressResponse) => {
86 | res.status(200).json(this.shortCreator.ListAvailableMusicTags());
87 | },
88 | );
89 |
90 | this.router.delete(
91 | "/short-video/:videoId",
92 | (req: ExpressRequest, res: ExpressResponse) => {
93 | const { videoId } = req.params;
94 | if (!videoId) {
95 | res.status(400).json({
96 | error: "videoId is required",
97 | });
98 | return;
99 | }
100 | this.shortCreator.deleteVideo(videoId);
101 | res.status(200).json({
102 | success: true,
103 | });
104 | },
105 | );
106 |
107 | this.router.get(
108 | "/short-video/:videoId",
109 | (req: ExpressRequest, res: ExpressResponse) => {
110 | try {
111 | const { videoId } = req.params;
112 | if (!videoId) {
113 | res.status(400).json({
114 | error: "videoId is required",
115 | });
116 | return;
117 | }
118 | const video = this.shortCreator.getVideo(videoId);
119 | res.setHeader("Content-Type", "video/mp4");
120 | res.setHeader(
121 | "Content-Disposition",
122 | `inline; filename=${videoId}.mp4`,
123 | );
124 | res.send(video);
125 | } catch (error: unknown) {
126 | logger.error(error, "Error getting video");
127 | res.status(404).json({
128 | error: "Video not found",
129 | });
130 | }
131 | },
132 | );
133 | }
134 | }
135 |
--------------------------------------------------------------------------------
/src/server/server.ts:
--------------------------------------------------------------------------------
1 | import http from "http";
2 | import express from "express";
3 | import type {
4 | Request as ExpressRequest,
5 | Response as ExpressResponse,
6 | } from "express";
7 | import { ShortCreator } from "../short-creator/ShortCreator";
8 | import { APIRouter } from "./routers/rest";
9 | import { MCPRouter } from "./routers/mcp";
10 | import { logger } from "../logger";
11 |
12 | export class Server {
13 | private app: express.Application;
14 | private port: number;
15 | private shortCreator: ShortCreator;
16 |
17 | constructor(port: number, shortCreator: ShortCreator) {
18 | this.port = port;
19 | this.app = express();
20 | this.shortCreator = shortCreator;
21 |
22 | // add healthcheck endpoint
23 | this.app.get("/health", (req: ExpressRequest, res: ExpressResponse) => {
24 | res.status(200).json({ status: "ok" });
25 | });
26 |
27 | const apiRouter = new APIRouter(shortCreator);
28 | const mcpRouter = new MCPRouter(shortCreator);
29 | this.app.use("/api", apiRouter.router);
30 | this.app.use("/mcp", mcpRouter.router);
31 | }
32 |
33 | public start(): http.Server {
34 | return this.app.listen(this.port, (error: unknown) => {
35 | if (error) {
36 | logger.error(error, "Error starting server");
37 | return;
38 | }
39 | logger.info(
40 | { port: this.port, mcp: "/mcp", api: "/api" },
41 | "MCP and API server is running",
42 | );
43 | // todo log instructions
44 | });
45 | }
46 |
47 | public getApp() {
48 | return this.app;
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/src/server/validator.ts:
--------------------------------------------------------------------------------
1 | import { createShortInput, CreateShortInput } from "../types/shorts";
2 | import { logger } from "../logger";
3 | import { ZodError } from "zod";
4 |
5 | export interface ValidationErrorResult {
6 | message: string;
7 | missingFields: Record;
8 | }
9 |
10 | export function validateCreateShortInput(input: object): CreateShortInput {
11 | const validated = createShortInput.safeParse(input);
12 | logger.info({ validated }, "Validated input");
13 |
14 | if (validated.success) {
15 | return validated.data;
16 | }
17 |
18 | // Process the validation errors
19 | const errorResult = formatZodError(validated.error);
20 |
21 | throw new Error(
22 | JSON.stringify({
23 | message: errorResult.message,
24 | missingFields: errorResult.missingFields,
25 | }),
26 | );
27 | }
28 |
29 | function formatZodError(error: ZodError): ValidationErrorResult {
30 | const missingFields: Record = {};
31 |
32 | // Extract all the errors into a human-readable format
33 | error.errors.forEach((err) => {
34 | const path = err.path.join(".");
35 | missingFields[path] = err.message;
36 | });
37 |
38 | // Create a human-readable message
39 | const errorPaths = Object.keys(missingFields);
40 | let message = `Validation failed for ${errorPaths.length} field(s): `;
41 | message += errorPaths.join(", ");
42 |
43 | return {
44 | message,
45 | missingFields,
46 | };
47 | }
48 |
--------------------------------------------------------------------------------
/src/short-creator/ShortCreator.ts:
--------------------------------------------------------------------------------
1 | /* eslint-disable @remotion/deterministic-randomness */
2 | import fs from "fs-extra";
3 | import cuid from "cuid";
4 | import path from "path";
5 |
6 | import { Kokoro } from "./libraries/Kokoro";
7 | import { Remotion } from "./libraries/Remotion";
8 | import { Whisper } from "./libraries/Whisper";
9 | import { FFMpeg } from "./libraries/FFmpeg";
10 | import { PexelsAPI } from "./libraries/Pexels";
11 | import { Config } from "../config";
12 | import { logger } from "../logger";
13 | import { MusicManager } from "./music";
14 | import { type Music } from "../types/shorts";
15 | import type {
16 | SceneInput,
17 | RenderConfig,
18 | Scene,
19 | VideoStatus,
20 | MusicMoodEnum,
21 | MusicTag,
22 | } from "../types/shorts";
23 |
24 | export class ShortCreator {
25 | private queue: {
26 | sceneInput: SceneInput[];
27 | config: RenderConfig;
28 | id: string;
29 | }[] = [];
30 | constructor(
31 | private config: Config,
32 | private remotion: Remotion,
33 | private kokoro: Kokoro,
34 | private whisper: Whisper,
35 | private ffmpeg: FFMpeg,
36 | private pexelsApi: PexelsAPI,
37 | private musicManager: MusicManager,
38 | ) {}
39 |
40 | public status(id: string): VideoStatus {
41 | const videoPath = this.getVideoPath(id);
42 | if (this.queue.find((item) => item.id === id)) {
43 | return "processing";
44 | }
45 | if (fs.existsSync(videoPath)) {
46 | return "ready";
47 | }
48 | return "failed";
49 | }
50 |
51 | public addToQueue(sceneInput: SceneInput[], config: RenderConfig): string {
52 | // todo add mutex lock
53 | const id = cuid();
54 | this.queue.push({
55 | sceneInput,
56 | config,
57 | id,
58 | });
59 | if (this.queue.length === 1) {
60 | this.processQueue();
61 | }
62 | return id;
63 | }
64 |
65 | private async processQueue(): Promise {
66 | // todo add a semaphore
67 | if (this.queue.length === 0) {
68 | return;
69 | }
70 | const { sceneInput, config, id } = this.queue[0];
71 | logger.debug(
72 | { sceneInput, config, id },
73 | "Processing video item in the queue",
74 | );
75 | try {
76 | await this.createShort(id, sceneInput, config);
77 | logger.debug({ id }, "Video created successfully");
78 | } catch (error) {
79 | logger.error({ error }, "Error creating video");
80 | } finally {
81 | this.queue.shift();
82 | this.processQueue();
83 | }
84 | }
85 |
86 | private async createShort(
87 | videoId: string,
88 | inputScenes: SceneInput[],
89 | config: RenderConfig,
90 | ): Promise {
91 | logger.debug(
92 | {
93 | inputScenes,
94 | },
95 | "Creating short video",
96 | );
97 | const scenes: Scene[] = [];
98 | let totalDuration = 0;
99 | const excludeVideoIds = [];
100 |
101 | let index = 0;
102 | for (const scene of inputScenes) {
103 | const audio = await this.kokoro.generate(scene.text, "af_heart");
104 | let { audioLength } = audio;
105 | const { audio: audioStream } = audio;
106 |
107 | // add the paddingBack in seconds to the last scene
108 | if (index + 1 === inputScenes.length && config.paddingBack) {
109 | audioLength += config.paddingBack / 1000;
110 | }
111 |
112 | const tempAudioPath = path.join(this.config.tempDirPath, `${cuid()}.wav`);
113 | await this.ffmpeg.normalizeAudioForWhisper(audioStream, tempAudioPath);
114 | const captions = await this.whisper.CreateCaption(tempAudioPath);
115 | fs.removeSync(tempAudioPath);
116 |
117 | const audioDataUri = await this.ffmpeg.createMp3DataUri(audioStream);
118 | const video = await this.pexelsApi.findVideo(
119 | scene.searchTerms,
120 | audioLength,
121 | excludeVideoIds,
122 | );
123 | excludeVideoIds.push(video.id);
124 |
125 | scenes.push({
126 | captions,
127 | video: video.url,
128 | audio: {
129 | dataUri: audioDataUri,
130 | duration: audioLength,
131 | },
132 | });
133 |
134 | totalDuration += audioLength;
135 | index++;
136 | }
137 | if (config.paddingBack) {
138 | totalDuration += config.paddingBack / 1000;
139 | }
140 |
141 | const selectedMusic = this.findMusic(totalDuration, config.music);
142 | logger.debug({ selectedMusic }, "Selected music for the video");
143 |
144 | await this.remotion.render(
145 | {
146 | music: selectedMusic,
147 | scenes,
148 | config: {
149 | durationMs: totalDuration * 1000,
150 | paddingBack: config.paddingBack,
151 | },
152 | },
153 | videoId,
154 | );
155 |
156 | return videoId;
157 | }
158 |
159 | public getVideoPath(videoId: string): string {
160 | return path.join(this.config.videosDirPath, `${videoId}.mp4`);
161 | }
162 |
163 | public deleteVideo(videoId: string): void {
164 | const videoPath = this.getVideoPath(videoId);
165 | fs.removeSync(videoPath);
166 | logger.debug({ videoId }, "Deleted video file");
167 | }
168 |
169 | public getVideo(videoId: string): Buffer {
170 | const videoPath = this.getVideoPath(videoId);
171 | if (!fs.existsSync(videoPath)) {
172 | throw new Error(`Video ${videoId} not found`);
173 | }
174 | return fs.readFileSync(videoPath);
175 | }
176 |
177 | private findMusic(videoDuration: number, tag?: MusicMoodEnum): Music {
178 | const musicFiles = this.musicManager.musicList().filter((music) => {
179 | if (tag) {
180 | return music.mood === tag;
181 | }
182 | return true;
183 | });
184 | return musicFiles[Math.floor(Math.random() * musicFiles.length)];
185 | }
186 |
187 | public ListAvailableMusicTags(): MusicTag[] {
188 | const tags = new Set();
189 | this.musicManager.musicList().forEach((music) => {
190 | tags.add(music.mood as MusicTag);
191 | });
192 | return Array.from(tags.values());
193 | }
194 | }
195 |
--------------------------------------------------------------------------------
/src/short-creator/libraries/FFmpeg.ts:
--------------------------------------------------------------------------------
1 | import ffmpeg from "fluent-ffmpeg";
2 | import { Readable } from "node:stream";
3 | import { logger } from "../../logger";
4 |
5 | export class FFMpeg {
6 | static async init(): Promise {
7 | return import("@ffmpeg-installer/ffmpeg").then((ffmpegInstaller) => {
8 | ffmpeg.setFfmpegPath(ffmpegInstaller.path);
9 | logger.info("FFmpeg path set to:", ffmpegInstaller.path);
10 | return new FFMpeg();
11 | });
12 | }
13 |
14 | async normalizeAudioForWhisper(
15 | audio: ArrayBuffer,
16 | outputPath: string,
17 | ): Promise {
18 | logger.debug("Normalizing audio for Whisper");
19 | const inputStream = new Readable();
20 | inputStream.push(Buffer.from(audio));
21 | inputStream.push(null);
22 |
23 | return new Promise((resolve, reject) => {
24 | ffmpeg()
25 | .input(inputStream)
26 | .audioCodec("pcm_s16le")
27 | .audioChannels(1)
28 | .audioFrequency(16000)
29 | .toFormat("wav")
30 | .on("end", () => {
31 | logger.debug("Audio normalization complete");
32 | resolve(outputPath);
33 | })
34 | .on("error", (err) => {
35 | logger.error(err, "Error normalizing audio:");
36 | reject(err);
37 | })
38 | .save(outputPath);
39 | });
40 | }
41 |
42 | async createMp3DataUri(audio: ArrayBuffer): Promise {
43 | const inputStream = new Readable();
44 | inputStream.push(Buffer.from(audio));
45 | inputStream.push(null);
46 | return new Promise((resolve, reject) => {
47 | const chunk: Buffer[] = [];
48 |
49 | ffmpeg()
50 | .input(inputStream)
51 | .audioCodec("libmp3lame")
52 | .audioBitrate(128)
53 | .audioChannels(2)
54 | .toFormat("mp3")
55 | .on("error", (err) => {
56 | reject(err);
57 | })
58 | .pipe()
59 | .on("data", (data: Buffer) => {
60 | chunk.push(data);
61 | })
62 | .on("end", () => {
63 | const buffer = Buffer.concat(chunk);
64 | resolve(`data:audio/mp3;base64,${buffer.toString("base64")}`);
65 | })
66 | .on("error", (err) => {
67 | reject(err);
68 | });
69 | });
70 | }
71 | }
72 |
--------------------------------------------------------------------------------
/src/short-creator/libraries/Kokoro.ts:
--------------------------------------------------------------------------------
1 | import { KokoroTTS } from "kokoro-js";
2 | import type { Voices } from "../../types/shorts";
3 |
4 | const MODEL = "onnx-community/Kokoro-82M-v1.0-ONNX";
5 | const D_TYPE = "fp32"; // Options: "fp32", "fp16", "q8", "q4", "q4f16"
6 |
7 | export class Kokoro {
8 | constructor(private tts: KokoroTTS) {}
9 |
10 | async generate(
11 | text: string,
12 | voice: Voices,
13 | ): Promise<{
14 | audio: ArrayBuffer;
15 | audioLength: number;
16 | }> {
17 | const audio = await this.tts.generate(text, {
18 | voice: voice,
19 | });
20 |
21 | return {
22 | audio: audio.toWav(),
23 | audioLength: audio.audio.length / audio.sampling_rate,
24 | };
25 | }
26 |
27 | static async init(): Promise {
28 | const tts = await KokoroTTS.from_pretrained(MODEL, {
29 | dtype: D_TYPE,
30 | device: "cpu", // only "cpu" is supported in node
31 | });
32 |
33 | return new Kokoro(tts);
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/src/short-creator/libraries/Pexels.test.ts:
--------------------------------------------------------------------------------
1 | process.env.LOG_LEVEL = "debug";
2 |
3 | import nock from "nock";
4 | import { PexelsAPI } from "./Pexels";
5 | import { test, assert } from "vitest";
6 | import fs from "fs-extra";
7 | import path from "path";
8 |
9 | test("test pexels", async () => {
10 | const mockResponse = fs.readFileSync(
11 | path.resolve("__mocks__/pexels-response.json"),
12 | "utf-8",
13 | );
14 | nock("https://api.pexels.com")
15 | .get(/videos\/search/)
16 | .reply(200, mockResponse);
17 | const pexels = new PexelsAPI("asdf");
18 | const video = await pexels.findVideo(["dog"], 2.4, []);
19 | console.log(video);
20 | assert.isObject(video, "Video should be an object");
21 | });
22 |
--------------------------------------------------------------------------------
/src/short-creator/libraries/Pexels.ts:
--------------------------------------------------------------------------------
1 | /* eslint-disable @remotion/deterministic-randomness */
2 | import { logger } from "../../logger";
3 | import type { Video } from "../../types/shorts";
4 |
5 | const jokerTerms: string[] = ["nature", "globe", "space", "ocean"];
6 | const durationBufferSeconds = 3;
7 |
8 | export class PexelsAPI {
9 | constructor(private API_KEY: string) {}
10 |
11 | private async _findVideo(
12 | searchTerm: string,
13 | minDurationSeconds: number,
14 | excludeIds: string[],
15 | ): Promise