├── .dockerignore
├── .env
├── .gitignore
├── .nvmrc
├── Dockerfile
├── LICENSE.txt
├── README.md
├── TODO.md
├── package-lock.json
├── package.json
├── samples
    └── un-deux.wav
├── src
    ├── config.mts
    ├── index.mts
    ├── main.mts
    ├── preproduction
    │   ├── mocks.mts
    │   └── prompts.mts
    ├── production
    │   ├── assembleShots.mts
    │   ├── normalizePendingVideoToTmpFilePath.mts
    │   ├── postInterpolation.mts
    │   ├── renderAnalysis.mts
    │   ├── renderContent.mts
    │   ├── renderImage.mts
    │   ├── renderImageAnalysis.mts
    │   ├── renderImageSegmentation.mts
    │   ├── renderImageUpscaling.mts
    │   ├── renderPipeline.mts
    │   ├── renderScene.mts
    │   ├── renderSegmentation.mts
    │   ├── renderUpscaling.mts
    │   ├── renderVideo.mts
    │   ├── renderVideoSegmentation.mts
    │   └── renderVideoUpscaling.mts
    ├── providers
    │   ├── audio-generation
    │   │   ├── generateAudio.mts
    │   │   └── generateAudioLegacy.mts
    │   ├── character-model
    │   │   └── generateActor.mts
    │   ├── image-caption
    │   │   ├── analyzeImageWithIDEFICS.mts
    │   │   └── analyzeImageWithIDEFICSAndNastyHack.mts
    │   ├── image-generation
    │   │   ├── generateImage.mts
    │   │   ├── generateImageLCMFetch.mts
    │   │   ├── generateImageLCMGradio.mts
    │   │   ├── generateImagePulib.mts
    │   │   ├── generateImageSDXL360.mts
    │   │   ├── generateImageSDXLFetch.mts
    │   │   ├── generateImageSDXLGradio.mts
    │   │   └── generateImageSDXLTurbo.mts
    │   ├── image-segmentation
    │   │   ├── segmentImage.mts
    │   │   └── segmentImageFromURL.mts
    │   ├── image-upscaling
    │   │   └── upscaleImage.mts
    │   ├── language-model
    │   │   ├── enrichVideoSpecsUsingLLM.mts
    │   │   ├── openai
    │   │   │   ├── createChatCompletion.mts
    │   │   │   ├── createChatCompletionStream.mts
    │   │   │   ├── generateYAML.mts
    │   │   │   ├── getTextPrompt.mts
    │   │   │   ├── getUserContent.mts
    │   │   │   ├── openai.mts
    │   │   │   ├── runModerationCheck.mts
    │   │   │   └── stream.mts
    │   │   └── types.mts
    │   ├── lip-syncing
    │   │   └── generateLipSyncVideo.mts
    │   ├── music-generation
    │   │   └── generateMusicWithReplicate.mts
    │   ├── music-to-caption
    │   │   └── musicToCaption.mts
    │   ├── speech-to-text
    │   │   ├── speechToTextWithWhisperLib.txt
    │   │   └── speechToTextWithWhisperSpace.mts
    │   ├── video-generation
    │   │   ├── addBase64HeaderToMp4.mts
    │   │   ├── defaultPrompts.mts
    │   │   ├── generateVideoWithAnimateDiffLightning.mts
    │   │   ├── generateVideoWithHotshotGradioAPI.mts
    │   │   ├── generateVideoWithHotshotReplicate.mts
    │   │   ├── generateVideoWithShow.mts
    │   │   ├── generateVideoWithZeroscope.mts
    │   │   └── types.mts
    │   ├── video-interpolation
    │   │   ├── interpolateVideo.mts
    │   │   ├── interpolateVideoLegacy.mts
    │   │   └── interpolateVideoWithReplicate.mts
    │   ├── video-transformation
    │   │   ├── transformVideoWithHotshotReplicate.mts
    │   │   └── transformVideoWithLatentImageAnimator.txt
    │   ├── video-upscaling
    │   │   ├── upscaleVideo.mts
    │   │   └── upscaleVideoToBase64URL.mts
    │   └── voice-generation
    │   │   ├── generateVoice.mts
    │   │   ├── generateVoiceWithCoqui.txt
    │   │   └── generateVoiceWithOpenVoice.mts
    ├── scheduler
    │   ├── copyVideoFromPendingToCompleted.mts
    │   ├── copyVideoFromTmpToCompleted.mts
    │   ├── copyVideoFromTmpToPending.mts
    │   ├── deleteVideo.mts
    │   ├── getAllVideosForOwner.mts
    │   ├── getCompletedVideos.mts
    │   ├── getFirstVideoFrame.mts
    │   ├── getFirstVideoFrameAsBase64.mts
    │   ├── getPendingVideos.mts
    │   ├── getVideo.mts
    │   ├── getVideoStatus.mts
    │   ├── markVideoAsPending.mts
    │   ├── markVideoAsToAbort.mts
    │   ├── markVideoAsToDelete.mts
    │   ├── markVideoAsToPause.mts
    │   ├── moveVideoFromPendingToCompleted.mts
    │   ├── moveVideoFromTmpToCompleted.mts
    │   ├── processVideo.mts
    │   ├── readVideoMetadataFile.mts
    │   ├── readVideoMetadataFiles.mts
    │   ├── saveAndCheckIfNeedToStop.mts
    │   ├── saveCompletedVideo.mts
    │   ├── savePendingVideo.mts
    │   ├── sortPendingVideosByLeastCompletedFirst.mts
    │   ├── sortVideosByYoungestFirst.mts
    │   ├── updatePendingVideo.mts
    │   └── updateShotPreview.mts
    ├── types.mts
    └── utils
    │   ├── audio
    │       ├── convertMp3ToWavBase64.mts
    │       ├── convertMp3ToWavFilePath.mts
    │       └── mergeAudio.mts
    │   ├── data
    │       ├── all_words.json
    │       └── good_words.json
    │   ├── download
    │       ├── downloadFileAsBase64.mts
    │       ├── downloadFileAsBase64URL.mts
    │       └── downloadFileToTmp.mts
    │   ├── filesystem
    │       ├── createDirIfNeeded.mts
    │       ├── deleteAllFilesWith.mts
    │       ├── deleteFileIfExists.mts
    │       ├── initFolders.mts
    │       ├── moveFile.mts
    │       ├── moveFileFromTmpToPending.mts
    │       ├── saveRenderedSceneToCache.mts
    │       └── writeBase64ToFile.mts
    │   ├── image
    │       ├── addBase64HeaderToPng.mts
    │       ├── convertToWebp.mts
    │       └── resizeBase64Image.mts
    │   ├── misc
    │       ├── debouncePromise.mts
    │       ├── debounceSync.mts
    │       ├── generateSeed.mts
    │       ├── getHuggingFaceSpaceStatus.mts
    │       ├── makeSureSpaceIsRunning.mts
    │       ├── randomShuffle.mts
    │       ├── sleep.mts
    │       └── tryApiCall.mts
    │   ├── requests
    │       ├── hasValidAuthorization.mts
    │       ├── hashRequest.mts
    │       ├── loadRenderedSceneFromCache.mts
    │       ├── parseRenderRequest.mts
    │       ├── parseShotRequest.mts
    │       └── parseVideoRequest.mts
    │   ├── streams
    │       └── streamToBuffer.mts
    │   ├── validators
    │       ├── computeSecretFingerprint.mts
    │       ├── computeSha256.mts
    │       ├── getValidBoolean.mts
    │       ├── getValidNumber.mts
    │       └── getValidResolution.mts
    │   └── video
    │       ├── addAudioToVideo.mts
    │       └── concatNoGL.mts
└── tsconfig.json


/.dockerignore:
--------------------------------------------------------------------------------
1 | node_modules
2 | npm-debug.log
3 | models
4 | sandbox
5 | audio.pipe
6 | video.pipe


--------------------------------------------------------------------------------
/.env:
--------------------------------------------------------------------------------
 1 | 
 2 | #--------------------- GENERATION CONFIGURATION -------------------
 3 | # if deployed to Hugging Face (with persistent storage enabled)
 4 | VC_STORAGE_PATH="/data/"
 5 | 
 6 | # for a local usage on your dev machine
 7 | #VC_STORAGE_PATH="./sandbox"
 8 | 
 9 | #--------------------- AUTH SECRETS AND ACCESS TOKENS -------------
10 | # The access token required to send some queries to VideoChain
11 | #VC_SECRET_ACCESS_TOKEN="<USE YOUR OWN>"
12 | 
13 | # The access token required to send queries to some sub-servers
14 | #VC_MICROSERVICE_SECRET_TOKEN="<USE YOUR OWN>"
15 | 
16 | # OpenAI API key used to call OpenAI API services
17 | #VC_OPENAI_API_KEY="<USE YOUR OWN>"
18 | 
19 | # Hugging Face API key used to call Hugging Face spaces
20 | #VC_HF_API_TOKEN="<USE YOUR OWN>"
21 | 
22 | # Replicate API token
23 | #VC_REPLICATE_API_TOKEN="<USE YOUR OWN>"
24 | 
25 | #--------------------- LLM INFERENCE SERVERS ----------------------
26 | #VC_INFERENCE_ENDPOINT_URL="<USE YOUR OWN>"
27 | 
28 | #--------------------- SDXL INFERENCE SERVERS ---------------------
29 | VC_SDXL_SPACE_API_URL="https://jbilcke-hf-image-server.hf.space"
30 | 
31 | #--------------------- LCM INFERENCE SERVERS ---------------------
32 | VC_LCM_SPACE_API_URL="https://jbilcke-hf-fast-image-server.hf.space"
33 | 
34 | #--------------------- SDXL TURBO INFERENCE SERVERS ---------------------
35 | VC_SDXL_TURBO_SPACE_API_URL="https://jbilcke-hf-faster-image-server.hf.space"
36 | 
37 | #----------------- ZEROSCOPE INFERENCE SERVERS -------------------
38 | VC_ZEROSCOPE_SPACE_API_URL_1="https://jbilcke-hf-zeroscope-server-1.hf.space"
39 | VC_ZEROSCOPE_SPACE_API_URL_2="https://jbilcke-hf-zeroscope-server-2.hf.space"
40 | VC_ZEROSCOPE_SPACE_API_URL_3="https://jbilcke-hf-zeroscope-server-3.hf.space"
41 | VC_ZEROSCOPE_SPACE_API_URL_4="https://jbilcke-hf-zeroscope-server-4.hf.space"
42 | 
43 | #----------------- HOTSHOT-XL INFERENCE SERVERS -------------------
44 | VC_HOTSHOT_XL_GRADIO_SPACE_API_URL="https://jbilcke-hf-hotshot-xl-server-1.hf.space"
45 | 
46 | #----------------- HOTSHOT-XL REPLICATE CONFIG --------------------
47 | VC_HOTSHOT_XL_REPLICATE_MODEL="cloneofsimo/hotshot-xl-lora-controlnet"
48 | VC_HOTSHOT_XL_REPLICATE_MODEL_VERSION="c447ef9fc621af091e2c06d08fd2a22d9f5906389a2f8103c851a2f7cf9c4e63"
49 | 
50 | #----------------- FRAME SEGMENTATION SERVERS ---------------------
51 | VC_SEGMENTATION_MODULE_SPACE_API_URL_1="https://jbilcke-hf-segmentation-server-1.hf.space"
52 | VC_SEGMENTATION_MODULE_SPACE_API_URL_2="https://jbilcke-hf-segmentation-server-2.hf.space"
53 | VC_SEGMENTATION_MODULE_SPACE_API_URL_3="https://jbilcke-hf-segmentation-server-3.hf.space"
54 | 
55 | # obsolete:
56 | VC_SEGMENTATION_MODULE_SPACE_API_URL="https://jbilcke-hf-image-segmentation.hf.space"
57 | 
58 | #----------------- PANORAMA GENERATION SERVERS -------------------
59 | VC_SDXL_360_SPACE_API_URL_1="https://jbilcke-hf-360-server-1.hf.space"
60 | 
61 | #----------------- IMAGE UPSCALING SERVERS -----------------------
62 | VC_UPSCALING_SPACE_API_URL="https://jbilcke-hf-upscaling-server.hf.space"
63 | 
64 | #----------------- VIDEO UPSCALING SERVERS -----------------------
65 | VC_VIDEO_UPSCALE_SPACE_API_URL_1="https://jbilcke-hf-video-upscaling-server-1.hf.space"
66 | 
67 | #----------------- VIDEO INTERPOLATION (FILM) SERVERS -------------------
68 | VC_VIDEO_INTERPOLATION_SPACE_API_URL="https://jbilcke-hf-video-interpolation-server.hf.space"
69 | 
70 | #----------------- VIDEO INTERPOLATION (ST-MFNET) CONFIG -------------------
71 | VC_VIDEO_INTERPOLATION_STMFNET_REPLICATE_MODEL="hzsxkib/st-mfnet"
72 | VC_VIDEO_INTERPOLATION_STMFNET_REPLICATE_MODEL_VERSION="faa7693430b0a4ac95d1b8e25165673c1d7a7263537a7c4bb9be82a3e2d130fb"
73 | 
74 | #----------------- AUDIO GENERATION SERVERS ----------------------
75 | VC_AUDIO_GENERATION_SPACE_API_URL="https://jbilcke-hf-audioldm-text-to-audio-generation.hf.space"
76 | 
77 | #----------------- IMAGE ANALYSIS SERVERS ----------------------
78 | VC_ANALYSIS_SPACE_API_URL="https://jbilcke-hf-idefics-server.hf.space"
79 | 
80 | #----------------- SPEECH TO TEXT SERVERS ----------------------
81 | VC_SPEECH_TO_TEXT_SPACE_API_URL_1="https://jbilcke-hf-speech-recognition-server-1.hf.space"
82 | 
83 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules
2 | *.log
3 | *.bin
4 | .DS_Store
5 | .venv
6 | *.mp4
7 | sandbox
8 | scripts
9 | .env.local


--------------------------------------------------------------------------------
/.nvmrc:
--------------------------------------------------------------------------------
1 | v20.17.0
2 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM node:18
 2 | 
 3 | ARG DEBIAN_FRONTEND=noninteractive
 4 | 
 5 | RUN apt update
 6 | 
 7 | # For FFMPEG and gl concat
 8 | RUN apt --yes install ffmpeg curl build-essential python3 python3-dev libx11-dev libxext-dev libxext6 libglu1-mesa-dev xvfb libxi-dev libglew-dev pkg-config
 9 | 
10 | # For Puppeteer
11 | RUN apt --yes install libnss3 libatk1.0-0 libatk-bridge2.0-0 libcups2 libgbm1 libasound2 libpangocairo-1.0-0 libxss1 libgtk-3-0
12 | 
13 | # Set up a new user named "user" with user ID 1000
14 | RUN useradd -o -u 1000 user
15 | 
16 | # Switch to the "user" user
17 | USER user
18 | 
19 | # Set home to the user's home directory
20 | ENV HOME=/home/user \
21 | 	PATH=/home/user/.local/bin:$PATH
22 | 
23 | # Set the working directory to the user's home directory
24 | WORKDIR $HOME/app
25 | 
26 | # Install app dependencies
27 | # A wildcard is used to ensure both package.json AND package-lock.json are copied
28 | # where available (npm@5+)
29 | COPY --chown=user package*.json $HOME/app
30 | 
31 | # make sure the .env is copied as well
32 | COPY --chown=user .env $HOME/app
33 | 
34 | RUN npm install
35 | 
36 | 
37 | # Copy the current directory contents into the container at $HOME/app setting the owner to the user
38 | COPY --chown=user . $HOME/app
39 | 
40 | EXPOSE 7860
41 | 
42 | # we can't use this (it time out)
43 | # CMD [ "xvfb-run", "-s", "-ac -screen 0 1920x1080x24", "npm", "run", "start" ]
44 | CMD [ "npm", "run", "start" ]


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: VideoChain API
 3 | emoji: 🎥 🔗
 4 | colorFrom: black
 5 | colorTo: white
 6 | sdk: docker
 7 | pinned: false
 8 | app_port: 7860
 9 | ---
10 | 
11 | A micro service to generate videos
12 | 
13 | # Installation
14 | 
15 | 1. `npm i`
16 | 2. clone `.env` to `.env.local`
17 | 3. edit `.env.local` to define the secrets / api access keys
18 | 4. `npm run start`
19 | 


--------------------------------------------------------------------------------
/TODO.md:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | to  allow multiple videos to be processed a the same time:
4 | 
5 | [ ] yield from the loop at each step
6 | [ ] random processing of videos
7 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "videochain-api",
 3 |   "version": "1.0.0",
 4 |   "description": "A service which wraps and chains video and audio spaces together",
 5 |   "main": "src/index.mts",
 6 |   "scripts": {
 7 |     "start": "tsx  src/index.mts",
 8 |     "test:submitVideo": "tsx  src/tests/submitVideo.mts",
 9 |     "test:checkStatus": "tsx  src/tests/checkStatus.mts",
10 |     "test:downloadFileToTmp": "tsx src/tests/downloadFileToTmp.mts",
11 |     "test:stuff": "tsx src/utils/segmentImage.mts",
12 |     "docker": "npm run docker:build && npm run docker:run",
13 |     "docker:build": "docker build -t videochain-api .",
14 |     "docker:run": "docker run -it -p 7860:7860 videochain-api"
15 |   },
16 |   "author": "Julian Bilcke <julian.bilcke@huggingface.co>",
17 |   "license": "Apache License",
18 |   "dependencies": {
19 |     "@gorgonjs/file-provider": "^1.4.1",
20 |     "@gorgonjs/gorgon": "^1.4.1",
21 |     "@gradio/client": "1.5.2",
22 |     "@huggingface/inference": "2.8.0",
23 |     "@types/express": "^4.17.17",
24 |     "@types/node": "^20.12.7",
25 |     "@types/uuid": "^9.0.2",
26 |     "dotenv": "^16.3.1",
27 |     "eventsource-parser": "^1.0.0",
28 |     "express": "^4.18.2",
29 |     "fluent-ffmpeg": "^2.1.2",
30 |     "fs-extra": "^11.1.1",
31 |     "gpt-tokens": "^1.1.1",
32 |     "node-fetch": "^3.3.1",
33 |     "nodejs-whisper": "^0.1.4",
34 |     "openai": "^4.38.2",
35 |     "puppeteer": "^22.6.5",
36 |     "replicate": "^0.29.1",
37 |     "resize-base64": "^1.0.12",
38 |     "sharp": "^0.32.4",
39 |     "temp-dir": "^3.0.0",
40 |     "ts-node": "^10.9.2",
41 |     "tsx": "^4.7.0",
42 |     "tts-react": "^3.0.1",
43 |     "uuid": "^9.0.0",
44 |     "yaml": "^2.3.1"
45 |   }
46 | }
47 | 


--------------------------------------------------------------------------------
/samples/un-deux.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jbilcke-hf/VideoChain-API/7e4bd1c0eab74b0e6fea9c1ca2e226c85fd43c03/samples/un-deux.wav


--------------------------------------------------------------------------------
/src/config.mts:
--------------------------------------------------------------------------------
 1 | import path from "node:path"
 2 | import fs from "node:fs"
 3 | 
 4 | import dotenv from "dotenv"
 5 | 
 6 | dotenv.config()
 7 | 
 8 | try {
 9 |   if (fs.existsSync(".env.local")) {
10 |     const result = dotenv.config({ path: ".env.local" })
11 |     console.log("using .env.local")
12 |     process.env = {
13 |       ...process.env,
14 |       ...result.parsed,
15 |     }
16 |   }
17 | } catch (err) {
18 |   // do nothing
19 |   console.log("using .env")
20 | }
21 | 
22 | export const storagePath = `${process.env.VC_STORAGE_PATH || './sandbox'}`
23 | 
24 | // those are persistent storage (we want to keep the data for months/years)
25 | export const metadataDirPath = path.join(storagePath, "metadata")
26 | export const pendingMetadataDirFilePath = path.join(metadataDirPath, "pending")
27 | export const completedMetadataDirFilePath =  path.join(metadataDirPath, "completed")
28 | 
29 | export const filesDirPath = path.join(storagePath, "files")
30 | export const pendingFilesDirFilePath = path.join(filesDirPath, "pending")
31 | export const completedFilesDirFilePath =  path.join(filesDirPath, "completed")
32 | 
33 | // this is a semi-persistent storage (we will want to renew it from time to time)
34 | export const cacheDirPath = path.join(storagePath, "cache")
35 | export const renderedDirFilePath = path.join(cacheDirPath, "rendered")
36 | 
37 | export const shotFormatVersion = 1
38 | export const sequenceFormatVersion = 1
39 | 


--------------------------------------------------------------------------------
/src/main.mts:
--------------------------------------------------------------------------------
 1 | 
 2 | import { getPendingVideos } from "./scheduler/getPendingVideos.mts"
 3 | import { processVideo } from "./scheduler/processVideo.mts"
 4 | import { sortPendingVideosByLeastCompletedFirst } from "./scheduler/sortPendingVideosByLeastCompletedFirst.mts"
 5 | 
 6 | export const main = async () => {
 7 | 
 8 |   const videos = await getPendingVideos()
 9 |   if (!videos.length) {
10 |     // console.log(`no job to process.. going to try in 200 ms`)
11 |     setTimeout(() => {
12 |       main()
13 |     }, 200)
14 |     return
15 |   }
16 | 
17 |   console.log(`there are ${videos.length} pending videos`)
18 | 
19 |   sortPendingVideosByLeastCompletedFirst(videos)
20 | 
21 |   let somethingFailed = ""
22 |   await Promise.all(videos.map(async video => {
23 |     try {
24 |       const result = await processVideo(video)
25 |       return result
26 |     } catch (err) {
27 |       somethingFailed = `${err}`
28 |       // a video failed.. no big deal
29 |       return Promise.resolve(somethingFailed)
30 |     }
31 |   }))
32 | 
33 |   if (somethingFailed) {
34 |     console.error(`one of the jobs failed: ${somethingFailed}, let's wait 5 seconds`)
35 |     setTimeout(() => { main() }, 5000)
36 |   } else {
37 |     console.log(`successfully worked on the jobs, let's immediately loop`)
38 |     setTimeout(() => { main() }, 50)
39 |   }
40 | 
41 | }


--------------------------------------------------------------------------------
/src/preproduction/mocks.mts:
--------------------------------------------------------------------------------
 1 | import { Video, VideoShot } from "../types.mts"
 2 | 
 3 | export const mockShots: VideoShot[] = [
 4 |   {
 5 |     "shotPrompt": "In the extreme wide shot, a flock of ducks is converging on the Central Park, coming from multiple directions. Their feathers are glossy and clean, casting off varying degrees of green, brown and white",
 6 |     "environmentPrompt": "Central Park at sunrise, the park looks slightly misty, the sky is tinged with shades of pink and orange as the day breaks. There's dew on the grass, and the leaves on trees are rustling in the light breeze",
 7 |     "photographyPrompt": "Eye-level shot with a slight tilt in the camera, capturing the panorama of the park. There's natural lighting, sun just rising. The camera zooms out to capture the ducks entering the park. Shutter speed is slow to capture the movement of ducks",
 8 |     "actionPrompt": "Large groups of ducks waddle into the park from various directions, some fly in groups, landing on the pond with small splashes. Movement is slow, slightly sped up to depict the invasion",
 9 |     "foregroundAudioPrompt": "A symphony of soft quacking and rustling feathers",
10 |   },
11 |   {
12 |     "shotPrompt": "In the medium shot, a group of ducks are by the pond, pecking at the ground and frolicking in the water. One male mallard is particularly captivating with its emerald green head and healthy body",
13 |     "environmentPrompt": "It's a sunny spring day in Central Park. The pond is surrounded by lush, green vegetation and dappled with sunlight filtering through the leaves",
14 |     "photographyPrompt": "Low angle shot near the water level, the camera moves in a crane shot to capture ducks in action, and the camera's aperture is partially open. Natural sunlight creates playful shadows",
15 |     "actionPrompt": "Ducks are pecking at the ground, dabbling at the water's edge and frolicking in the pond. The camera tracks a particularly majestic mallard navigating through the pond",
16 |     "foregroundAudioPrompt": "Sounds of ducks quacking and splashing in the water"
17 |   },
18 |   {
19 |     "shotPrompt": "Close-up shot of a mother duck with ducklings following her in a line on the grass and into the water",
20 |     "environmentPrompt": "Central Park, by one of the smaller ponds, surrounded by green trees. Sun is high up giving off warm, radiant light",
21 |     "photographyPrompt": "High angle shot, focusing on the line of ducklings following their mother. The camera follows the ducklings. The setting is bright and clear with sun illuminating the ducklings",
22 |     "actionPrompt": "Mother duck is leading her ducklings from the grass into the water, the ducklings obediently follow, creating a neat line. The whole scene feels peaceful",
23 |     "foregroundAudioPrompt": "Ducklings' high pitched chirping, soft lapping of water at the edge of the pond"
24 |   }
25 | ] as any
26 | 
27 | export const mock: Video = {
28 |   "backgroundAudioPrompt": "City ambience mixed with the rustling leaves and the chirping birds in the park",
29 |   "foregroundAudioPrompt": "Rustling feathers, soft quacking, flapping wings, occasional splash in the pond",
30 |   "actorPrompt": "Main actors are ducks - a variety of breeds, mostly mallards: males with glossy green heads and females in mottled brown; all plump, medium-sized waterfowl",
31 |   "actorVoicePrompt": "Soft, low pitched quacking of adult ducks and higher pitched chirping of ducklings",
32 |   "noise": true,
33 |   "noiseAmount": 2,
34 |   "outroDurationMs": 1500,
35 |   "shots": mockShots
36 | } as any


--------------------------------------------------------------------------------
/src/production/assembleShots.mts:
--------------------------------------------------------------------------------
 1 | import path from "node:path"
 2 | 
 3 | // due to Docker issues, we disable OpenGL transitions for now
 4 | // import concat from 'ffmpeg-concat'
 5 | import concat from '../utils/video/concatNoGL.mts'
 6 | 
 7 | import { VideoShot } from '../types.mts'
 8 | import { pendingFilesDirFilePath } from "../config.mts"
 9 | import { normalizePendingVideoToTmpFilePath } from "./normalizePendingVideoToTmpFilePath.mts"
10 | 
11 | export const assembleShots = async (shots: VideoShot[], fileName: string) => {
12 | 
13 |   if (!Array.isArray(shots) || shots.length < 2) {
14 |     throw new Error(`need at least 2 shots`)
15 |   }
16 | 
17 |   const transitions = [
18 |     {
19 |       name: 'circleOpen',
20 |       duration: 1000,
21 |     },
22 |     {
23 |       name: 'crossWarp',
24 |       duration: 800,
25 |     },
26 |     {
27 |       name: 'directionalWarp',
28 |       duration: 800,
29 |       // pass custom params to a transition
30 |       params: { direction: [1, -1] },
31 |     },
32 |     
33 |     /*
34 |     {
35 |       name: 'squaresWire',
36 |       duration: 2000,
37 |     },
38 |     */
39 |   ]
40 | 
41 |   const videoFilePath = path.join(pendingFilesDirFilePath, fileName)
42 | 
43 |   // before performing assembly, we must normalize images
44 |   const shotFilesPaths: string[] = []
45 |   for (let shot of shots) {
46 |     const normalizedShotFilePath = await normalizePendingVideoToTmpFilePath(shot.fileName)
47 |     shotFilesPaths.push(normalizedShotFilePath)
48 |   }
49 | 
50 |   await concat({
51 |     output: videoFilePath,
52 |     videos: shotFilesPaths,
53 |     transitions: shotFilesPaths
54 |       .slice(0, shotFilesPaths.length - 1)
55 |       .map(
56 |         (vid) => transitions[Math.floor(Math.random() * transitions.length)]
57 |       ),
58 |   })
59 | }
60 | 


--------------------------------------------------------------------------------
/src/production/normalizePendingVideoToTmpFilePath.mts:
--------------------------------------------------------------------------------
 1 | import path from "node:path"
 2 | 
 3 | import { v4 as uuidv4 } from "uuid"
 4 | import tmpDir from "temp-dir"
 5 | import ffmpeg from "fluent-ffmpeg"
 6 | 
 7 | import { pendingFilesDirFilePath } from "../config.mts"
 8 | 
 9 | export const normalizePendingVideoToTmpFilePath = async (fileName: string): Promise<string> => {
10 |   return new Promise((resolve,reject) => {
11 | 
12 |     const tmpFileName = `${uuidv4()}.mp4`
13 | 
14 |     const filePath = path.join(pendingFilesDirFilePath, fileName)
15 |     const tmpFilePath = path.join(tmpDir, tmpFileName)
16 | 
17 |     ffmpeg.ffprobe(filePath, function(err,) {
18 |       if (err) { reject(err); return; }
19 | 
20 |     ffmpeg(filePath)
21 | 
22 |       .size("1280x720")
23 | 
24 |       .save(tmpFilePath)
25 |       .on("end", async () => {
26 |         resolve(tmpFilePath)
27 |       })
28 |       .on("error", (err) => {
29 |         reject(err)
30 |       })
31 |     })
32 |   })
33 | }


--------------------------------------------------------------------------------
/src/production/postInterpolation.mts:
--------------------------------------------------------------------------------
 1 | import path from "node:path"
 2 | 
 3 | import { v4 as uuidv4 } from "uuid"
 4 | import tmpDir from "temp-dir"
 5 | import ffmpeg from "fluent-ffmpeg"
 6 | import { moveFileFromTmpToPending } from "../utils/filesystem/moveFileFromTmpToPending.mts"
 7 | import { pendingFilesDirFilePath } from "../config.mts"
 8 | 
 9 | export const postInterpolation = async (fileName: string, durationMs: number, nbFrames: number, noiseAmount: number): Promise<string> => {
10 |   return new Promise((resolve,reject) => {
11 | 
12 |     const tmpFileName = `${uuidv4()}.mp4`
13 | 
14 |     const filePath = path.join(pendingFilesDirFilePath, fileName)
15 |     const tmpFilePath = path.join(tmpDir, tmpFileName)
16 | 
17 |     ffmpeg.ffprobe(filePath, function(err, metadata) {
18 |       if (err) { reject(err); return; }
19 |       
20 |       const durationInSec = durationMs / 1000
21 | 
22 |       const currentVideoDurationInSec = metadata.format.duration
23 |       
24 |       console.log(`currentVideoDurationInSec in sec: ${currentVideoDurationInSec}s`)
25 |     
26 |       console.log(`target duration in sec: ${durationInSec}s (${durationMs}ms)`)
27 |     
28 |       // compute a ratio ex. 0.3 = 30% of the total length
29 |       const durationRatio = currentVideoDurationInSec / durationInSec
30 |       console.log(`durationRatio: ${durationRatio}`)
31 | 
32 |     ffmpeg(filePath)
33 | 
34 |       // convert to HD
35 |       .size("1280x720")
36 | 
37 |       .videoFilters([
38 |         `setpts=0.5*PTS`, // we make the video faster
39 |         //'scale=-1:576:lanczos',
40 |         // 'unsharp=5:5:0.2:5:5:0.2', // not recommended, this make the video more "pixely"
41 |         `noise=c0s=${noiseAmount}:c0f=t+u` // add a movie grain noise
42 |       ])
43 |       .outputOptions([
44 |         `-r ${nbFrames}`,
45 |       ])
46 | 
47 |       .save(tmpFilePath)
48 |       .on("end", async () => {
49 |         await moveFileFromTmpToPending(tmpFileName, fileName)
50 | 
51 |         resolve(fileName)
52 |       })
53 |       .on("error", (err) => {
54 |         reject(err)
55 |       })
56 |     })
57 |   })
58 | }


--------------------------------------------------------------------------------
/src/production/renderAnalysis.mts:
--------------------------------------------------------------------------------
 1 | 
 2 | import { RenderedScene, RenderRequest } from "../types.mts"
 3 | 
 4 | import { renderImageAnalysis } from "./renderImageAnalysis.mts"
 5 | 
 6 | export async function renderAnalysis(request: RenderRequest, response: RenderedScene) {
 7 |   
 8 |   if (request.analyze) {
 9 |     const isVideo = request?.nbFrames > 1
10 | 
11 |     // note: this only works on images for now,
12 |     // but we could also analyze the first video frame to get ourselves an idea
13 |     const optionalAnalysisFn = !isVideo
14 |       ? renderImageAnalysis(request, response)
15 |       : Promise.resolve()
16 | 
17 |     await optionalAnalysisFn
18 |   }
19 | }


--------------------------------------------------------------------------------
/src/production/renderContent.mts:
--------------------------------------------------------------------------------
 1 | 
 2 | import { RenderedScene, RenderRequest } from "../types.mts"
 3 | 
 4 | import { renderImage } from "./renderImage.mts"
 5 | import { renderVideo } from "./renderVideo.mts"
 6 | 
 7 | export async function renderContent(request: RenderRequest, response: RenderedScene) {
 8 |   const isVideo = request?.nbFrames > 1
 9 | 
10 |   const renderContentFn = isVideo
11 |     ? renderVideo
12 |     : renderImage
13 |  
14 |   try {
15 |     await renderContentFn(request, response)
16 |   } catch (err) {
17 |     // console.log(`renderContent() failed, trying a 2nd time..`)
18 |     try {
19 |       await renderContentFn(request, response)
20 |     } catch (err2) {
21 |       // console.log(`renderContent() failed, trying a 3th time..`)
22 |       await renderContentFn(request, response)
23 |     }
24 |   }
25 | }


--------------------------------------------------------------------------------
/src/production/renderImage.mts:
--------------------------------------------------------------------------------
 1 | import { generateImageSDXLAsBase64 } from "../providers/image-generation/generateImageSDXLGradio.mts"
 2 | import { generateImageSDXL360AsBase64 } from "../providers/image-generation/generateImageSDXL360.mts"
 3 | import { RenderedScene, RenderRequest } from "../types.mts"
 4 | import { generateImagePulibAsBase64 } from "../providers/image-generation/generateImagePulib.mts"
 5 | 
 6 | export async function renderImage(
 7 |   request: RenderRequest,
 8 |   response: RenderedScene,
 9 | ): Promise<RenderedScene> {
10 | 
11 |   const isSpherical = request.projection === 'spherical'
12 | 
13 |   // we don't want to switch too much between model types in VideoChain,
14 |   // because for speed we need to pre-load the servers,
15 |   // but there is no point in pre-loading many servers for many models
16 |   const generateImageAsBase64 = isSpherical
17 |     ? generateImageSDXL360AsBase64
18 |     : request.turbo
19 | 
20 |     // turbo models are models that are slightly less beautiful
21 |     // but much, much faster to run
22 |     // for the moment we use SDXL + LCM, as it offers better scene coherence,
23 |     // but we might switch to SDXL Turbo in the future if its quality improves
24 |     ? generateImagePulibAsBase64 // generateImageSDXLTurboAsBase64 
25 | 
26 |     : generateImageSDXLAsBase64
27 | 
28 |   // console.log(`going to generate an image using ${request.projection || "default (cartesian)"} projection`)
29 |   
30 |   const params = {
31 |     positivePrompt: request.prompt,
32 |     negativePrompt: request.negativePrompt,
33 |     identityImage: request.identityImage,
34 |     seed: request.seed,
35 |     nbSteps: request.nbSteps,
36 |     width: request.width,
37 |     height: request.height
38 |   }
39 | 
40 |   // console.log(`calling generateImageAsBase64 with: `, JSON.stringify(params, null, 2))
41 | 
42 |   // we try at least 3 different servers
43 |   try {
44 |     response.assetUrl = await generateImageAsBase64(params)
45 |     // console.log("successful generation!", response.assetUrl.slice(0, 30))
46 |     if (!response.assetUrl?.length) {
47 |       throw new Error(`the generated image is empty`)
48 |     }
49 |   } catch (err) {
50 |     // console.error(`failed to render.. but let's try again!`)
51 |     try {
52 |       response.assetUrl = await generateImageAsBase64(params)
53 |       // console.log("successful generation!", response.assetUrl.slice(0, 30))
54 |       if (!response.assetUrl?.length) {
55 |         throw new Error(`the generated image is empty`)
56 |       }
57 |     } catch (err) {
58 |       try {
59 |         response.assetUrl = await generateImageAsBase64(params)
60 |         // console.log("successful generation!", response.assetUrl.slice(0, 30))
61 |         if (!response.assetUrl?.length) {
62 |           throw new Error(`the generated image is empty`)
63 |         }
64 |       } catch (err) {
65 |         // console.error(`failed to generate the image, due to`, err)
66 |         response.error = `failed to render scene: ${err}`
67 |         response.status = "error"
68 |         response.assetUrl = ""
69 |       }
70 |     }
71 |   }
72 | 
73 |   return response
74 | }
75 | 


--------------------------------------------------------------------------------
/src/production/renderImageAnalysis.mts:
--------------------------------------------------------------------------------
 1 | 
 2 | import { RenderedScene, RenderRequest } from "../types.mts"
 3 | import { analyzeImage } from "../providers/image-caption/analyzeImageWithIDEFICSAndNastyHack.mts"
 4 | 
 5 | export async function renderImageAnalysis(
 6 |   request: RenderRequest,
 7 |   response: RenderedScene,
 8 | ): Promise<RenderedScene> {
 9 |   response.alt = request.prompt
10 |   
11 |   try {
12 |     // note: this converts a base64 PNG to a base64 JPG (which is good, actually!)
13 |     response.alt = await analyzeImage(response.assetUrl, request.prompt)
14 |     console.log(`analysis worked on the first try!`)
15 |   } catch (err) {
16 |     console.error(`analysis failed the first time.. let's try again..`)
17 |     try {
18 |       response.alt = await analyzeImage(response.assetUrl, request.prompt)
19 |       console.log(`analysis worked on the second try!`)
20 |     } catch (err) {
21 |       console.error(`analysis failed on the second attempt.. let's keep the prompt as a fallback, then :|`)
22 |       // no need to log a catastrophic failure here, since we still have the original (low-res image)
23 |       // to work with
24 |       response.alt = request.prompt
25 |     }
26 |   }
27 | 
28 |   return response
29 | }
30 | 


--------------------------------------------------------------------------------
/src/production/renderImageSegmentation.mts:
--------------------------------------------------------------------------------
 1 | import path from "node:path"
 2 | 
 3 | import { v4 as uuidv4 } from "uuid"
 4 | import tmpDir from "temp-dir"
 5 | 
 6 | import { RenderedScene, RenderRequest } from "../types.mts"
 7 | 
 8 | import { writeBase64ToFile } from "../utils/filesystem/writeBase64ToFile.mts"
 9 | import { segmentImage } from "../providers/image-segmentation/segmentImage.mts"
10 | 
11 | 
12 | export async function renderImageSegmentation(
13 |   request: RenderRequest,
14 |   response: RenderedScene,
15 | ): Promise<RenderedScene> {
16 | 
17 |   const actionnables = Array.isArray(request.actionnables) ? request.actionnables : []
18 | 
19 |   if (actionnables.length > 0) {
20 |     // console.log("we have some actionnables:", actionnables)
21 | 
22 |     const tmpImageFilePath = path.join(tmpDir, `${uuidv4()}.png`)
23 | 
24 |     // console.log("beginning:", imageBase64.slice(0, 100))
25 |     await writeBase64ToFile(response.assetUrl, tmpImageFilePath)
26 |     // console.log("wrote the image to ", tmpImageFilePath)
27 |   
28 |     if (!tmpImageFilePath) {
29 |       // console.error("failed to segment the image")
30 |       response.error = "failed to segment the image"
31 |       response.status = "error"
32 |     } else {
33 |       // console.log("got the first frame! segmenting..")
34 |       try {
35 |         const result = await segmentImage(tmpImageFilePath, actionnables, request.width, request.height)
36 |         response.maskUrl = result.maskUrl
37 |         response.segments = result.segments
38 |   
39 |         // console.log(`it worked the first time! got ${response.segments.length} segments`)
40 |       } catch (err) {
41 |         // console.log("this takes too long :/ trying another server..")
42 |         try {
43 |           const result = await segmentImage(tmpImageFilePath, actionnables, request.width, request.height)
44 |           response.maskUrl = result.maskUrl
45 |           response.segments = result.segments
46 |          
47 |           // console.log(`it worked the second time! got ${response.segments.length} segments`)
48 |         } catch (err) {
49 |           // console.log("trying one last time, on a 3rd server..")
50 |           try {
51 |             const result = await segmentImage(tmpImageFilePath, actionnables, request.width, request.height)
52 |             response.maskUrl = result.maskUrl
53 |             response.segments = result.segments
54 |         
55 |             // console.log(`it worked the third time! got ${response.segments.length} segments`)
56 |           } catch (err) {
57 |             console.log("yeah, all servers are busy it seems.. aborting")
58 |             response.error = "all servers are busy"
59 |             response.status = "error"
60 |           }
61 |         }
62 |       }
63 |     }
64 |   } else {
65 |     // console.log("no actionnables: just returning the image, then")
66 |   }
67 | 
68 |   return response
69 | }
70 | 


--------------------------------------------------------------------------------
/src/production/renderImageUpscaling.mts:
--------------------------------------------------------------------------------
 1 | import { RenderedScene, RenderRequest } from "../types.mts"
 2 | import { upscaleImage } from "../providers/image-upscaling/upscaleImage.mts"
 3 | 
 4 | export async function renderImageUpscaling(
 5 |   request: RenderRequest,
 6 |   response: RenderedScene,
 7 | ): Promise<RenderedScene> {
 8 |   
 9 |   try {
10 |     // note: this converts a base64 PNG to a base64 JPG (which is good, actually!)
11 |     response.assetUrl = await upscaleImage(response.assetUrl, request.upscalingFactor)
12 |     // console.log(`upscaling worked on the first try!`)
13 |   } catch (err) {
14 |     // console.error(`upscaling failed the first time.. let's try again..`)
15 |     try {
16 |       response.assetUrl = await upscaleImage(response.assetUrl, request.upscalingFactor)
17 |       // console.log(`upscaling worked on the second try!`)
18 |     } catch (err) {
19 |       console.error(`upscaling failed on the second attempt.. let's keep the low-res image then :|`)
20 |       // no need to log a catastrophic failure here, since we still have the original (low-res image)
21 |       // to work with
22 |     }
23 |   }
24 | 
25 |   return response
26 | }
27 | 


--------------------------------------------------------------------------------
/src/production/renderPipeline.mts:
--------------------------------------------------------------------------------
 1 | 
 2 | import { RenderedScene, RenderRequest } from "../types.mts"
 3 | 
 4 | import { saveRenderedSceneToCache } from "../utils/filesystem/saveRenderedSceneToCache.mts"
 5 | import { renderSegmentation } from "./renderSegmentation.mts"
 6 | import { renderUpscaling } from "./renderUpscaling.mts"
 7 | import { renderContent } from "./renderContent.mts"
 8 | import { renderAnalysis } from "./renderAnalysis.mts"
 9 | 
10 | export async function renderPipeline(request: RenderRequest, response: RenderedScene) {
11 |   await renderContent(request, response)
12 | 
13 |   await Promise.all([
14 |     renderSegmentation(request, response),
15 |     renderAnalysis(request, response),
16 |     renderUpscaling(request, response)
17 |   ])
18 | 
19 |   /*
20 |   this is the optimized pipeline
21 |   However, right now it doesn't work because for some reason,
22 |   asking to generate the same seed + prompt on different nb of steps
23 |   doesn't generate the same image!
24 | 
25 |   // first we need to wait for the low quality pre-render
26 |   await renderContent({
27 |     ...request,
28 | 
29 |     // we are a bit more aggressive with the quality of the video preview
30 |     nbSteps: isVideo ? 8 : 16
31 |   }, response)
32 | 
33 |   // then we can run both the segmentation and the high-res render at the same time
34 |   await Promise.all([
35 |     renderSegmentation(request, response),
36 |     renderContent(request, response)
37 |   ])
38 |   */
39 | 
40 |   response.status = "completed"
41 |   response.error = ""
42 | 
43 |   if (!request.cache || request.cache === "ignore") {
44 |     // console.log("client asked to not use the cache in the rendering pipeline")
45 |     return
46 |   }
47 | 
48 |   // console.log("client asked this for cache: "+request.cache)
49 | 
50 |   try {
51 |     // since the request is now completed we cache it
52 |     await saveRenderedSceneToCache(request, response)
53 |     // console.log("successfully saved to cache")
54 | 
55 |     // we don't really need to remove it from the in-memory cache 
56 |     // (the cache queue in src/production/renderScene.mts)
57 |     // since this cache queue has already an automatic pruning
58 |   } catch (err) {
59 |     console.error(`failed to save to cache, but no big deal: ${err}`)
60 |   }
61 | }


--------------------------------------------------------------------------------
/src/production/renderScene.mts:
--------------------------------------------------------------------------------
 1 | import { v4 as uuidv4 } from "uuid"
 2 | 
 3 | import { RenderedScene, RenderRequest } from "../types.mts"
 4 | import { renderPipeline } from "./renderPipeline.mts"
 5 | 
 6 | const cache: Record<string, RenderedScene> = {}
 7 | const cacheQueue: string[] = []
 8 | const maxCacheSize = 2000
 9 | 
10 | export async function renderScene(request: RenderRequest): Promise<RenderedScene> {
11 |   // const key = getCacheKey(scene)
12 | 
13 |   const renderId = uuidv4()
14 | 
15 |   const response: RenderedScene = {
16 |     renderId,
17 |     status: "pending",
18 |     assetUrl: "",
19 |     alt: request.prompt || "",
20 |     error: "",
21 |     maskUrl: "",
22 |     segments: []
23 |   }
24 | 
25 |   cache[renderId] = response
26 |   cacheQueue.push(renderId)
27 |   if (cacheQueue.length > maxCacheSize) {
28 |     const toRemove = cacheQueue.shift()
29 |     delete cache[toRemove]
30 |   }
31 | 
32 |   if (request.wait) {
33 |     await renderPipeline(request, response)
34 |   } else {
35 |         // this is a fire-and-forget asynchronous pipeline:
36 |     // we start it, but we do not await for the response
37 |     renderPipeline(request, response)
38 |   }
39 | 
40 |   // console.log("renderScene: yielding the scene", response)
41 |   return response
42 | }
43 | 
44 | export async function getRenderedScene(renderId: string): Promise<RenderedScene> {
45 |   const rendered = cache[renderId]
46 |   if (!rendered) {
47 |     throw new Error(`couldn't find any rendered scene with renderId ${renderId}`)
48 |   }
49 |   return cache[renderId]
50 | }
51 | 
52 | 


--------------------------------------------------------------------------------
/src/production/renderSegmentation.mts:
--------------------------------------------------------------------------------
 1 | 
 2 | import { RenderedScene, RenderRequest } from "../types.mts"
 3 | 
 4 | import { renderImageSegmentation } from "./renderImageSegmentation.mts"
 5 | import { renderVideoSegmentation } from "./renderVideoSegmentation.mts"
 6 | 
 7 | export async function renderSegmentation(request: RenderRequest, response: RenderedScene) {
 8 |   
 9 |   if (request.segmentation === "firstframe" || request.segmentation === "allframes") {
10 |     const isVideo = request?.nbFrames > 1
11 | 
12 |     const renderSegmentationFn = isVideo
13 |       ? renderVideoSegmentation
14 |       : renderImageSegmentation 
15 | 
16 |     await renderSegmentationFn(request, response)
17 |   }
18 | }


--------------------------------------------------------------------------------
/src/production/renderUpscaling.mts:
--------------------------------------------------------------------------------
 1 | 
 2 | import { RenderedScene, RenderRequest } from "../types.mts"
 3 | 
 4 | import { renderImageUpscaling } from "./renderImageUpscaling.mts"
 5 | import { renderVideoUpscaling } from "./renderVideoUpscaling.mts"
 6 | 
 7 | export async function renderUpscaling(request: RenderRequest, response: RenderedScene) {
 8 | 
 9 |   if (request.upscalingFactor > 1) {
10 | 
11 |     const isVideo = request?.nbFrames > 1
12 | 
13 |     // we upscale images with esrgan, and video with Zeroscope XL
14 |     const renderFn = isVideo
15 |       ? renderVideoUpscaling
16 |       : renderImageUpscaling
17 | 
18 |     await renderFn(request, response)
19 |   }
20 | }


--------------------------------------------------------------------------------
/src/production/renderVideo.mts:
--------------------------------------------------------------------------------
 1 | import { RenderedScene, RenderRequest, VideoGenerationParams } from "../types.mts"
 2 | 
 3 | // import { generateVideo } from "../providers/video-generation/generateVideoWithZeroscope.mts"
 4 | // import { generateVideo } from "../providers/video-generation/generateVideoWithHotshotGradioAPI.mts"
 5 | // import { generateVideoWithAnimateLCM } from "../providers/video-generation/generateVideoWithAnimateLCM.mts"
 6 | import { generateVideoWithAnimateDiffLightning } from "../providers/video-generation/generateVideoWithAnimateDiffLightning.mts"
 7 | 
 8 | export async function renderVideo(
 9 |   request: RenderRequest,
10 |   response: RenderedScene
11 | ): Promise<RenderedScene> {
12 | 
13 |   await generateVideoWithAnimateDiffLightning(request, response)
14 | 
15 |   return response
16 | }


--------------------------------------------------------------------------------
/src/production/renderVideoSegmentation.mts:
--------------------------------------------------------------------------------
 1 | import { v4 as uuidv4 } from "uuid"
 2 | 
 3 | import { RenderedScene, RenderRequest } from "../types.mts"
 4 | import { downloadFileToTmp } from "../utils/download/downloadFileToTmp.mts"
 5 | import { getFirstVideoFrame } from "../scheduler/getFirstVideoFrame.mts"
 6 | import { segmentImage } from "../providers/image-segmentation/segmentImage.mts"
 7 | 
 8 | export async function renderVideoSegmentation(
 9 |   request: RenderRequest,
10 |   response: RenderedScene
11 | ): Promise<RenderedScene> {
12 | 
13 |   const actionnables = Array.isArray(request.actionnables) ? request.actionnables : []
14 | 
15 |   if (actionnables.length > 0) {
16 |     // console.log("we have some actionnables:", actionnables)
17 |     if (request.segmentation === "firstframe") {
18 |       // console.log("going to grab the first frame")
19 |       const tmpVideoFilePath = await downloadFileToTmp(response.assetUrl, `${uuidv4()}`)
20 |       // console.log("downloaded the first frame to ", tmpVideoFilePath)
21 |       const firstFrameFilePath = await getFirstVideoFrame(tmpVideoFilePath)
22 |       // console.log("downloaded the first frame to ", firstFrameFilePath)
23 |       
24 |       if (!firstFrameFilePath) {
25 |         // console.error("failed to get the image")
26 |         response.error = "failed to segment the image"
27 |         response.status = "error"
28 |       } else {
29 |        // console.log("got the first frame! segmenting..")
30 |         const result = await segmentImage(firstFrameFilePath, actionnables, request.width, request.height)
31 |         response.maskUrl = result.maskUrl
32 |         response.segments = result.segments
33 | 
34 |         // console.log("success!", {  segments })
35 |       }
36 |       /*
37 |       const jpgBase64 = await getFirstVideoFrame(tmpVideoFileName)
38 |       if (!jpgBase64) {
39 |         console.error("failed to get the image")
40 |         error = "failed to segment the image"
41 |       } else {
42 |         console.log(`got the first frame (${jpgBase64.length})`)
43 | 
44 |         console.log("TODO: call segmentImage with the base64 image")
45 |         await segmentImage()
46 |       }
47 |       */
48 |     }
49 |   }
50 | 
51 |   return response
52 | }


--------------------------------------------------------------------------------
/src/production/renderVideoUpscaling.mts:
--------------------------------------------------------------------------------
 1 | import { upscaleVideoToBase64URL } from "../providers/video-upscaling/upscaleVideoToBase64URL.mts"
 2 | import { RenderedScene, RenderRequest } from "../types.mts"
 3 | 
 4 | export async function renderVideoUpscaling(
 5 |   request: RenderRequest,
 6 |   response: RenderedScene,
 7 | ): Promise<RenderedScene> {
 8 |   
 9 |   try {
10 |     // note: this converts a base64 PNG to a base64 JPG (which is good, actually!)
11 |     response.assetUrl = await upscaleVideoToBase64URL(response.assetUrl, request.prompt)
12 |     // console.log(`upscaling worked on the first try!`)
13 |   } catch (err) {
14 |     // console.error(`upscaling failed the first time.. let's try again..`)
15 |     try {
16 |       response.assetUrl = await upscaleVideoToBase64URL(response.assetUrl, request.prompt)
17 |       // console.log(`upscaling worked on the second try!`)
18 |     } catch (err) {
19 |       console.error(`upscaling failed on the second attempt.. let's keep the low-res image then :|`)
20 |       // no need to log a catastrophic failure here, since we still have the original (low-res image)
21 |       // to work with
22 |     }
23 |   }
24 | 
25 |   return response
26 | }
27 | 


--------------------------------------------------------------------------------
/src/providers/audio-generation/generateAudio.mts:
--------------------------------------------------------------------------------
 1 | import { v4 as uuidv4 } from "uuid"
 2 | import puppeteer from "puppeteer"
 3 | 
 4 | import { downloadFileToTmp } from "../../utils/download/downloadFileToTmp.mts"
 5 | import { moveFileFromTmpToPending } from "../../utils/filesystem/moveFileFromTmpToPending.mts"
 6 | 
 7 | export const state = {
 8 |   load: 0,
 9 | }
10 | 
11 | const instances: string[] = [
12 |   `${process.env.VC_AUDIO_GENERATION_SPACE_API_URL_1 || ""}`
13 | ].filter(instance => instance?.length > 0)
14 | 
15 | // TODO we should use an inference endpoint instead
16 | export async function generateAudio(prompt: string, audioFileName: string) {
17 | 
18 |   if (state.load === instances.length) {
19 |     throw new Error(`all audio generation servers are busy, try again later..`)
20 |   }
21 | 
22 |   state.load += 1
23 | 
24 |   try {
25 |     const instance = instances.shift()
26 |     instances.push(instance)
27 | 
28 |     const browser = await puppeteer.launch({
29 |       headless: true,
30 |       protocolTimeout: 120000,
31 |     })
32 | 
33 |     try {
34 |       const page = await browser.newPage()
35 | 
36 |       await page.goto(instance, {
37 |         waitUntil: "networkidle2",
38 |       })
39 | 
40 |       // await new Promise(r => setTimeout(r, 1000))
41 | 
42 |       const firstTextboxInput = await page.$('input[data-testid="textbox"]')
43 | 
44 |       await firstTextboxInput.type(prompt)
45 | 
46 |       // console.log("looking for the button to submit")
47 |       const submitButton = await page.$("button.lg")
48 | 
49 |       // console.log("clicking on the button")
50 |       await submitButton.click()
51 | 
52 |       await page.waitForSelector("a[download]", {
53 |         timeout: 120000, // no need to wait for too long, generation is quick
54 |       })
55 | 
56 |       const audioRemoteUrl = await page.$$eval("a[download]", el => el.map(x => x.getAttribute("href"))[0])
57 | 
58 | 
59 |       // it is always a good idea to download to a tmp dir before saving to the pending dir
60 |       // because there is always a risk that the download will fail
61 |       
62 |       const tmpFileName = `${uuidv4()}.mp4`
63 | 
64 |       await downloadFileToTmp(audioRemoteUrl, tmpFileName)
65 |       await moveFileFromTmpToPending(tmpFileName, audioFileName)
66 |     } catch (err) {
67 |       throw err
68 |     } finally {
69 |       await browser.close()
70 |     }
71 |   } catch (err) {
72 |     throw err
73 |   } finally {
74 |     state.load -= 1
75 |   }
76 | }


--------------------------------------------------------------------------------
/src/providers/audio-generation/generateAudioLegacy.mts:
--------------------------------------------------------------------------------
 1 | import { client } from '@gradio/client'
 2 | 
 3 | import { generateSeed } from "../../utils/misc/generateSeed.mts"
 4 | 
 5 | export const state = {
 6 |   load: 0
 7 | }
 8 | 
 9 | const instances: string[] = [
10 |   process.env.VC_AUDIO_GENERATION_SPACE_API_URL
11 | ]
12 | 
13 | export const generateAudio = async (prompt: string, options?: {
14 |   seed: number;
15 |   nbFrames: number;
16 |   nbSteps: number;
17 | }) => {
18 | 
19 |   if (state.load === instances.length) {
20 |     throw new Error(`all audio generation servers are busy, try again later..`)
21 |   }
22 | 
23 |   state.load += 1
24 | 
25 |   try {
26 |     const seed = options?.seed || generateSeed()
27 |     const nbFrames = options?.nbFrames || 24 // we can go up to 48 frames, but then upscaling quill require too much memory!
28 |     const nbSteps = options?.nbSteps || 35
29 | 
30 |     const instance = instances.shift()
31 |     instances.push(instance)
32 | 
33 |     const api = await client(instance, {
34 |           hf_token: `${process.env.VC_HF_API_TOKEN}` as any
35 |     })
36 | 
37 |     const rawResponse = await api.predict('/run', [		
38 |       prompt, // string  in 'Prompt' Textbox component		
39 |       seed, // number (numeric value between 0 and 2147483647) in 'Seed' Slider component		
40 |       nbFrames, // 24 // it is the nb of frames per seconds I think?
41 |       nbSteps, // 10, (numeric value between 10 and 50) in 'Number of inference steps' Slider component
42 |     ]) as any
43 |     
44 |     const { name } = rawResponse?.data?.[0]?.[0] as { name: string, orig_name: string }
45 | 
46 |     return `${instance}/file=${name}`
47 |   } catch (err) {
48 |     throw err
49 |   } finally {
50 |     state.load -= 1
51 |   }
52 | }


--------------------------------------------------------------------------------
/src/providers/character-model/generateActor.mts:
--------------------------------------------------------------------------------
 1 | import { promises as fs } from "node:fs"
 2 | import path from "node:path"
 3 | import tmpDir from "temp-dir"
 4 | 
 5 | import { HfInference } from "@huggingface/inference"
 6 | 
 7 | const hf = new HfInference(process.env.VC_HF_API_TOKEN)
 8 | 
 9 | export const generateActor = async (prompt: string, fileName: string, seed: number) => {
10 |   const positivePrompt = [
11 |     `profile photo of ${prompt || ""}`,
12 |     "id picture",
13 |     "photoshoot",
14 |     "portrait photography",
15 |     "neutral expression",
16 |     "neutral background",
17 |     "studio photo",
18 |     "award winning",
19 |     "high resolution",
20 |     "photo realistic",
21 |     "intricate details",
22 |     "beautiful",
23 |   ]
24 |   const negativePrompt = [
25 |     "anime",
26 |     "drawing",
27 |     "painting",
28 |     "lowres",
29 |     "blurry",
30 |     "artificial"
31 |   ]
32 | 
33 |   console.log(`generating actor: ${positivePrompt.join(", ")}`)
34 | 
35 |   const blob = await hf.textToImage({
36 |     inputs: positivePrompt.join(", "),
37 |     model: "stabilityai/stable-diffusion-2-1",
38 |     parameters: {
39 |       negative_prompt: negativePrompt.join(", "),
40 |       // seed, no seed?
41 |     }
42 |   })
43 | 
44 |   const filePath = path.resolve(tmpDir, fileName)
45 | 
46 |   const buffer = Buffer.from(await blob.arrayBuffer())
47 |   await fs.writeFile(filePath, buffer, "utf8")
48 | 
49 |   return filePath
50 | }


--------------------------------------------------------------------------------
/src/providers/image-caption/analyzeImageWithIDEFICS.mts:
--------------------------------------------------------------------------------
 1 | 
 2 | import { client } from "@gradio/client"
 3 | 
 4 | const instances: string[] = [
 5 |   `${process.env.VC_ANALYSIS_SPACE_API_URL || ""}`,
 6 | ].filter(instance => instance?.length > 0)
 7 | 
 8 | export async function analyzeImage(src: string, prompt: string): Promise<string> {
 9 | 
10 |   const instance = instances.shift()
11 |   instances.push(instance)
12 | 
13 |   const api = await client(instance, {
14 |     hf_token: `${process.env.VC_HF_API_TOKEN}` as any
15 |   })
16 | 
17 |   // console.log("/analyzeImage: calling api.predict(6, ...)")
18 | 
19 |   /*
20 |   the chat history has this format:
21 |   [
22 |     [
23 |       '![](/file=/tmp/gradio/2ee0577f810cba5c50d0a7f047a9e6557f4e269f/image.png)What do you see in the following image?',
24 |       'I'
25 |     ]
26 |   ]
27 | */
28 |   const chat_history = [
29 |     // ['', '']
30 |   ]
31 | 
32 |   // unfortunately the Gradio client doesn't support streaming, and will crash here with a nasty error
33 |   /*
34 |   node_modules/@gradio/client/dist/index.js:705
35 |   return data.map((d, i) => {
36 |               ^
37 | TypeError: Cannot read properties of null (reading 'is_file')
38 |     at node_modules/@gradio/client/dist/index.js:713:43
39 |     at Array.map (<anonymous>)
40 |     at transform_output (node_modules/@gradio/client/dist/index.js:705:15)
41 |   */
42 | 
43 |   const result = await api.predict(6, [		
44 |     "HuggingFaceM4/idefics-80b-instruct", // string (Option from: ['HuggingFaceM4/idefics-80b-instruct']) in 'Model' Dropdown component		
45 |     prompt, // string  in 'Text input' Textbox component		
46 |     chat_history, // any (any valid json) in 'IDEFICS' Chatbot component
47 |     src, 	// blob in 'Image input' Image component
48 | 
49 |     // the following values come from the source code at:
50 |     // https://huggingface.co/spaces/HuggingFaceM4/idefics_playground/blob/main/app_dialogue.py#L416-L472
51 | 
52 |     "Greedy", // string  in 'Decoding strategy' Radio component		
53 |     0.4, // number (numeric value between 0.0 and 5.0) in 'Sampling temperature' Slider component		
54 |     512, // number (numeric value between 8 and 1024) in 'Maximum number of new tokens to generate' Slider component		
55 |     1, // number (numeric value between 0.0 and 5.0) in 'Repetition penalty' Slider component		
56 |     0.8, // number (numeric value between 0.01 and 0.99) in 'Top P' Slider component
57 |   ])
58 | 
59 |   const rawResponse = result as any 
60 | 
61 |   console.log("got a response!:", rawResponse)
62 |   
63 |   return rawResponse?.data?.[0] as string
64 | }
65 | 


--------------------------------------------------------------------------------
/src/providers/image-caption/analyzeImageWithIDEFICSAndNastyHack.mts:
--------------------------------------------------------------------------------
  1 | 
  2 | /*
  3 | unfortunately the Gradio client doesn't support streaming:
  4 | it will crash here with a nasty error
  5 | 
  6 |   node_modules/@gradio/client/dist/index.js:705
  7 |   return data.map((d, i) => {
  8 |               ^
  9 | TypeError: Cannot read properties of null (reading 'is_file')
 10 |     at node_modules/@gradio/client/dist/index.js:713:43
 11 |     at Array.map (<anonymous>)
 12 |     at transform_output (node_modules/@gradio/client/dist/index.js:705:15)
 13 | 
 14 | 
 15 | This prevents use from using IDEFICS using the Gradio API,
 16 | so the only solution is to hack our way in using puppeteer.
 17 | */
 18 | 
 19 | 
 20 | import path from "node:path"
 21 | 
 22 | import { v4 as uuidv4 } from "uuid"
 23 | import tmpDir from "temp-dir"
 24 | import puppeteer from "puppeteer"
 25 | 
 26 | import { writeBase64ToFile } from "../../utils/filesystem/writeBase64ToFile.mts"
 27 | import { sleep } from "../../utils/misc/sleep.mts"
 28 | import { deleteFileIfExists } from "../../utils/filesystem/deleteFileIfExists.mts"
 29 | 
 30 | const instances: string[] = [
 31 |   `${process.env.VC_ANALYSIS_SPACE_API_URL || ""}`,
 32 | ].filter(instance => instance?.length > 0)
 33 | 
 34 | // There is no easy to use public API for IDEFICS
 35 | // (something where we can just push text + file and get a response without handling history, upload etc)
 36 | // So let's hack our way in 🐕
 37 | export async function analyzeImage(image: string, prompt: string) {
 38 |   const instance = instances.shift()
 39 |   instances.push(instance)
 40 | 
 41 |   // wait.. is that really a jpg we have?
 42 |   // well, let's hope so.
 43 |   const tmpImageFilePath = path.join(tmpDir, `${uuidv4()}.jpg`)
 44 | 
 45 |   await writeBase64ToFile(image, tmpImageFilePath)
 46 |   // console.log("wrote the image to ", tmpImageFilePath)
 47 | 
 48 |   const browser = await puppeteer.launch({
 49 |     headless: true,
 50 |     protocolTimeout: 30000,
 51 |   })
 52 | 
 53 |   try {
 54 |     const page = await browser.newPage()
 55 | 
 56 |     await page.goto(instance, {
 57 |       waitUntil: 'networkidle2',
 58 |     })
 59 | 
 60 |     // console.log("filling in the prompt..")
 61 |     const promptField = await page.$('textarea')
 62 |     await promptField.type(prompt)
 63 | 
 64 |     // console.log("beginning:", imageBase64.slice(0, 100))
 65 | 
 66 |     // await new Promise(r => setTimeout(r, 1000))
 67 | 
 68 |     const fileField = await page.$('input[type=file]')
 69 | 
 70 |     console.log(`uploading file..`)
 71 |     await fileField.uploadFile(tmpImageFilePath)
 72 |     // console.log(`did it work? did it do something?`)
 73 |     // await sleep(2000)
 74 | 
 75 |     // console.log('looking for the button to submit')
 76 |     const submitButton = await page.$('button.lg')
 77 | 
 78 |     // console.log('clicking on the submit')
 79 |     await submitButton.click()
 80 | 
 81 |     console.log("waiting for bot response..")
 82 |     await page.$('.message.bot')
 83 | 
 84 |     // note: we are going to receive the response in streaming
 85 | 
 86 |     // TODO we should a different approach here, like perhaps something to detect when the element
 87 |     // has stopped receiving updates
 88 |     await sleep(12000)
 89 | 
 90 |     const message = await page.$$eval(".message.bot p", el => el.map(x => x.innerText)[0])
 91 |     console.log("response:", message)
 92 | 
 93 |     return message || ""
 94 |   } catch (err) {
 95 |     throw err
 96 |   } finally {
 97 |     await browser.close()
 98 |     await deleteFileIfExists(tmpImageFilePath)
 99 |   }
100 | }
101 | 


--------------------------------------------------------------------------------
/src/providers/image-generation/generateImage.mts:
--------------------------------------------------------------------------------
 1 | import { HfInference } from "@huggingface/inference"
 2 | 
 3 | import { getValidNumber } from "../../utils/validators/getValidNumber.mts"
 4 | import { generateSeed } from "../../utils/misc/generateSeed.mts"
 5 | 
 6 | const hf = new HfInference(process.env.VC_HF_API_TOKEN)
 7 | 
 8 | export async function generateImage(options: {
 9 |   positivePrompt: string;
10 |   negativePrompt: string;
11 |   seed?: number;
12 |   width?: number;
13 |   height?: number;
14 |   nbSteps?: number;
15 | }) {
16 |   
17 |   const positivePrompt = options?.positivePrompt || ""
18 |   if (!positivePrompt) {
19 |     throw new Error("missing prompt")
20 |   }
21 |   const negativePrompt = options?.negativePrompt || ""
22 | 
23 |   // we treat 0 as meaning "random seed"
24 |   const seed = (options?.seed ? options.seed : 0) || generateSeed()
25 | 
26 |   const width = getValidNumber(options?.width, 256, 1024, 512)
27 |   const height = getValidNumber(options?.height, 256, 1024, 512)
28 |   const nbSteps = getValidNumber(options?.nbSteps, 5, 50, 25)
29 | 
30 |   const blob = await hf.textToImage({
31 |     inputs: [
32 |       positivePrompt,
33 |       "beautiful",
34 |       "award winning",
35 |       // "intricate details",
36 |       "high resolution"
37 |     ].filter(word => word)
38 |     .join(", "),
39 |     model: "stabilityai/stable-diffusion-2-1",
40 |     parameters: {
41 |       negative_prompt: [
42 |         negativePrompt,
43 |         "blurry",
44 |         // "artificial",
45 |        //  "cropped",
46 |         "low quality",
47 |         "ugly"
48 |       ].filter(word => word)
49 |       .join(", ")
50 |     }
51 |   })
52 |   const buffer = Buffer.from(await blob.arrayBuffer())
53 | 
54 |   return buffer
55 | }


--------------------------------------------------------------------------------
/src/providers/image-generation/generateImageLCMFetch.mts:
--------------------------------------------------------------------------------
  1 | import { client } from "@gradio/client"
  2 | 
  3 | import { generateSeed } from "../../utils/misc/generateSeed.mts"
  4 | import { getValidNumber } from "../../utils/validators/getValidNumber.mts"
  5 | 
  6 | // TODO add a system to mark failed instances as "unavailable" for a couple of minutes
  7 | // console.log("process.env:", process.env)
  8 | 
  9 | // note: to reduce costs I use the small A10s (not the large)
 10 | // anyway, we will soon not need to use this cloud anymore 
 11 | // since we will be able to leverage the Inference API
 12 | const instance = `${process.env.VC_LCM_SPACE_API_URL || ""}`
 13 | const secretToken = `${process.env.VC_MICROSERVICE_SECRET_TOKEN || ""}`
 14 | 
 15 | // console.log("DEBUG:", JSON.stringify({ instances, secretToken }, null, 2))
 16 | 
 17 | export async function generateImageLCMAsBase64(options: {
 18 |   positivePrompt: string;
 19 |   negativePrompt?: string;
 20 |   seed?: number;
 21 |   width?: number;
 22 |   height?: number;
 23 |   nbSteps?: number;
 24 | }): Promise<string> {
 25 | 
 26 |   // console.log("querying " + instance)
 27 |   const positivePrompt = options?.positivePrompt || ""
 28 |   if (!positivePrompt) {
 29 |     throw new Error("missing prompt")
 30 |   }
 31 | 
 32 |   // the negative prompt CAN be missing, since we use a trick
 33 |   // where we make the interface mandatory in the TS doc,
 34 |   // but browsers might send something partial
 35 |   const negativePrompt = options?.negativePrompt || ""
 36 |   
 37 |   // we treat 0 as meaning "random seed"
 38 |   const seed = (options?.seed ? options.seed : 0) || generateSeed()
 39 | 
 40 |   const width = getValidNumber(options?.width, 256, 1024, 512)
 41 |   const height = getValidNumber(options?.height, 256, 1024, 512)
 42 |   const nbSteps = getValidNumber(options?.nbSteps, 1, 8, 4)
 43 |   // console.log("SEED:", seed)
 44 | 
 45 |   const positive = [
 46 | 
 47 |     // oh well.. is it too late to move this to the bottom?
 48 |     "beautiful",
 49 | 
 50 |     // too opinionated, so let's remove it
 51 |     // "intricate details",
 52 | 
 53 |     positivePrompt,
 54 | 
 55 |     "award winning",
 56 |     "high resolution"
 57 |   ].filter(word => word)
 58 |   .join(", ")
 59 | 
 60 |   const negative =  [
 61 |     negativePrompt,
 62 |     "watermark",
 63 |     "copyright",
 64 |     "blurry",
 65 |     // "artificial",
 66 |     // "cropped",
 67 |     "low quality",
 68 |     "ugly"
 69 |   ].filter(word => word)
 70 |   .join(", ")
 71 | 
 72 |   const res = await fetch(instance + (instance.endsWith("/") ? "" : "/") + "api/predict", {
 73 |     method: "POST",
 74 |     headers: {
 75 |       "Content-Type": "application/json",
 76 |       // Authorization: `Bearer ${token}`,
 77 |     },
 78 |     body: JSON.stringify({
 79 |       fn_index: 1, // <- important!
 80 |       data: [
 81 |         positive, // string  in 'Prompt' Textbox component		
 82 |         negative, // string  in 'Negative prompt' Textbox component		
 83 |         seed, // number (numeric value between 0 and 2147483647) in 'Seed' Slider component		
 84 |         width, // number (numeric value between 256 and 1024) in 'Width' Slider component		
 85 |         height, // number (numeric value between 256 and 1024) in 'Height' Slider component		
 86 |         0.0, // can be disabled for LCM-LORA-SSD-1B
 87 |         nbSteps, // number (numeric value between 2 and 8) in 'Number of inference steps for base' Slider component			
 88 |         secretToken
 89 |       ],
 90 |     }),
 91 |     cache: "no-store",
 92 |   })
 93 | 
 94 |   const { data } = await res.json()
 95 | 
 96 | 
 97 |   // Recommendation: handle errors
 98 |   if (res.status !== 200 || !Array.isArray(data)) {
 99 |     // This will activate the closest `error.js` Error Boundary
100 |     throw new Error(`Failed to fetch data (status: ${res.status})`)
101 |   }
102 |   // console.log("data:", data.slice(0, 50))
103 | 
104 |   if (!data[0]) {
105 |     throw new Error(`the returned image was empty`)
106 |   }
107 | 
108 |   return data[0] as string
109 | }
110 | 


--------------------------------------------------------------------------------
/src/providers/image-generation/generateImageLCMGradio.mts:
--------------------------------------------------------------------------------
  1 | 
  2 | import { client } from "@gradio/client"
  3 | 
  4 | import { generateSeed } from "../../utils/misc/generateSeed.mts"
  5 | import { getValidNumber } from "../../utils/validators/getValidNumber.mts"
  6 | import { convertToWebp } from "../../utils/image/convertToWebp.mts"
  7 | 
  8 | // TODO add a system to mark failed instances as "unavailable" for a couple of minutes
  9 | // console.log("process.env:", process.env)
 10 | 
 11 | // note: to reduce costs I use the small A10s (not the large)
 12 | // anyway, we will soon not need to use this cloud anymore 
 13 | // since we will be able to leverage the Inference API
 14 | const instance = `${process.env.VC_LCM_SPACE_API_URL || ""}`
 15 | const secretToken = `${process.env.VC_MICROSERVICE_SECRET_TOKEN || ""}`
 16 | 
 17 | // console.log("DEBUG:", JSON.stringify({ instances, secretToken }, null, 2))
 18 | 
 19 | export async function generateImageLCMAsBase64(options: {
 20 |   positivePrompt: string;
 21 |   negativePrompt?: string;
 22 |   seed?: number;
 23 |   width?: number;
 24 |   height?: number;
 25 |   nbSteps?: number;
 26 | }): Promise<string> {
 27 | 
 28 |   // console.log("querying " + instance)
 29 |   const positivePrompt = options?.positivePrompt || ""
 30 |   if (!positivePrompt) {
 31 |     throw new Error("missing prompt")
 32 |   }
 33 | 
 34 |   // the negative prompt CAN be missing, since we use a trick
 35 |   // where we make the interface mandatory in the TS doc,
 36 |   // but browsers might send something partial
 37 |   const negativePrompt = options?.negativePrompt || ""
 38 |   
 39 |   // we treat 0 as meaning "random seed"
 40 |   const seed = (options?.seed ? options.seed : 0) || generateSeed()
 41 | 
 42 |   const width = getValidNumber(options?.width, 256, 1024, 512)
 43 |   const height = getValidNumber(options?.height, 256, 1024, 512)
 44 |   const nbSteps = getValidNumber(options?.nbSteps, 1, 8, 4)
 45 |   // console.log("SEED:", seed)
 46 | 
 47 |   const positive = [
 48 | 
 49 |     // oh well.. is it too late to move this to the bottom?
 50 |     "beautiful",
 51 | 
 52 |     // too opinionated, so let's remove it
 53 |     // "intricate details",
 54 | 
 55 |     positivePrompt,
 56 | 
 57 |     "award winning",
 58 |     "high resolution"
 59 |   ].filter(word => word)
 60 |   .join(", ")
 61 | 
 62 |   const negative =  [
 63 |     negativePrompt,
 64 |     "watermark",
 65 |     "copyright",
 66 |     "blurry",
 67 |     // "artificial",
 68 |     // "cropped",
 69 |     "low quality",
 70 |     "ugly"
 71 |   ].filter(word => word)
 72 |   .join(", ")
 73 | 
 74 |   const api = await client(instance, {
 75 |     hf_token: `${process.env.VC_HF_API_TOKEN}` as any
 76 |   })
 77 | 
 78 |   const rawResponse = (await api.predict("/run", [		
 79 |     positive, // string  in 'Prompt' Textbox component		
 80 |     negative, // string  in 'Negative prompt' Textbox component		
 81 |     seed, // number (numeric value between 0 and 2147483647) in 'Seed' Slider component		
 82 |     width, // number (numeric value between 256 and 1024) in 'Width' Slider component		
 83 |     height, // number (numeric value between 256 and 1024) in 'Height' Slider component		
 84 |     0.0, // can be disabled for LCM SDXL
 85 |     nbSteps, // number (numeric value between 2 and 8) in 'Number of inference steps for base' Slider component			
 86 |     secretToken
 87 |   ])) as any
 88 |     
 89 |   const result = rawResponse?.data?.[0] as string
 90 |   if (!result?.length) {
 91 |     throw new Error(`the returned image was empty`)
 92 |   }
 93 | 
 94 |   try {
 95 |     const finalImage = await convertToWebp(result)
 96 |     return finalImage
 97 |   } catch (err) {
 98 |     // console.log("err:", err)
 99 |     throw new Error(err)
100 |   }
101 | }


--------------------------------------------------------------------------------
/src/providers/image-generation/generateImagePulib.mts:
--------------------------------------------------------------------------------
  1 | 
  2 | import { client } from "@gradio/client"
  3 | 
  4 | import { generateSeed } from "../../utils/misc/generateSeed.mts"
  5 | import { getValidNumber } from "../../utils/validators/getValidNumber.mts"
  6 | import { convertToWebp } from "../../utils/image/convertToWebp.mts"
  7 | import { addBase64HeaderToPng } from "../../utils/image/addBase64HeaderToPng.mts"
  8 | 
  9 | // TODO add a system to mark failed instances as "unavailable" for a couple of minutes
 10 | // console.log("process.env:", process.env)
 11 | 
 12 | // note: to reduce costs I use the small A10s (not the large)
 13 | // anyway, we will soon not need to use this cloud anymore 
 14 | // since we will be able to leverage the Inference API
 15 | const gradioSpaceApiUrl = `https://jbilcke-hf-ai-tube-model-pulid.hf.space`
 16 | const gradioSpace = `jbilcke-hf/ai-tube-model-pulid`
 17 | const secretToken = `${process.env.VC_MICROSERVICE_SECRET_TOKEN || ""}`
 18 | 
 19 | // console.log("DEBUG:", JSON.stringify({ instances, secretToken }, null, 2))
 20 | 
 21 | export async function generateImagePulibAsBase64(options: {
 22 |   positivePrompt: string;
 23 |   negativePrompt?: string;
 24 |   identityImage?: string;
 25 |   seed?: number;
 26 |   width?: number;
 27 |   height?: number;
 28 |   nbSteps?: number;
 29 | }): Promise<string> {
 30 | 
 31 |   const positivePrompt = options?.positivePrompt || ""
 32 |   if (!positivePrompt) {
 33 |     throw new Error("missing prompt")
 34 |   }
 35 | 
 36 |   // the negative prompt CAN be missing, since we use a trick
 37 |   // where we make the interface mandatory in the TS doc,
 38 |   // but browsers might send something partial
 39 |   const negativePrompt = options?.negativePrompt || ""
 40 |   
 41 |   // we treat 0 as meaning "random seed"
 42 |   const seed = (options?.seed ? options.seed : 0) || generateSeed()
 43 | 
 44 |   const width = getValidNumber(options?.width, 256, 1024, 512)
 45 |   const height = getValidNumber(options?.height, 256, 1024, 512)
 46 |   const nbSteps = getValidNumber(options?.nbSteps, 1, 8, 4)
 47 |   // console.log("SEED:", seed)
 48 | 
 49 |   const identityImage = `${options.identityImage || ""}`
 50 | 
 51 |   const positive = [
 52 |     positivePrompt,
 53 |   ].filter(word => word)
 54 |   .join(", ")
 55 | 
 56 |   const negative =  [
 57 |     negativePrompt,
 58 |     "watermark",
 59 |     "copyright",
 60 |     "blurry",
 61 |     // "artificial",
 62 |     // "cropped",
 63 |     "low quality",
 64 |     "ugly",
 65 |     'flaws in the eyes',
 66 |     'flaws in the face',
 67 |     'flaws',
 68 |     'lowres',
 69 |     'non-HDRi',
 70 |     'low quality',
 71 |     'worst quality',
 72 |     'artifacts noise',
 73 |     'text',
 74 |     'glitch',
 75 |     'deformed',
 76 |     'mutated',
 77 |     'disfigured hands',
 78 |     'low resolution',
 79 |     'partially rendered objects',
 80 |     'deformed or partially rendered eyes',
 81 |     'ddeformed eyeballs',
 82 |     'cross-eyed',
 83 |   ].filter(word => word)
 84 |   .join(", ")
 85 | 
 86 |   const api = await client(gradioSpaceApiUrl, {
 87 |     hf_token: `${process.env.VC_HF_API_TOKEN}` as any
 88 |   })
 89 | 
 90 |   // we hardcode the number of steps to 4
 91 |   const steps = 4
 92 | 
 93 |   // console.log("querying " + gradioSpaceApiUrl + " with tons of params")
 94 | 
 95 |   const rawResponse = (await api.predict("/run", [
 96 |     secretToken, // # str in 'parameter_4' Textbox component
 97 |     identityImage || "", // 'ID image (main)' Image component
 98 |     "", // 'Additional ID image (auxiliary)' Image component
 99 |     "", // 'Additional ID image (auxiliary)' Image component
100 |     "", // 'Additional ID image (auxiliary)' Image component
101 |     positive, //  # str in 'Prompt' Textbox component
102 |     negative, //   # str in 'Negative Prompt' Textbox component
103 |     1.2, //  # int | float (numeric value between 1 and 1.5) in 'CFG, recommend value range [1, 1.5], 1 will be faster ' Slider component
104 |     generateSeed(), //,  # int | float (numeric value between 0 and 4294967295) in 'Seed' Slider component
105 |     steps, // # int | float (numeric value between 1 and 100) in 'Steps' Slider component
106 |     height, // # int | float (numeric value between 512 and 1280) in 'Height' Slider component
107 |     width, // # int | float (numeric value between 512 and 1280) in 'Width' Slider component
108 |     0.8, // # int | float (numeric value between 0 and 5) in 'ID scale' Slider component
109 |     "fidelity", // # str (Option from: ['fidelity', 'extremely style']) in 'mode' Dropdown component
110 |     false, // 'ID Mix (if you want to mix two ID image, please turn this on, otherwise, turn this off)' Checkbox component
111 |   ])) as any
112 | 
113 |   const result = rawResponse?.data?.[0] as string
114 |   if (!result?.length) {
115 |     throw new Error(`the returned image was empty`)
116 |   }
117 | 
118 |   try {
119 |     const finalImage = await convertToWebp(addBase64HeaderToPng(result))
120 |     return finalImage
121 |   } catch (err) {
122 |     // console.log("err:", err)
123 |     throw new Error(err)
124 |   }
125 | }


--------------------------------------------------------------------------------
/src/providers/image-generation/generateImageSDXL360.mts:
--------------------------------------------------------------------------------
 1 | import { client } from "@gradio/client"
 2 | 
 3 | import { generateSeed } from "../../utils/misc/generateSeed.mts"
 4 | import { getValidNumber } from "../../utils/validators/getValidNumber.mts"
 5 | 
 6 | // we don't use replicas yet, because it ain't easy to get their hostname
 7 | const instances: string[] = [
 8 |   `${process.env.VC_SDXL_360_SPACE_API_URL_1 || ""}`,
 9 |  //  `${process.env.VC_SDXL_SPACE_API_URL_2 || ""}`,
10 | ].filter(instance => instance?.length > 0)
11 | 
12 | const secretToken = `${process.env.VC_MICROSERVICE_SECRET_TOKEN || ""}`
13 | 
14 | export async function generateImageSDXL360AsBase64(options: {
15 |   positivePrompt: string;
16 |   negativePrompt?: string;
17 |   seed?: number;
18 |   width?: number;
19 |   height?: number;
20 |   nbSteps?: number;
21 | }): Promise<string> {
22 | 
23 |   const positivePrompt = options?.positivePrompt || ""
24 |   if (!positivePrompt) {
25 |     throw new Error("missing prompt")
26 |   }
27 |   const negativePrompt = options?.negativePrompt || ""
28 |   
29 |   // we treat 0 as meaning "random seed"
30 |   const seed = (options?.seed ? options.seed : 0) || generateSeed()
31 | 
32 |   const width = getValidNumber(options?.width, 256, 1024, 512)
33 |   const height = getValidNumber(options?.height, 256, 1024, 512)
34 |   const nbSteps = getValidNumber(options?.nbSteps, 5, 100, 20)
35 |   // console.log("SEED FOR 360:", seed)
36 | 
37 |   const instance = instances.shift()
38 |   instances.push(instance)
39 | 
40 |   const positive = [
41 |     "360 view",
42 |     positivePrompt,
43 |     "beautiful",
44 |     // "intricate details",
45 |     "award winning",
46 |     "high resolution"
47 |   ].filter(word => word)
48 |   .join(", ")
49 | 
50 |   const negative =  [
51 |     negativePrompt,
52 |     "watermark",
53 |     "copyright",
54 |     "blurry",
55 |     // "artificial",
56 |     // "cropped",
57 |     "low quality",
58 |     "ugly"
59 |   ].filter(word => word)
60 |   .join(", ")
61 | 
62 |   const api = await client(instance, {
63 |     hf_token: `${process.env.VC_HF_API_TOKEN}` as any
64 |   })
65 | 
66 |   
67 |   const rawResponse = (await api.predict("/run", [		
68 |     positive, // string  in 'Prompt' Textbox component		
69 |     negative, // string  in 'Negative prompt' Textbox component		
70 |     positive, // string  in 'Prompt 2' Textbox component		
71 |     negative, // string  in 'Negative prompt 2' Textbox component		
72 |     true, // boolean  in 'Use negative prompt' Checkbox component		
73 |     false, // boolean  in 'Use prompt 2' Checkbox component		
74 |     false, // boolean  in 'Use negative prompt 2' Checkbox component		
75 |     seed, // number (numeric value between 0 and 2147483647) in 'Seed' Slider component		
76 |     width, // number (numeric value between 256 and 1024) in 'Width' Slider component		
77 |     height, // number (numeric value between 256 and 1024) in 'Height' Slider component		
78 |     8, // number (numeric value between 1 and 20) in 'Guidance scale for base' Slider component		
79 |     8, // number (numeric value between 1 and 20) in 'Guidance scale for refiner' Slider component		
80 |     nbSteps, // number (numeric value between 10 and 100) in 'Number of inference steps for base' Slider component		
81 |     nbSteps, // number (numeric value between 10 and 100) in 'Number of inference steps for refiner' Slider component		
82 |     true, // boolean  in 'Apply refiner' Checkbox component
83 |     secretToken,
84 |   ])) as any
85 |     
86 |   const result = rawResponse?.data?.[0] as string
87 |   if (!result?.length) {
88 |     throw new Error(`the returned image was empty`)
89 |   }
90 |   return result
91 | }
92 | 


--------------------------------------------------------------------------------
/src/providers/image-generation/generateImageSDXLFetch.mts:
--------------------------------------------------------------------------------
  1 | import { client } from "@gradio/client"
  2 | 
  3 | import { generateSeed } from "../../utils/misc/generateSeed.mts"
  4 | import { getValidNumber } from "../../utils/validators/getValidNumber.mts"
  5 | 
  6 | // TODO add a system to mark failed instances as "unavailable" for a couple of minutes
  7 | // console.log("process.env:", process.env)
  8 | 
  9 | // note: to reduce costs I use the small A10s (not the large)
 10 | // anyway, we will soon not need to use this cloud anymore 
 11 | // since we will be able to leverage the Inference API
 12 | const instance = `${process.env.VC_SDXL_SPACE_API_URL || ""}`
 13 | const secretToken = `${process.env.VC_MICROSERVICE_SECRET_TOKEN || ""}`
 14 | 
 15 | // console.log("DEBUG:", JSON.stringify({ instances, secretToken }, null, 2))
 16 | 
 17 | export async function generateImageSDXLAsBase64(options: {
 18 |   positivePrompt: string;
 19 |   negativePrompt?: string;
 20 |   seed?: number;
 21 |   width?: number;
 22 |   height?: number;
 23 |   nbSteps?: number;
 24 | }): Promise<string> {
 25 | 
 26 |   const positivePrompt = options?.positivePrompt || ""
 27 |   if (!positivePrompt) {
 28 |     throw new Error("missing prompt")
 29 |   }
 30 | 
 31 |   // the negative prompt CAN be missing, since we use a trick
 32 |   // where we make the interface mandatory in the TS doc,
 33 |   // but browsers might send something partial
 34 |   const negativePrompt = options?.negativePrompt || ""
 35 |   
 36 |   // we treat 0 as meaning "random seed"
 37 |   const seed = (options?.seed ? options.seed : 0) || generateSeed()
 38 | 
 39 |   const width = getValidNumber(options?.width, 256, 1024, 512)
 40 |   const height = getValidNumber(options?.height, 256, 1024, 512)
 41 |   const nbSteps = getValidNumber(options?.nbSteps, 5, 100, 20)
 42 |   // console.log("SEED:", seed)
 43 | 
 44 |   const positive = [
 45 | 
 46 |     // oh well.. is it too late to move this to the bottom?
 47 |     "beautiful",
 48 |     // "intricate details",
 49 |     positivePrompt,
 50 | 
 51 |     "award winning",
 52 |     "high resolution"
 53 |   ].filter(word => word)
 54 |   .join(", ")
 55 | 
 56 |   const negative =  [
 57 |     negativePrompt,
 58 |     "watermark",
 59 |     "copyright",
 60 |     "blurry",
 61 |     // "artificial",
 62 |     // "cropped",
 63 |     "low quality",
 64 |     "ugly"
 65 |   ].filter(word => word)
 66 |   .join(", ")
 67 | 
 68 |   const res = await fetch(instance + (instance.endsWith("/") ? "" : "/") + "api/predict", {
 69 |     method: "POST",
 70 |     headers: {
 71 |       "Content-Type": "application/json",
 72 |       // Authorization: `Bearer ${token}`,
 73 |     },
 74 |     body: JSON.stringify({
 75 |       fn_index: 1, // <- important!
 76 |       data: [
 77 |         positive, // string  in 'Prompt' Textbox component		
 78 |         negative, // string  in 'Negative prompt' Textbox component		
 79 |         positive, // string  in 'Prompt 2' Textbox component		
 80 |         negative, // string  in 'Negative prompt 2' Textbox component		
 81 |         true, // boolean  in 'Use negative prompt' Checkbox component		
 82 |         false, // boolean  in 'Use prompt 2' Checkbox component		
 83 |         false, // boolean  in 'Use negative prompt 2' Checkbox component		
 84 |         seed, // number (numeric value between 0 and 2147483647) in 'Seed' Slider component		
 85 |         width, // number (numeric value between 256 and 1024) in 'Width' Slider component		
 86 |         height, // number (numeric value between 256 and 1024) in 'Height' Slider component		
 87 |         8, // number (numeric value between 1 and 20) in 'Guidance scale for base' Slider component		
 88 |         8, // number (numeric value between 1 and 20) in 'Guidance scale for refiner' Slider component		
 89 |         nbSteps, // number (numeric value between 10 and 100) in 'Number of inference steps for base' Slider component		
 90 |         nbSteps, // number (numeric value between 10 and 100) in 'Number of inference steps for refiner' Slider component		
 91 |         true, // boolean  in 'Apply refiner' Checkbox component,
 92 |         secretToken
 93 |       ],
 94 |     }),
 95 |     cache: "no-store",
 96 |   })
 97 | 
 98 |   const { data } = await res.json()
 99 | 
100 |   // console.log("data:", data)
101 |   // Recommendation: handle errors
102 |   if (res.status !== 200 || !Array.isArray(data)) {
103 |     // This will activate the closest `error.js` Error Boundary
104 |     throw new Error(`Failed to fetch data (status: ${res.status})`)
105 |   }
106 |   // console.log("data:", data.slice(0, 50))
107 | 
108 |   if (!data[0]) {
109 |     throw new Error(`the returned image was empty`)
110 |   }
111 | 
112 |   return data[0] as string
113 | }
114 | 


--------------------------------------------------------------------------------
/src/providers/image-generation/generateImageSDXLGradio.mts:
--------------------------------------------------------------------------------
  1 | import { client } from "@gradio/client"
  2 | 
  3 | import { generateSeed } from "../../utils/misc/generateSeed.mts"
  4 | import { getValidNumber } from "../../utils/validators/getValidNumber.mts"
  5 | import { convertToWebp } from "../../utils/image/convertToWebp.mts"
  6 | 
  7 | // TODO add a system to mark failed instances as "unavailable" for a couple of minutes
  8 | // console.log("process.env:", process.env)
  9 | 
 10 | // note: to reduce costs I use the small A10s (not the large)
 11 | // anyway, we will soon not need to use this cloud anymore 
 12 | // since we will be able to leverage the Inference API
 13 | const instance = `${process.env.VC_SDXL_SPACE_API_URL || ""}`
 14 | const secretToken = `${process.env.VC_MICROSERVICE_SECRET_TOKEN || ""}`
 15 | 
 16 | // console.log("DEBUG:", JSON.stringify({ instances, secretToken }, null, 2))
 17 | 
 18 | export async function generateImageSDXLAsBase64(options: {
 19 |   positivePrompt: string;
 20 |   negativePrompt?: string;
 21 |   seed?: number;
 22 |   width?: number;
 23 |   height?: number;
 24 |   nbSteps?: number;
 25 | }): Promise<string> {
 26 | 
 27 |   const positivePrompt = options?.positivePrompt || ""
 28 |   if (!positivePrompt) {
 29 |     throw new Error("missing prompt")
 30 |   }
 31 | 
 32 |   // the negative prompt CAN be missing, since we use a trick
 33 |   // where we make the interface mandatory in the TS doc,
 34 |   // but browsers might send something partial
 35 |   const negativePrompt = options?.negativePrompt || ""
 36 |   
 37 |   // we treat 0 as meaning "random seed"
 38 |   const seed = (options?.seed ? options.seed : 0) || generateSeed()
 39 | 
 40 |   const width = getValidNumber(options?.width, 256, 1024, 512)
 41 |   const height = getValidNumber(options?.height, 256, 1024, 512)
 42 |   const nbSteps = getValidNumber(options?.nbSteps, 5, 100, 20)
 43 |   // console.log("SEED:", seed)
 44 | 
 45 |   const positive = [
 46 | 
 47 |     // oh well.. is it too late to move this to the bottom?
 48 |     "beautiful",
 49 |     // "intricate details",
 50 |     positivePrompt,
 51 | 
 52 |     "award winning",
 53 |     "high resolution"
 54 |   ].filter(word => word)
 55 |   .join(", ")
 56 | 
 57 |   const negative =  [
 58 |     negativePrompt,
 59 |     "watermark",
 60 |     "copyright",
 61 |     "blurry",
 62 |     // "artificial",
 63 |     // "cropped",
 64 |     "low quality",
 65 |     "ugly"
 66 |   ].filter(word => word)
 67 |   .join(", ")
 68 | 
 69 |   const api = await client(instance, {
 70 |     hf_token: `${process.env.VC_HF_API_TOKEN}` as any
 71 |   })
 72 | 
 73 |   
 74 |   const rawResponse = (await api.predict("/run", [		
 75 |     positive, // string  in 'Prompt' Textbox component		
 76 |     negative, // string  in 'Negative prompt' Textbox component		
 77 |     positive, // string  in 'Prompt 2' Textbox component		
 78 |     negative, // string  in 'Negative prompt 2' Textbox component		
 79 |     true, // boolean  in 'Use negative prompt' Checkbox component		
 80 |     false, // boolean  in 'Use prompt 2' Checkbox component		
 81 |     false, // boolean  in 'Use negative prompt 2' Checkbox component		
 82 |     seed, // number (numeric value between 0 and 2147483647) in 'Seed' Slider component		
 83 |     width, // number (numeric value between 256 and 1024) in 'Width' Slider component		
 84 |     height, // number (numeric value between 256 and 1024) in 'Height' Slider component		
 85 |     8, // number (numeric value between 1 and 20) in 'Guidance scale for base' Slider component		
 86 |     8, // number (numeric value between 1 and 20) in 'Guidance scale for refiner' Slider component		
 87 |     nbSteps, // number (numeric value between 10 and 100) in 'Number of inference steps for base' Slider component		
 88 |     nbSteps, // number (numeric value between 10 and 100) in 'Number of inference steps for refiner' Slider component		
 89 |     true, // boolean  in 'Apply refiner' Checkbox component,
 90 |     secretToken
 91 |   ])) as any
 92 |     
 93 |   const result = rawResponse?.data?.[0] as string
 94 |   if (!result?.length) {
 95 |     throw new Error(`the returned image was empty`)
 96 |   }
 97 | 
 98 |   try {
 99 |     const finalImage = await convertToWebp(result)
100 |     return finalImage
101 |   } catch (err) {
102 |     // console.log("err:", err)
103 |     throw new Error(err)
104 |   }
105 | }
106 | 


--------------------------------------------------------------------------------
/src/providers/image-generation/generateImageSDXLTurbo.mts:
--------------------------------------------------------------------------------
  1 | 
  2 | import { client } from "@gradio/client"
  3 | 
  4 | import { generateSeed } from "../../utils/misc/generateSeed.mts"
  5 | import { getValidNumber } from "../../utils/validators/getValidNumber.mts"
  6 | import { convertToWebp } from "../../utils/image/convertToWebp.mts"
  7 | 
  8 | 
  9 | const instance = `${process.env.VC_SDXL_TURBO_SPACE_API_URL || ""}`
 10 | const secretToken = `${process.env.VC_MICROSERVICE_SECRET_TOKEN || ""}`
 11 | 
 12 | // console.log("DEBUG:", JSON.stringify({ instances, secretToken }, null, 2))
 13 | 
 14 | export async function generateImageSDXLTurboAsBase64(options: {
 15 |   positivePrompt: string;
 16 |   negativePrompt?: string;
 17 |   seed?: number;
 18 |   width?: number;
 19 |   height?: number;
 20 |   nbSteps?: number;
 21 | }): Promise<string> {
 22 | 
 23 |   // console.log("querying " + instance)
 24 |   const positivePrompt = options?.positivePrompt || ""
 25 |   if (!positivePrompt) {
 26 |     throw new Error("missing prompt")
 27 |   }
 28 | 
 29 |   // the negative prompt CAN be missing, since we use a trick
 30 |   // where we make the interface mandatory in the TS doc,
 31 |   // but browsers might send something partial
 32 |   const negativePrompt = options?.negativePrompt || ""
 33 |   
 34 |   // we treat 0 as meaning "random seed"
 35 |   const seed = (options?.seed ? options.seed : 0) || generateSeed()
 36 | 
 37 |   const maxRequestedResolution = 1024
 38 |   const maxModelResolution = 512
 39 | 
 40 |   const requestedWidth = getValidNumber(options?.width, 256, maxRequestedResolution, maxModelResolution)
 41 |   const requestedHeight = getValidNumber(options?.height, 256, maxRequestedResolution, maxModelResolution)
 42 | 
 43 |   // we try to preserve the original image ratio
 44 |   const ratioH = requestedHeight / requestedWidth
 45 |   const ratioW = requestedWidth / requestedHeight
 46 | 
 47 |   // we always try to ccrank the resolution to the max
 48 |   let width = ratioW < 1 ? Math.round(ratioW * maxModelResolution) : maxModelResolution
 49 |   let height = ratioH < 1 ? Math.round(ratioH * maxModelResolution) : maxModelResolution
 50 | 
 51 |   const positive = [
 52 | 
 53 |     // oh well.. is it too late to move this to the bottom?
 54 |     "beautiful",
 55 | 
 56 |     // too opinionated, so let's remove it
 57 |     // "intricate details",
 58 | 
 59 |     positivePrompt,
 60 | 
 61 |     "award winning",
 62 |     "high resolution"
 63 |   ].filter(word => word)
 64 |   .join(", ")
 65 | 
 66 |   const negative =  [
 67 |     negativePrompt,
 68 |     "watermark",
 69 |     "copyright",
 70 |     "blurry",
 71 |     // "artificial",
 72 |     // "cropped",
 73 |     "low quality",
 74 |     "ugly"
 75 |   ].filter(word => word)
 76 |   .join(", ")
 77 | 
 78 |   const api = await client(instance, {
 79 |     hf_token: `${process.env.VC_HF_API_TOKEN}` as any
 80 |   })
 81 | 
 82 |   const rawResponse = (await api.predict("/run", [		
 83 |     positive, // string  in 'Prompt' Textbox component		
 84 |     negative, // string  in 'Negative prompt' Textbox component		
 85 |     seed, // number (numeric value between 0 and 2147483647) in 'Seed' Slider component		
 86 |     width, // number (numeric value between 256 and 1024) in 'Width' Slider component		
 87 |     height, // number (numeric value between 256 and 1024) in 'Height' Slider component		
 88 |     secretToken
 89 |   ])) as any
 90 |     
 91 |   const result = rawResponse?.data?.[0] as string
 92 |   if (!result?.length) {
 93 |     throw new Error(`the returned image was empty`)
 94 |   }
 95 | 
 96 |   try {
 97 |     const finalImage = await convertToWebp(result)
 98 |     return finalImage
 99 |   } catch (err) {
100 |     // console.log("err:", err)
101 |     throw new Error(err)
102 |   }
103 | }


--------------------------------------------------------------------------------
/src/providers/image-segmentation/segmentImage.mts:
--------------------------------------------------------------------------------
  1 | import puppeteer from "puppeteer"
  2 | 
  3 | import { sleep } from "../../utils/misc/sleep.mts"
  4 | import { ImageSegment } from "../../types.mts"
  5 | import { downloadFileAsBase64 } from "../../utils/download/downloadFileAsBase64.mts"
  6 | import { resizeBase64Image } from "../../utils/image/resizeBase64Image.mts"
  7 | 
  8 | // we don't use replicas yet, because it ain't easy to get their hostname
  9 | const instances: string[] = [
 10 |   `${process.env.VC_SEGMENTATION_MODULE_SPACE_API_URL_1 || ""}`,
 11 |   `${process.env.VC_SEGMENTATION_MODULE_SPACE_API_URL_2 || ""}`,
 12 |   // `${process.env.VC_SEGMENTATION_MODULE_SPACE_API_URL_3 || ""}`,
 13 | ]
 14 | 
 15 | // TODO we should use an inference endpoint instead
 16 | 
 17 | // note: on a large T4 (8 vCPU)
 18 | // it takes about 30 seconds to compute
 19 | export async function segmentImage(
 20 |   inputImageFilePath: string,
 21 |   actionnables: string[],
 22 |   width: number,
 23 |   height: number,
 24 | ): Promise<{
 25 |   maskUrl: string
 26 |   segments: ImageSegment[]
 27 | }> {
 28 | 
 29 |   console.log(`segmenting image..`)
 30 | 
 31 |   const instance = instances.shift()
 32 |   instances.push(instance)
 33 | 
 34 |   const browser = await puppeteer.launch({
 35 |     headless: true,
 36 |     protocolTimeout: 40000,
 37 |   })
 38 | 
 39 |   try {
 40 |     const page = await browser.newPage()
 41 |     await page.goto(instance, { waitUntil: 'networkidle2' })
 42 |     
 43 |     // await new Promise(r => setTimeout(r, 1000))
 44 | 
 45 |     const fileField = await page.$('input[type="file"]')
 46 | 
 47 |     // console.log(`uploading file..`)
 48 |     await fileField.uploadFile(inputImageFilePath)
 49 | 
 50 |     const firstTextarea = await page.$('textarea[data-testid="textbox"]')
 51 | 
 52 |     const conceptsToDetect = actionnables.join(" . ")
 53 |     await firstTextarea.type(conceptsToDetect)
 54 | 
 55 |     // console.log('looking for the button to submit')
 56 |     const submitButton = await page.$('button.lg')
 57 | 
 58 |     await sleep(300)
 59 | 
 60 |     // console.log('clicking on the button')
 61 |     await submitButton.click()
 62 | 
 63 |     await page.waitForSelector('img[data-testid="detailed-image"]', {
 64 |       timeout: 40000, // we keep it tight, to fail early
 65 |     })
 66 | 
 67 |     const tmpMaskDownloadUrl = await page.$$eval('img[data-testid="detailed-image"]', el => el.map(x => x.getAttribute("src"))[0])
 68 | 
 69 |     let segments: ImageSegment[] = []
 70 |     
 71 |     try {
 72 |       segments = JSON.parse(await page.$$eval('textarea', el => el.map(x => x.value)[1]))
 73 |     } catch (err) {
 74 |       console.log(`failed to parse JSON: ${err}`)
 75 |       segments = []
 76 |     }
 77 | 
 78 |     // const tmpMaskFileName = `${uuidv4()}.png`
 79 |     // await downloadFileToTmp(maskUrl, tmpMaskFileName)
 80 | 
 81 |     const rawPngInBase64 = await downloadFileAsBase64(tmpMaskDownloadUrl)
 82 | 
 83 |     const maskUrl = await resizeBase64Image(rawPngInBase64, width, height)
 84 | 
 85 |     return {
 86 |       maskUrl,
 87 |       segments,
 88 |     }
 89 |   } catch (err) {
 90 |     throw err
 91 |   } finally {
 92 |     await browser.close()
 93 |   }
 94 | }
 95 | 
 96 | /*
 97 | 
 98 | If you want to try:
 99 | 
100 | / note: must be a jpg and not jpeg it seems
101 | // (probably a playwright bug)
102 | const results = await segmentImage("./barn.jpg", [
103 |   "roof",
104 |   "door",
105 |   "window"
106 | ])
107 | 
108 | console.log("results:", results)
109 | */


--------------------------------------------------------------------------------
/src/providers/image-segmentation/segmentImageFromURL.mts:
--------------------------------------------------------------------------------
 1 | import { v4 as uuidv4 } from "uuid"
 2 | 
 3 | import { downloadFileToTmp } from "../../utils/download/downloadFileToTmp.mts"
 4 | import { segmentImage } from "./segmentImage.mts"
 5 | 
 6 | // TODO we should use an inference endpoint instead
 7 | 
 8 | // WARNING: this function is currently unused
 9 | // if you do attempt to use it, please check the hardcoded 1024x1024 thing line 21, and refactor it to your needs
10 | export async function segmentImageFromURL(
11 |   inputUrl: string,
12 |   actionnables: string[]
13 | ) {
14 |   if (!actionnables?.length) {
15 |     throw new Error("cannot segment image without actionnables!")
16 |   }
17 |   console.log(`segmenting image from URL: "${inputUrl}"`)
18 |   const tmpFileName = `${uuidv4()}`
19 |   const tmpFilePath = await downloadFileToTmp(inputUrl, tmpFileName)
20 | 
21 |   const results = await segmentImage(tmpFilePath, actionnables, 1024, 1024)
22 | 
23 |   console.log("image has been segmented!", results)
24 |   return results
25 | }


--------------------------------------------------------------------------------
/src/providers/image-upscaling/upscaleImage.mts:
--------------------------------------------------------------------------------
 1 | 
 2 | import { client } from "@gradio/client"
 3 | 
 4 | import { getValidNumber } from "../../utils/validators/getValidNumber.mts"
 5 | 
 6 | // we don't use replicas yet, because it ain't easy to get their hostname
 7 | const instance = `${process.env.VC_UPSCALING_SPACE_API_URL || ""}`
 8 | const secretToken = `${process.env.VC_MICROSERVICE_SECRET_TOKEN || ""}`
 9 | 
10 | // this doesn't work because of this error.. I think the version of Gradio is too old/young?
11 | // ReferenceError: addEventListener is not defined
12 | //    at file:///Users/jbilcke/Projects/VideoChain-API/node_modules/@gradio/client/dist/index.js:551:15
13 | //    at processTicksAndRejections (node:internal/process/task_queues:95:5)
14 | export async function upscaleImage(src: string, factor?: number) {
15 | 
16 |   // by default we do a 2X scale
17 |   // VideoQuest will use 4X
18 |   // 4 is really the max/limit, as this can generate PNGs of 50 Mb..
19 |   const scaleFactor = getValidNumber(factor, 0, 4, 2)
20 | 
21 |   if (scaleFactor < 2) {
22 |     return src
23 |   }
24 |   
25 |   const api = await client(instance, {
26 |     hf_token: `${process.env.VC_HF_API_TOKEN}` as any
27 |   })
28 |   
29 |   const result = await api.predict("/upscale", [
30 |     secretToken,
31 |     src, 	// blob in 'Source Image' Image component		
32 |     "realesr-general-x4v3", // string (Option from: ['RealESRGAN_x4plus', 'RealESRNet_x4plus', 'RealESRGAN_x4plus_anime_6B', 'RealESRGAN_x2plus', 'realesr-general-x4v3']) in 'Real-ESRGAN inference model to be used' Dropdown component		
33 |     0.5, // number (numeric value between 0 and 1) in 'Denoise Strength (Used only with the realesr-general-x4v3 model)' Slider component		
34 |     false, // boolean  in 'Face Enhancement using GFPGAN (Doesn't work for anime images)' Checkbox component		
35 |     scaleFactor, // number (numeric value between 1 and 10) in 'Image Upscaling Factor' Slider component
36 | ]);
37 | 
38 | 
39 |   const rawResponse = result as any 
40 | 
41 |   // console.log("rawResponse:", rawResponse)
42 |   
43 |   return rawResponse?.data?.[0] as string
44 | }
45 | 


--------------------------------------------------------------------------------
/src/providers/language-model/enrichVideoSpecsUsingLLM.mts:
--------------------------------------------------------------------------------
 1 | import { ChatCompletionRequestMessage } from "openai"
 2 | 
 3 | import { Video, VideoAPIRequest } from "../../types.mts"
 4 | import { generateYAML } from "./openai/generateYAML.mts"
 5 | import { HallucinatedVideoRequest, OpenAIErrorResponse } from "./types.mts"
 6 | import { getQueryChatMessages } from "../../preproduction/prompts.mts"
 7 | import { getValidNumber } from "../../utils/validators/getValidNumber.mts"
 8 | import { parseShotRequest } from "../../utils/requests/parseShotRequest.mts"
 9 | 
10 | 
11 | export const enrichVideoSpecsUsingLLM = async (video: Video): Promise<Video> => {
12 | 
13 |   const messages: ChatCompletionRequestMessage[] = getQueryChatMessages(video.videoPrompt)
14 |   
15 |   const defaultValue = {} as unknown as HallucinatedVideoRequest
16 | 
17 |   let hallucinatedVideo: HallucinatedVideoRequest
18 |   video.shots = []
19 | 
20 |   try {
21 |     hallucinatedVideo = await generateYAML<HallucinatedVideoRequest>(
22 |       messages,
23 |       defaultValue
24 |     )
25 |     console.log("enrichVideoSpecsUsingLLM: hallucinatedVideo = ", hallucinatedVideo)
26 |   } catch (err) {
27 | 
28 |     let error: OpenAIErrorResponse = err?.response?.data?.error as unknown as OpenAIErrorResponse
29 |     if (!error) {
30 |       error = { message: `${err || ""}` } as unknown as OpenAIErrorResponse
31 |     }
32 |     
33 |     console.error(JSON.stringify(error, null, 2))
34 |     throw new Error(`failed to call the LLM: ${error.message}`)
35 |   }
36 | 
37 |   // const video = JSON.parse(JSON.stringify(referenceVideo)) as Video
38 | 
39 |   // TODO here we should make some verifications and perhaps even some conversions
40 |   // betwen the LLM response and the actual format used in a videoRequest
41 |   video.backgroundAudioPrompt = hallucinatedVideo.backgroundAudioPrompt || video.backgroundAudioPrompt
42 |   video.foregroundAudioPrompt = hallucinatedVideo.foregroundAudioPrompt || video.foregroundAudioPrompt
43 |   video.actorPrompt = hallucinatedVideo.actorPrompt || video.actorPrompt
44 |   video.actorVoicePrompt = hallucinatedVideo.actorVoicePrompt || video.actorVoicePrompt
45 | 
46 |   video.noise = typeof hallucinatedVideo.noise !== "undefined"
47 |     ? (`${hallucinatedVideo.noise || ""}`.toLowerCase() === "true")
48 |     : video.noise
49 |   
50 |   video.noiseAmount = typeof hallucinatedVideo.noiseAmount !== "undefined"
51 |     ? getValidNumber(hallucinatedVideo.noiseAmount, 0, 10, 2)
52 |     : video.noiseAmount
53 | 
54 |   video.outroDurationMs = typeof hallucinatedVideo.outroDurationMs !== "undefined"
55 |     ? getValidNumber(hallucinatedVideo.outroDurationMs, 0, 3000, 500)
56 |     : video.outroDurationMs
57 | 
58 |   const hallucinatedShots = Array.isArray(hallucinatedVideo.shots) ? hallucinatedVideo.shots : []
59 | 
60 | 
61 |   for (const hallucinatedShot of hallucinatedShots) {
62 |     const shot = await parseShotRequest(video, {
63 |       shotPrompt: hallucinatedShot.shotPrompt,
64 |       environmentPrompt: hallucinatedShot.environmentPrompt,
65 |       photographyPrompt: hallucinatedShot.photographyPrompt,
66 |       actionPrompt: hallucinatedShot.actionPrompt,
67 |       foregroundAudioPrompt: hallucinatedShot.foregroundAudioPrompt
68 |     })
69 |     video.shots.push(shot)
70 |   }
71 | 
72 |   console.log("enrichVideoSpecsUsingLLM: video = ", video)
73 | 
74 |   return video
75 | }


--------------------------------------------------------------------------------
/src/providers/language-model/openai/createChatCompletion.mts:
--------------------------------------------------------------------------------
  1 | import { ChatCompletionRequestMessage } from "openai"
  2 | import { GPTTokens } from "gpt-tokens"
  3 | 
  4 | import { openai } from "./openai.mts"
  5 | import { runModerationCheck } from "./runModerationCheck.mts"
  6 | import { getUserContent } from "./getUserContent.mts"
  7 | import { getTextPrompt } from "./getTextPrompt.mts"
  8 | 
  9 | export const createChatCompletion = async (
 10 |   messages: ChatCompletionRequestMessage[],
 11 |   model = "gpt-4"
 12 | ): Promise<string> => {
 13 |   // this is the part added by the user, and the one we need to check against the moderation API
 14 |   const userContent = getUserContent(messages)
 15 | 
 16 |   const check = await runModerationCheck(userContent)
 17 | 
 18 |   if (check.flagged) {
 19 |     console.error("Thoughtcrime: content flagged by the AI police", {
 20 |       userContent,
 21 |       moderationResult: check,
 22 |     })
 23 |     return "Thoughtcrime: content flagged by the AI police"
 24 |   }
 25 | 
 26 |   const rawPrompt = getTextPrompt(messages)
 27 | 
 28 | 
 29 |   // for doc: https://www.npmjs.com/package/gpt-tokens
 30 |   const usageInfo = new GPTTokens({
 31 |     // Plus enjoy a 25% cost reduction for input tokens on GPT-3.5 Turbo (0.0015 per 1K input tokens)
 32 |     // plus    : false,
 33 |     model   : "gpt-4",
 34 |     messages: messages as any,
 35 |   })
 36 | 
 37 |   console.table({
 38 |     "Tokens prompt": usageInfo.promptUsedTokens,
 39 |     "Tokens completion": usageInfo.completionUsedTokens,
 40 |     "Tokens total": usageInfo.usedTokens,
 41 |   })
 42 | 
 43 |   // Price USD:  0.000298
 44 |   console.log("Price USD: ", usageInfo.usedUSD)
 45 | 
 46 |   // const tokenLimit = 4000
 47 | 
 48 |   const maxTokens = 4000 - usageInfo.promptUsedTokens
 49 | 
 50 |   console.log("maxTokens:", maxTokens)
 51 |   /*
 52 |   console.log("settings:", {
 53 |     tokenLimit,
 54 |     promptLength: rawPrompt.length,
 55 |     promptTokenLengh: rawPrompt.length / 1.9,
 56 |     maxTokens
 57 |   })
 58 | 
 59 |   console.log("createChatCompletion(): raw prompt length:", rawPrompt.length)
 60 |   console.log(
 61 |     `createChatCompletion(): requesting ${maxTokens} of the ${tokenLimit} tokens availables`
 62 |   )
 63 |   */
 64 | 
 65 |   console.log("query:", {
 66 |     model,
 67 |     // messages,
 68 |     user: "Anonymous User",
 69 |     temperature: 0.7,
 70 |     max_tokens: maxTokens,
 71 |     // stop: preset.stop?.length ? preset.stop : undefined,
 72 |   })
 73 | 
 74 |   const response = await openai.createChatCompletion({
 75 |     model,
 76 |     messages,
 77 |     // TODO use the Hugging Face Login username here
 78 |     user: "Anonymous User",
 79 |     temperature: 0.7,
 80 | 
 81 |     // 30 tokens is about 120 characters
 82 |     // we don't want more, as it will take longer to respond
 83 |     max_tokens: maxTokens,
 84 |     // stop: preset.stop?.length ? preset.stop : undefined,
 85 |   })
 86 | 
 87 |   const { choices } = response.data
 88 | 
 89 |   if (!choices.length) {
 90 |     console.log("createChatCompletion(): no choice found in the LLM response..")
 91 |     return ""
 92 |   }
 93 |   const firstChoice = choices[0]
 94 | 
 95 |   if (firstChoice?.message?.role !== "assistant") {
 96 |     console.log(
 97 |       "createChatCompletion(): something went wrong, the model imagined the user response?!"
 98 |     )
 99 |     return ""
100 |   }
101 | 
102 |   console.log("createChatCompletion(): response", firstChoice.message.content)
103 | 
104 |   return firstChoice.message.content || ""
105 | }


--------------------------------------------------------------------------------
/src/providers/language-model/openai/createChatCompletionStream.mts:
--------------------------------------------------------------------------------
 1 | import { ChatCompletionRequestMessage } from "openai"
 2 | 
 3 | import { openai } from "./openai.mts"
 4 | import { streamCompletion } from "./stream.mts"
 5 | import { getTextPrompt } from "./getTextPrompt.mts"
 6 | 
 7 | export const createChatCompletionStream = async (
 8 |   messages: ChatCompletionRequestMessage[],
 9 |   model: string,
10 |   onMessage: (message: string) => Promise<void>,
11 |   onEnd = () => Promise<void>
12 | ) => {
13 |   try {
14 |     const rawPrompt = getTextPrompt(messages)
15 | 
16 |     const tokenLimit = 4096 // 8000
17 | 
18 |     const maxTokens = Math.round(tokenLimit - rawPrompt.length / 1.9)
19 | 
20 |     const completion = await openai.createCompletion({
21 |       model,
22 |       prompt: messages,
23 |       temperature: 0.7,
24 |       max_tokens: Math.min(30, maxTokens),
25 |       stream: true,
26 |     })
27 | 
28 |     for await (const message of streamCompletion(completion as any)) {
29 |       try {
30 |         const parsed = JSON.parse(message)
31 |         const { text } = parsed.choices[0]
32 | 
33 |         try {
34 |           await onMessage(text)
35 |         } catch (err) {
36 |           console.error(
37 |             'Could not process stream message (callback failed)',
38 |             message,
39 |             err
40 |           )
41 |         }
42 |       } catch (error) {
43 |         console.error('Could not JSON parse stream message', message, error)
44 |       }
45 |     }
46 |     try {
47 |       await onEnd()
48 |     } catch (err) {
49 |       console.error('onEnd callback failed', err)
50 |     }
51 |   } catch (error: any) {
52 |     if (error.code) {
53 |       try {
54 |         const parsed = JSON.parse(error.body)
55 |         console.error('An error occurred during OpenAI request: ', parsed)
56 |       } catch (error) {
57 |         console.error(
58 |           'An error occurred during OpenAI request (invalid json): ',
59 |           error
60 |         )
61 |       }
62 |     } else {
63 |       console.error('An error occurred during OpenAI request', error)
64 |     }
65 |   }
66 | }


--------------------------------------------------------------------------------
/src/providers/language-model/openai/generateYAML.mts:
--------------------------------------------------------------------------------
 1 | import { ChatCompletionRequestMessage } from "openai"
 2 | import { parse } from "yaml"
 3 | 
 4 | import { createChatCompletion } from "./createChatCompletion.mts"
 5 | 
 6 | export const generateYAML = async <T,>(messages: ChatCompletionRequestMessage[] = [], defaultValue?: T): Promise<T> => {
 7 | 
 8 |   const defaultResult = defaultValue || ({} as T)
 9 | 
10 |   if (!messages.length) {
11 |     return defaultResult
12 |   }
13 | 
14 |   const output = await createChatCompletion(messages)
15 | 
16 |   let raw = ""
17 | 
18 |   // cleanup any remains of the markdown response
19 |   raw = output.split("```")[0]
20 | 
21 |   // remove any remaining `
22 |   const input = raw.replaceAll("`", "")
23 | 
24 |   try {
25 |     const obj = parse(input) as T
26 | 
27 |     if (obj === null || typeof obj === undefined) {
28 |       throw new Error("couldn't parse YAML")
29 |     }
30 | 
31 |     return obj
32 |   } catch (err) {
33 |     // just in case, we also try JSON!
34 |     const obj = JSON.parse(input) as T
35 | 
36 |     if (obj === null || typeof obj === undefined) {
37 |       throw new Error("couldn't parse JSON")
38 |     }
39 | 
40 |     return obj
41 |   }
42 | }


--------------------------------------------------------------------------------
/src/providers/language-model/openai/getTextPrompt.mts:
--------------------------------------------------------------------------------
1 | import { ChatCompletionRequestMessage } from "openai"
2 | 
3 | export const getTextPrompt = (prompt: ChatCompletionRequestMessage[]) =>
4 |   prompt.reduce((acc, item) => acc.concat(item.content), "") || ""


--------------------------------------------------------------------------------
/src/providers/language-model/openai/getUserContent.mts:
--------------------------------------------------------------------------------
1 | import { ChatCompletionRequestMessage } from "openai"
2 | 
3 | export const getUserContent = (prompt: ChatCompletionRequestMessage[]) =>
4 |   prompt
5 |     .filter((message) => message.role === "user")
6 |     .map((message) => message.content)
7 |     .join("\n")


--------------------------------------------------------------------------------
/src/providers/language-model/openai/openai.mts:
--------------------------------------------------------------------------------
1 | import OpenAI from "openai";
2 | 
3 | export const openai = new OpenAI({
4 |   apiKey: `${process.env.VC_OPENAI_API_KEY || ""}`
5 | })


--------------------------------------------------------------------------------
/src/providers/language-model/openai/runModerationCheck.mts:
--------------------------------------------------------------------------------
 1 | import {
 2 |   CreateModerationResponseResultsInnerCategories,
 3 |   CreateModerationResponseResultsInnerCategoryScores,
 4 | } from "openai"
 5 | 
 6 | import { openai } from "./openai.mts"
 7 | 
 8 | export const runModerationCheck = async (
 9 |   input = ''
10 | ): Promise<{
11 |   categories?: CreateModerationResponseResultsInnerCategories
12 |   category_scores?: CreateModerationResponseResultsInnerCategoryScores
13 |   flagged: boolean
14 | }> => {
15 |   if (!input || !input.length) {
16 |     console.log(`skipping moderation check as input length is too shot`)
17 |     return {
18 |       flagged: false,
19 |     }
20 |   }
21 | 
22 |   const response = await openai.createModeration({ input })
23 |   const { results } = response.data
24 | 
25 |   if (!results.length) {
26 |     throw new Error(`failed to call the moderation endpoint`)
27 |   }
28 | 
29 |   return results[0]
30 | }


--------------------------------------------------------------------------------
/src/providers/language-model/openai/stream.mts:
--------------------------------------------------------------------------------
 1 | import { Readable } from "node:stream"
 2 | 
 3 | async function* chunksToLines(
 4 |   chunksAsync: AsyncIterable<Buffer>
 5 | ): AsyncIterable<string> {
 6 |   let previous = ""
 7 |   for await (const chunk of chunksAsync) {
 8 |     const bufferChunk = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk)
 9 |     previous += bufferChunk
10 |     let eolIndex
11 |     while ((eolIndex = previous.indexOf("\n")) >= 0) {
12 |       // line includes the EOL
13 |       const line = previous.slice(0, eolIndex + 1).trimEnd()
14 |       if (line === "data: [DONE]") break
15 |       if (line.startsWith("data: ")) yield line
16 |       previous = previous.slice(eolIndex + 1)
17 |     }
18 |   }
19 | }
20 | 
21 | async function* linesToMessages(
22 |   linesAsync: AsyncIterable<string>
23 | ): AsyncIterable<string> {
24 |   for await (const line of linesAsync) {
25 |     const message = line.substring("data :".length)
26 | 
27 |     yield message
28 |   }
29 | }
30 | 
31 | export async function* streamCompletion(
32 |   stream: Readable
33 | ): AsyncGenerator<string, void, undefined> {
34 |   yield* linesToMessages(chunksToLines(stream))
35 | }


--------------------------------------------------------------------------------
/src/providers/language-model/types.mts:
--------------------------------------------------------------------------------
 1 | // note: this has to exactly match what is in the prompt, in ../preproduction/prompts.mts
 2 | export interface HallucinatedVideoRequest {
 3 |   backgroundAudioPrompt: string; // describe the background audio (crowd, birds, wind, sea etc..)
 4 |   foregroundAudioPrompt: string; // describe the foreground audio (cars revving, footsteps, objects breaking, explosion etc)
 5 |   actorPrompt: string; // describe the physical look of the main actor visible in the shot (man, woman, old, young, hair, glasses, clothes etc)
 6 |   actorVoicePrompt: string; // describe the main actor voice (man, woman, old, young, amused, annoyed.. etc)
 7 |   noise: boolean; // whether to apply movie noise or not
 8 |   noiseAmount: number; // (integer) the amount of ffmpeg noise (film grain) to apply. 0 is none, 10 is a lot
 9 |   outroDurationMs: number; // in milliseconds. An outro generally only lasts between 0 and 3000 (3s)
10 |   
11 |   shots: Array<{
12 |     shotPrompt: string; // describe the main elements of a shot, in excruciating details. You must include ALL those parameters: characters, shot story, what is happening. How they look, the textures, the expressions, their clothes. The color, materials and style of clothes. 
13 |     environmentPrompt: string; // describe the environment, in excruciating details. You must include ALL those parameters: Lights, atmosphere and weather (misty, dust, clear, rain, snow..). Time of the day and hour of the day. Furnitures, their shape, style, era. The materials used for each object. The global time period, time of the day, era. Explain if anything is moving in the backgroung.
14 |     photographyPrompt: string; // describe the photography, in excruciating details. You must include ALL those parameters: Camera angle, position and movement. Type of shot and angle. Lighting. Mood. Settings. Tint of the lights. Position of the sun or moon. Shadows and their direction. Camera shutter speed, blur, bokeh, aperture.
15 |     actionPrompt: string; // describe the dynamics of a shot, in excruciating details. You must include ALL those parameters: What is happening, who and what is moving. Which entity are in movements. What are the directions, starting and ending position. At which speed entities or objects are moving. Is there motion blur, slow motion, timelapse etc.
16 |     foregroundAudioPrompt: string; // describe the sounds in a concise way (eg. ringing bells, underwater sound and whistling dolphin, cat mewong etc),
17 |   }>
18 | }
19 | 
20 | export interface OpenAIErrorResponse {
21 |   message: string
22 |   type: string
23 |   param: any
24 |   code: any
25 | }


--------------------------------------------------------------------------------
/src/providers/lip-syncing/generateLipSyncVideo.mts:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | // TODO: we should use
4 | 
5 | 
6 | // or we can use Video ReTalking but it requires a video where the person is already talking I believe:
7 | // https://twitter.com/camenduru/status/1713570931342237852


--------------------------------------------------------------------------------
/src/providers/music-generation/generateMusicWithReplicate.mts:
--------------------------------------------------------------------------------
1 | // TODO use https://replicate.com/sakemin/musicgen-fine-tuner
2 | 


--------------------------------------------------------------------------------
/src/providers/music-to-caption/musicToCaption.mts:
--------------------------------------------------------------------------------
 1 | 
 2 | import { client } from "@gradio/client"
 3 | 
 4 | import { generateSeed } from "../../utils/misc/generateSeed.mts"
 5 | 
 6 | export const state = {
 7 |   load: 0,
 8 | }
 9 | 
10 | // we don't use replicas yet, because it ain't easy to get their hostname
11 | const instances: string[] = [
12 |   `${process.env.VC_MUSIC_CAPTION_SPACE_API_URL_1 || ""}`,
13 | ].filter(instance => instance?.length > 0)
14 | 
15 | const secretToken = `${process.env.VC_MICROSERVICE_SECRET_TOKEN || ""}`
16 | 
17 | export const musicToCaption = async (musicBlob: string) => {
18 | 
19 |   state.load += 1
20 | 
21 |   try {
22 | 
23 |     const instance = instances.shift()
24 |     instances.push(instance)
25 | 
26 |     console.log("musicToCaption")
27 | 
28 |     const api = await client(instance, {
29 |       hf_token: `${process.env.VC_HF_API_TOKEN}` as any
30 |     })
31 | 
32 |     // const input = new Blob([wav], { type: 'audio/wav' })
33 |     // const blob = new Blob([video], { type: 'video/mp4' })
34 | 
35 |     const rawResponse = await api.predict('/predict', [		
36 |       musicBlob, // string  in 'Prompt' Textbox component		
37 |      // secretToken,
38 |     ]) as any
39 |     
40 |     console.log("rawResponse:", rawResponse)
41 | 
42 |     const { name } = rawResponse?.data?.[0]?.[0] as { name: string, orig_name: string }
43 | 
44 |     return "TODO"
45 |   } catch (err) {
46 |     throw err
47 |   } finally {
48 |     state.load -= 1
49 |   }
50 | }
51 | 


--------------------------------------------------------------------------------
/src/providers/speech-to-text/speechToTextWithWhisperLib.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | import path from "node:path"
 3 | import { nodewhisper } from "nodejs-whisper"
 4 | 
 5 | import { convertMp3ToWavFilePath } from "../utils/convertMp3ToWavFilePath.mts"
 6 | 
 7 | export async function speechToText(sound: string): Promise<string> {
 8 | 
 9 |   console.log("/speechToText: calling whisper binding..")
10 | 
11 |   // for some reason our mp3 is unreadable on Mac
12 |   // (too short?)
13 |   // but ffmpeg manages to convert it to a valid wav
14 |   const wavFilePath = await convertMp3ToWavFilePath(sound)
15 | 
16 |   const result = await nodewhisper(wavFilePath, {
17 |     modelName: "large", //Downloaded models name
18 |     autoDownloadModelName: "large"
19 |   })
20 | 
21 |   console.log("result:" + JSON.stringify(result, null, 2))
22 | 
23 |   return "TODO"
24 | 
25 | }
26 | 
27 | /*
28 | async function warmup() {
29 |   try {
30 |     await nodewhisper("./", {
31 |       modelName: "large", //Downloaded models name
32 |       autoDownloadModelName: "large"
33 |     })
34 |   } catch (err) {
35 | 
36 |   }
37 | }
38 | 
39 | setTimeout(() => {
40 |   warmup()
41 | }, 1000)
42 | */


--------------------------------------------------------------------------------
/src/providers/speech-to-text/speechToTextWithWhisperSpace.mts:
--------------------------------------------------------------------------------
 1 | 
 2 | import { client } from "@gradio/client"
 3 | 
 4 | import { convertMp3ToWavBase64 } from "../../utils/audio/convertMp3ToWavBase64.mts"
 5 | 
 6 | const instances: string[] = [
 7 |   `${process.env.VC_SPEECH_TO_TEXT_SPACE_API_URL_1 || ""}`,
 8 | ].filter(instance => instance?.length > 0)
 9 | 
10 | export async function speechToText(sound: string): Promise<string> {
11 | 
12 |   const instance = instances.shift()
13 |   instances.push(instance)
14 | 
15 |   const api = await client(instance, {
16 |     hf_token: `${process.env.VC_HF_API_TOKEN}` as any
17 |   })
18 | 
19 |   console.log("/speechToText: calling Space..")
20 | 
21 |   // TODO try a wav? audio/wav
22 |   const wav = await convertMp3ToWavBase64(sound)
23 | 
24 |   // const input = sound
25 |   // const input = Buffer.from(sound, "base64")
26 |   // const input = new Blob([sound], { type: 'audio/mpeg' })
27 |   const input = new Blob([wav], { type: 'audio/wav' })
28 | 
29 |   const result = await api.predict("/transcribe", [
30 |     input,
31 |   ])
32 | 
33 |   console.log(result)
34 | 
35 |   return "TODO"
36 | 
37 | }


--------------------------------------------------------------------------------
/src/providers/video-generation/addBase64HeaderToMp4.mts:
--------------------------------------------------------------------------------
 1 | export function addBase64HeaderToMp4(base64Data: string) {
 2 |   if (typeof base64Data !== "string" || !base64Data) {
 3 |     return ""
 4 |   }
 5 |   if (base64Data.startsWith('data:')) {
 6 |     if (base64Data.startsWith('data:video/mp4;base64,')) {
 7 |       return base64Data
 8 |     } else {
 9 |       throw new Error("fatal: the input string is NOT a mp4 video!")
10 |     }
11 |   } else {
12 |     return `data:video/mp4;base64,${base64Data}`
13 |   }
14 | }


--------------------------------------------------------------------------------
/src/providers/video-generation/defaultPrompts.mts:
--------------------------------------------------------------------------------
 1 | // should we really have default prompts in here?
 2 | // I think they should probably be defined at the applicative software layer (ie. in the client)
 3 | 
 4 | export function addWordsIfNotPartOfThePrompt(prompt: string = "", words: string[] = []): string {
 5 |   const promptWords = prompt.split(",").map(w => w.trim().toLocaleLowerCase())
 6 | 
 7 |   return [
 8 |     prompt,
 9 |     // we add our keywords, unless they are already part of the prompt
10 |     ...words.filter(w => !promptWords.includes(w.toLocaleLowerCase()))
11 |   ].join(", ")
12 | }
13 | 
14 |  export function getPositivePrompt(prompt: string, triggerWord = "") {
15 |   return addWordsIfNotPartOfThePrompt(prompt, [
16 |     triggerWord,
17 |     "sublime",
18 |     "pro quality",
19 |     "sharp",
20 |     "crisp",
21 |     "beautiful",
22 |     "impressive",
23 |     "amazing",
24 |     "4K",
25 |     "hd"
26 |   ])
27 | }
28 | 
29 | export function getNegativePrompt(prompt: string) {
30 |   return addWordsIfNotPartOfThePrompt(prompt, [
31 |     "cropped",
32 |     // "underexposed", // <-- can be a desired style
33 |     // "overexposed", // <-- can be a desired style
34 |     "logo",
35 |     "hud",
36 |     "ui",
37 |     "censored",
38 |     "blurry",
39 |     "watermark",
40 |     "watermarked",
41 |     "copyright",
42 |     "extra digit",
43 |     "fewer digits",
44 |     "bad fingers",
45 |     "bad quality",
46 |     "worst quality",
47 |     "low quality",
48 |     "low resolution",
49 |     "glitch", // <-- keep or not? could be a desired style?
50 |     // "deformed",
51 |     // "mutated",
52 |     // "ugly",
53 |     // "disfigured",
54 |     // "3D render", // <-- keep or not? could be a desired style?
55 |     "signature"
56 |   ])
57 | }


--------------------------------------------------------------------------------
/src/providers/video-generation/generateVideoWithAnimateDiffLightning.mts:
--------------------------------------------------------------------------------
  1 | import { RenderedScene, RenderRequest } from "../../types.mts"
  2 | import { generateSeed } from "../../utils/misc/generateSeed.mts"
  3 | import { tryApiCalls } from "../../utils/misc/tryApiCall.mts"
  4 | import { getValidNumber } from "../../utils/validators/getValidNumber.mts"
  5 | 
  6 | const accessToken = `${process.env.VC_MICROSERVICE_SECRET_TOKEN || ""}`
  7 | 
  8 | // @deprecated This endpoint has been decommissioned. Please use the AiTube API instead (check aitube.at/api/v1/render)
  9 | export const generateVideoWithAnimateDiffLightning = async (
 10 |   request: RenderRequest,
 11 |   response: RenderedScene,
 12 | ): Promise<RenderedScene> => {
 13 |   
 14 |   throw new Error(`This endpoint has been decommissioned. Please use the AiTube API instead (check aitube.at/api/v1/render)`)
 15 |   const debug = true
 16 | 
 17 | 
 18 |   const actualFunction = async (): Promise<RenderedScene> => {
 19 | 
 20 |     const prompt = request.prompt || ""
 21 |     if (!prompt) {
 22 |       response.error = "prompt is empty"
 23 |       return response
 24 |     }
 25 |     
 26 |     // seed = seed || generateSeed()
 27 |     request.seed = request.seed || generateSeed()
 28 | 
 29 |     // see https://huggingface.co/spaces/jbilcke-hf/ai-tube-model-animatediff-lightning/blob/main/app.py#L15-L18
 30 |     const baseModel = "epiCRealism"
 31 | 
 32 |     // the motion LoRA - could be useful one day
 33 |     const motion = ""
 34 | 
 35 |     // can be 1, 2, 4 or 8
 36 |     // but values below 4 look bad
 37 |     const nbSteps = 4// getValidNumber(request.nbSteps, 1, 8, 4)
 38 |     const width = 512 // getValidNumber(request.width, 256, 1024, 512)
 39 |     const height = 288 // getValidNumber(request.height, 256, 1024, 256)
 40 | 
 41 |     const nbFrames = 16 // getValidNumber(request.nbFrames, 10, 60, 10)
 42 |     const nbFPS = 10 //  getValidNumber(request.nbFPS, 10, 60, 10)
 43 | 
 44 |     // by default AnimateDiff generates about 2 seconds of video at 10 fps
 45 |     // the Gradio API now has some code to optional fix that using FFmpeg,
 46 |     // but this will add some delay overhead, so use with care!
 47 |     const durationInSec = nbFrames / nbFPS
 48 |     // no, we need decimals
 49 |     // const durationInSec = Math.round(nbFrames / nbFPS)
 50 |     const framesPerSec = nbFPS
 51 | 
 52 |     try {
 53 |       if (debug) {
 54 |         console.log(`calling AnimateDiff Lightning API with params (some are hidden):`, {
 55 |           baseModel,
 56 |           motion,
 57 |           nbSteps,
 58 |           width,
 59 |           height,
 60 |           nbFrames,
 61 |           nbFPS,
 62 |           durationInSec,
 63 |           framesPerSec,
 64 |         })
 65 |       }
 66 | 
 67 |       const res = await fetch(`https://jbilcke-hf-ai-tube-model-animatediff.hf.space/api/predict`, {
 68 |         method: "POST",
 69 |         headers: {
 70 |           "Content-Type": "application/json",
 71 |           // Authorization: `Bearer ${token}`,
 72 |         },
 73 |         body: JSON.stringify({
 74 |           fn_index: 0, // <- important! it is currently 4, not 1!
 75 |           data: [
 76 |             accessToken,
 77 |             prompt,
 78 |             baseModel,
 79 |             width,
 80 |             height,
 81 |             motion,
 82 |             nbSteps,
 83 |             durationInSec,
 84 |             framesPerSec,
 85 |           ],
 86 |         }),
 87 |         cache: "no-store",
 88 |         // we can also use this (see https://vercel.com/blog/vercel-cache-api-nextjs-cache)
 89 |         // next: { revalidate: 1 }
 90 |       })
 91 | 
 92 |       // console.log("res:", res)
 93 | 
 94 |       const { data } = await res.json()
 95 | 
 96 |       // console.log("data:", data)
 97 |       // Recommendation: handle errors
 98 |       if (res.status !== 200 || !Array.isArray(data)) {
 99 |         // This will activate the closest `error.js` Error Boundary
100 |         throw new Error(`Failed to fetch data (status: ${res.status})`)
101 |       }
102 |       // console.log("data:", data.slice(0, 50))
103 |     
104 |       const base64Content = (data?.[0] || "") as string
105 | 
106 |       if (!base64Content) {
107 |         throw new Error(`invalid response (no content)`)
108 |       }
109 | 
110 |       response.assetUrl = base64Content
111 | 
112 |       // this API already emits a data-uri with a content type
113 |       return response // addBase64HeaderToMp4(base64Content)
114 |     } catch (err) {
115 |       if (debug) {
116 |         console.error(`failed to call the AnimateDiff Lightning API:`)
117 |         console.error(err)
118 |       }
119 |       throw err
120 |     }
121 |   }
122 | 
123 |   return tryApiCalls({
124 |     func: actualFunction,
125 |     debug,
126 |     failureMessage: "failed to call the AnimateDiff Lightning API"
127 |   })
128 | }


--------------------------------------------------------------------------------
/src/providers/video-generation/generateVideoWithHotshotGradioAPI.mts:
--------------------------------------------------------------------------------
 1 | import { VideoGenerationOptions } from "./types.mts"
 2 | import { getNegativePrompt, getPositivePrompt } from "./defaultPrompts.mts"
 3 | import { generateSeed } from "../../utils/misc/generateSeed.mts"
 4 | 
 5 | // we don't use replicas yet, because it ain't easy to get their hostname
 6 | const instance = `${process.env.VC_HOTSHOT_XL_GRADIO_SPACE_API_URL || ""}`
 7 | const secretToken = `${process.env.VC_MICROSERVICE_SECRET_TOKEN || ""}`
 8 | 
 9 | export const generateVideo = async ({
10 |   positivePrompt,
11 |   negativePrompt = "",
12 |   seed,
13 |   nbFrames = 8, // for now the only values that make sense are 1 (for a jpg) or 8 (for a video)
14 |   videoDuration = 1000, // for now Hotshot doesn't really supports anything else
15 |   nbSteps = 30, // when rendering a final video, we want a value like 50 or 70 here
16 |   size = "768x320",
17 | 
18 |   // for jbilcke-hf/sdxl-cinematic-2 it is "cinematic-2"
19 |   triggerWord = "cinematic-2",
20 | 
21 |   huggingFaceLora = "jbilcke-hf/sdxl-cinematic-2",
22 | }: VideoGenerationOptions) => {
23 |   
24 |   // pimp the prompt
25 |   positivePrompt = getPositivePrompt(positivePrompt, triggerWord)
26 |   negativePrompt = getNegativePrompt(negativePrompt)
27 | 
28 |   try {
29 | 
30 |     const res = await fetch(instance + (instance.endsWith("/") ? "" : "/") + "api/predict", {
31 |       method: "POST",
32 |       headers: {
33 |         "Content-Type": "application/json",
34 |         // Authorization: `Bearer ${token}`,
35 |       },
36 |       body: JSON.stringify({
37 |         fn_index: 1, // <- important!
38 |         data: [
39 |           secretToken,
40 |           positivePrompt,
41 |           negativePrompt,
42 |           huggingFaceLora,
43 |           size,
44 |           generateSeed(),
45 |           nbSteps,
46 |           nbFrames,
47 |           videoDuration,
48 |         ],
49 |       }),
50 |       cache: "no-store",
51 |       // we can also use this (see https://vercel.com/blog/vercel-cache-api-nextjs-cache)
52 |       // next: { revalidate: 1 }
53 |     })
54 |   
55 |     const { data } = await res.json()
56 |   
57 |     // console.log("data:", data)
58 |     // Recommendation: handle errors
59 |     if (res.status !== 200 || !Array.isArray(data)) {
60 |       // This will activate the closest `error.js` Error Boundary
61 |       throw new Error(`Failed to fetch data (status: ${res.status})`)
62 |     }
63 |     // console.log("data:", data.slice(0, 50))
64 |   
65 |     return data[0]
66 |   } catch (err) {
67 |     throw err
68 |   }
69 | }


--------------------------------------------------------------------------------
/src/providers/video-generation/generateVideoWithHotshotReplicate.mts:
--------------------------------------------------------------------------------
  1 | "use server"
  2 | 
  3 | import Replicate from "replicate"
  4 | 
  5 | import { generateSeed } from "../../utils/misc/generateSeed.mts"
  6 | import { sleep } from "../../utils/misc/sleep.mts"
  7 | import { getNegativePrompt, getPositivePrompt } from "./defaultPrompts.mts"
  8 | import { VideoGenerationOptions } from "./types.mts"
  9 | 
 10 | const replicateToken = `${process.env.VC_REPLICATE_API_TOKEN || ""}`
 11 | const replicateModel = `${process.env.VC_HOTSHOT_XL_REPLICATE_MODEL || ""}`
 12 | const replicateModelVersion = `${process.env.VC_HOTSHOT_XL_REPLICATE_MODEL_VERSION || ""}`
 13 | 
 14 | if (!replicateToken) {
 15 |   throw new Error(`you need to configure your VC_REPLICATE_API_TOKEN`)
 16 | }
 17 | 
 18 | const replicate = new Replicate({ auth: replicateToken })
 19 | 
 20 | /**
 21 |  * Generate a video with hotshot through Replicate
 22 |  * 
 23 |  * Note that if nbFrames == 1, then it will generate a jpg
 24 |  * 
 25 |  */
 26 | export async function generateVideoWithHotshotReplicate({
 27 |     positivePrompt,
 28 |     negativePrompt = "",
 29 |     seed,
 30 |     nbFrames = 8, // for now the only values that make sense are 1 (for a jpg) or 8 (for a video)
 31 |     videoDuration = 1000, // for now Hotshot doesn't really supports anything else
 32 |     nbSteps = 30, // when rendering a final video, we want a value like 50 or 70 here
 33 |     size = "768x320",
 34 | 
 35 |     // for a replicate LoRa this is always the same ("In the style of TOK")
 36 |     // triggerWord = "In the style of TOK",
 37 | 
 38 |     // for jbilcke-hf/sdxl-cinematic-2 it is "cinematic-2"
 39 |     triggerWord = "cinematic-2",
 40 | 
 41 |     huggingFaceLora = "jbilcke-hf/sdxl-cinematic-2",
 42 | 
 43 |     // url to the weight
 44 |     replicateLora,
 45 |   }: VideoGenerationOptions): Promise<string> {
 46 | 
 47 |   if (!positivePrompt?.length) {
 48 |     throw new Error(`prompt is too short!`)
 49 |   }
 50 | 
 51 |   if (!replicateModel) {
 52 |     throw new Error(`you need to configure your VC_HOTSHOT_XL_REPLICATE_MODEL`)
 53 |   }
 54 | 
 55 |   if (!replicateModelVersion) {
 56 |     throw new Error(`you need to configure your VC_HOTSHOT_XL_REPLICATE_MODEL_VERSION`)
 57 |   }
 58 | 
 59 |   // pimp the prompt
 60 |   positivePrompt = getPositivePrompt(positivePrompt, triggerWord)
 61 |   negativePrompt = getNegativePrompt(negativePrompt)
 62 | 
 63 |   const [width, height] = size.split("x").map(x => Number(x))
 64 |   
 65 |   // see an example here: 
 66 |   // https://replicate.com/p/incraplbv23g3zv6woinhgdira
 67 |   // for params and doc see https://replicate.com/cloneofsimo/hotshot-xl-lora-controlnet
 68 |   const prediction = await replicate.predictions.create({
 69 |     version: replicateModelVersion,
 70 |     input: {
 71 |       prompt: positivePrompt,
 72 |       negative_prompt: negativePrompt,
 73 | 
 74 |       // this is not a URL but a model name
 75 |       hf_lora_url: replicateLora?.length ? undefined : huggingFaceLora,
 76 | 
 77 |       // this is a URL to the .tar (we can get it from the "trainings" page)
 78 |       replicate_weights_url: huggingFaceLora?.length ? undefined : replicateLora,
 79 | 
 80 |       width,
 81 |       height,
 82 | 
 83 |       // those are used to create an upsampling or downsampling
 84 |       // original_width: width,
 85 |       // original_height: height,
 86 |       // target_width: width,
 87 |       // target_height: height,
 88 | 
 89 |       steps: nbSteps,
 90 |     
 91 |       
 92 |       // note: right now it only makes sense to use either 1 (a jpg)
 93 |       video_length: nbFrames, // nb frames
 94 | 
 95 |       video_duration: videoDuration, // video duration in ms
 96 |       
 97 |       seed: !isNaN(seed) && isFinite(seed) ? seed : generateSeed()
 98 |     }
 99 |   })
100 |     
101 |   // console.log("prediction:", prediction)
102 | 
103 |   // Replicate requires at least 30 seconds of mandatory delay
104 |   await sleep(30000)
105 | 
106 |   let res: Response
107 |   let pollingCount = 0
108 |   do {
109 |     // Check every 5 seconds
110 |     await sleep(5000)
111 | 
112 |     res = await fetch(`https://api.replicate.com/v1/predictions/${prediction.id}`, {
113 |       method: "GET",
114 |       headers: {
115 |         Authorization: `Token ${replicateToken}`,
116 |       },
117 |       cache: 'no-store',
118 |     })
119 | 
120 |     if (res.status === 200) {
121 |       const response = (await res.json()) as any
122 |       const error = `${response?.error || ""}`
123 |       if (error) {
124 |         throw new Error(error)
125 |       }
126 |     }
127 | 
128 |     pollingCount++
129 | 
130 |     // To prevent indefinite polling, we can stop after a certain number, here 30 (i.e. about 2 and half minutes)
131 |     if (pollingCount >= 30) {
132 |       throw new Error('Request time out.')
133 |     }
134 |   } while (true)
135 | }


--------------------------------------------------------------------------------
/src/providers/video-generation/generateVideoWithShow.mts:
--------------------------------------------------------------------------------
 1 | 
 2 | // we don't use replicas yet, because it ain't easy to get their hostname
 3 | const instances: string[] = [
 4 |   `${process.env.VC_ZEROSCOPE_SPACE_API_URL_1 || ""}`,
 5 | ].filter(instance => instance?.length > 0)
 6 | 
 7 | const secretToken = `${process.env.VC_MICROSERVICE_SECRET_TOKEN || ""}`
 8 | 
 9 | export const generateVideo = async (prompt: string, options?: {
10 |   seed: number;
11 |   nbFrames: number;
12 |   nbSteps: number;
13 | }) => {
14 |   throw new Error("Not implemented yet")
15 | }
16 | 


--------------------------------------------------------------------------------
/src/providers/video-generation/generateVideoWithZeroscope.mts:
--------------------------------------------------------------------------------
 1 | import { client } from "@gradio/client"
 2 | 
 3 | import { generateSeed } from "../../utils/misc/generateSeed.mts"
 4 | import { VideoGenerationOptions } from "./types.mts"
 5 | import { getPositivePrompt } from "./defaultPrompts.mts"
 6 | 
 7 | // we don't use replicas yet, because it ain't easy to get their hostname
 8 | const instances: string[] = [
 9 |   `${process.env.VC_ZEROSCOPE_SPACE_API_URL_1 || ""}`,
10 |   `${process.env.VC_ZEROSCOPE_SPACE_API_URL_2 || ""}`,
11 |   `${process.env.VC_ZEROSCOPE_SPACE_API_URL_3 || ""}`,
12 |   `${process.env.VC_ZEROSCOPE_SPACE_API_URL_4 || ""}`,
13 | ].filter(instance => instance?.length > 0)
14 | 
15 | const secretToken = `${process.env.VC_MICROSERVICE_SECRET_TOKEN || ""}`
16 | 
17 | export const generateVideo = async ({
18 |   positivePrompt,
19 |   seed,
20 |   nbFrames = 8, // for now the only values that make sense are 1 (for a jpg) or 8 (for a video)
21 |   nbSteps = 30, // when rendering a final video, we want a value like 50 or 70 here
22 | }: VideoGenerationOptions) => {
23 |   try {
24 |     const instance = instances.shift()
25 |     instances.push(instance)
26 | 
27 |     const api = await client(instance, {
28 |       hf_token: `${process.env.VC_HF_API_TOKEN}` as any
29 |     })
30 | 
31 |     const rawResponse = await api.predict('/run', [		
32 |       getPositivePrompt(positivePrompt), // string  in 'Prompt' Textbox component	
33 |       
34 |       // we treat 0 as meaning "random seed"
35 |       !isNaN(seed) && isFinite(seed) && seed > 0 ? seed : generateSeed(), // number (numeric value between 0 and 2147483647) in 'Seed' Slider component		
36 |       nbFrames || 24, // 24 // it is the nb of frames per seconds I think?
37 |       nbSteps || 35, // 10, (numeric value between 10 and 50) in 'Number of inference steps' Slider component
38 |       secretToken,
39 |     ]) as any
40 |     
41 |     // console.log("rawResponse:", rawResponse)
42 | 
43 |     const { name } = rawResponse?.data?.[0]?.[0] as { name: string, orig_name: string }
44 | 
45 |     return `${instance}/file=${name}`
46 |   } catch (err) {
47 |     throw err
48 |   }
49 | }
50 | 


--------------------------------------------------------------------------------
/src/providers/video-generation/types.mts:
--------------------------------------------------------------------------------
 1 | import { HotshotImageInferenceSize } from "../../types.mts"
 2 | 
 3 | export type VideoGenerationOptions = {
 4 |   /**
 5 |    * Positive prompt to use
 6 |    */
 7 |   positivePrompt: string
 8 | 
 9 |   /**
10 |    * Negative prompt to use
11 |    */
12 |   negativePrompt?: string
13 | 
14 |   /**
15 |    * Seed.
16 |    * 
17 |    * Depending on the vendor, if you use a negative value (eg -1) it should give you an always random value
18 |    */
19 |   seed?: number
20 | 
21 |   /**
22 |    * Number of frames to generate
23 |    */
24 |   nbFrames?: number
25 | 
26 |   /**
27 |    * Duration of the video, in seconds
28 |    */
29 |   videoDuration?: number
30 | 
31 |   /**
32 |    * Number of inference steps (for final rendering use 70)
33 |    */
34 |   nbSteps?: number
35 | 
36 |   /**
37 |    * Image size (which is actually a ratio)
38 |    * 
39 |    * Note that Hotshot wasn't trained on all possible combinations,
40 |    * and in particular by default it supposed to only support 512x512 well
41 |    */
42 |   size?: HotshotImageInferenceSize
43 | 
44 |   /**
45 |    * Trigger word
46 |    * 
47 |    * for a replicate LoRa this is always the same ("In the style of TOK")
48 |    * triggerWord = "In the style of TOK",
49 |    * for jbilcke-hf/sdxl-cinematic-2 it is "cinematic-2"
50 |    */
51 |   triggerWord?: string
52 | 
53 |   /**
54 |    * Owner + repo name of the Hugging Face LoRA
55 |    */
56 |   huggingFaceLora?: string
57 | 
58 |   /**
59 |    * URL to the weights .tar (those can be hosted anywere, it doesn't have to be on Replicate.com)
60 |    */
61 |   replicateLora?: string
62 | }


--------------------------------------------------------------------------------
/src/providers/video-interpolation/interpolateVideo.mts:
--------------------------------------------------------------------------------
 1 | import path from "node:path"
 2 | 
 3 | import { v4 as uuidv4 } from "uuid"
 4 | import puppeteer from "puppeteer"
 5 | 
 6 | import { downloadFileToTmp } from "../../utils/download/downloadFileToTmp.mts"
 7 | import { pendingFilesDirFilePath } from "../../config.mts"
 8 | import { moveFileFromTmpToPending } from "../../utils/filesystem/moveFileFromTmpToPending.mts"
 9 | 
10 | export const state = {
11 |   load: 0
12 | }
13 | 
14 | const instances: string[] = [
15 |   process.env.VC_VIDEO_INTERPOLATION_SPACE_API_URL
16 | ]
17 | 
18 | // TODO we should use an inference endpoint instead
19 | export async function interpolateVideo(fileName: string, steps: number, fps: number) {
20 |   if (state.load === instances.length) {
21 |     throw new Error(`all video interpolation servers are busy, try again later..`)
22 |   }
23 | 
24 |   state.load += 1
25 | 
26 |   try {
27 |     const inputFilePath = path.join(pendingFilesDirFilePath, fileName)
28 | 
29 |     console.log(`interpolating ${fileName}`)
30 |     console.log(`warning: interpolateVideo parameter "${steps}" is ignored!`)
31 |     console.log(`warning: interpolateVideo parameter "${fps}" is ignored!`)
32 | 
33 |     const instance = instances.shift()
34 |     instances.push(instance)
35 | 
36 |     const browser = await puppeteer.launch({
37 |       headless: true,
38 |       protocolTimeout: 400000,
39 |     })
40 | 
41 |     try {
42 |       const page = await browser.newPage()
43 |       await page.goto(instance, { waitUntil: 'networkidle2' })
44 |       
45 |       // await new Promise(r => setTimeout(r, 1000))
46 | 
47 |       const fileField = await page.$('input[type=file]')
48 | 
49 |       // console.log(`uploading file..`)
50 |       await fileField.uploadFile(inputFilePath)
51 | 
52 |       // console.log('looking for the button to submit')
53 |       const submitButton = await page.$('button.lg')
54 | 
55 |       // console.log('clicking on the button')
56 |       await submitButton.click()
57 | 
58 |       await page.waitForSelector('a[download="interpolated_result.mp4"]', {
59 |         timeout: 400000, // need to be large enough in case someone else attemps to use our space
60 |       })
61 | 
62 |       const interpolatedFileUrl = await page.$$eval('a[download="interpolated_result.mp4"]', el => el.map(x => x.getAttribute("href"))[0])
63 | 
64 |       // it is always a good idea to download to a tmp dir before saving to the pending dir
65 |       // because there is always a risk that the download will fail
66 | 
67 |       const tmpFileName = `${uuidv4()}.mp4`
68 | 
69 |       await downloadFileToTmp(interpolatedFileUrl, tmpFileName)
70 |       await moveFileFromTmpToPending(tmpFileName, fileName)
71 |     } catch (err) {
72 |       throw err
73 |     } finally {
74 |       await browser.close()
75 |     }
76 |   } catch (err) {
77 |     throw err
78 |   } finally {
79 |     state.load -= 1
80 |   }
81 | }


--------------------------------------------------------------------------------
/src/providers/video-interpolation/interpolateVideoLegacy.mts:
--------------------------------------------------------------------------------
 1 | import { promises as fs } from "node:fs"
 2 | import path from "node:path"
 3 | import { Blob } from "buffer"
 4 | 
 5 | import { client } from "@gradio/client"
 6 | import tmpDir from "temp-dir"
 7 | 
 8 | import { downloadFileToTmp } from '../../utils/download/downloadFileToTmp.mts'
 9 | 
10 | export const state = {
11 |   load: 0
12 | }
13 | 
14 | const instances: string[] = [
15 |   process.env.VC_VIDEO_INTERPOLATION_SPACE_API_URL
16 | ]
17 | 
18 | export const interpolateVideo = async (fileName: string, steps: number, fps: number) => {
19 |   if (state.load === instances.length) {
20 |     throw new Error(`all video interpolation servers are busy, try again later..`)
21 |   }
22 | 
23 |   state.load += 1
24 | 
25 |   try {
26 |     const inputFilePath = path.join(tmpDir, fileName)
27 | 
28 |     const instance = instances.shift()
29 |     instances.push(instance)
30 | 
31 |     const api = await client(instance, {
32 |       hf_token: `${process.env.VC_HF_API_TOKEN}` as any
33 |     })
34 | 
35 |     const video = await fs.readFile(inputFilePath)
36 | 
37 |     const blob = new Blob([video], { type: 'video/mp4' })
38 |     // const blob = blobFrom(filePath)
39 |     const result = await api.predict(1, [
40 |       blob, 	// blob in 'parameter_5' Video component		
41 |       steps, // number (numeric value between 1 and 4) in 'Interpolation Steps' Slider component		
42 |       fps, // string (FALSE! it's a number)  in 'FPS output' Radio component
43 |     ])
44 | 
45 |     const data = (result as any).data[0]
46 |     console.log('raw data:', data)
47 |     const { orig_name, data: remoteFilePath } = data
48 |     const remoteUrl = `${instance}/file=${remoteFilePath}`
49 |     console.log("remoteUrl:", remoteUrl)
50 |     await downloadFileToTmp(remoteUrl, fileName)
51 |   } catch (err) {
52 |     throw err
53 |   } finally {
54 |     state.load -= 1
55 |   }
56 | }


--------------------------------------------------------------------------------
/src/providers/video-interpolation/interpolateVideoWithReplicate.mts:
--------------------------------------------------------------------------------
  1 | 
  2 | // this looks really great!
  3 | // https://replicate.com/zsxkib/st-mfnet?prediction=bufijj3b45cjoe43pzloqkcghy
  4 | 
  5 | "use server"
  6 | 
  7 | import Replicate from "replicate"
  8 | 
  9 | import { sleep } from "../../utils/misc/sleep.mts"
 10 | 
 11 | const replicateToken = `${process.env.VC_REPLICATE_API_TOKEN || ""}`
 12 | const replicateModel = `${process.env.VC_VIDEO_INTERPOLATION_STMFNET_REPLICATE_MODEL || ""}`
 13 | const replicateModelVersion = `${process.env.VC_VIDEO_INTERPOLATION_STMFNET_REPLICATE_MODEL_VERSION || ""}`
 14 | 
 15 | if (!replicateToken) {
 16 |   throw new Error(`you need to configure your VC_REPLICATE_API_TOKEN`)
 17 | }
 18 | 
 19 | const replicate = new Replicate({ auth: replicateToken })
 20 | 
 21 | /**
 22 |  * Interpolate a video using Replicate
 23 |  * 
 24 |  * Important note: the video will lose its sound, if any!
 25 |  * 
 26 |  * With the current settingd, duration of the original video will be preserved
 27 |  * (but we could make slow-mo too)
 28 |  */
 29 | export async function interpolateVideoWithReplicate({
 30 |     video,
 31 | 
 32 |     // so arguably 60 would look smoother, but we are tying to reach for a "movie" kind of feel here
 33 |     nbFrames = 24,
 34 |   }: {
 35 |     video: string
 36 | 
 37 |     /**
 38 |      * Number of frame (duration of the original video will be preserved)
 39 |      */
 40 |     nbFrames?: number // min 1, max: 240
 41 |   }): Promise<string> {
 42 | 
 43 |   if (!video) {
 44 |     throw new Error(`no video provided`)
 45 |   }
 46 | 
 47 |   if (!replicateModel) {
 48 |     throw new Error(`you need to configure your VC_VIDEO_INTERPOLATION_STMFNET_REPLICATE_MODEL`)
 49 |   }
 50 | 
 51 |   if (!replicateModelVersion) {
 52 |     throw new Error(`you need to configure your VC_VIDEO_INTERPOLATION_STMFNET_REPLICATE_MODEL_VERSION`)
 53 |   }
 54 | 
 55 |   // for params and doc see https://replicate.com/zsxkib/st-mfnet
 56 |   const prediction = await replicate.predictions.create({
 57 |     version: replicateModelVersion,
 58 |     input: {
 59 |       mp4: video, // I think it should be a base64 object?
 60 |       framerate_multiplier: 2, // can be one of 2, 4, 8, 16, 32
 61 | 
 62 |       // note: for now we use the simplest setting, which is to keep the original video duration
 63 |       // if we don't keep the original duration, the video will look like a slow motion animation
 64 |       // which may be a desired effect, but let's keep it simple for now
 65 |       keep_original_duration: true, // false,
 66 |       custom_fps: nbFrames // min 1, max: 240
 67 |     }
 68 |   })
 69 |     
 70 |   // console.log("prediction:", prediction)
 71 | 
 72 |   // Replicate requires at least 8 seconds of mandatory delay
 73 |   await sleep(10000)
 74 | 
 75 |   let res: Response
 76 |   let pollingCount = 0
 77 |   do {
 78 |     // This is normally a fast model, so let's check every 2 seconds
 79 |     await sleep(2000)
 80 | 
 81 |     res = await fetch(`https://api.replicate.com/v1/predictions/${prediction.id}`, {
 82 |       method: "GET",
 83 |       headers: {
 84 |         Authorization: `Token ${replicateToken}`,
 85 |       },
 86 |       cache: 'no-store',
 87 |     })
 88 | 
 89 |     if (res.status === 200) {
 90 |       const response = (await res.json()) as any
 91 |       const error = `${response?.error || ""}`
 92 |       if (error) {
 93 |         throw new Error(error)
 94 |       }
 95 |     }
 96 | 
 97 |     pollingCount++
 98 | 
 99 |     // To prevent indefinite polling, we can stop after a certain number
100 |     if (pollingCount >= 30) {
101 |       throw new Error('Request time out.')
102 |     }
103 |   } while (true)
104 | }


--------------------------------------------------------------------------------
/src/providers/video-transformation/transformVideoWithHotshotReplicate.mts:
--------------------------------------------------------------------------------
 1 | import { client } from "@gradio/client"
 2 | 
 3 | import { generateSeed } from "../../utils/misc/generateSeed.mts"
 4 | 
 5 | export const state = {
 6 |   load: 0,
 7 | }
 8 | 
 9 | // we don't use replicas yet, because it ain't easy to get their hostname
10 | const instances: string[] = [
11 |   `${process.env.VC_ZEROSCOPE_SPACE_API_URL_1 || ""}`,
12 | ].filter(instance => instance?.length > 0)
13 | 
14 | const secretToken = `${process.env.VC_MICROSERVICE_SECRET_TOKEN || ""}`
15 | 
16 | export const generateVideo = async (prompt: string, options?: {
17 |   seed: number;
18 |   nbFrames: number;
19 |   nbSteps: number;
20 | }) => {
21 |   throw new Error("Not implemented yet")
22 | 
23 |   /* let's disable load control, and let it use the queue */
24 |   /*
25 |   if (state.load === instances.length) {
26 |     throw new Error(`all video generation servers are busy, try again later..`)
27 |   }
28 |   */
29 | 
30 |   state.load += 1
31 |   
32 | 
33 |   try {
34 | 
35 |     // we treat 0 as meaning "random seed"
36 |     const seed = (options?.seed ? options.seed : 0) || generateSeed()
37 | 
38 |     const nbFrames = options?.nbFrames || 24 // we can go up to 48 frames, but then upscaling quill require too much memory!
39 |     const nbSteps = options?.nbSteps || 35
40 | 
41 |     const instance = instances.shift()
42 |     instances.push(instance)
43 | 
44 |     const api = await client(instance, {
45 |       hf_token: `${process.env.VC_HF_API_TOKEN}` as any
46 |     })
47 | 
48 |     const rawResponse = await api.predict('/run', [		
49 |       prompt, // string  in 'Prompt' Textbox component		
50 |       seed, // number (numeric value between 0 and 2147483647) in 'Seed' Slider component		
51 |       nbFrames, // 24 // it is the nb of frames per seconds I think?
52 |       nbSteps, // 10, (numeric value between 10 and 50) in 'Number of inference steps' Slider component
53 |       secretToken,
54 |     ]) as any
55 |     
56 |     // console.log("rawResponse:", rawResponse)
57 | 
58 |     const { name } = rawResponse?.data?.[0]?.[0] as { name: string, orig_name: string }
59 | 
60 |     return `${instance}/file=${name}`
61 |   } catch (err) {
62 |     throw err
63 |   } finally {
64 |     state.load -= 1
65 |   }
66 | }
67 | 


--------------------------------------------------------------------------------
/src/providers/video-transformation/transformVideoWithLatentImageAnimator.txt:
--------------------------------------------------------------------------------
  1 | "use server"
  2 | 
  3 | import Replicate from "replicate"
  4 | 
  5 | import { generateSeed } from "../../utils/misc/generateSeed.mts"
  6 | import { sleep } from "../../utils/misc/sleep.mts"
  7 | 
  8 | const replicateToken = `${process.env.VC_REPLICATE_API_TOKEN || ""}`
  9 | const replicateModel = `wyhsirius/lia`
 10 | const replicateModelVersion = "4ce4e4aff5bd28c6958b1e3e7628ea80718be56672d92ea8039039a3a152e67d"
 11 | 
 12 | if (!replicateToken) {
 13 |   throw new Error(`you need to configure your VC_REPLICATE_API_TOKEN`)
 14 | }
 15 | 
 16 | const replicate = new Replicate({ auth: replicateToken })
 17 | 
 18 | /**
 19 |  * Generate a video with hotshot through Replicate
 20 |  * 
 21 |  * Note that if nbFrames == 1, then it will generate a jpg
 22 |  * 
 23 |  */
 24 | export async function generateVideoWithHotshotReplicate({
 25 |    
 26 |   }): Promise<string> {
 27 | 
 28 |   if (!replicateModel) {
 29 |     throw new Error(`you need to configure the replicateModel`)
 30 |   }
 31 | 
 32 |   if (!replicateModelVersion) {
 33 |     throw new Error(`you need to configure the replicateModelVersion`)
 34 |   }
 35 | 
 36 |   const prediction = await replicate.predictions.create({
 37 |     version: replicateModelVersion,
 38 |     input: {
 39 |       img_source: ,
 40 |       negative_prompt: negativePrompt,
 41 | 
 42 |       // this is not a URL but a model name
 43 |       hf_lora_url: replicateLora?.length ? undefined : huggingFaceLora,
 44 | 
 45 |       // this is a URL to the .tar (we can get it from the "trainings" page)
 46 |       replicate_weights_url: huggingFaceLora?.length ? undefined : replicateLora,
 47 | 
 48 |       width,
 49 |       height,
 50 | 
 51 |       // those are used to create an upsampling or downsampling
 52 |       // original_width: width,
 53 |       // original_height: height,
 54 |       // target_width: width,
 55 |       // target_height: height,
 56 | 
 57 |       steps: nbSteps,
 58 |     
 59 |       
 60 |       // note: right now it only makes sense to use either 1 (a jpg)
 61 |       video_length: nbFrames, // nb frames
 62 | 
 63 |       video_duration: videoDuration, // video duration in ms
 64 |       
 65 |       seed: !isNaN(seed) && isFinite(seed) ? seed : generateSeed()
 66 |     }
 67 |   })
 68 |     
 69 |   // console.log("prediction:", prediction)
 70 | 
 71 |   // Replicate requires at least 30 seconds of mandatory delay
 72 |   await sleep(30000)
 73 | 
 74 |   let res: Response
 75 |   let pollingCount = 0
 76 |   do {
 77 |     // Check every 5 seconds
 78 |     await sleep(5000)
 79 | 
 80 |     res = await fetch(`https://api.replicate.com/v1/predictions/${prediction.id}`, {
 81 |       method: "GET",
 82 |       headers: {
 83 |         Authorization: `Token ${replicateToken}`,
 84 |       },
 85 |       cache: 'no-store',
 86 |     })
 87 | 
 88 |     if (res.status === 200) {
 89 |       const response = (await res.json()) as any
 90 |       const error = `${response?.error || ""}`
 91 |       if (error) {
 92 |         throw new Error(error)
 93 |       }
 94 |     }
 95 | 
 96 |     pollingCount++
 97 | 
 98 |     // To prevent indefinite polling, we can stop after a certain number, here 30 (i.e. about 2 and half minutes)
 99 |     if (pollingCount >= 30) {
100 |       throw new Error('Request time out.')
101 |     }
102 |   } while (true)
103 | }


--------------------------------------------------------------------------------
/src/providers/video-upscaling/upscaleVideo.mts:
--------------------------------------------------------------------------------
 1 | import path from "node:path"
 2 | 
 3 | import { v4 as uuidv4 } from "uuid"
 4 | import tmpDir from "temp-dir"
 5 | import puppeteer from "puppeteer"
 6 | 
 7 | import { downloadFileToTmp } from '../../utils/download/downloadFileToTmp.mts'
 8 | import { pendingFilesDirFilePath } from '../../config.mts'
 9 | import { moveFileFromTmpToPending } from "../../utils/filesystem/moveFileFromTmpToPending.mts"
10 | 
11 | const instances: string[] = [
12 |   `${process.env.VC_VIDEO_UPSCALE_SPACE_API_URL_1 || ""}`
13 | ].filter(instance => instance?.length > 0)
14 | 
15 | const secretToken = `${process.env.VC_MICROSERVICE_SECRET_TOKEN || ""}`
16 | 
17 | // TODO we should use an inference endpoint instead (or a space which bakes generation + upscale at the same time)
18 | export async function upscaleVideo(fileName: string, prompt: string) {
19 |   const instance = instances.shift()
20 |   instances.push(instance)
21 | 
22 |   const browser = await puppeteer.launch({
23 |     // headless: true,
24 |     protocolTimeout: 800000,
25 |   })
26 | 
27 |   try {
28 |     const page = await browser.newPage()
29 | 
30 |     await page.goto(instance, {
31 |       waitUntil: 'networkidle2',
32 |     })
33 | 
34 |     const secretField = await page.$('input[type=text]')
35 |     await secretField.type(secretToken)
36 | 
37 |     const promptField = await page.$('textarea')
38 |     await promptField.type(prompt)
39 | 
40 |     const inputFilePath = path.join(pendingFilesDirFilePath, fileName)
41 |     // console.log(`local file to upscale: ${inputFilePath}`)
42 |     
43 |     // await new Promise(r => setTimeout(r, 1000))
44 | 
45 |     const fileField = await page.$('input[type=file]')
46 | 
47 |     // console.log(`uploading file..`)
48 |     await fileField.uploadFile(inputFilePath)
49 | 
50 |     // console.log('looking for the button to submit')
51 |     const submitButton = await page.$('button.lg')
52 | 
53 |     // console.log('clicking on the button')
54 |     await submitButton.click()
55 | 
56 |     /*
57 |     const client = await page.target().createCDPSession()
58 | 
59 |     await client.send('Page.setDownloadBehavior', {
60 |       behavior: 'allow',
61 |       downloadPath: tmpDir,
62 |     })
63 |     */
64 | 
65 |     await page.waitForSelector('a[download="xl_result.mp4"]', {
66 |       timeout: 800000, // need to be large enough in case someone else attemps to use our space
67 |     })
68 | 
69 |     const upscaledFileUrl = await page.$$eval('a[download="xl_result.mp4"]', el => el.map(x => x.getAttribute("href"))[0])
70 | 
71 |     // it is always a good idea to download to a tmp dir before saving to the pending dir
72 |     // because there is always a risk that the download will fail
73 |     
74 |     const tmpFileName = `${uuidv4()}.mp4`
75 | 
76 |     await downloadFileToTmp(upscaledFileUrl, tmpFileName)
77 |     await moveFileFromTmpToPending(tmpFileName, fileName)
78 |   } catch (err) {
79 |     throw err
80 |   } finally {
81 |     await browser.close()
82 |   }
83 | }
84 | 


--------------------------------------------------------------------------------
/src/providers/video-upscaling/upscaleVideoToBase64URL.mts:
--------------------------------------------------------------------------------
 1 | import path from "node:path"
 2 | 
 3 | 
 4 | import puppeteer from "puppeteer"
 5 | 
 6 | import { pendingFilesDirFilePath } from '../../config.mts'
 7 | import { downloadFileAsBase64URL } from "../../utils/download/downloadFileAsBase64URL.mts"
 8 | 
 9 | const instances: string[] = [
10 |   `${process.env.VC_VIDEO_UPSCALE_SPACE_API_URL_1 || ""}`
11 | ].filter(instance => instance?.length > 0)
12 | 
13 | const secretToken = `${process.env.VC_MICROSERVICE_SECRET_TOKEN || ""}`
14 | 
15 | // TODO we should use an inference endpoint instead (or a space which bakes generation + upscale at the same time)
16 | export async function upscaleVideoToBase64URL(fileName: string, prompt: string) {
17 |   const instance = instances.shift()
18 |   instances.push(instance)
19 | 
20 |   const browser = await puppeteer.launch({
21 |     // headless: true,
22 |     protocolTimeout: 800000,
23 |   })
24 | 
25 |   try {
26 |     const page = await browser.newPage()
27 | 
28 |     await page.goto(instance, {
29 |       waitUntil: 'networkidle2',
30 |     })
31 | 
32 |     const secretField = await page.$('input[type=text]')
33 |     await secretField.type(secretToken)
34 | 
35 |     const promptField = await page.$('textarea')
36 |     await promptField.type(prompt)
37 | 
38 |     const inputFilePath = path.join(pendingFilesDirFilePath, fileName)
39 |     // console.log(`local file to upscale: ${inputFilePath}`)
40 |     
41 |     // await new Promise(r => setTimeout(r, 1000))
42 | 
43 |     const fileField = await page.$('input[type=file]')
44 | 
45 |     // console.log(`uploading file..`)
46 |     await fileField.uploadFile(inputFilePath)
47 | 
48 |     // console.log('looking for the button to submit')
49 |     const submitButton = await page.$('button.lg')
50 | 
51 |     // console.log('clicking on the button')
52 |     await submitButton.click()
53 | 
54 |     /*
55 |     const client = await page.target().createCDPSession()
56 | 
57 |     await client.send('Page.setDownloadBehavior', {
58 |       behavior: 'allow',
59 |       downloadPath: tmpDir,
60 |     })
61 |     */
62 | 
63 |     await page.waitForSelector('a[download="xl_result.mp4"]', {
64 |       timeout: 800000, // need to be large enough in case someone else attemps to use our space
65 |     })
66 | 
67 |     const upscaledFileUrl = await page.$$eval('a[download="xl_result.mp4"]', el => el.map(x => x.getAttribute("href"))[0])
68 | 
69 |     // we download the whole file
70 |     // it's only a few seconds of video, so it should be < 2MB
71 |     const assetUrl = await downloadFileAsBase64URL(upscaledFileUrl)
72 | 
73 |     return assetUrl
74 |   } catch (err) {
75 |     throw err
76 |   } finally {
77 |     await browser.close()
78 |   }
79 | }
80 | 


--------------------------------------------------------------------------------
/src/providers/voice-generation/generateVoice.mts:
--------------------------------------------------------------------------------
 1 | import puppeteer from "puppeteer"
 2 | 
 3 | import { downloadFileToTmp } from "../../utils/download/downloadFileToTmp.mts"
 4 | 
 5 | export const state = {
 6 |   load: 0
 7 | }
 8 | 
 9 | const instances: string[] = [
10 |   process.env.VC_VOICE_GENERATION_SPACE_API_URL
11 | ]
12 | 
13 | // TODO we should use an inference endpoint instead
14 | export async function generateVoice(prompt: string, voiceFileName: string) {
15 |   if (state.load === instances.length) {
16 |     throw new Error(`all voice generation servers are busy, try again later..`)
17 |   }
18 | 
19 |   state.load += 1
20 | 
21 |   try {
22 |     const instance = instances.shift()
23 |     instances.push(instance)
24 | 
25 |     console.log("instance:", instance)
26 |     
27 |     const browser = await puppeteer.launch({
28 |       headless: true,
29 |       protocolTimeout: 800000,
30 |     })
31 | 
32 |     try {
33 |       const page = await browser.newPage()
34 | 
35 |       await page.goto(instance, {
36 |         waitUntil: "networkidle2",
37 |       })
38 | 
39 |       // await new Promise(r => setTimeout(r, 1000))
40 | 
41 |       const firstTextarea = await page.$('textarea[data-testid="textbox"]')
42 | 
43 |       await firstTextarea.type(prompt)
44 | 
45 |       // console.log("looking for the button to submit")
46 |       const submitButton = await page.$("button.lg")
47 | 
48 |       // console.log("clicking on the button")
49 |       await submitButton.click()
50 | 
51 |       await page.waitForSelector("audio", {
52 |         timeout: 800000, // need to be large enough in case someone else attemps to use our space
53 |       })
54 | 
55 |       const voiceRemoteUrl = await page.$$eval("audio", el => el.map(x => x.getAttribute("src"))[0])
56 | 
57 | 
58 |       console.log({
59 |         voiceRemoteUrl,
60 |       })
61 | 
62 | 
63 |       console.log(`- downloading ${voiceFileName} from ${voiceRemoteUrl}`)
64 | 
65 |       await downloadFileToTmp(voiceRemoteUrl, voiceFileName)
66 | 
67 |       return voiceFileName
68 |     } catch (err) {
69 |       throw err
70 |     } finally {
71 |       await browser.close()
72 |     }
73 |   } catch (err) {
74 |     throw err
75 |   } finally {
76 |     state.load -= 1
77 |   }
78 | }
79 | 


--------------------------------------------------------------------------------
/src/providers/voice-generation/generateVoiceWithCoqui.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | // installed using:
 3 | // npx api install @coqui-api/v1.0#fbrqr4dllng0lnk
 4 | import sdk from "@api/coqui-api"
 5 | 
 6 | export const generateVoiceWithCoqui = async ({
 7 |   dialogueLine = "",
 8 |   characterDescription = "",
 9 |   characterName = "",
10 | }: {
11 |   dialogueLine: string
12 |   characterDescription: string
13 |   characterName: string
14 | }) => {
15 |   if (!dialogueLine) {
16 |     throw new Error("Missing dialogue line")
17 |   }
18 |   if (!characterDescription) {
19 |     throw new Error("Missing character description")
20 |   }
21 |   if (!characterName) {
22 |     throw new Error("Missing character name")
23 |   }
24 |   const coquiToken = `${process.env.PROVIDER_COQUI_API_TOKEN || ""}`
25 |   if (!coquiToken) {
26 |     throw new Error("Missing Coqui API token")
27 |   }
28 | 
29 |   sdk.auth(coquiToken)
30 | 
31 |   const something = await sdk.samples_xtts_render_from_prompt_create({
32 |     prompt: characterDescription,
33 |     name: characterName,
34 |     text: dialogueLine,
35 |     speed: 1
36 |   })
37 | 
38 |   if (!something.data) {
39 |     throw new Error(`requiest failed: ${something.data}`)
40 |   }
41 | 
42 |   /*
43 |   audio_url: 
44 | "https://coqui-prod-creator-app-synthesized-samples.s3.amazonaws.com/xtts_samples/03050b77-489d-4999-b0fc-d7a56ff62b78.wav"
45 | created_at: "2023-09-18T21:47:49.357225Z"
46 | id: "03050b77-489d-4999-b0fc-d7a56ff62b78"
47 | language: "en"
48 | name: "Al Dongino"
49 | text: "Keep your friends close, but your enemies closer."
50 | */
51 |   return something.data
52 | }
53 | 


--------------------------------------------------------------------------------
/src/providers/voice-generation/generateVoiceWithOpenVoice.mts:
--------------------------------------------------------------------------------
1 | // TODO
2 | export const todo = "todo"


--------------------------------------------------------------------------------
/src/scheduler/copyVideoFromPendingToCompleted.mts:
--------------------------------------------------------------------------------
 1 | import path from "node:path"
 2 | import { promises as fs } from "node:fs"
 3 | 
 4 | import { completedFilesDirFilePath, pendingFilesDirFilePath } from "../config.mts"
 5 | 
 6 | export const copyVideoFromPendingToCompleted = async (pendingFileName: string, completedFileName?: string) => {
 7 |   if (!completedFileName) {
 8 |     completedFileName = pendingFileName
 9 |   }
10 |   const pendingFilePath = path.join(pendingFilesDirFilePath, pendingFileName)
11 |   const completedFilePath = path.join(completedFilesDirFilePath, completedFileName)
12 | 
13 |   await fs.copyFile(pendingFilePath, completedFilePath)
14 |   console.log(`copied file from ${pendingFilePath} to ${completedFilePath}`)
15 | }


--------------------------------------------------------------------------------
/src/scheduler/copyVideoFromTmpToCompleted.mts:
--------------------------------------------------------------------------------
 1 | import path from "node:path"
 2 | import { promises as fs } from "node:fs"
 3 | 
 4 | import tmpDir from "temp-dir"
 5 | import { completedFilesDirFilePath } from "../config.mts"
 6 | 
 7 | // a function to copy a video to the completed video directory
 8 | // this implementation is safe to use on a Hugging Face Space
 9 | // for instance when copying from one disk to another
10 | // (we cannot use fs.rename in that case)
11 | export const copyVideoFromTmpToCompleted = async (tmpFileName: string, completedFileName?: string) => {
12 |   if (!completedFileName) {
13 |     completedFileName = tmpFileName
14 |   }
15 |   const tmpFilePath = path.join(tmpDir, tmpFileName)
16 |   const completedFilePath = path.join(completedFilesDirFilePath, completedFileName)
17 | 
18 |   await fs.copyFile(tmpFilePath, completedFilePath)
19 |   console.log(`copied file from ${tmpFilePath} to ${completedFilePath}`)
20 | }


--------------------------------------------------------------------------------
/src/scheduler/copyVideoFromTmpToPending.mts:
--------------------------------------------------------------------------------
 1 | import path from "node:path"
 2 | import { promises as fs } from "node:fs"
 3 | 
 4 | import tmpDir from "temp-dir"
 5 | import { pendingFilesDirFilePath } from "../config.mts"
 6 | 
 7 | // a function to copy a video to the pending video directory
 8 | // this implementation is safe to use on a Hugging Face Space
 9 | // for instance when copying from one disk to another
10 | // (we cannot use fs.rename in that case)
11 | export const copyVideoFromTmpToPending = async (tmpFileName: string, pendingFileName?: string) => {
12 |   if (!pendingFileName) {
13 |     pendingFileName = tmpFileName
14 |   }
15 |   const tmpFilePath = path.join(tmpDir, tmpFileName)
16 |   const pendingFilePath = path.join(pendingFilesDirFilePath, pendingFileName)
17 | 
18 |   await fs.copyFile(tmpFilePath, pendingFilePath)
19 |   console.log(`copied file from ${tmpFilePath} to ${pendingFilePath}`)
20 | }


--------------------------------------------------------------------------------
/src/scheduler/deleteVideo.mts:
--------------------------------------------------------------------------------
 1 | import tmpDir from "temp-dir"
 2 | import { validate as uuidValidate } from "uuid"
 3 | 
 4 | import { completedMetadataDirFilePath, completedFilesDirFilePath, pendingMetadataDirFilePath, pendingFilesDirFilePath } from "../config.mts"
 5 | import { deleteFilesWithName } from "../utils/filesystem/deleteAllFilesWith.mts"
 6 | 
 7 | 
 8 | // note: we make sure ownerId and videoId are *VALID*
 9 | // otherwise an attacker could try to delete important files!
10 | export const deleteVideo = async (ownerId: string, videoId?: string) => {
11 |   if (!uuidValidate(ownerId)) {
12 |     throw new Error(`fatal error: ownerId ${ownerId} is invalid!`)
13 |   }
14 | 
15 |   if (videoId && !uuidValidate(videoId)) {
16 |     throw new Error(`fatal error: videoId ${videoId} is invalid!`)
17 |   }
18 |   const id = videoId ? `${ownerId}_${videoId}` : ownerId
19 | 
20 |   // this should delete everything, including audio files
21 |   // however we still have some temporary files with a name that is unique:
22 |   // we should probably rename those
23 |   await deleteFilesWithName(tmpDir, id)
24 |   await deleteFilesWithName(completedMetadataDirFilePath, id)
25 |   await deleteFilesWithName(completedFilesDirFilePath, id)
26 |   await deleteFilesWithName(pendingMetadataDirFilePath, id)
27 |   await deleteFilesWithName(pendingFilesDirFilePath, id)
28 | }
29 | 


--------------------------------------------------------------------------------
/src/scheduler/getAllVideosForOwner.mts:
--------------------------------------------------------------------------------
1 | import { Video } from "../types.mts"
2 | import { getCompletedVideos } from "./getCompletedVideos.mts"
3 | import { getPendingVideos } from "./getPendingVideos.mts"
4 | 
5 | export const getAllVideosForOwner = async (ownerId: string): Promise<Video[]> => {
6 |   const pendingVideos = await getPendingVideos(ownerId)
7 |   const completedVideos = await getCompletedVideos(ownerId)
8 |   return [...pendingVideos, ...completedVideos]
9 | }


--------------------------------------------------------------------------------
/src/scheduler/getCompletedVideos.mts:
--------------------------------------------------------------------------------
1 | import { Video } from "../types.mts"
2 | import { completedMetadataDirFilePath } from "../config.mts"
3 | import { readVideoMetadataFiles } from "./readVideoMetadataFiles.mts"
4 | 
5 | export const getCompletedVideos = async (ownerId?: string): Promise<Video[]> => {
6 |   const completedVideos = await readVideoMetadataFiles(completedMetadataDirFilePath, ownerId)
7 | 
8 |   return completedVideos
9 | }


--------------------------------------------------------------------------------
/src/scheduler/getFirstVideoFrame.mts:
--------------------------------------------------------------------------------
 1 | import path from "node:path"
 2 | 
 3 | import ffmpeg from "fluent-ffmpeg"
 4 | import { v4 as uuidv4 } from "uuid"
 5 | import tmpDir from "temp-dir"
 6 | 
 7 | export async function getFirstVideoFrame(videoFilePath: string): Promise<string | void> {
 8 |   const tmpFileName = `${uuidv4()}.jpg`
 9 | 
10 |   const tmpFilePath = path.resolve(tmpDir, tmpFileName)
11 | 
12 |     return new Promise((resolve, reject) => {
13 |         ffmpeg(videoFilePath)
14 |             .outputOptions("-vframes 1")
15 |             .output(tmpFilePath)
16 |             .on("end", async () => {
17 |                 resolve(tmpFilePath)
18 |             })
19 |             .on("error", reject)
20 |             .run()
21 |     })
22 | }
23 | 


--------------------------------------------------------------------------------
/src/scheduler/getFirstVideoFrameAsBase64.mts:
--------------------------------------------------------------------------------
 1 | import fs from "node:fs"
 2 | import util from "node:util"
 3 | import path from "node:path"
 4 | 
 5 | import ffmpeg from "fluent-ffmpeg"
 6 | import { v4 as uuidv4 } from "uuid"
 7 | import tmpDir from "temp-dir"
 8 | 
 9 | const unlinkAsync = util.promisify(fs.unlink)
10 | 
11 | export async function getFirstVideoFrameAsBase64(videoPath: string): Promise<string | void> {
12 |   const tmpFileName = `${uuidv4()}.jpg`
13 | 
14 |   const tmpFilePath = path.resolve(tmpDir, tmpFileName)
15 | 
16 |     return new Promise((resolve, reject) => {
17 |         ffmpeg(videoPath)
18 |             .outputOptions("-vframes 1")
19 |             .output(tmpFilePath)
20 |             .on("end", async () => {
21 |                 let base64;
22 |                 try {
23 |                   base64 = await fs.promises.readFile(tmpFilePath, { encoding: "base64" });
24 |                   await unlinkAsync(tmpFilePath)
25 |                 } catch(err) {
26 |                    return reject(err)
27 |                 }
28 |                 resolve(base64)
29 |             })
30 |             .on("error", reject)
31 |             .run()
32 |     })
33 | }
34 | 


--------------------------------------------------------------------------------
/src/scheduler/getPendingVideos.mts:
--------------------------------------------------------------------------------
1 | import { Video } from "../types.mts"
2 | import { pendingMetadataDirFilePath } from "../config.mts"
3 | import { readVideoMetadataFiles } from "./readVideoMetadataFiles.mts"
4 | 
5 | export const getPendingVideos = async (ownerId?: string): Promise<Video[]> => {
6 |   const pendingVideos = await readVideoMetadataFiles(pendingMetadataDirFilePath, ownerId)
7 | 
8 |   return pendingVideos
9 | }


--------------------------------------------------------------------------------
/src/scheduler/getVideo.mts:
--------------------------------------------------------------------------------
 1 | import path from "node:path"
 2 | 
 3 | import { completedMetadataDirFilePath, pendingMetadataDirFilePath } from "../config.mts"
 4 | import { readVideoMetadataFile } from "./readVideoMetadataFile.mts"
 5 | 
 6 | export const getVideo = async (ownerId: string, videoId: string) => {
 7 |   const videoFileName = `${ownerId}_${videoId}.json`
 8 | 
 9 |   const completedVideoMetadataFilePath = path.join(completedMetadataDirFilePath, videoFileName)
10 |   const pendingVideoMetadataFilePath = path.join(pendingMetadataDirFilePath, videoFileName)
11 | 
12 |   try {
13 |     const completedVideo = await readVideoMetadataFile(completedVideoMetadataFilePath)
14 |     return completedVideo
15 |   } catch (err) {
16 |     try {
17 |       const pendingVideo= await readVideoMetadataFile(pendingVideoMetadataFilePath)
18 |       return pendingVideo
19 |     } catch (err) {
20 |       throw new Error(`couldn't find video ${videoId} for owner ${ownerId}`)
21 |     }
22 |   }
23 | }


--------------------------------------------------------------------------------
/src/scheduler/getVideoStatus.mts:
--------------------------------------------------------------------------------
 1 | import { Video, VideoStatus } from "../types.mts"
 2 | 
 3 | import { getVideo } from "./getVideo.mts"
 4 | 
 5 | export const getVideoStatus = async (video: Video): Promise<VideoStatus> => {
 6 |   try {
 7 |     const { status } = await getVideo(video.ownerId, video.id)
 8 |     return status
 9 |   } catch (err) {
10 |     console.log(`failed to get the video status.. weird`)
11 |   }
12 |   return "unknown"
13 | }


--------------------------------------------------------------------------------
/src/scheduler/markVideoAsPending.mts:
--------------------------------------------------------------------------------
 1 | import { updatePendingVideo } from "./updatePendingVideo.mts"
 2 | import { getVideo } from "./getVideo.mts"
 3 | 
 4 | export const markVideoAsPending = async (ownerId: string, videoId: string) => {
 5 |   try {
 6 |     const video = await getVideo(ownerId, videoId)
 7 |     if (video.status === "abort" ) {
 8 |       // actually, if we wanted to, we could ressurect it..
 9 |       console.log(`cannot mark video as pending: video ${videoId} is aborted`)
10 |     } else if (video.status === "completed") {
11 |       console.log(`video ${videoId} is already completed`)
12 |     } else if (video.status === "delete") {
13 |       console.log(`cannot mark video as pending: video ${videoId} is marked for deletion`)
14 |     } else if (video.status === "pending") {
15 |       console.log(`video ${videoId} is already pending`)
16 |     } {
17 |       video.status = "pending"
18 |       await updatePendingVideo(video)
19 |       return true
20 |     }
21 |   } catch (err) {
22 |     console.error(`failed to mark video as pending ${videoId}`)
23 |   }
24 |   return false
25 | }


--------------------------------------------------------------------------------
/src/scheduler/markVideoAsToAbort.mts:
--------------------------------------------------------------------------------
 1 | import { updatePendingVideo } from "./updatePendingVideo.mts"
 2 | import { getVideo } from "./getVideo.mts"
 3 | 
 4 | export const markVideoAsToAbort = async (ownerId: string, videoId: string) => {
 5 |   try {
 6 |     const video = await getVideo(ownerId, videoId)
 7 |     if (video.status === "abort" ) {
 8 |       console.log(`video ${videoId} is already aborted`)
 9 |     } else if (video.status === "delete") {
10 |       console.log(`cannot abort: video ${videoId} is marked for deletion`)
11 |     }  else if (video.status === "completed") {
12 |       console.log(`cannot abort: video ${videoId} is completed`)
13 |     } {
14 |       video.status = "abort"
15 |       await updatePendingVideo(video)
16 |       return true
17 |     }
18 |   } catch (err) {
19 |     console.error(`failed to abort video ${videoId}`)
20 |   }
21 |   return false
22 | }


--------------------------------------------------------------------------------
/src/scheduler/markVideoAsToDelete.mts:
--------------------------------------------------------------------------------
 1 | import { updatePendingVideo } from "./updatePendingVideo.mts"
 2 | import { getVideo } from "./getVideo.mts"
 3 | import { deleteVideo } from "./deleteVideo.mts"
 4 | 
 5 | export const markVideoAsToDelete = async (ownerId: string, videoId: string) => {
 6 |   try {
 7 |     const video = await getVideo(ownerId, videoId)
 8 |     if (video.status === "delete" ) {
 9 |       console.log(`video ${videoId} is already marked for deletion`)
10 |     } else if (video.status === "completed" ) {
11 |       console.log(`video ${videoId} is completed: we can delete immediately`)
12 |       await deleteVideo(ownerId, videoId)
13 |       return true
14 |     } else {
15 |       video.status = "delete"
16 |       await updatePendingVideo(video)
17 |       return true
18 |     }
19 |   } catch (err) {
20 |     console.error(`failed to delete video ${videoId}`)
21 |   }
22 |   return false
23 | }


--------------------------------------------------------------------------------
/src/scheduler/markVideoAsToPause.mts:
--------------------------------------------------------------------------------
 1 | import { updatePendingVideo } from "./updatePendingVideo.mts"
 2 | import { getVideo } from "./getVideo.mts"
 3 | 
 4 | export const markVideoAsToPause = async (ownerId: string, videoId: string) => {
 5 |   try {
 6 |     const video = await getVideo(ownerId, videoId)
 7 |     if (video.status === "abort" ) {
 8 |       console.log(`cannot pause: video ${videoId} is being aborted`)
 9 |     } else if (video.status === "completed") {
10 |       console.log(`cannot pause: video ${videoId} is completed`)
11 |     } else if (video.status === "delete") {
12 |       console.log(`cannot pause: video ${videoId} is marked for deletion`)
13 |     } else if (video.status === "pause") {
14 |       console.log(`video ${videoId} is already paused`)
15 |     } {
16 |       video.status = "pause"
17 |       await updatePendingVideo(video)
18 |       return true
19 |     }
20 |   } catch (err) {
21 |     console.error(`failed to mark video as paused ${videoId}`)
22 |   }
23 |   return false
24 | }


--------------------------------------------------------------------------------
/src/scheduler/moveVideoFromPendingToCompleted.mts:
--------------------------------------------------------------------------------
 1 | import path from "path"
 2 | 
 3 | import { completedFilesDirFilePath, pendingFilesDirFilePath } from "../config.mjs"
 4 | import { moveFile } from "../utils/filesystem/moveFile.mjs"
 5 | 
 6 | export const moveVideoFromPendingToCompleted = async (pendingFileName: string, completedFileName?: string) => {
 7 |   if (!completedFileName) {
 8 |     completedFileName = pendingFileName
 9 |   }
10 |   const pendingFilePath = path.join(pendingFilesDirFilePath, pendingFileName)
11 |   const completedFilePath = path.join(completedFilesDirFilePath, completedFileName)
12 | 
13 |   await moveFile(pendingFilePath, completedFilePath)
14 | }


--------------------------------------------------------------------------------
/src/scheduler/moveVideoFromTmpToCompleted.mts:
--------------------------------------------------------------------------------
 1 | import path from "node:path"
 2 | 
 3 | import tmpDir from "temp-dir"
 4 | 
 5 | import { completedFilesDirFilePath } from "../config.mjs"
 6 | import { moveFile } from "../utils/filesystem/moveFile.mjs"
 7 | 
 8 | // a function to move a video to the completed video directory
 9 | // this implementation is safe to use on a Hugging Face Space
10 | // for instance when copying from one disk to another
11 | // (we cannot use fs.rename in that case)
12 | export const moveVideoFromTmpToCompleted = async (tmpFileName: string, completedFileName?: string) => {
13 |   if (!completedFileName) {
14 |     completedFileName = tmpFileName
15 |   }
16 |   const tmpFilePath = path.join(tmpDir, tmpFileName)
17 |   const completedFilePath = path.join(completedFilesDirFilePath, completedFileName)
18 | 
19 |   await moveFile(tmpFilePath, completedFilePath)
20 | }


--------------------------------------------------------------------------------
/src/scheduler/readVideoMetadataFile.mts:
--------------------------------------------------------------------------------
 1 | import { promises as fs } from "node:fs"
 2 | 
 3 | import { Video } from "../types.mts"
 4 | 
 5 | export const readVideoMetadataFile = async (videoMetadataFilePath: string): Promise<Video> => {
 6 |   const video = JSON.parse(
 7 |     await fs.readFile(videoMetadataFilePath, 'utf8')
 8 |   ) as Video
 9 | 
10 |   return video
11 | }
12 | 


--------------------------------------------------------------------------------
/src/scheduler/readVideoMetadataFiles.mts:
--------------------------------------------------------------------------------
 1 | import path from "node:path"
 2 | import { promises as fs } from "node:fs"
 3 | 
 4 | import { Video } from "../types.mts"
 5 | import { readVideoMetadataFile } from "./readVideoMetadataFile.mts"
 6 | 
 7 | export const readVideoMetadataFiles = async (videoMetadataDirFilePath: string, ownerId?: string): Promise<Video[]> => {
 8 | 
 9 |   let videosMetadataFiles: string[] = []
10 |   try {
11 |     const filesInDir = await fs.readdir(videoMetadataDirFilePath)
12 |     // console.log("filesInDir:", filesInDir)
13 | 
14 |     // we only keep valid files (in UUID.json format)
15 |     videosMetadataFiles = filesInDir.filter(fileName =>
16 |       fileName.match(/[a-z0-9\-_]\.json/i) && (ownerId ? fileName.includes(ownerId): true)
17 |     )
18 | 
19 |     // console.log("videosfiles:", videosFiles)
20 |   } catch (err) {
21 |     console.log(`failed to read videos: ${err}`)
22 |   }
23 | 
24 |   const videos: Video[] = []
25 | 
26 |   for (const videoMetadataFileName of videosMetadataFiles) {
27 |     // console.log("videoFileName:", videoFileName)
28 |     const videoMetadataFilePath = path.join(videoMetadataDirFilePath, videoMetadataFileName)
29 |     try {
30 |       const videoMetadata = await readVideoMetadataFile(videoMetadataFilePath)
31 |       videos.push(videoMetadata)
32 |     } catch (parsingErr) {
33 |       console.log(`failed to read ${videoMetadataFileName}: ${parsingErr}`)
34 |       console.log(`deleting corrupted file ${videoMetadataFileName}`)
35 |       try {
36 |         await fs.unlink(videoMetadataFilePath)
37 |       } catch (unlinkErr) {
38 |         console.log(`failed to unlink ${videoMetadataFileName}: ${unlinkErr}`)
39 |       }
40 |     }
41 |   }
42 | 
43 |   return videos
44 | }
45 | 


--------------------------------------------------------------------------------
/src/scheduler/saveAndCheckIfNeedToStop.mts:
--------------------------------------------------------------------------------
 1 | import { Video } from "../types.mts"
 2 | import { deleteVideo } from "./deleteVideo.mts"
 3 | import { getVideoStatus } from "./getVideoStatus.mts"
 4 | import { saveCompletedVideo } from "./saveCompletedVideo.mts"
 5 | import { updatePendingVideo } from "./updatePendingVideo.mts"
 6 | 
 7 | export const saveAndCheckIfNeedToStop = async (video: Video): Promise<boolean> => {
 8 |   
 9 |   const status = await getVideoStatus(video)
10 |   const isToDelete =  status === "delete"
11 |   const isToAbort = status === "abort"
12 |   const isToPause = status === "pause"
13 | 
14 |   // well, normally no other process is supported to mark a video as "completed"
15 |   // while we are busy processing it
16 |   // but maybe in the future, we can afford to waste procesing power to do the "who goes faster"..?
17 |   // const isCompleted = status === "completed"
18 |   
19 |   const mustStop = isToAbort || isToPause || isToDelete
20 | 
21 |   // deletion is the most priority check, as we just need to ignore all the rest
22 |   if (isToDelete) {
23 |     await deleteVideo(video.ownerId, video.id)
24 |     return mustStop
25 |   }
26 | 
27 |   // then we give priority to the pending video: maybe it is done?
28 |   if (video.completed) {
29 |     console.log(`video ${video.id} is completed!`)
30 |     video.progressPercent = 100
31 |     video.completedAt = new Date().toISOString()
32 |     video.status = "completed"
33 |     await updatePendingVideo(video)
34 |     await saveCompletedVideo(video)
35 |     return mustStop
36 |   }
37 | 
38 | 
39 |   if (isToPause) {
40 |     console.log(`we've been requested to pause the video`)
41 |     video.status = "pause"
42 |     await updatePendingVideo(video)
43 |     return mustStop
44 |   }
45 |   
46 |   if (isToAbort) {
47 |     console.log(`we've been requested to cancel the video`)
48 |     
49 |     // we are not going to update the percentage, because we want to keep the
50 |     // info that we aborted mid-course
51 |     video.completed = true
52 | 
53 |     // watch what we do here: we mark the video as completed
54 |     // that's because "abort" is a temporary status
55 |     video.status = "completed"
56 | 
57 |     video.completedAt = new Date().toISOString()
58 |     await updatePendingVideo(video)
59 |     await saveCompletedVideo(video)
60 | 
61 |     return mustStop
62 |   }
63 | 
64 |   await updatePendingVideo(video)
65 |   
66 |   // if we return "true", it means we will yield, which can be an interesting thing
67 |   // for us, to increase parallelism
68 |   return true
69 | }
70 | 


--------------------------------------------------------------------------------
/src/scheduler/saveCompletedVideo.mts:
--------------------------------------------------------------------------------
 1 | import path from "path"
 2 | 
 3 | import { Video } from "../types.mts"
 4 | import { completedMetadataDirFilePath, pendingMetadataDirFilePath } from "../config.mts"
 5 | import { moveFile } from "../utils/filesystem/moveFile.mts"
 6 | 
 7 | export const saveCompletedVideo = async (video: Video) => {
 8 |   const metadataFileName = `${video.ownerId}_${video.id}.json`
 9 |   const pendingMetadataFilePath = path.join(pendingMetadataDirFilePath, metadataFileName)
10 |   const completedMetadataFilePath = path.join(completedMetadataDirFilePath, metadataFileName)
11 |   await moveFile(pendingMetadataFilePath, completedMetadataFilePath)
12 | }


--------------------------------------------------------------------------------
/src/scheduler/savePendingVideo.mts:
--------------------------------------------------------------------------------
 1 | import { promises as fs } from "node:fs"
 2 | import path from "path"
 3 | 
 4 | import { Video } from "../types.mts"
 5 | import { pendingMetadataDirFilePath } from "../config.mts"
 6 | 
 7 | export const savePendingVideo = async (video: Video) => {
 8 |   const fileName = `${video.ownerId}_${video.id}.json`
 9 |   const filePath = path.join(pendingMetadataDirFilePath, fileName)
10 |   await fs.writeFile(filePath, JSON.stringify(video, null, 2), "utf8")
11 | }


--------------------------------------------------------------------------------
/src/scheduler/sortPendingVideosByLeastCompletedFirst.mts:
--------------------------------------------------------------------------------
1 | import { Video } from "../types.mjs"
2 | 
3 | export function sortPendingVideosByLeastCompletedFirst(videos: Video[]): Video[] {
4 |   videos.sort((a: Video, b: Video) => a.progressPercent - b.progressPercent)
5 |   return videos
6 | }


--------------------------------------------------------------------------------
/src/scheduler/sortVideosByYoungestFirst.mts:
--------------------------------------------------------------------------------
1 | import { Video } from "../types.mjs"
2 | 
3 | // used by the API, to return latest videos at the top
4 | export function sortVideosByYoungestFirst(videos: Video[]) {
5 |   videos.sort((a: Video, b: Video) => Date.parse(b.createdAt) - Date.parse(a.createdAt))
6 |   return videos
7 | }


--------------------------------------------------------------------------------
/src/scheduler/updatePendingVideo.mts:
--------------------------------------------------------------------------------
 1 | import { promises as fs } from "node:fs"
 2 | import path from "path"
 3 | 
 4 | import { Video } from "../types.mts"
 5 | import { pendingMetadataDirFilePath } from "../config.mts"
 6 | 
 7 | export const updatePendingVideo = async (video: Video) => {
 8 |   try {
 9 |     const fileName = `${video.ownerId}_${video.id}.json`
10 |     const filePath = path.join(pendingMetadataDirFilePath, fileName)
11 |     await fs.writeFile(filePath, JSON.stringify(video, null, 2), "utf8")
12 |   } catch (err) {
13 |     console.error(`Failed to update the video. Probably an issue with the serialized object or the file system: ${err}`)
14 |     // we do not forward the exception, there is no need
15 |     // we will just try again the job later (even if it means losing a bit of data)
16 |   }
17 | }


--------------------------------------------------------------------------------
/src/scheduler/updateShotPreview.mts:
--------------------------------------------------------------------------------
 1 | import { assembleShots } from "../production/assembleShots.mts"
 2 | import { Video, VideoShot } from "../types.mts"
 3 | import { copyVideoFromPendingToCompleted } from "./copyVideoFromPendingToCompleted.mts"
 4 | 
 5 | export const updateShotPreview = async (video: Video, shot: VideoShot) => {
 6 |   // copy the individual shot
 7 |   await copyVideoFromPendingToCompleted(shot.fileName)
 8 | 
 9 |   // now let's create the latest version of the assembly
10 |   const shotsThatCanBeAssembled = video.shots.filter(sh => sh.hasGeneratedPreview)
11 | 
12 |   // if we have multiple shots with at least a minimal image, we assemble them
13 |   if (shotsThatCanBeAssembled.length === 1) {
14 |     // copy the individual shot to become the final video
15 |     await copyVideoFromPendingToCompleted(shot.fileName, video.fileName)
16 |   } else if (shotsThatCanBeAssembled.length > 1) {
17 |     try {
18 |       // create an updated assembly
19 |       await assembleShots(shotsThatCanBeAssembled, video.fileName)
20 | 
21 |       // copy the assembly to become the final video
22 |       await copyVideoFromPendingToCompleted(video.fileName)
23 |     } catch (err) {
24 |       console.error(`failed to create the temporary assembly: ${err}`)
25 |     }
26 |   }
27 | }


--------------------------------------------------------------------------------
/src/utils/audio/convertMp3ToWavBase64.mts:
--------------------------------------------------------------------------------
 1 | 
 2 | import path from "node:path"
 3 | import * as fs from "node:fs"
 4 | 
 5 | import { v4 as uuidv4 } from "uuid"
 6 | import tmpDir from "temp-dir"
 7 | import ffmpeg from "fluent-ffmpeg"
 8 | 
 9 | export function convertMp3ToWavBase64(base64: string) : Promise<string> {
10 |   return new Promise((resolve, reject) => {
11 | 
12 |     const inputPath = path.join(tmpDir, `${uuidv4()}.mp3`)
13 |     const outputPath = path.join(tmpDir, `${uuidv4()}.wav`)
14 | 
15 |     const base64Header = 'data:audio/mpeg;base64,';
16 | 
17 |     fs.writeFile(inputPath, base64.replace(base64Header, ''), { encoding: 'base64' }, (writeError) => {
18 |       if (writeError) {
19 |         reject(writeError);
20 |         return;
21 |       }
22 | 
23 |       ffmpeg(inputPath)
24 |         .output(outputPath)
25 |         .on('end', () => {
26 |           fs.readFile(outputPath, { encoding: 'base64' }, (readError, data) => {
27 |             if (readError) {
28 |               reject(readError);
29 |               return;
30 |             }
31 |             resolve(`data:audio/wav;base64,${data}`);
32 |             fs.promises.unlink(inputPath);
33 |             fs.promises.unlink(outputPath);
34 |           });
35 |         })
36 |         .on('error', (conversionError) => {
37 |           reject(conversionError);
38 |         })
39 |         .run();
40 |     });
41 |   });
42 | }


--------------------------------------------------------------------------------
/src/utils/audio/convertMp3ToWavFilePath.mts:
--------------------------------------------------------------------------------
 1 | 
 2 | import path from "node:path"
 3 | import * as fs from "node:fs"
 4 | 
 5 | import { v4 as uuidv4 } from "uuid"
 6 | import tmpDir from "temp-dir"
 7 | import ffmpeg from "fluent-ffmpeg"
 8 | 
 9 | export function convertMp3ToWavFilePath(base64: string) : Promise<string> {
10 |   return new Promise((resolve, reject) => {
11 | 
12 |     // this one will be deleted at the end
13 |     const tmpInputPath = path.join(tmpDir, `${uuidv4()}.mp3`)
14 | 
15 |     const tmpOutputPath = path.join(tmpDir, `${uuidv4()}.wav`)
16 | 
17 |     const base64Header = 'data:audio/mpeg;base64,';
18 | 
19 |     fs.writeFile(tmpInputPath, base64.replace(base64Header, ''), { encoding: 'base64' }, (writeError) => {
20 |       if (writeError) {
21 |         reject(writeError);
22 |         return;
23 |       }
24 | 
25 |       ffmpeg(tmpInputPath)
26 |         .output(tmpOutputPath)
27 |         .on('end', () => {
28 |           fs.promises.unlink(tmpInputPath)
29 |           resolve(tmpOutputPath)
30 |         
31 |         })
32 |         .on('error', (conversionError) => {
33 |           reject(conversionError);
34 |         })
35 |         .run();
36 |     });
37 |   });
38 | }


--------------------------------------------------------------------------------
/src/utils/audio/mergeAudio.mts:
--------------------------------------------------------------------------------
 1 | import path from "node:path"
 2 | 
 3 | import tmpDir from "temp-dir"
 4 | import { v4 as uuidv4 } from "uuid"
 5 | import ffmpeg from "fluent-ffmpeg"
 6 | 
 7 | export const mergeAudio = async ({
 8 |   input1FileName, 
 9 |   input1Volume,
10 |   input2FileName,
11 |   input2Volume,
12 |   outputFileName = ''
13 | }: {
14 |   input1FileName: string, 
15 |   input1Volume: number,
16 |   input2FileName: string,
17 |   input2Volume: number,
18 |   outputFileName?: string 
19 | }): Promise<string> => {
20 |   outputFileName = `${uuidv4()}.m4a`
21 | 
22 |   const input1FilePath = path.resolve(tmpDir, input1FileName)
23 |   const input2FilePath = path.resolve(tmpDir, input2FileName)
24 |   const outputFilePath = path.resolve(tmpDir, outputFileName)
25 | 
26 |   const input1Ffmpeg = ffmpeg(input1FilePath)
27 |     .outputOptions("-map 0:a:0")
28 |     .audioFilters([{ filter: 'volume', options: input1Volume }]); // set volume for main audio
29 |   
30 |   const input2Ffmpeg = ffmpeg(input2FilePath)
31 |     .outputOptions("-map 1:a:0")
32 |     .audioFilters([{ filter: 'volume', options: input2Volume }]); // set volume for additional audio
33 | 
34 |   await new Promise((resolve, reject) => {
35 |     ffmpeg()
36 |       .input(input1Ffmpeg)
37 |       .input(input2Ffmpeg)
38 |       .outputOptions("-c:a aac")   // use audio codec
39 |       .outputOptions("-shortest")  // finish encoding when shortest input stream ends
40 |       .output(outputFilePath)
41 |       .on("end", resolve)
42 |       .on("error", reject)
43 |       .run()
44 |   })
45 | 
46 |   console.log(`merged audio from ${input1FileName} and ${input2FileName} into ${outputFileName}`)
47 | 
48 |   return outputFileName
49 | }


--------------------------------------------------------------------------------
/src/utils/download/downloadFileAsBase64.mts:
--------------------------------------------------------------------------------
 1 | export const downloadFileAsBase64 = async (remoteUrl: string): Promise<string> => {
 2 |   const controller = new AbortController()
 3 | 
 4 |   // download the image
 5 |   const response = await fetch(remoteUrl, {
 6 |     signal: controller.signal
 7 |   })
 8 | 
 9 |   // get as Buffer
10 |   const arrayBuffer = await response.arrayBuffer()
11 |   const buffer = Buffer.from(arrayBuffer)
12 | 
13 |   // convert it to base64
14 |   const base64 = buffer.toString('base64')
15 | 
16 |   return base64
17 | };


--------------------------------------------------------------------------------
/src/utils/download/downloadFileAsBase64URL.mts:
--------------------------------------------------------------------------------
 1 | export const downloadFileAsBase64URL = async (remoteUrl: string): Promise<string> => {
 2 |   const controller = new AbortController()
 3 | 
 4 |   // download the file
 5 |   const response = await fetch(remoteUrl, {
 6 |     signal: controller.signal
 7 |   })
 8 | 
 9 |   // get as Buffer
10 |   const arrayBuffer = await response.arrayBuffer()
11 |   const buffer = Buffer.from(arrayBuffer)
12 | 
13 |   // convert it to base64
14 |   const base64 = buffer.toString('base64')
15 | 
16 |   const contentType = response.headers.get('content-type')
17 | 
18 |   const assetUrl = `data:${contentType};base64,${base64}`
19 |   return assetUrl
20 | };


--------------------------------------------------------------------------------
/src/utils/download/downloadFileToTmp.mts:
--------------------------------------------------------------------------------
 1 | import path from "node:path"
 2 | import fs from "node:fs"
 3 | 
 4 | import tmpDir from "temp-dir"
 5 | 
 6 | export const downloadFileToTmp = async (remoteUrl: string, fileName: string) => {
 7 | 
 8 |   const filePath = path.resolve(tmpDir, fileName)
 9 | 
10 |   const controller = new AbortController()
11 |   const timeoutId = setTimeout(() => controller.abort(), 15 * 60 * 60 * 1000) // 15 minutes
12 | 
13 |   // TODO finish the timeout?
14 | 
15 |   // download the file
16 |   const response = await fetch(remoteUrl, {
17 |     signal: controller.signal
18 |   })
19 | 
20 |   // write it to the disk
21 |   const arrayBuffer = await response.arrayBuffer()
22 | 
23 |   await fs.promises.writeFile(
24 |     filePath,
25 |     Buffer.from(arrayBuffer)
26 |   )
27 | 
28 |   return filePath
29 | }


--------------------------------------------------------------------------------
/src/utils/filesystem/createDirIfNeeded.mts:
--------------------------------------------------------------------------------
1 | import { existsSync, mkdirSync } from "node:fs"
2 | 
3 | export const createDirIfNeeded = (dirPath: string) => {
4 |   if (!existsSync(dirPath)) {
5 |     mkdirSync(dirPath, { recursive: true })
6 |   }
7 | }


--------------------------------------------------------------------------------
/src/utils/filesystem/deleteAllFilesWith.mts:
--------------------------------------------------------------------------------
 1 | import { promises as fs } from "node:fs"
 2 | import path from "node:path"
 3 | 
 4 | export const deleteFilesWithName = async (dir: string, name: string) => {
 5 |   for (const file of await fs.readdir(dir)) {
 6 |     if (file.includes(name)) {
 7 |       const filePath = path.join(dir, file)
 8 |       try {
 9 |         await fs.unlink(filePath)
10 |       } catch (err) {
11 |         console.error(`failed to unlink file in ${filePath}: ${err}`)
12 |       }
13 |     }
14 |   }
15 | }
16 | 


--------------------------------------------------------------------------------
/src/utils/filesystem/deleteFileIfExists.mts:
--------------------------------------------------------------------------------
 1 | import { existsSync, promises as fs } from "node:fs"
 2 | 
 3 | export const deleteFileIfExists = async (filePath: string) => {
 4 | 
 5 |   const safePath = filePath.trim()
 6 |   // just a sanity check
 7 |   if (safePath.includes("*") ||safePath === "/" || safePath === "~" || safePath === ".") {
 8 |     throw new Error(`lol, no.`)
 9 |   }
10 | 
11 |   if (existsSync(filePath)) {
12 |     try {
13 |       await fs.unlink(safePath)
14 |       return true
15 |     } catch (err) {
16 |       console.log(`failed to delete file ${safePath}`)
17 |     }
18 |   }
19 |   return false
20 | }


--------------------------------------------------------------------------------
/src/utils/filesystem/initFolders.mts:
--------------------------------------------------------------------------------
 1 | import {
 2 |   metadataDirPath,
 3 |   pendingMetadataDirFilePath,
 4 |   completedMetadataDirFilePath,
 5 |   filesDirPath,
 6 |   pendingFilesDirFilePath,
 7 |   completedFilesDirFilePath,
 8 |   cacheDirPath,
 9 |   renderedDirFilePath
10 | } from "../../config.mts"
11 | import { createDirIfNeeded } from "./createDirIfNeeded.mts"
12 | 
13 | export const initFolders = () => {
14 |   console.log(`initializing folders..`)
15 |   createDirIfNeeded(metadataDirPath)
16 |   createDirIfNeeded(pendingMetadataDirFilePath)
17 |   createDirIfNeeded(completedMetadataDirFilePath)
18 |   createDirIfNeeded(filesDirPath)
19 |   createDirIfNeeded(pendingFilesDirFilePath)
20 |   createDirIfNeeded(completedFilesDirFilePath)
21 |   createDirIfNeeded(cacheDirPath)
22 |   createDirIfNeeded(renderedDirFilePath)
23 | }


--------------------------------------------------------------------------------
/src/utils/filesystem/moveFile.mts:
--------------------------------------------------------------------------------
 1 | import { promises as fs } from "node:fs"
 2 | 
 3 | // a function to move a file
 4 | // this implementation is safe to use on a Hugging Face Space
 5 | // for instance when copying from one disk to another
 6 | // (we cannot use fs.rename in that case)
 7 | export const moveFile = async (sourceFilePath: string, targetFilePath: string) => {
 8 |   await fs.copyFile(sourceFilePath, targetFilePath)
 9 |   console.log(`moved file from ${sourceFilePath} to ${targetFilePath}`)
10 |   try {
11 |     await fs.unlink(sourceFilePath)
12 |   } catch (err) {
13 |     console.log("moveFile: failed to cleanup (no big deal..)")
14 |   }
15 | }


--------------------------------------------------------------------------------
/src/utils/filesystem/moveFileFromTmpToPending.mts:
--------------------------------------------------------------------------------
 1 | import path from "node:path"
 2 | import tmpDir from "temp-dir"
 3 | import { pendingFilesDirFilePath } from "../../config.mts"
 4 | import { moveFile } from "./moveFile.mts"
 5 | 
 6 | // a function to move a file to the pending file directory
 7 | // this implementation is safe to use on a Hugging Face Space
 8 | // for instance when copying from one disk to another
 9 | // (we cannot use fs.rename in that case)
10 | export const moveFileFromTmpToPending = async (tmpFileName: string, pendingFileName?: string) => {
11 |   if (!pendingFileName) {
12 |     pendingFileName = tmpFileName
13 |   }
14 |   const tmpFilePath = path.join(tmpDir, tmpFileName)
15 |   const pendingFilePath = path.join(pendingFilesDirFilePath, pendingFileName)
16 | 
17 |   await moveFile(tmpFilePath, pendingFilePath)
18 | }


--------------------------------------------------------------------------------
/src/utils/filesystem/saveRenderedSceneToCache.mts:
--------------------------------------------------------------------------------
 1 | import { promises as fs } from "node:fs"
 2 | import path from "node:path"
 3 | 
 4 | import { RenderRequest, RenderedScene } from "../../types.mts"
 5 | import { renderedDirFilePath } from "../../config.mts"
 6 | 
 7 | import { hashRequest } from "../requests/hashRequest.mts"
 8 | 
 9 | export async function saveRenderedSceneToCache(
10 |   request: RenderRequest,
11 |   scene: RenderedScene
12 | ): Promise<RenderedScene> {
13 |   // console.log("saveRenderedSceneToCache")
14 |   if (scene.status !== "completed") {
15 |     throw new Error("sorry, it only makes sense to cache a *completed* scene, not a pending or failed one.")
16 |   }
17 | 
18 |   //note: this hashing function ignores the commands associated to cache and stuff
19 |   const hash = hashRequest(request)
20 |   const id = scene.renderId
21 | 
22 |   const cacheFileName = `hash_${hash}_id_${id}.json`
23 |   const cacheFilePath = path.join(renderedDirFilePath, cacheFileName)
24 | 
25 |   const renderedSceneJson = JSON.stringify(scene)
26 | 
27 |   /*
28 |   console.log({
29 |     request,
30 |     hash,
31 |     id,
32 |     cacheFileName,
33 |     cacheFilePath,
34 |     scene
35 |   })
36 |   */
37 | 
38 |   await fs.writeFile(cacheFilePath, renderedSceneJson, "utf8")
39 |   // console.log(`saved result to cache`)
40 | 
41 |   return scene
42 | }


--------------------------------------------------------------------------------
/src/utils/filesystem/writeBase64ToFile.mts:
--------------------------------------------------------------------------------
 1 | import { promises as fs } from "node:fs"
 2 | 
 3 | export async function writeBase64ToFile(content: string, filePath: string): Promise<void> {
 4 |   
 5 |   // Remove "data:image/png;base64," from the start of the data url
 6 |   const base64Data = content.split(",")[1]
 7 | 
 8 |   // Convert base64 to binary
 9 |   const data = Buffer.from(base64Data, "base64")
10 | 
11 |   // Write binary data to file
12 |   try {
13 |     await fs.writeFile(filePath, data)
14 |     // console.log("File written successfully")
15 |   } catch (error) {
16 |     console.error("An error occurred:", error)
17 |   }
18 | }


--------------------------------------------------------------------------------
/src/utils/image/addBase64HeaderToPng.mts:
--------------------------------------------------------------------------------
 1 | export function addBase64HeaderToPng(base64Data: string) {
 2 |   if (typeof base64Data !== "string" || !base64Data) {
 3 |     return ""
 4 |   }
 5 |   if (base64Data.startsWith('data:')) {
 6 |     if (base64Data.startsWith('data:image/png;base64,')) {
 7 |       return base64Data
 8 |     } else {
 9 |       throw new Error("fatal: the input string is NOT a PNG!")
10 |     }
11 |   } else {
12 |     return `data:image/png;base64,${base64Data}`
13 |   }
14 | }


--------------------------------------------------------------------------------
/src/utils/image/convertToWebp.mts:
--------------------------------------------------------------------------------
 1 | import sharp from "sharp"
 2 | 
 3 | export async function convertToWebp(imgBase64: string = ""): Promise<string> {
 4 | 
 5 |   const base64WithoutHeader = imgBase64.split(";base64,")[1] || ""
 6 | 
 7 |   if (!base64WithoutHeader) {
 8 |     const slice = `${imgBase64 || ""}`.slice(0, 50)
 9 |     throw new Error(`couldn't process input image "${slice}..."`)
10 |   }
11 | 
12 |   // Convert base64 to buffer
13 |   const tmpBuffer = Buffer.from(base64WithoutHeader, 'base64')
14 | 
15 |   // Resize the buffer to the target size
16 |   const newBuffer = await sharp(tmpBuffer)
17 |     .webp({
18 |       // for options please see https://sharp.pixelplumbing.com/api-output#webp
19 | 
20 |       // preset: "photo",
21 | 
22 |       // effort: 3,
23 | 
24 |       // for a PNG-like quality
25 |       // lossless: true,
26 | 
27 |       // by default it is quality 80
28 |       quality: 90,
29 | 
30 |       // nearLossless: true,
31 | 
32 |       // use high quality chroma subsampling
33 |       smartSubsample: true,
34 |      })
35 |       .toBuffer()
36 | 
37 |   // Convert the buffer back to base64
38 |   const newImageBase64 = newBuffer.toString('base64')
39 | 
40 |   return `data:image/webp;base64,${newImageBase64}`
41 | }


--------------------------------------------------------------------------------
/src/utils/image/resizeBase64Image.mts:
--------------------------------------------------------------------------------
 1 | import sharp from "sharp"
 2 | 
 3 | export async function resizeBase64Image(imgBase64: string, targetWidth: number, targetHeight: number): Promise<string> {
 4 |   // Convert base64 to buffer
 5 |   const buffer = Buffer.from(imgBase64, 'base64');
 6 | 
 7 |   // Resize the buffer to the target size
 8 |   const resizedBuffer = await sharp(buffer)
 9 |       .resize(targetWidth, targetHeight)
10 |       .toBuffer();
11 | 
12 |   // Convert the buffer back to base64
13 |   const resizedImageBase64 = resizedBuffer.toString('base64');
14 | 
15 |   return `data:image/png;base64,${resizedImageBase64}`
16 | }


--------------------------------------------------------------------------------
/src/utils/misc/debouncePromise.mts:
--------------------------------------------------------------------------------
 1 | /*
 2 | 
 3 | Here's how you can use the `debouncePromise` function:
 4 | 
 5 | ```typescript
 6 | async function fetchData(query: string): Promise<string> {
 7 |   // Simulating an API call
 8 |   return new Promise((resolve) => {
 9 |     setTimeout(() => {
10 |       resolve(`Results for ${query}`);
11 |     }, 500);
12 |   });
13 | }
14 | 
15 | const debouncedFetchData = debouncePromise(fetchData, 300);
16 | 
17 | (async () => {
18 |   try {
19 |     console.log(await debouncedFetchData("query 1")); // This will be ignored
20 |     console.log(await debouncedFetchData("query 2")); // This will be ignored
21 |     console.log(await debouncedFetchData("query 3")); // This will return "Results for query 3"
22 |   } catch (error) {
23 |     console.error(error);
24 |   }
25 | })();
26 | ```
27 | 
28 | The `debouncePromise` function takes a Promise-based function `func` and a `wait` time as its arguments.
29 | It returns a debounced version of the given function that, when called multiple times within the specified wait time, will only execute the last call.
30 | */
31 | 
32 | type DebouncedFunction<T extends any[], R> = ((...args: T) => R) & {
33 |   clear: () => void
34 | }
35 | 
36 | export function debouncePromise<T extends any[], R>(
37 |   func: (...args: T) => Promise<R>,
38 |   wait: number
39 | ): DebouncedFunction<T, Promise<R | undefined>> {
40 |   let timeout: NodeJS.Timeout | undefined
41 | 
42 |   const debounced = (...args: T) => {
43 |     return new Promise<R | undefined>((resolve, reject) => {
44 |       if (timeout) {
45 |         clearTimeout(timeout)
46 |       }
47 | 
48 |       timeout = setTimeout(async () => {
49 |         try {
50 |           const result = await func(...args)
51 |           resolve(result)
52 |         } catch (error) {
53 |           reject(error)
54 |         }
55 |       }, wait)
56 |     })
57 |   }
58 | 
59 |   debounced.clear = () => {
60 |     if (timeout) {
61 |       clearTimeout(timeout)
62 |     }
63 |   }
64 | 
65 |   return debounced
66 | }


--------------------------------------------------------------------------------
/src/utils/misc/debounceSync.mts:
--------------------------------------------------------------------------------
 1 | type DebouncedFunctionSync<T extends any[], R> = ((...args: T) => R) & {
 2 |   clear: () => void
 3 | }
 4 | 
 5 | /**
 6 |  * Example usage:
 7 |  * ```typescript
 8 |  *   function fetchDataSync(query: string): string {
 9 |  *     return `Results for ${query}`;
10 |  *   }
11 |  *
12 |  *   const debouncedFetchDataSync = debounceSync(fetchDataSync, 300);
13 |  *
14 |  *   try {
15 |  *    console.log(debouncedFetchDataSync("query 1")); // This will be ignored
16 |  *     console.log(debouncedFetchDataSync("query 2")); // This will be ignored
17 |  *     console.log(debouncedFetchDataSync("query 3")); // This will return "Results for query 3"
18 |  *   } catch (error) {
19 |  *     console.error(error);
20 |  *   }
21 |  * ```
22 |  *
23 |  * Note that the synchronous version of the debounce function will return
24 |  * the result of the last function call or `undefined` if the function was
25 |  * not called within the specified wait time. You should also be aware that
26 |  * this synchronous version may cause blocking if the debounced function
27 |  * takes a long time to execute.
28 |  *
29 |  * @param func
30 |  * @param wait
31 |  * @returns
32 |  */
33 | export function debounceSync<T extends any[], R>(
34 |   func: (...args: T) => R,
35 |   wait: number
36 | ): DebouncedFunctionSync<T, R | undefined> {
37 |   let timeout: NodeJS.Timeout | undefined
38 | 
39 |   const debounced = (...args: T): R | undefined => {
40 |     if (timeout) {
41 |       clearTimeout(timeout)
42 |     }
43 | 
44 |     let result: R | undefined
45 |     timeout = setTimeout(() => {
46 |       result = func(...args)
47 |     }, wait)
48 | 
49 |     return result
50 |   }
51 | 
52 |   debounced.clear = () => {
53 |     if (timeout) {
54 |       clearTimeout(timeout)
55 |     }
56 |   }
57 | 
58 |   return debounced
59 | }


--------------------------------------------------------------------------------
/src/utils/misc/generateSeed.mts:
--------------------------------------------------------------------------------
1 | export function generateSeed() {
2 |   return Math.floor(Math.random() * Math.pow(2, 31));
3 | }


--------------------------------------------------------------------------------
/src/utils/misc/getHuggingFaceSpaceStatus.mts:
--------------------------------------------------------------------------------
  1 | 
  2 | /** Actually `hf_${string}`, but for convenience, using the string type */
  3 | type AccessToken = string;
  4 | 
  5 | interface Credentials {
  6 | 	accessToken: AccessToken;
  7 | }
  8 | 
  9 | type SpaceHardwareFlavor =
 10 | 	| "cpu-basic"
 11 | 	| "cpu-upgrade"
 12 | 	| "t4-small"
 13 | 	| "t4-medium"
 14 | 	| "a10g-small"
 15 | 	| "a10g-large"
 16 | 	| "a100-large";
 17 | 
 18 | type SpaceSdk = "streamlit" | "gradio" | "docker" | "static";
 19 | 
 20 | type SpaceStage =
 21 | 	| "NO_APP_FILE"
 22 | 	| "CONFIG_ERROR"
 23 | 	| "BUILDING"
 24 | 	| "BUILD_ERROR"
 25 | 	| "RUNNING"
 26 | 	| "RUNNING_BUILDING"
 27 | 	| "RUNTIME_ERROR"
 28 | 	| "DELETING"
 29 | 	| "PAUSED"
 30 | 	| "SLEEPING";
 31 | 
 32 | type AccessTokenRole = "admin" | "write" | "contributor" | "read";
 33 | 
 34 | type AuthType = "access_token" | "app_token" | "app_token_as_user";
 35 | 
 36 | 
 37 | interface SpaceRuntime {
 38 | 	stage: SpaceStage;
 39 | 	sdk?: SpaceSdk;
 40 | 	sdkVersion?: string;
 41 | 	errorMessage?: string;
 42 | 	hardware?: {
 43 | 		current: SpaceHardwareFlavor | null;
 44 | 		currentPrettyName?: string;
 45 | 		requested: SpaceHardwareFlavor | null;
 46 | 		requestedPrettyName?: string;
 47 | 	};
 48 | 	/** when calling /spaces, those props are only fetched if ?full=true */
 49 | 	resources?: SpaceResourceConfig;
 50 | 	/** in seconds */
 51 | 	gcTimeout?: number | null;
 52 | }
 53 | 
 54 | interface SpaceResourceRequirement {
 55 | 	cpu?: string;
 56 | 	memory?: string;
 57 | 	gpu?: string;
 58 | 	gpuModel?: string;
 59 | 	ephemeral?: string;
 60 | }
 61 | 
 62 | interface SpaceResourceConfig {
 63 | 	requests: SpaceResourceRequirement;
 64 | 	limits: SpaceResourceRequirement;
 65 | 	replicas?: number;
 66 | 	throttled?: boolean;
 67 | 	is_custom?: boolean;
 68 | }
 69 | 
 70 | export interface HFSpaceStatus {
 71 |   _id: string
 72 |   id: string
 73 |   author: string
 74 |   sha: string
 75 |   lastModified: string
 76 |   private: boolean
 77 |   gated: boolean
 78 |   disabled: boolean
 79 |   host: string
 80 |   subdomain: string
 81 |   tags: string[]
 82 |   likes: number
 83 |   sdk: string
 84 |   runtime: SpaceRuntime
 85 |   createdAt: string
 86 | }
 87 | 
 88 | export async function getHuggingFaceSpaceStatus({
 89 |   space,
 90 |   // userName,
 91 |   // spaceName,
 92 | }: {
 93 |   space: string // a joined "user_name/space_name"
 94 |   // userName: string
 95 |   // spaceName: string
 96 | }): Promise<HFSpaceStatus> {
 97 |   const res = await fetch(`https://huggingface.co/api/spaces/${space}`, {
 98 |     method: "GET",
 99 |     headers: {
100 |       Authorization: `Bearer ${process.env.VC_HF_API_TOKEN || ""}`
101 |     }
102 |   })
103 | 
104 |   if (res.status !== 200)  {
105 |     throw new Error("failed to get the space data")
106 |   }
107 | 
108 |   try {
109 |     const data = await res.json() as HFSpaceStatus
110 |     return data
111 |   } catch (err) {
112 |     throw new Error("failed to parse space data", err)
113 |   }
114 | }
115 | 


--------------------------------------------------------------------------------
/src/utils/misc/makeSureSpaceIsRunning.mts:
--------------------------------------------------------------------------------
 1 | 
 2 | import { getHuggingFaceSpaceStatus } from "./getHuggingFaceSpaceStatus.mts"
 3 | import { sleep } from "./sleep.mts"
 4 | 
 5 | export async function makeSureSpaceIsRunning({
 6 |   space,
 7 |   maxWaitTimeInSec = 15 * 60, // some spaces are ultra slow to cold boot (eg. data dl at runtime)
 8 |   statusUpdateFrequencyInSec = 5,
 9 |   // userName,
10 |   // spaceName,
11 | }: {
12 |   space?: string // a joined "user_name/space_name"
13 | 
14 |   maxWaitTimeInSec?: number
15 | 
16 |   statusUpdateFrequencyInSec?: number
17 | 
18 |   // userName: string
19 |   // spaceName: string
20 | }): Promise<void> {
21 |   if (!space) { return }
22 |   
23 |   // process.stdout.write(`trying to restart space "${space}"`)
24 |   try {
25 |     const { runtime: { stage } } = await getHuggingFaceSpaceStatus({ space })
26 |     if (stage === "RUNNING") {
27 |       // process.stdout.write(`: well, it is already ${stage}!\n`)
28 |       return
29 |     }
30 |   } catch (err) {
31 |   }
32 | 
33 |   const res = await fetch(`https://huggingface.co/api/spaces/${space}/restart`, {
34 |     method: "POST",
35 |     headers: {
36 |       Authorization: `Bearer ${process.env.VC_HF_API_TOKEN || ""}`
37 |     }
38 |   })
39 | 
40 |   if (res.status !== 200) {
41 |     process.stdout.write(`failure!\nwe couldn't trigger the restart of space "${space}"\n`)
42 |   
43 |     throw new Error(`failed to trigger the restart of space "${space}" (status is not 200)`)
44 |   }
45 | 
46 |   let elapsedTime = 0
47 | 
48 |   process.stdout.write(`trying to restart space "${space}"`)
49 | 
50 |   while (true) {
51 |     process.stdout.write(".")
52 |     const { runtime: { stage } } = await getHuggingFaceSpaceStatus({ space })
53 | 
54 |     if (stage === "RUNNING") {
55 |       process.stdout.write(`success!\nspace "${space}" is ${stage} (took ${elapsedTime} sec)\n`)
56 |       return
57 |     } else if (stage === "BUILDING" || stage === "RUNNING_BUILDING") {
58 |       // let's wait more
59 |       await sleep(statusUpdateFrequencyInSec * 1000)
60 | 
61 |       elapsedTime += statusUpdateFrequencyInSec
62 | 
63 |       if (elapsedTime >= maxWaitTimeInSec) {
64 |         process.stdout.write(`failure!\nspace "${space}" is still ${stage} (after ${elapsedTime} sec)\n`)
65 |         if (stage === "BUILDING") {
66 |           throw new Error(`failed to start space ${space} (reason: space is ${stage}, but we reached the ${maxWaitTimeInSec} sec timeout)`)
67 |         } else {
68 |           // if we are "RUNNING_BUILDING" we assume it is.. okay? I guess?
69 |           return
70 |         }
71 |       }
72 |     } else {
73 |       process.stdout.write(`failure!\nspace "${space}" is ${stage} (after ${elapsedTime} sec)\n`)
74 |       throw new Error(`failed to build space ${space} (reason: space is ${stage})`)
75 |     }
76 |   }
77 | }


--------------------------------------------------------------------------------
/src/utils/misc/randomShuffle.mts:
--------------------------------------------------------------------------------
1 | export function randomShuffle<T>(array: T[]): T[] { 
2 |   for (let i = array.length - 1; i > 0; i--) { 
3 |     const j = Math.floor(Math.random() * (i + 1));
4 |     [array[i], array[j]] = [array[j], array[i]];
5 |   } 
6 | 
7 |   return array
8 | }; 


--------------------------------------------------------------------------------
/src/utils/misc/sleep.mts:
--------------------------------------------------------------------------------
1 | export const sleep = async (durationInMs: number) =>
2 |   new Promise((resolve) => {
3 |     setTimeout(() => {
4 |       resolve(true)
5 |     }, durationInMs)
6 |   })


--------------------------------------------------------------------------------
/src/utils/misc/tryApiCall.mts:
--------------------------------------------------------------------------------
 1 | // import { Credentials, downloadFile, uploadFile, whoAmI } from "@huggingface/hub"
 2 | 
 3 | import { makeSureSpaceIsRunning } from "./makeSureSpaceIsRunning.mts"
 4 | import { sleep } from "./sleep.mts"
 5 | 
 6 | const sec = 1000
 7 | const min = 60 *sec
 8 | 
 9 | export async function tryApiCalls<T>({
10 |   func,
11 |   huggingFaceSpace,
12 |   debug = false,
13 |   failureMessage = "failed to call the endpoint",
14 |   delays = [
15 |     5 *sec,
16 |     15 *sec,
17 |     40 *sec, // total 1 min wait time
18 | 
19 |     //at this stage, if it is so slow it means we are probably waking up a model
20 |     // which is a slow operation (takes ~5 min)
21 | 
22 |     2 *min, //     ~ 3 min ~
23 |     1 *min, //     ~ 4 min ~
24 |     1 *min, //     ~ 5 min ~
25 |   ]
26 | }: {
27 |   func: () => Promise<T>
28 | 
29 |   // optional: the name of the hugging face space
30 |   // this will be used to "wake up" the space if necessary
31 |   huggingFaceSpace?: string
32 | 
33 |   debug?: boolean
34 |   failureMessage?: string
35 |   delays?: number[]
36 | }) {
37 | 
38 |   for (let i = 0; i < delays.length; i++) {
39 |     try {
40 |       await makeSureSpaceIsRunning({ space: huggingFaceSpace })
41 |       const result = await func()
42 |       return result
43 |     } catch (err) {
44 |       if (debug) { console.error(err) }
45 |       process.stdout.write(".")
46 | 
47 |       if (i > 0) {
48 |         await sleep(delays[i])
49 |       }
50 |     }
51 |   }
52 | 
53 |   throw new Error(`${failureMessage} after ${delays.length} attempts`)
54 | }
55 | 


--------------------------------------------------------------------------------
/src/utils/requests/hasValidAuthorization.mts:
--------------------------------------------------------------------------------
 1 | import { IncomingHttpHeaders } from "node:http"
 2 | 
 3 | export const hasValidAuthorization = (headers: IncomingHttpHeaders) => {
 4 |   const [_, token] = `${headers.authorization || ""}`.split(" ")
 5 |   if (typeof token === "string" && token.trim() === process.env.VC_SECRET_ACCESS_TOKEN.trim()) {
 6 |     return true
 7 |   }
 8 | 
 9 |   return false
10 | }


--------------------------------------------------------------------------------
/src/utils/requests/hashRequest.mts:
--------------------------------------------------------------------------------
 1 | import { RenderRequest } from "../../types.mts"
 2 | import { computeSha256 } from "../validators/computeSha256.mts"
 3 | 
 4 | export function hashRequest(request: RenderRequest) {
 5 | 
 6 |   // we ignore the commands associated to cache and stuff
 7 |   const hashable = {
 8 |     version: 1,
 9 |     prompt: request.prompt,
10 |     negativePrompt: request.negativePrompt,
11 |     identityImage: request.identityImage,
12 |     segmentation: request.segmentation,
13 |     actionnables: request.actionnables,
14 |     nbFrames: request.nbFrames,
15 |     nbSteps: request.nbSteps,
16 |     seed: request.seed,
17 |     width: request.width,
18 |     height: request.height,
19 |     projection: request.projection,
20 |   }
21 | 
22 |   const requestJson = JSON.stringify(hashable)
23 |   const hash = computeSha256(requestJson)
24 | 
25 |   return hash
26 | }


--------------------------------------------------------------------------------
/src/utils/requests/loadRenderedSceneFromCache.mts:
--------------------------------------------------------------------------------
 1 | import { promises as fs } from "node:fs"
 2 | import path from "node:path"
 3 | 
 4 | import { RenderRequest, RenderedScene } from "../../types.mts"
 5 | import { renderedDirFilePath } from "../../config.mts"
 6 | import { hashRequest } from "./hashRequest.mts"
 7 | 
 8 | export async function loadRenderedSceneFromCache(request?: RenderRequest, id?: string): Promise<RenderedScene> {
 9 |   
10 |   let pattern = ""
11 | 
12 |   if (request?.prompt) {
13 |     try {
14 |       // note: this hashing function ignores the commands associated to cache and stuff
15 |       const hash = hashRequest(request)
16 |       pattern = `hash_${hash}`
17 |     } catch (err) {
18 |     }
19 |   } else if (id) {
20 |    pattern = `id_${id}` 
21 |   }
22 | 
23 |   if (!pattern) {
24 |     throw new Error("invalid request or id")
25 |   }
26 | 
27 |   // console.log("pattern to find: " + pattern)
28 | 
29 |   for (const cachedFile of await fs.readdir(renderedDirFilePath)) {
30 |     // console.log("evaluating " + cachedFile)
31 |     if (cachedFile.includes(pattern)) {
32 |       // console.log("matched with " + cachedFile)
33 |       const cacheFilePath = path.join(renderedDirFilePath, cachedFile)
34 | 
35 |       const scene = JSON.parse(
36 |         await fs.readFile(cacheFilePath, 'utf8')
37 |       ) as RenderedScene
38 |     
39 |       if (!scene.assetUrl) {
40 |         throw new Error("there is something wrong with the cached rendered scene (url is empty)")
41 |       }
42 |     
43 |       if (!scene.assetUrl) {
44 |         throw new Error("there is something wrong with the cached rendered scene (statis is not completed)")
45 |       }
46 | 
47 |       return scene
48 |     }
49 |   }
50 | 
51 |   throw new Error(`couldn't find a cache file for pattern ${pattern}`)
52 | }


--------------------------------------------------------------------------------
/src/utils/requests/parseRenderRequest.mts:
--------------------------------------------------------------------------------
 1 | import { RenderRequest } from "../../types.mts"
 2 | import { generateSeed } from "../misc/generateSeed.mts"
 3 | import { getValidBoolean } from "../validators/getValidBoolean.mts"
 4 | import { getValidNumber } from "../validators/getValidNumber.mts"
 5 | 
 6 | export function parseRenderRequest(request: RenderRequest) {
 7 | 
 8 |   // console.log("parseRenderRequest: "+JSON.stringify(request, null, 2))
 9 |   try {
10 |     // we are large on the values here, since each model will have their own limits
11 |     // we just want pseudo-valid numbers
12 | 
13 |     request.nbFrames = getValidNumber(request.nbFrames, 1, 2147483647, 1)
14 |     request.nbFPS = getValidNumber(request.nbFPS, 1, 2147483647, 1)
15 | 
16 |     request.negativePrompt = request.negativePrompt || ""
17 | 
18 |     const isVideo = request?.nbFrames === 1
19 | 
20 |     // note that we accept a seed of 0
21 |     // (this ensure we are able to cache the whole request by signing it)
22 |     request.seed = getValidNumber(request.seed, 0, 2147483647, 0)
23 | 
24 |     // but obviously we will treat 0 as the random seed at a later stage
25 | 
26 |     request.upscalingFactor = getValidNumber(request.upscalingFactor, 0, 4, 0)
27 | 
28 |     request.nbSteps = getValidNumber(request.nbSteps, 1, 50, 10)
29 | 
30 |     request.analyze = request?.analyze ? true : false
31 | 
32 |     if (isVideo) {
33 |       request.width = getValidNumber(request.width, 256, 2048, 576)
34 |       request.height = getValidNumber(request.height, 256, 2048, 320)
35 |     } else {
36 |       request.width = getValidNumber(request.width, 256, 2048, 1024)
37 |       request.height = getValidNumber(request.height, 256, 2048, 1024)
38 |     }
39 | 
40 |     request.turbo = getValidBoolean(request.turbo, false)
41 | 
42 |     request.wait = getValidBoolean(request?.wait, false)
43 | 
44 |     request.cache = request?.cache || "ignore"
45 |   } catch (err) {
46 |     console.error(`failed to parse the render request: ${err}`)
47 |   }
48 | 
49 |   // console.log("parsed request: "+JSON.stringify(request, null, 2))
50 |   return request
51 | }


--------------------------------------------------------------------------------
/src/utils/requests/parseShotRequest.mts:
--------------------------------------------------------------------------------
  1 | import { v4 as uuidv4 } from "uuid"
  2 | 
  3 | // convert a request (which might be invalid)
  4 | 
  5 | import { VideoSequence, VideoShot, VideoShotMeta } from "../../types.mts"
  6 | import { generateSeed } from "../misc/generateSeed.mts"
  7 | import { getValidNumber } from "../validators/getValidNumber.mts"
  8 | import { shotFormatVersion } from "../../config.mts"
  9 | 
 10 | export const parseShotRequest = async (sequence: VideoSequence, maybeShotMeta: Partial<VideoShotMeta>): Promise<VideoShot> => {
 11 |   // we don't want people to input their own ID or we might have trouble,
 12 |   // such as people attempting to use a non-UUID, a file path (to hack us), etc
 13 |   const id = uuidv4()
 14 | 
 15 |   const shot: VideoShot = {
 16 |     id,
 17 |     sequenceId: sequence.id,
 18 |     ownerId: sequence.ownerId,
 19 | 
 20 |     shotPrompt: `${maybeShotMeta.shotPrompt || ""}`,
 21 | 
 22 |     // background, weather, lights, time of the day, etc
 23 |     environmentPrompt: `${maybeShotMeta.environmentPrompt || ""}`,
 24 | 
 25 |     // camera parameters, angle, type of shot etc
 26 |     photographyPrompt: `${maybeShotMeta.photographyPrompt || ""}`,
 27 | 
 28 |     // dynamic elements of the scene, movement etc
 29 |     actionPrompt: `${maybeShotMeta.actionPrompt || ""}`,
 30 | 
 31 |     // describe the background audio (crowd, birds, wind, sea etc..)
 32 |     backgroundAudioPrompt: `${maybeShotMeta.backgroundAudioPrompt || ""}`,
 33 | 
 34 |     // describe the foreground audio (cars revving, footsteps, objects breaking, explosion etc)
 35 |     foregroundAudioPrompt: `${maybeShotMeta.foregroundAudioPrompt || maybeShotMeta.shotPrompt || ""}`,
 36 | 
 37 |     // describe the main actor visible in the shot (optional)
 38 |     actorPrompt: `${maybeShotMeta.actorPrompt || ""}`,
 39 | 
 40 |     // describe the main actor voice (man, woman, old, young, amused, annoyed.. etc)
 41 |     actorVoicePrompt: `${maybeShotMeta.actorVoicePrompt || ""}`,
 42 | 
 43 |     // describe the main actor dialogue line
 44 |     actorDialoguePrompt: `${maybeShotMeta.actorDialoguePrompt || ""}`,
 45 | 
 46 |     // a video sequence SHOULD NOT HAVE a consistent seed, to avoid weird geometry similarities
 47 |     seed: getValidNumber(maybeShotMeta.seed, 0, 2147483647, generateSeed()),
 48 | 
 49 |     // a video sequence SHOULD HAVE a consistent grain
 50 |     noise: sequence.noise,
 51 |     noiseAmount: sequence.noiseAmount,
 52 | 
 53 |     // a video sequence CAN HAVE inconsistent scene duration, like in any real movie
 54 |     durationMs: getValidNumber(maybeShotMeta.durationMs, 0, 6000, 3000),
 55 |     
 56 |     // a video sequence CAN HAVE inconsistent iteration steps
 57 |     steps: getValidNumber(maybeShotMeta.steps || sequence.steps, 10, 50, 45),
 58 | 
 59 |     // a video sequence MUST HAVE consistent frames per second
 60 |     fps: getValidNumber(sequence.fps, 8, 60, 30),
 61 | 
 62 |     // a video sequence MUST HAVE a consistent resolution
 63 |     resolution: sequence.resolution,
 64 | 
 65 |     // a video sequence CAN HAVE intro transitions for each shot
 66 |     introTransition: 'fade',
 67 |     introDurationMs: 500,
 68 | 
 69 |     // for internal use
 70 | 
 71 |     version: shotFormatVersion,
 72 |     fileName: `${sequence.ownerId}_${sequence.id}_${id}.mp4`,
 73 |     hasGeneratedPreview: false,
 74 |     hasGeneratedVideo: false,
 75 |     hasUpscaledVideo: false,
 76 |     hasGeneratedBackgroundAudio: false,
 77 |     hasGeneratedForegroundAudio: false,
 78 |     hasGeneratedActor: false,
 79 |     hasInterpolatedVideo: false,
 80 |     hasAddedAudio: false,
 81 |     hasPostProcessedVideo: false,
 82 |     nbCompletedSteps: 0,
 83 | 
 84 |     // - generate with Zeroscope
 85 |     // - upscale with Zeroscope XL
 86 |     // - interpolate with FILE
 87 |     // - generate audio background
 88 |     // - generate audio foreground
 89 |     // - add audio to video
 90 |     // - post-processing
 91 |     nbTotalSteps: 5,
 92 |     progressPercent: 0,
 93 |     createdAt: new Date().toISOString(),
 94 |     completedAt: '',
 95 |     completed: false,
 96 |     error: '',
 97 |   }
 98 | 
 99 |   return shot
100 | }


--------------------------------------------------------------------------------
/src/utils/requests/parseVideoRequest.mts:
--------------------------------------------------------------------------------
  1 | import { v4 as uuidv4, validate as uuidValidate } from "uuid"
  2 | import { HfInference } from "@huggingface/inference"
  3 | 
  4 | // convert a request (which might be invalid)
  5 | 
  6 | import { VideoAPIRequest, Video } from "../../types.mts"
  7 | import { getValidNumber } from "../validators/getValidNumber.mts"
  8 | import { getValidResolution } from "../validators/getValidResolution.mts"
  9 | import { parseShotRequest } from "./parseShotRequest.mts"
 10 | import { generateSeed } from "../misc/generateSeed.mts"
 11 | import { sequenceFormatVersion } from "../../config.mts"
 12 | 
 13 | // const hfi = new HfInference(process.env._VC_HF_API_TOKEN)
 14 | // const hf = hfi.endpoint(process.env.VC_INFERENCE_ENDPOINT_URL)
 15 | 
 16 | export const parseVideoRequest = async (ownerId: string, request: VideoAPIRequest): Promise<Video> => {
 17 |   // we don't want people to input their own ID or we might have trouble,
 18 |   // such as people attempting to use a non-UUID, a file path (to hack us), etc
 19 |   const id = uuidv4()
 20 | 
 21 |   if (typeof request.prompt === "string" && request.prompt.length > 0) {
 22 |     console.log("we have a valid prompt:", request.prompt)
 23 |       // TODO: use llama2 to populate this!
 24 |     request.sequence = {
 25 |       videoPrompt: request.prompt,
 26 |     }
 27 |     request.shots = [{
 28 |       shotPrompt: request.prompt,
 29 |       environmentPrompt: "",
 30 |       photographyPrompt: "",
 31 |       actionPrompt: "",
 32 |     }]
 33 |   }
 34 | 
 35 |   // console.log("continuing..")
 36 |   const video: Video = {
 37 |     // ------------ VideoSequenceMeta -------------
 38 |     id,
 39 | 
 40 |     ownerId,
 41 | 
 42 |     // describe the whole movie
 43 |     videoPrompt: `${request.sequence.videoPrompt || ''}`,
 44 | 
 45 |     // describe the background audio (crowd, birds, wind, sea etc..)
 46 |     backgroundAudioPrompt: `${request.sequence.backgroundAudioPrompt || ''}`,
 47 | 
 48 |     // describe the foreground audio (cars revving, footsteps, objects breaking, explosion etc)
 49 |     foregroundAudioPrompt: `${request.sequence.foregroundAudioPrompt || ''}`,
 50 | 
 51 |     // describe the main actor visible in the shot (optional)
 52 |     actorPrompt: `${request.sequence.actorPrompt || ''}`,
 53 | 
 54 |     // describe the main actor voice (man, woman, old, young, amused, annoyed.. etc)
 55 |     actorVoicePrompt: `${request.sequence.actorVoicePrompt || ''}`,
 56 | 
 57 |     // describe the main actor dialogue line
 58 |     actorDialoguePrompt: `${request.sequence.actorDialoguePrompt || ''}`,
 59 | 
 60 |     seed: getValidNumber(request.sequence.seed, 0, 2147483647, generateSeed()),
 61 | 
 62 |     noise: request.sequence.noise === true,
 63 |     noiseAmount: request.sequence.noise === true ? 2 : 0,
 64 | 
 65 |     steps: getValidNumber(request.sequence.steps, 10, 50, 45),
 66 | 
 67 |     fps: getValidNumber(request.sequence.fps, 8, 60, 30),
 68 | 
 69 |     resolution: getValidResolution(request.sequence.resolution),
 70 | 
 71 |     outroTransition: 'staticfade',
 72 |     outroDurationMs: 3000,
 73 | 
 74 |     // ---------- VideoSequenceData ---------
 75 |     version: sequenceFormatVersion,
 76 |     fileName: `${ownerId}_${id}.mp4`,
 77 |     status: "pending",
 78 |     hasAssembledVideo: false,
 79 |     hasGeneratedSpecs: false,
 80 |     nbCompletedShots: 0,
 81 |     progressPercent: 0,
 82 |     createdAt: new Date().toISOString(),
 83 |     completedAt: null,
 84 |     completed: false,
 85 |     error: '',
 86 | 
 87 | 
 88 |     // ------- the VideoShot -----
 89 | 
 90 |     shots: [],
 91 |   }
 92 | 
 93 |   // console.log("we are still good..")
 94 |   const maybeShots = Array.isArray(request.shots) ? request.shots : []
 95 | 
 96 |   // console.log("let's try..")
 97 |   for (const maybeShot of maybeShots) {
 98 |     // console.log("trying shot", maybeShot)
 99 |     try {
100 |       const shot = await parseShotRequest(video, maybeShot)
101 |       video.shots.push(shot)
102 |     } catch (err) {
103 |       console.log(`error parsing shot: `, maybeShot)
104 |     }
105 | 
106 |   }
107 | 
108 |   return video
109 | }


--------------------------------------------------------------------------------
/src/utils/streams/streamToBuffer.mts:
--------------------------------------------------------------------------------
 1 | export const streamToBuffer = (
 2 |   stream: NodeJS.ReadWriteStream
 3 | ): Promise<Buffer> => {
 4 |   return new Promise((resolve, reject) => {
 5 |     const chunks: Buffer[] = []
 6 |     stream.on("data", (chunk: Buffer) => {
 7 |       // console.log("Received chunk with length:"", chunk.length)
 8 |       chunks.push(chunk)
 9 |     })
10 |     stream.on("error", reject)
11 |     stream.on("end", () => {
12 |       /*
13 |       console.log(
14 |         "Stream ended, total buffer length:",
15 |         Buffer.concat(chunks).length
16 |       )
17 |       */
18 |       resolve(Buffer.concat(chunks))
19 |     })
20 |   })
21 | }


--------------------------------------------------------------------------------
/src/utils/validators/computeSecretFingerprint.mts:
--------------------------------------------------------------------------------
1 | import { computeSha256 } from "./computeSha256.mts"
2 | 
3 | const secretFingerprint = `${process.env.VC_SECRET_FINGERPRINT || ""}`
4 | 
5 | export function computeSecretFingerprint(input: string) {
6 |   return computeSha256(`${secretFingerprint}_${input}`)
7 | }


--------------------------------------------------------------------------------
/src/utils/validators/computeSha256.mts:
--------------------------------------------------------------------------------
 1 | import { createHash } from 'node:crypto'
 2 | 
 3 | /**
 4 |  * Returns a SHA256 hash using SHA-3 for the given `content`.
 5 |  *
 6 |  * @see https://en.wikipedia.org/wiki/SHA-3
 7 |  *
 8 |  * @param {String} content
 9 |  *
10 |  * @returns {String}
11 |  */
12 | export function computeSha256(strContent: string) {
13 |   return createHash('sha3-256').update(strContent).digest('hex')
14 | }


--------------------------------------------------------------------------------
/src/utils/validators/getValidBoolean.mts:
--------------------------------------------------------------------------------
1 | export const getValidBoolean = (something: any, defaultValue: boolean) => {
2 |   if (typeof something === "boolean") {
3 |     return something
4 |   }
5 | 
6 |   const strValue = `${something || defaultValue}`.toLowerCase()
7 |   
8 |   return strValue === "true" || strValue === "1" || strValue === "on"
9 | }


--------------------------------------------------------------------------------
/src/utils/validators/getValidNumber.mts:
--------------------------------------------------------------------------------
 1 | export const getValidNumber = (something: any, minValue: number, maxValue: number, defaultValue: number) => {
 2 |   const strValue = `${something || defaultValue}`
 3 |   const numValue = Number(strValue)
 4 |   const isValid = !isNaN(numValue) && isFinite(numValue)
 5 |   if (!isValid) {
 6 |     return defaultValue
 7 |   }
 8 |   return Math.max(minValue, Math.min(maxValue, numValue))
 9 |  
10 | }


--------------------------------------------------------------------------------
/src/utils/validators/getValidResolution.mts:
--------------------------------------------------------------------------------
 1 | import { getValidNumber } from "./getValidNumber.mts"
 2 | 
 3 | export const getValidResolution = (something: any) => {
 4 |   const strValue = `${something || ''}`
 5 |   const chunks = strValue.split('x')
 6 | 
 7 |   if (chunks.length !== 2) {
 8 |     return `1280x720`
 9 |   }
10 | 
11 |   const [widthStr, heightStr] = chunks
12 |   const width = getValidNumber(widthStr, 256, 1280, 1280)
13 |   const height = getValidNumber(widthStr, 256, 720, 720)
14 | 
15 |   return `${width}x${height}`
16 | }


--------------------------------------------------------------------------------
/src/utils/video/addAudioToVideo.mts:
--------------------------------------------------------------------------------
 1 | import path from "node:path"
 2 | 
 3 | import tmpDir from "temp-dir"
 4 | import { v4 as uuidv4 } from "uuid"
 5 | import ffmpeg from "fluent-ffmpeg"
 6 | 
 7 | import { pendingFilesDirFilePath } from "../../config.mts"
 8 | import { moveFileFromTmpToPending } from "../filesystem/moveFileFromTmpToPending.mts"
 9 | 
10 | export const addAudioToVideo = async (
11 |   videoFileName: string,
12 |   audioFileName: string,
13 | 
14 |   /*
15 |   * 0.0: mute the audio completely
16 |   * 0.5: set the audio to 50% of original volume (half volume)
17 |   * 1.0: maintain the audio at original volume (100% of original volume)
18 |   * 2.0: amplify the audio to 200% of original volume (double volume - might cause clipping)
19 |   */
20 |   volume: number = 1.0
21 | ) => {
22 |   const inputFilePath = path.join(pendingFilesDirFilePath, videoFileName)
23 |   const audioFilePath = path.resolve(pendingFilesDirFilePath, audioFileName)
24 | 
25 |   const tmpFileName = `${uuidv4()}.mp4`
26 |   const tempOutputFilePath = path.join(tmpDir, tmpFileName)
27 | 
28 |   await new Promise((resolve, reject) => {
29 |     ffmpeg(inputFilePath)
30 |       .input(audioFilePath)
31 |       .audioFilters({ filter: 'volume', options: volume }) // add audio filter for volume
32 |       .outputOptions("-c:v copy")  // use video copy codec
33 |       .outputOptions("-c:a aac")   // use audio codec
34 |       .outputOptions("-map 0:v:0") // map video from 0th to 0th
35 |       .outputOptions("-map 1:a:0") // map audio from 1st to 0th
36 |       .outputOptions("-shortest") // finish encoding when shortest input stream ends
37 |       .output(tempOutputFilePath)
38 |       .on("end", resolve)
39 |       .on("error", reject)
40 |       .run()
41 |   })
42 |   await moveFileFromTmpToPending(tmpFileName, videoFileName)
43 | };


--------------------------------------------------------------------------------
/src/utils/video/concatNoGL.mts:
--------------------------------------------------------------------------------
 1 | import ffmpeg from "fluent-ffmpeg";
 2 | import fs from "fs";
 3 | 
 4 | interface IConcatParams {
 5 |     output: string;
 6 |     videos: string[];
 7 |     transitions: any;
 8 | }
 9 | 
10 | const concat = async ({ output, videos }: IConcatParams): Promise<void> => {
11 |     if(!output || !Array.isArray(videos)) {
12 |         throw new Error("An output file and videos must be provided");
13 |     }
14 | 
15 |     if(!videos.every(video => fs.existsSync(video))) {
16 |         throw new Error("All videos must exist");
17 |     }
18 | 
19 |     const ffmpegCommand = ffmpeg();
20 |  
21 |     videos.forEach((video) =>
22 |         ffmpegCommand.addInput(video)
23 |     );
24 | 
25 |     return new Promise<void>((resolve, reject) => {
26 |         ffmpegCommand
27 |             .on('error', reject)
28 |             .on('end', resolve)
29 |             .mergeToFile(output);
30 |     });
31 | };
32 |   
33 | export default concat;


--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |    "compilerOptions": {
 3 |     "allowJs": true,
 4 |     "esModuleInterop": true,
 5 |     "allowSyntheticDefaultImports": true,
 6 |     "module": "nodenext",
 7 |     "noEmit": true,
 8 |     "allowImportingTsExtensions": true,
 9 |     "target": "es2022"
10 |   },
11 |   "include": ["**/*.ts", "**/*.mts"],
12 | }


--------------------------------------------------------------------------------