├── .dockerignore ├── .env ├── .gitignore ├── .nvmrc ├── Dockerfile ├── LICENSE.txt ├── README.md ├── TODO.md ├── package-lock.json ├── package.json ├── samples └── un-deux.wav ├── src ├── config.mts ├── index.mts ├── main.mts ├── preproduction │ ├── mocks.mts │ └── prompts.mts ├── production │ ├── assembleShots.mts │ ├── normalizePendingVideoToTmpFilePath.mts │ ├── postInterpolation.mts │ ├── renderAnalysis.mts │ ├── renderContent.mts │ ├── renderImage.mts │ ├── renderImageAnalysis.mts │ ├── renderImageSegmentation.mts │ ├── renderImageUpscaling.mts │ ├── renderPipeline.mts │ ├── renderScene.mts │ ├── renderSegmentation.mts │ ├── renderUpscaling.mts │ ├── renderVideo.mts │ ├── renderVideoSegmentation.mts │ └── renderVideoUpscaling.mts ├── providers │ ├── audio-generation │ │ ├── generateAudio.mts │ │ └── generateAudioLegacy.mts │ ├── character-model │ │ └── generateActor.mts │ ├── image-caption │ │ ├── analyzeImageWithIDEFICS.mts │ │ └── analyzeImageWithIDEFICSAndNastyHack.mts │ ├── image-generation │ │ ├── generateImage.mts │ │ ├── generateImageLCMFetch.mts │ │ ├── generateImageLCMGradio.mts │ │ ├── generateImagePulib.mts │ │ ├── generateImageSDXL360.mts │ │ ├── generateImageSDXLFetch.mts │ │ ├── generateImageSDXLGradio.mts │ │ └── generateImageSDXLTurbo.mts │ ├── image-segmentation │ │ ├── segmentImage.mts │ │ └── segmentImageFromURL.mts │ ├── image-upscaling │ │ └── upscaleImage.mts │ ├── language-model │ │ ├── enrichVideoSpecsUsingLLM.mts │ │ ├── openai │ │ │ ├── createChatCompletion.mts │ │ │ ├── createChatCompletionStream.mts │ │ │ ├── generateYAML.mts │ │ │ ├── getTextPrompt.mts │ │ │ ├── getUserContent.mts │ │ │ ├── openai.mts │ │ │ ├── runModerationCheck.mts │ │ │ └── stream.mts │ │ └── types.mts │ ├── lip-syncing │ │ └── generateLipSyncVideo.mts │ ├── music-generation │ │ └── generateMusicWithReplicate.mts │ ├── music-to-caption │ │ └── musicToCaption.mts │ ├── speech-to-text │ │ ├── speechToTextWithWhisperLib.txt │ │ └── speechToTextWithWhisperSpace.mts │ ├── video-generation │ │ ├── addBase64HeaderToMp4.mts │ │ ├── defaultPrompts.mts │ │ ├── generateVideoWithAnimateDiffLightning.mts │ │ ├── generateVideoWithHotshotGradioAPI.mts │ │ ├── generateVideoWithHotshotReplicate.mts │ │ ├── generateVideoWithShow.mts │ │ ├── generateVideoWithZeroscope.mts │ │ └── types.mts │ ├── video-interpolation │ │ ├── interpolateVideo.mts │ │ ├── interpolateVideoLegacy.mts │ │ └── interpolateVideoWithReplicate.mts │ ├── video-transformation │ │ ├── transformVideoWithHotshotReplicate.mts │ │ └── transformVideoWithLatentImageAnimator.txt │ ├── video-upscaling │ │ ├── upscaleVideo.mts │ │ └── upscaleVideoToBase64URL.mts │ └── voice-generation │ │ ├── generateVoice.mts │ │ ├── generateVoiceWithCoqui.txt │ │ └── generateVoiceWithOpenVoice.mts ├── scheduler │ ├── copyVideoFromPendingToCompleted.mts │ ├── copyVideoFromTmpToCompleted.mts │ ├── copyVideoFromTmpToPending.mts │ ├── deleteVideo.mts │ ├── getAllVideosForOwner.mts │ ├── getCompletedVideos.mts │ ├── getFirstVideoFrame.mts │ ├── getFirstVideoFrameAsBase64.mts │ ├── getPendingVideos.mts │ ├── getVideo.mts │ ├── getVideoStatus.mts │ ├── markVideoAsPending.mts │ ├── markVideoAsToAbort.mts │ ├── markVideoAsToDelete.mts │ ├── markVideoAsToPause.mts │ ├── moveVideoFromPendingToCompleted.mts │ ├── moveVideoFromTmpToCompleted.mts │ ├── processVideo.mts │ ├── readVideoMetadataFile.mts │ ├── readVideoMetadataFiles.mts │ ├── saveAndCheckIfNeedToStop.mts │ ├── saveCompletedVideo.mts │ ├── savePendingVideo.mts │ ├── sortPendingVideosByLeastCompletedFirst.mts │ ├── sortVideosByYoungestFirst.mts │ ├── updatePendingVideo.mts │ └── updateShotPreview.mts ├── types.mts └── utils │ ├── audio │ ├── convertMp3ToWavBase64.mts │ ├── convertMp3ToWavFilePath.mts │ └── mergeAudio.mts │ ├── data │ ├── all_words.json │ └── good_words.json │ ├── download │ ├── downloadFileAsBase64.mts │ ├── downloadFileAsBase64URL.mts │ └── downloadFileToTmp.mts │ ├── filesystem │ ├── createDirIfNeeded.mts │ ├── deleteAllFilesWith.mts │ ├── deleteFileIfExists.mts │ ├── initFolders.mts │ ├── moveFile.mts │ ├── moveFileFromTmpToPending.mts │ ├── saveRenderedSceneToCache.mts │ └── writeBase64ToFile.mts │ ├── image │ ├── addBase64HeaderToPng.mts │ ├── convertToWebp.mts │ └── resizeBase64Image.mts │ ├── misc │ ├── debouncePromise.mts │ ├── debounceSync.mts │ ├── generateSeed.mts │ ├── getHuggingFaceSpaceStatus.mts │ ├── makeSureSpaceIsRunning.mts │ ├── randomShuffle.mts │ ├── sleep.mts │ └── tryApiCall.mts │ ├── requests │ ├── hasValidAuthorization.mts │ ├── hashRequest.mts │ ├── loadRenderedSceneFromCache.mts │ ├── parseRenderRequest.mts │ ├── parseShotRequest.mts │ └── parseVideoRequest.mts │ ├── streams │ └── streamToBuffer.mts │ ├── validators │ ├── computeSecretFingerprint.mts │ ├── computeSha256.mts │ ├── getValidBoolean.mts │ ├── getValidNumber.mts │ └── getValidResolution.mts │ └── video │ ├── addAudioToVideo.mts │ └── concatNoGL.mts └── tsconfig.json /.dockerignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | npm-debug.log 3 | models 4 | sandbox 5 | audio.pipe 6 | video.pipe -------------------------------------------------------------------------------- /.env: -------------------------------------------------------------------------------- 1 | 2 | #--------------------- GENERATION CONFIGURATION ------------------- 3 | # if deployed to Hugging Face (with persistent storage enabled) 4 | VC_STORAGE_PATH="/data/" 5 | 6 | # for a local usage on your dev machine 7 | #VC_STORAGE_PATH="./sandbox" 8 | 9 | #--------------------- AUTH SECRETS AND ACCESS TOKENS ------------- 10 | # The access token required to send some queries to VideoChain 11 | #VC_SECRET_ACCESS_TOKEN="" 12 | 13 | # The access token required to send queries to some sub-servers 14 | #VC_MICROSERVICE_SECRET_TOKEN="" 15 | 16 | # OpenAI API key used to call OpenAI API services 17 | #VC_OPENAI_API_KEY="" 18 | 19 | # Hugging Face API key used to call Hugging Face spaces 20 | #VC_HF_API_TOKEN="" 21 | 22 | # Replicate API token 23 | #VC_REPLICATE_API_TOKEN="" 24 | 25 | #--------------------- LLM INFERENCE SERVERS ---------------------- 26 | #VC_INFERENCE_ENDPOINT_URL="" 27 | 28 | #--------------------- SDXL INFERENCE SERVERS --------------------- 29 | VC_SDXL_SPACE_API_URL="https://jbilcke-hf-image-server.hf.space" 30 | 31 | #--------------------- LCM INFERENCE SERVERS --------------------- 32 | VC_LCM_SPACE_API_URL="https://jbilcke-hf-fast-image-server.hf.space" 33 | 34 | #--------------------- SDXL TURBO INFERENCE SERVERS --------------------- 35 | VC_SDXL_TURBO_SPACE_API_URL="https://jbilcke-hf-faster-image-server.hf.space" 36 | 37 | #----------------- ZEROSCOPE INFERENCE SERVERS ------------------- 38 | VC_ZEROSCOPE_SPACE_API_URL_1="https://jbilcke-hf-zeroscope-server-1.hf.space" 39 | VC_ZEROSCOPE_SPACE_API_URL_2="https://jbilcke-hf-zeroscope-server-2.hf.space" 40 | VC_ZEROSCOPE_SPACE_API_URL_3="https://jbilcke-hf-zeroscope-server-3.hf.space" 41 | VC_ZEROSCOPE_SPACE_API_URL_4="https://jbilcke-hf-zeroscope-server-4.hf.space" 42 | 43 | #----------------- HOTSHOT-XL INFERENCE SERVERS ------------------- 44 | VC_HOTSHOT_XL_GRADIO_SPACE_API_URL="https://jbilcke-hf-hotshot-xl-server-1.hf.space" 45 | 46 | #----------------- HOTSHOT-XL REPLICATE CONFIG -------------------- 47 | VC_HOTSHOT_XL_REPLICATE_MODEL="cloneofsimo/hotshot-xl-lora-controlnet" 48 | VC_HOTSHOT_XL_REPLICATE_MODEL_VERSION="c447ef9fc621af091e2c06d08fd2a22d9f5906389a2f8103c851a2f7cf9c4e63" 49 | 50 | #----------------- FRAME SEGMENTATION SERVERS --------------------- 51 | VC_SEGMENTATION_MODULE_SPACE_API_URL_1="https://jbilcke-hf-segmentation-server-1.hf.space" 52 | VC_SEGMENTATION_MODULE_SPACE_API_URL_2="https://jbilcke-hf-segmentation-server-2.hf.space" 53 | VC_SEGMENTATION_MODULE_SPACE_API_URL_3="https://jbilcke-hf-segmentation-server-3.hf.space" 54 | 55 | # obsolete: 56 | VC_SEGMENTATION_MODULE_SPACE_API_URL="https://jbilcke-hf-image-segmentation.hf.space" 57 | 58 | #----------------- PANORAMA GENERATION SERVERS ------------------- 59 | VC_SDXL_360_SPACE_API_URL_1="https://jbilcke-hf-360-server-1.hf.space" 60 | 61 | #----------------- IMAGE UPSCALING SERVERS ----------------------- 62 | VC_UPSCALING_SPACE_API_URL="https://jbilcke-hf-upscaling-server.hf.space" 63 | 64 | #----------------- VIDEO UPSCALING SERVERS ----------------------- 65 | VC_VIDEO_UPSCALE_SPACE_API_URL_1="https://jbilcke-hf-video-upscaling-server-1.hf.space" 66 | 67 | #----------------- VIDEO INTERPOLATION (FILM) SERVERS ------------------- 68 | VC_VIDEO_INTERPOLATION_SPACE_API_URL="https://jbilcke-hf-video-interpolation-server.hf.space" 69 | 70 | #----------------- VIDEO INTERPOLATION (ST-MFNET) CONFIG ------------------- 71 | VC_VIDEO_INTERPOLATION_STMFNET_REPLICATE_MODEL="hzsxkib/st-mfnet" 72 | VC_VIDEO_INTERPOLATION_STMFNET_REPLICATE_MODEL_VERSION="faa7693430b0a4ac95d1b8e25165673c1d7a7263537a7c4bb9be82a3e2d130fb" 73 | 74 | #----------------- AUDIO GENERATION SERVERS ---------------------- 75 | VC_AUDIO_GENERATION_SPACE_API_URL="https://jbilcke-hf-audioldm-text-to-audio-generation.hf.space" 76 | 77 | #----------------- IMAGE ANALYSIS SERVERS ---------------------- 78 | VC_ANALYSIS_SPACE_API_URL="https://jbilcke-hf-idefics-server.hf.space" 79 | 80 | #----------------- SPEECH TO TEXT SERVERS ---------------------- 81 | VC_SPEECH_TO_TEXT_SPACE_API_URL_1="https://jbilcke-hf-speech-recognition-server-1.hf.space" 82 | 83 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | *.log 3 | *.bin 4 | .DS_Store 5 | .venv 6 | *.mp4 7 | sandbox 8 | scripts 9 | .env.local -------------------------------------------------------------------------------- /.nvmrc: -------------------------------------------------------------------------------- 1 | v20.17.0 2 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM node:18 2 | 3 | ARG DEBIAN_FRONTEND=noninteractive 4 | 5 | RUN apt update 6 | 7 | # For FFMPEG and gl concat 8 | RUN apt --yes install ffmpeg curl build-essential python3 python3-dev libx11-dev libxext-dev libxext6 libglu1-mesa-dev xvfb libxi-dev libglew-dev pkg-config 9 | 10 | # For Puppeteer 11 | RUN apt --yes install libnss3 libatk1.0-0 libatk-bridge2.0-0 libcups2 libgbm1 libasound2 libpangocairo-1.0-0 libxss1 libgtk-3-0 12 | 13 | # Set up a new user named "user" with user ID 1000 14 | RUN useradd -o -u 1000 user 15 | 16 | # Switch to the "user" user 17 | USER user 18 | 19 | # Set home to the user's home directory 20 | ENV HOME=/home/user \ 21 | PATH=/home/user/.local/bin:$PATH 22 | 23 | # Set the working directory to the user's home directory 24 | WORKDIR $HOME/app 25 | 26 | # Install app dependencies 27 | # A wildcard is used to ensure both package.json AND package-lock.json are copied 28 | # where available (npm@5+) 29 | COPY --chown=user package*.json $HOME/app 30 | 31 | # make sure the .env is copied as well 32 | COPY --chown=user .env $HOME/app 33 | 34 | RUN npm install 35 | 36 | 37 | # Copy the current directory contents into the container at $HOME/app setting the owner to the user 38 | COPY --chown=user . $HOME/app 39 | 40 | EXPOSE 7860 41 | 42 | # we can't use this (it time out) 43 | # CMD [ "xvfb-run", "-s", "-ac -screen 0 1920x1080x24", "npm", "run", "start" ] 44 | CMD [ "npm", "run", "start" ] -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: VideoChain API 3 | emoji: 🎥 🔗 4 | colorFrom: black 5 | colorTo: white 6 | sdk: docker 7 | pinned: false 8 | app_port: 7860 9 | --- 10 | 11 | A micro service to generate videos 12 | 13 | # Installation 14 | 15 | 1. `npm i` 16 | 2. clone `.env` to `.env.local` 17 | 3. edit `.env.local` to define the secrets / api access keys 18 | 4. `npm run start` 19 | -------------------------------------------------------------------------------- /TODO.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | to allow multiple videos to be processed a the same time: 4 | 5 | [ ] yield from the loop at each step 6 | [ ] random processing of videos 7 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "videochain-api", 3 | "version": "1.0.0", 4 | "description": "A service which wraps and chains video and audio spaces together", 5 | "main": "src/index.mts", 6 | "scripts": { 7 | "start": "tsx src/index.mts", 8 | "test:submitVideo": "tsx src/tests/submitVideo.mts", 9 | "test:checkStatus": "tsx src/tests/checkStatus.mts", 10 | "test:downloadFileToTmp": "tsx src/tests/downloadFileToTmp.mts", 11 | "test:stuff": "tsx src/utils/segmentImage.mts", 12 | "docker": "npm run docker:build && npm run docker:run", 13 | "docker:build": "docker build -t videochain-api .", 14 | "docker:run": "docker run -it -p 7860:7860 videochain-api" 15 | }, 16 | "author": "Julian Bilcke ", 17 | "license": "Apache License", 18 | "dependencies": { 19 | "@gorgonjs/file-provider": "^1.4.1", 20 | "@gorgonjs/gorgon": "^1.4.1", 21 | "@gradio/client": "1.5.2", 22 | "@huggingface/inference": "2.8.0", 23 | "@types/express": "^4.17.17", 24 | "@types/node": "^20.12.7", 25 | "@types/uuid": "^9.0.2", 26 | "dotenv": "^16.3.1", 27 | "eventsource-parser": "^1.0.0", 28 | "express": "^4.18.2", 29 | "fluent-ffmpeg": "^2.1.2", 30 | "fs-extra": "^11.1.1", 31 | "gpt-tokens": "^1.1.1", 32 | "node-fetch": "^3.3.1", 33 | "nodejs-whisper": "^0.1.4", 34 | "openai": "^4.38.2", 35 | "puppeteer": "^22.6.5", 36 | "replicate": "^0.29.1", 37 | "resize-base64": "^1.0.12", 38 | "sharp": "^0.32.4", 39 | "temp-dir": "^3.0.0", 40 | "ts-node": "^10.9.2", 41 | "tsx": "^4.7.0", 42 | "tts-react": "^3.0.1", 43 | "uuid": "^9.0.0", 44 | "yaml": "^2.3.1" 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /samples/un-deux.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jbilcke-hf/VideoChain-API/7e4bd1c0eab74b0e6fea9c1ca2e226c85fd43c03/samples/un-deux.wav -------------------------------------------------------------------------------- /src/config.mts: -------------------------------------------------------------------------------- 1 | import path from "node:path" 2 | import fs from "node:fs" 3 | 4 | import dotenv from "dotenv" 5 | 6 | dotenv.config() 7 | 8 | try { 9 | if (fs.existsSync(".env.local")) { 10 | const result = dotenv.config({ path: ".env.local" }) 11 | console.log("using .env.local") 12 | process.env = { 13 | ...process.env, 14 | ...result.parsed, 15 | } 16 | } 17 | } catch (err) { 18 | // do nothing 19 | console.log("using .env") 20 | } 21 | 22 | export const storagePath = `${process.env.VC_STORAGE_PATH || './sandbox'}` 23 | 24 | // those are persistent storage (we want to keep the data for months/years) 25 | export const metadataDirPath = path.join(storagePath, "metadata") 26 | export const pendingMetadataDirFilePath = path.join(metadataDirPath, "pending") 27 | export const completedMetadataDirFilePath = path.join(metadataDirPath, "completed") 28 | 29 | export const filesDirPath = path.join(storagePath, "files") 30 | export const pendingFilesDirFilePath = path.join(filesDirPath, "pending") 31 | export const completedFilesDirFilePath = path.join(filesDirPath, "completed") 32 | 33 | // this is a semi-persistent storage (we will want to renew it from time to time) 34 | export const cacheDirPath = path.join(storagePath, "cache") 35 | export const renderedDirFilePath = path.join(cacheDirPath, "rendered") 36 | 37 | export const shotFormatVersion = 1 38 | export const sequenceFormatVersion = 1 39 | -------------------------------------------------------------------------------- /src/main.mts: -------------------------------------------------------------------------------- 1 | 2 | import { getPendingVideos } from "./scheduler/getPendingVideos.mts" 3 | import { processVideo } from "./scheduler/processVideo.mts" 4 | import { sortPendingVideosByLeastCompletedFirst } from "./scheduler/sortPendingVideosByLeastCompletedFirst.mts" 5 | 6 | export const main = async () => { 7 | 8 | const videos = await getPendingVideos() 9 | if (!videos.length) { 10 | // console.log(`no job to process.. going to try in 200 ms`) 11 | setTimeout(() => { 12 | main() 13 | }, 200) 14 | return 15 | } 16 | 17 | console.log(`there are ${videos.length} pending videos`) 18 | 19 | sortPendingVideosByLeastCompletedFirst(videos) 20 | 21 | let somethingFailed = "" 22 | await Promise.all(videos.map(async video => { 23 | try { 24 | const result = await processVideo(video) 25 | return result 26 | } catch (err) { 27 | somethingFailed = `${err}` 28 | // a video failed.. no big deal 29 | return Promise.resolve(somethingFailed) 30 | } 31 | })) 32 | 33 | if (somethingFailed) { 34 | console.error(`one of the jobs failed: ${somethingFailed}, let's wait 5 seconds`) 35 | setTimeout(() => { main() }, 5000) 36 | } else { 37 | console.log(`successfully worked on the jobs, let's immediately loop`) 38 | setTimeout(() => { main() }, 50) 39 | } 40 | 41 | } -------------------------------------------------------------------------------- /src/preproduction/mocks.mts: -------------------------------------------------------------------------------- 1 | import { Video, VideoShot } from "../types.mts" 2 | 3 | export const mockShots: VideoShot[] = [ 4 | { 5 | "shotPrompt": "In the extreme wide shot, a flock of ducks is converging on the Central Park, coming from multiple directions. Their feathers are glossy and clean, casting off varying degrees of green, brown and white", 6 | "environmentPrompt": "Central Park at sunrise, the park looks slightly misty, the sky is tinged with shades of pink and orange as the day breaks. There's dew on the grass, and the leaves on trees are rustling in the light breeze", 7 | "photographyPrompt": "Eye-level shot with a slight tilt in the camera, capturing the panorama of the park. There's natural lighting, sun just rising. The camera zooms out to capture the ducks entering the park. Shutter speed is slow to capture the movement of ducks", 8 | "actionPrompt": "Large groups of ducks waddle into the park from various directions, some fly in groups, landing on the pond with small splashes. Movement is slow, slightly sped up to depict the invasion", 9 | "foregroundAudioPrompt": "A symphony of soft quacking and rustling feathers", 10 | }, 11 | { 12 | "shotPrompt": "In the medium shot, a group of ducks are by the pond, pecking at the ground and frolicking in the water. One male mallard is particularly captivating with its emerald green head and healthy body", 13 | "environmentPrompt": "It's a sunny spring day in Central Park. The pond is surrounded by lush, green vegetation and dappled with sunlight filtering through the leaves", 14 | "photographyPrompt": "Low angle shot near the water level, the camera moves in a crane shot to capture ducks in action, and the camera's aperture is partially open. Natural sunlight creates playful shadows", 15 | "actionPrompt": "Ducks are pecking at the ground, dabbling at the water's edge and frolicking in the pond. The camera tracks a particularly majestic mallard navigating through the pond", 16 | "foregroundAudioPrompt": "Sounds of ducks quacking and splashing in the water" 17 | }, 18 | { 19 | "shotPrompt": "Close-up shot of a mother duck with ducklings following her in a line on the grass and into the water", 20 | "environmentPrompt": "Central Park, by one of the smaller ponds, surrounded by green trees. Sun is high up giving off warm, radiant light", 21 | "photographyPrompt": "High angle shot, focusing on the line of ducklings following their mother. The camera follows the ducklings. The setting is bright and clear with sun illuminating the ducklings", 22 | "actionPrompt": "Mother duck is leading her ducklings from the grass into the water, the ducklings obediently follow, creating a neat line. The whole scene feels peaceful", 23 | "foregroundAudioPrompt": "Ducklings' high pitched chirping, soft lapping of water at the edge of the pond" 24 | } 25 | ] as any 26 | 27 | export const mock: Video = { 28 | "backgroundAudioPrompt": "City ambience mixed with the rustling leaves and the chirping birds in the park", 29 | "foregroundAudioPrompt": "Rustling feathers, soft quacking, flapping wings, occasional splash in the pond", 30 | "actorPrompt": "Main actors are ducks - a variety of breeds, mostly mallards: males with glossy green heads and females in mottled brown; all plump, medium-sized waterfowl", 31 | "actorVoicePrompt": "Soft, low pitched quacking of adult ducks and higher pitched chirping of ducklings", 32 | "noise": true, 33 | "noiseAmount": 2, 34 | "outroDurationMs": 1500, 35 | "shots": mockShots 36 | } as any -------------------------------------------------------------------------------- /src/production/assembleShots.mts: -------------------------------------------------------------------------------- 1 | import path from "node:path" 2 | 3 | // due to Docker issues, we disable OpenGL transitions for now 4 | // import concat from 'ffmpeg-concat' 5 | import concat from '../utils/video/concatNoGL.mts' 6 | 7 | import { VideoShot } from '../types.mts' 8 | import { pendingFilesDirFilePath } from "../config.mts" 9 | import { normalizePendingVideoToTmpFilePath } from "./normalizePendingVideoToTmpFilePath.mts" 10 | 11 | export const assembleShots = async (shots: VideoShot[], fileName: string) => { 12 | 13 | if (!Array.isArray(shots) || shots.length < 2) { 14 | throw new Error(`need at least 2 shots`) 15 | } 16 | 17 | const transitions = [ 18 | { 19 | name: 'circleOpen', 20 | duration: 1000, 21 | }, 22 | { 23 | name: 'crossWarp', 24 | duration: 800, 25 | }, 26 | { 27 | name: 'directionalWarp', 28 | duration: 800, 29 | // pass custom params to a transition 30 | params: { direction: [1, -1] }, 31 | }, 32 | 33 | /* 34 | { 35 | name: 'squaresWire', 36 | duration: 2000, 37 | }, 38 | */ 39 | ] 40 | 41 | const videoFilePath = path.join(pendingFilesDirFilePath, fileName) 42 | 43 | // before performing assembly, we must normalize images 44 | const shotFilesPaths: string[] = [] 45 | for (let shot of shots) { 46 | const normalizedShotFilePath = await normalizePendingVideoToTmpFilePath(shot.fileName) 47 | shotFilesPaths.push(normalizedShotFilePath) 48 | } 49 | 50 | await concat({ 51 | output: videoFilePath, 52 | videos: shotFilesPaths, 53 | transitions: shotFilesPaths 54 | .slice(0, shotFilesPaths.length - 1) 55 | .map( 56 | (vid) => transitions[Math.floor(Math.random() * transitions.length)] 57 | ), 58 | }) 59 | } 60 | -------------------------------------------------------------------------------- /src/production/normalizePendingVideoToTmpFilePath.mts: -------------------------------------------------------------------------------- 1 | import path from "node:path" 2 | 3 | import { v4 as uuidv4 } from "uuid" 4 | import tmpDir from "temp-dir" 5 | import ffmpeg from "fluent-ffmpeg" 6 | 7 | import { pendingFilesDirFilePath } from "../config.mts" 8 | 9 | export const normalizePendingVideoToTmpFilePath = async (fileName: string): Promise => { 10 | return new Promise((resolve,reject) => { 11 | 12 | const tmpFileName = `${uuidv4()}.mp4` 13 | 14 | const filePath = path.join(pendingFilesDirFilePath, fileName) 15 | const tmpFilePath = path.join(tmpDir, tmpFileName) 16 | 17 | ffmpeg.ffprobe(filePath, function(err,) { 18 | if (err) { reject(err); return; } 19 | 20 | ffmpeg(filePath) 21 | 22 | .size("1280x720") 23 | 24 | .save(tmpFilePath) 25 | .on("end", async () => { 26 | resolve(tmpFilePath) 27 | }) 28 | .on("error", (err) => { 29 | reject(err) 30 | }) 31 | }) 32 | }) 33 | } -------------------------------------------------------------------------------- /src/production/postInterpolation.mts: -------------------------------------------------------------------------------- 1 | import path from "node:path" 2 | 3 | import { v4 as uuidv4 } from "uuid" 4 | import tmpDir from "temp-dir" 5 | import ffmpeg from "fluent-ffmpeg" 6 | import { moveFileFromTmpToPending } from "../utils/filesystem/moveFileFromTmpToPending.mts" 7 | import { pendingFilesDirFilePath } from "../config.mts" 8 | 9 | export const postInterpolation = async (fileName: string, durationMs: number, nbFrames: number, noiseAmount: number): Promise => { 10 | return new Promise((resolve,reject) => { 11 | 12 | const tmpFileName = `${uuidv4()}.mp4` 13 | 14 | const filePath = path.join(pendingFilesDirFilePath, fileName) 15 | const tmpFilePath = path.join(tmpDir, tmpFileName) 16 | 17 | ffmpeg.ffprobe(filePath, function(err, metadata) { 18 | if (err) { reject(err); return; } 19 | 20 | const durationInSec = durationMs / 1000 21 | 22 | const currentVideoDurationInSec = metadata.format.duration 23 | 24 | console.log(`currentVideoDurationInSec in sec: ${currentVideoDurationInSec}s`) 25 | 26 | console.log(`target duration in sec: ${durationInSec}s (${durationMs}ms)`) 27 | 28 | // compute a ratio ex. 0.3 = 30% of the total length 29 | const durationRatio = currentVideoDurationInSec / durationInSec 30 | console.log(`durationRatio: ${durationRatio}`) 31 | 32 | ffmpeg(filePath) 33 | 34 | // convert to HD 35 | .size("1280x720") 36 | 37 | .videoFilters([ 38 | `setpts=0.5*PTS`, // we make the video faster 39 | //'scale=-1:576:lanczos', 40 | // 'unsharp=5:5:0.2:5:5:0.2', // not recommended, this make the video more "pixely" 41 | `noise=c0s=${noiseAmount}:c0f=t+u` // add a movie grain noise 42 | ]) 43 | .outputOptions([ 44 | `-r ${nbFrames}`, 45 | ]) 46 | 47 | .save(tmpFilePath) 48 | .on("end", async () => { 49 | await moveFileFromTmpToPending(tmpFileName, fileName) 50 | 51 | resolve(fileName) 52 | }) 53 | .on("error", (err) => { 54 | reject(err) 55 | }) 56 | }) 57 | }) 58 | } -------------------------------------------------------------------------------- /src/production/renderAnalysis.mts: -------------------------------------------------------------------------------- 1 | 2 | import { RenderedScene, RenderRequest } from "../types.mts" 3 | 4 | import { renderImageAnalysis } from "./renderImageAnalysis.mts" 5 | 6 | export async function renderAnalysis(request: RenderRequest, response: RenderedScene) { 7 | 8 | if (request.analyze) { 9 | const isVideo = request?.nbFrames > 1 10 | 11 | // note: this only works on images for now, 12 | // but we could also analyze the first video frame to get ourselves an idea 13 | const optionalAnalysisFn = !isVideo 14 | ? renderImageAnalysis(request, response) 15 | : Promise.resolve() 16 | 17 | await optionalAnalysisFn 18 | } 19 | } -------------------------------------------------------------------------------- /src/production/renderContent.mts: -------------------------------------------------------------------------------- 1 | 2 | import { RenderedScene, RenderRequest } from "../types.mts" 3 | 4 | import { renderImage } from "./renderImage.mts" 5 | import { renderVideo } from "./renderVideo.mts" 6 | 7 | export async function renderContent(request: RenderRequest, response: RenderedScene) { 8 | const isVideo = request?.nbFrames > 1 9 | 10 | const renderContentFn = isVideo 11 | ? renderVideo 12 | : renderImage 13 | 14 | try { 15 | await renderContentFn(request, response) 16 | } catch (err) { 17 | // console.log(`renderContent() failed, trying a 2nd time..`) 18 | try { 19 | await renderContentFn(request, response) 20 | } catch (err2) { 21 | // console.log(`renderContent() failed, trying a 3th time..`) 22 | await renderContentFn(request, response) 23 | } 24 | } 25 | } -------------------------------------------------------------------------------- /src/production/renderImage.mts: -------------------------------------------------------------------------------- 1 | import { generateImageSDXLAsBase64 } from "../providers/image-generation/generateImageSDXLGradio.mts" 2 | import { generateImageSDXL360AsBase64 } from "../providers/image-generation/generateImageSDXL360.mts" 3 | import { RenderedScene, RenderRequest } from "../types.mts" 4 | import { generateImagePulibAsBase64 } from "../providers/image-generation/generateImagePulib.mts" 5 | 6 | export async function renderImage( 7 | request: RenderRequest, 8 | response: RenderedScene, 9 | ): Promise { 10 | 11 | const isSpherical = request.projection === 'spherical' 12 | 13 | // we don't want to switch too much between model types in VideoChain, 14 | // because for speed we need to pre-load the servers, 15 | // but there is no point in pre-loading many servers for many models 16 | const generateImageAsBase64 = isSpherical 17 | ? generateImageSDXL360AsBase64 18 | : request.turbo 19 | 20 | // turbo models are models that are slightly less beautiful 21 | // but much, much faster to run 22 | // for the moment we use SDXL + LCM, as it offers better scene coherence, 23 | // but we might switch to SDXL Turbo in the future if its quality improves 24 | ? generateImagePulibAsBase64 // generateImageSDXLTurboAsBase64 25 | 26 | : generateImageSDXLAsBase64 27 | 28 | // console.log(`going to generate an image using ${request.projection || "default (cartesian)"} projection`) 29 | 30 | const params = { 31 | positivePrompt: request.prompt, 32 | negativePrompt: request.negativePrompt, 33 | identityImage: request.identityImage, 34 | seed: request.seed, 35 | nbSteps: request.nbSteps, 36 | width: request.width, 37 | height: request.height 38 | } 39 | 40 | // console.log(`calling generateImageAsBase64 with: `, JSON.stringify(params, null, 2)) 41 | 42 | // we try at least 3 different servers 43 | try { 44 | response.assetUrl = await generateImageAsBase64(params) 45 | // console.log("successful generation!", response.assetUrl.slice(0, 30)) 46 | if (!response.assetUrl?.length) { 47 | throw new Error(`the generated image is empty`) 48 | } 49 | } catch (err) { 50 | // console.error(`failed to render.. but let's try again!`) 51 | try { 52 | response.assetUrl = await generateImageAsBase64(params) 53 | // console.log("successful generation!", response.assetUrl.slice(0, 30)) 54 | if (!response.assetUrl?.length) { 55 | throw new Error(`the generated image is empty`) 56 | } 57 | } catch (err) { 58 | try { 59 | response.assetUrl = await generateImageAsBase64(params) 60 | // console.log("successful generation!", response.assetUrl.slice(0, 30)) 61 | if (!response.assetUrl?.length) { 62 | throw new Error(`the generated image is empty`) 63 | } 64 | } catch (err) { 65 | // console.error(`failed to generate the image, due to`, err) 66 | response.error = `failed to render scene: ${err}` 67 | response.status = "error" 68 | response.assetUrl = "" 69 | } 70 | } 71 | } 72 | 73 | return response 74 | } 75 | -------------------------------------------------------------------------------- /src/production/renderImageAnalysis.mts: -------------------------------------------------------------------------------- 1 | 2 | import { RenderedScene, RenderRequest } from "../types.mts" 3 | import { analyzeImage } from "../providers/image-caption/analyzeImageWithIDEFICSAndNastyHack.mts" 4 | 5 | export async function renderImageAnalysis( 6 | request: RenderRequest, 7 | response: RenderedScene, 8 | ): Promise { 9 | response.alt = request.prompt 10 | 11 | try { 12 | // note: this converts a base64 PNG to a base64 JPG (which is good, actually!) 13 | response.alt = await analyzeImage(response.assetUrl, request.prompt) 14 | console.log(`analysis worked on the first try!`) 15 | } catch (err) { 16 | console.error(`analysis failed the first time.. let's try again..`) 17 | try { 18 | response.alt = await analyzeImage(response.assetUrl, request.prompt) 19 | console.log(`analysis worked on the second try!`) 20 | } catch (err) { 21 | console.error(`analysis failed on the second attempt.. let's keep the prompt as a fallback, then :|`) 22 | // no need to log a catastrophic failure here, since we still have the original (low-res image) 23 | // to work with 24 | response.alt = request.prompt 25 | } 26 | } 27 | 28 | return response 29 | } 30 | -------------------------------------------------------------------------------- /src/production/renderImageSegmentation.mts: -------------------------------------------------------------------------------- 1 | import path from "node:path" 2 | 3 | import { v4 as uuidv4 } from "uuid" 4 | import tmpDir from "temp-dir" 5 | 6 | import { RenderedScene, RenderRequest } from "../types.mts" 7 | 8 | import { writeBase64ToFile } from "../utils/filesystem/writeBase64ToFile.mts" 9 | import { segmentImage } from "../providers/image-segmentation/segmentImage.mts" 10 | 11 | 12 | export async function renderImageSegmentation( 13 | request: RenderRequest, 14 | response: RenderedScene, 15 | ): Promise { 16 | 17 | const actionnables = Array.isArray(request.actionnables) ? request.actionnables : [] 18 | 19 | if (actionnables.length > 0) { 20 | // console.log("we have some actionnables:", actionnables) 21 | 22 | const tmpImageFilePath = path.join(tmpDir, `${uuidv4()}.png`) 23 | 24 | // console.log("beginning:", imageBase64.slice(0, 100)) 25 | await writeBase64ToFile(response.assetUrl, tmpImageFilePath) 26 | // console.log("wrote the image to ", tmpImageFilePath) 27 | 28 | if (!tmpImageFilePath) { 29 | // console.error("failed to segment the image") 30 | response.error = "failed to segment the image" 31 | response.status = "error" 32 | } else { 33 | // console.log("got the first frame! segmenting..") 34 | try { 35 | const result = await segmentImage(tmpImageFilePath, actionnables, request.width, request.height) 36 | response.maskUrl = result.maskUrl 37 | response.segments = result.segments 38 | 39 | // console.log(`it worked the first time! got ${response.segments.length} segments`) 40 | } catch (err) { 41 | // console.log("this takes too long :/ trying another server..") 42 | try { 43 | const result = await segmentImage(tmpImageFilePath, actionnables, request.width, request.height) 44 | response.maskUrl = result.maskUrl 45 | response.segments = result.segments 46 | 47 | // console.log(`it worked the second time! got ${response.segments.length} segments`) 48 | } catch (err) { 49 | // console.log("trying one last time, on a 3rd server..") 50 | try { 51 | const result = await segmentImage(tmpImageFilePath, actionnables, request.width, request.height) 52 | response.maskUrl = result.maskUrl 53 | response.segments = result.segments 54 | 55 | // console.log(`it worked the third time! got ${response.segments.length} segments`) 56 | } catch (err) { 57 | console.log("yeah, all servers are busy it seems.. aborting") 58 | response.error = "all servers are busy" 59 | response.status = "error" 60 | } 61 | } 62 | } 63 | } 64 | } else { 65 | // console.log("no actionnables: just returning the image, then") 66 | } 67 | 68 | return response 69 | } 70 | -------------------------------------------------------------------------------- /src/production/renderImageUpscaling.mts: -------------------------------------------------------------------------------- 1 | import { RenderedScene, RenderRequest } from "../types.mts" 2 | import { upscaleImage } from "../providers/image-upscaling/upscaleImage.mts" 3 | 4 | export async function renderImageUpscaling( 5 | request: RenderRequest, 6 | response: RenderedScene, 7 | ): Promise { 8 | 9 | try { 10 | // note: this converts a base64 PNG to a base64 JPG (which is good, actually!) 11 | response.assetUrl = await upscaleImage(response.assetUrl, request.upscalingFactor) 12 | // console.log(`upscaling worked on the first try!`) 13 | } catch (err) { 14 | // console.error(`upscaling failed the first time.. let's try again..`) 15 | try { 16 | response.assetUrl = await upscaleImage(response.assetUrl, request.upscalingFactor) 17 | // console.log(`upscaling worked on the second try!`) 18 | } catch (err) { 19 | console.error(`upscaling failed on the second attempt.. let's keep the low-res image then :|`) 20 | // no need to log a catastrophic failure here, since we still have the original (low-res image) 21 | // to work with 22 | } 23 | } 24 | 25 | return response 26 | } 27 | -------------------------------------------------------------------------------- /src/production/renderPipeline.mts: -------------------------------------------------------------------------------- 1 | 2 | import { RenderedScene, RenderRequest } from "../types.mts" 3 | 4 | import { saveRenderedSceneToCache } from "../utils/filesystem/saveRenderedSceneToCache.mts" 5 | import { renderSegmentation } from "./renderSegmentation.mts" 6 | import { renderUpscaling } from "./renderUpscaling.mts" 7 | import { renderContent } from "./renderContent.mts" 8 | import { renderAnalysis } from "./renderAnalysis.mts" 9 | 10 | export async function renderPipeline(request: RenderRequest, response: RenderedScene) { 11 | await renderContent(request, response) 12 | 13 | await Promise.all([ 14 | renderSegmentation(request, response), 15 | renderAnalysis(request, response), 16 | renderUpscaling(request, response) 17 | ]) 18 | 19 | /* 20 | this is the optimized pipeline 21 | However, right now it doesn't work because for some reason, 22 | asking to generate the same seed + prompt on different nb of steps 23 | doesn't generate the same image! 24 | 25 | // first we need to wait for the low quality pre-render 26 | await renderContent({ 27 | ...request, 28 | 29 | // we are a bit more aggressive with the quality of the video preview 30 | nbSteps: isVideo ? 8 : 16 31 | }, response) 32 | 33 | // then we can run both the segmentation and the high-res render at the same time 34 | await Promise.all([ 35 | renderSegmentation(request, response), 36 | renderContent(request, response) 37 | ]) 38 | */ 39 | 40 | response.status = "completed" 41 | response.error = "" 42 | 43 | if (!request.cache || request.cache === "ignore") { 44 | // console.log("client asked to not use the cache in the rendering pipeline") 45 | return 46 | } 47 | 48 | // console.log("client asked this for cache: "+request.cache) 49 | 50 | try { 51 | // since the request is now completed we cache it 52 | await saveRenderedSceneToCache(request, response) 53 | // console.log("successfully saved to cache") 54 | 55 | // we don't really need to remove it from the in-memory cache 56 | // (the cache queue in src/production/renderScene.mts) 57 | // since this cache queue has already an automatic pruning 58 | } catch (err) { 59 | console.error(`failed to save to cache, but no big deal: ${err}`) 60 | } 61 | } -------------------------------------------------------------------------------- /src/production/renderScene.mts: -------------------------------------------------------------------------------- 1 | import { v4 as uuidv4 } from "uuid" 2 | 3 | import { RenderedScene, RenderRequest } from "../types.mts" 4 | import { renderPipeline } from "./renderPipeline.mts" 5 | 6 | const cache: Record = {} 7 | const cacheQueue: string[] = [] 8 | const maxCacheSize = 2000 9 | 10 | export async function renderScene(request: RenderRequest): Promise { 11 | // const key = getCacheKey(scene) 12 | 13 | const renderId = uuidv4() 14 | 15 | const response: RenderedScene = { 16 | renderId, 17 | status: "pending", 18 | assetUrl: "", 19 | alt: request.prompt || "", 20 | error: "", 21 | maskUrl: "", 22 | segments: [] 23 | } 24 | 25 | cache[renderId] = response 26 | cacheQueue.push(renderId) 27 | if (cacheQueue.length > maxCacheSize) { 28 | const toRemove = cacheQueue.shift() 29 | delete cache[toRemove] 30 | } 31 | 32 | if (request.wait) { 33 | await renderPipeline(request, response) 34 | } else { 35 | // this is a fire-and-forget asynchronous pipeline: 36 | // we start it, but we do not await for the response 37 | renderPipeline(request, response) 38 | } 39 | 40 | // console.log("renderScene: yielding the scene", response) 41 | return response 42 | } 43 | 44 | export async function getRenderedScene(renderId: string): Promise { 45 | const rendered = cache[renderId] 46 | if (!rendered) { 47 | throw new Error(`couldn't find any rendered scene with renderId ${renderId}`) 48 | } 49 | return cache[renderId] 50 | } 51 | 52 | -------------------------------------------------------------------------------- /src/production/renderSegmentation.mts: -------------------------------------------------------------------------------- 1 | 2 | import { RenderedScene, RenderRequest } from "../types.mts" 3 | 4 | import { renderImageSegmentation } from "./renderImageSegmentation.mts" 5 | import { renderVideoSegmentation } from "./renderVideoSegmentation.mts" 6 | 7 | export async function renderSegmentation(request: RenderRequest, response: RenderedScene) { 8 | 9 | if (request.segmentation === "firstframe" || request.segmentation === "allframes") { 10 | const isVideo = request?.nbFrames > 1 11 | 12 | const renderSegmentationFn = isVideo 13 | ? renderVideoSegmentation 14 | : renderImageSegmentation 15 | 16 | await renderSegmentationFn(request, response) 17 | } 18 | } -------------------------------------------------------------------------------- /src/production/renderUpscaling.mts: -------------------------------------------------------------------------------- 1 | 2 | import { RenderedScene, RenderRequest } from "../types.mts" 3 | 4 | import { renderImageUpscaling } from "./renderImageUpscaling.mts" 5 | import { renderVideoUpscaling } from "./renderVideoUpscaling.mts" 6 | 7 | export async function renderUpscaling(request: RenderRequest, response: RenderedScene) { 8 | 9 | if (request.upscalingFactor > 1) { 10 | 11 | const isVideo = request?.nbFrames > 1 12 | 13 | // we upscale images with esrgan, and video with Zeroscope XL 14 | const renderFn = isVideo 15 | ? renderVideoUpscaling 16 | : renderImageUpscaling 17 | 18 | await renderFn(request, response) 19 | } 20 | } -------------------------------------------------------------------------------- /src/production/renderVideo.mts: -------------------------------------------------------------------------------- 1 | import { RenderedScene, RenderRequest, VideoGenerationParams } from "../types.mts" 2 | 3 | // import { generateVideo } from "../providers/video-generation/generateVideoWithZeroscope.mts" 4 | // import { generateVideo } from "../providers/video-generation/generateVideoWithHotshotGradioAPI.mts" 5 | // import { generateVideoWithAnimateLCM } from "../providers/video-generation/generateVideoWithAnimateLCM.mts" 6 | import { generateVideoWithAnimateDiffLightning } from "../providers/video-generation/generateVideoWithAnimateDiffLightning.mts" 7 | 8 | export async function renderVideo( 9 | request: RenderRequest, 10 | response: RenderedScene 11 | ): Promise { 12 | 13 | await generateVideoWithAnimateDiffLightning(request, response) 14 | 15 | return response 16 | } -------------------------------------------------------------------------------- /src/production/renderVideoSegmentation.mts: -------------------------------------------------------------------------------- 1 | import { v4 as uuidv4 } from "uuid" 2 | 3 | import { RenderedScene, RenderRequest } from "../types.mts" 4 | import { downloadFileToTmp } from "../utils/download/downloadFileToTmp.mts" 5 | import { getFirstVideoFrame } from "../scheduler/getFirstVideoFrame.mts" 6 | import { segmentImage } from "../providers/image-segmentation/segmentImage.mts" 7 | 8 | export async function renderVideoSegmentation( 9 | request: RenderRequest, 10 | response: RenderedScene 11 | ): Promise { 12 | 13 | const actionnables = Array.isArray(request.actionnables) ? request.actionnables : [] 14 | 15 | if (actionnables.length > 0) { 16 | // console.log("we have some actionnables:", actionnables) 17 | if (request.segmentation === "firstframe") { 18 | // console.log("going to grab the first frame") 19 | const tmpVideoFilePath = await downloadFileToTmp(response.assetUrl, `${uuidv4()}`) 20 | // console.log("downloaded the first frame to ", tmpVideoFilePath) 21 | const firstFrameFilePath = await getFirstVideoFrame(tmpVideoFilePath) 22 | // console.log("downloaded the first frame to ", firstFrameFilePath) 23 | 24 | if (!firstFrameFilePath) { 25 | // console.error("failed to get the image") 26 | response.error = "failed to segment the image" 27 | response.status = "error" 28 | } else { 29 | // console.log("got the first frame! segmenting..") 30 | const result = await segmentImage(firstFrameFilePath, actionnables, request.width, request.height) 31 | response.maskUrl = result.maskUrl 32 | response.segments = result.segments 33 | 34 | // console.log("success!", { segments }) 35 | } 36 | /* 37 | const jpgBase64 = await getFirstVideoFrame(tmpVideoFileName) 38 | if (!jpgBase64) { 39 | console.error("failed to get the image") 40 | error = "failed to segment the image" 41 | } else { 42 | console.log(`got the first frame (${jpgBase64.length})`) 43 | 44 | console.log("TODO: call segmentImage with the base64 image") 45 | await segmentImage() 46 | } 47 | */ 48 | } 49 | } 50 | 51 | return response 52 | } -------------------------------------------------------------------------------- /src/production/renderVideoUpscaling.mts: -------------------------------------------------------------------------------- 1 | import { upscaleVideoToBase64URL } from "../providers/video-upscaling/upscaleVideoToBase64URL.mts" 2 | import { RenderedScene, RenderRequest } from "../types.mts" 3 | 4 | export async function renderVideoUpscaling( 5 | request: RenderRequest, 6 | response: RenderedScene, 7 | ): Promise { 8 | 9 | try { 10 | // note: this converts a base64 PNG to a base64 JPG (which is good, actually!) 11 | response.assetUrl = await upscaleVideoToBase64URL(response.assetUrl, request.prompt) 12 | // console.log(`upscaling worked on the first try!`) 13 | } catch (err) { 14 | // console.error(`upscaling failed the first time.. let's try again..`) 15 | try { 16 | response.assetUrl = await upscaleVideoToBase64URL(response.assetUrl, request.prompt) 17 | // console.log(`upscaling worked on the second try!`) 18 | } catch (err) { 19 | console.error(`upscaling failed on the second attempt.. let's keep the low-res image then :|`) 20 | // no need to log a catastrophic failure here, since we still have the original (low-res image) 21 | // to work with 22 | } 23 | } 24 | 25 | return response 26 | } 27 | -------------------------------------------------------------------------------- /src/providers/audio-generation/generateAudio.mts: -------------------------------------------------------------------------------- 1 | import { v4 as uuidv4 } from "uuid" 2 | import puppeteer from "puppeteer" 3 | 4 | import { downloadFileToTmp } from "../../utils/download/downloadFileToTmp.mts" 5 | import { moveFileFromTmpToPending } from "../../utils/filesystem/moveFileFromTmpToPending.mts" 6 | 7 | export const state = { 8 | load: 0, 9 | } 10 | 11 | const instances: string[] = [ 12 | `${process.env.VC_AUDIO_GENERATION_SPACE_API_URL_1 || ""}` 13 | ].filter(instance => instance?.length > 0) 14 | 15 | // TODO we should use an inference endpoint instead 16 | export async function generateAudio(prompt: string, audioFileName: string) { 17 | 18 | if (state.load === instances.length) { 19 | throw new Error(`all audio generation servers are busy, try again later..`) 20 | } 21 | 22 | state.load += 1 23 | 24 | try { 25 | const instance = instances.shift() 26 | instances.push(instance) 27 | 28 | const browser = await puppeteer.launch({ 29 | headless: true, 30 | protocolTimeout: 120000, 31 | }) 32 | 33 | try { 34 | const page = await browser.newPage() 35 | 36 | await page.goto(instance, { 37 | waitUntil: "networkidle2", 38 | }) 39 | 40 | // await new Promise(r => setTimeout(r, 1000)) 41 | 42 | const firstTextboxInput = await page.$('input[data-testid="textbox"]') 43 | 44 | await firstTextboxInput.type(prompt) 45 | 46 | // console.log("looking for the button to submit") 47 | const submitButton = await page.$("button.lg") 48 | 49 | // console.log("clicking on the button") 50 | await submitButton.click() 51 | 52 | await page.waitForSelector("a[download]", { 53 | timeout: 120000, // no need to wait for too long, generation is quick 54 | }) 55 | 56 | const audioRemoteUrl = await page.$$eval("a[download]", el => el.map(x => x.getAttribute("href"))[0]) 57 | 58 | 59 | // it is always a good idea to download to a tmp dir before saving to the pending dir 60 | // because there is always a risk that the download will fail 61 | 62 | const tmpFileName = `${uuidv4()}.mp4` 63 | 64 | await downloadFileToTmp(audioRemoteUrl, tmpFileName) 65 | await moveFileFromTmpToPending(tmpFileName, audioFileName) 66 | } catch (err) { 67 | throw err 68 | } finally { 69 | await browser.close() 70 | } 71 | } catch (err) { 72 | throw err 73 | } finally { 74 | state.load -= 1 75 | } 76 | } -------------------------------------------------------------------------------- /src/providers/audio-generation/generateAudioLegacy.mts: -------------------------------------------------------------------------------- 1 | import { client } from '@gradio/client' 2 | 3 | import { generateSeed } from "../../utils/misc/generateSeed.mts" 4 | 5 | export const state = { 6 | load: 0 7 | } 8 | 9 | const instances: string[] = [ 10 | process.env.VC_AUDIO_GENERATION_SPACE_API_URL 11 | ] 12 | 13 | export const generateAudio = async (prompt: string, options?: { 14 | seed: number; 15 | nbFrames: number; 16 | nbSteps: number; 17 | }) => { 18 | 19 | if (state.load === instances.length) { 20 | throw new Error(`all audio generation servers are busy, try again later..`) 21 | } 22 | 23 | state.load += 1 24 | 25 | try { 26 | const seed = options?.seed || generateSeed() 27 | const nbFrames = options?.nbFrames || 24 // we can go up to 48 frames, but then upscaling quill require too much memory! 28 | const nbSteps = options?.nbSteps || 35 29 | 30 | const instance = instances.shift() 31 | instances.push(instance) 32 | 33 | const api = await client(instance, { 34 | hf_token: `${process.env.VC_HF_API_TOKEN}` as any 35 | }) 36 | 37 | const rawResponse = await api.predict('/run', [ 38 | prompt, // string in 'Prompt' Textbox component 39 | seed, // number (numeric value between 0 and 2147483647) in 'Seed' Slider component 40 | nbFrames, // 24 // it is the nb of frames per seconds I think? 41 | nbSteps, // 10, (numeric value between 10 and 50) in 'Number of inference steps' Slider component 42 | ]) as any 43 | 44 | const { name } = rawResponse?.data?.[0]?.[0] as { name: string, orig_name: string } 45 | 46 | return `${instance}/file=${name}` 47 | } catch (err) { 48 | throw err 49 | } finally { 50 | state.load -= 1 51 | } 52 | } -------------------------------------------------------------------------------- /src/providers/character-model/generateActor.mts: -------------------------------------------------------------------------------- 1 | import { promises as fs } from "node:fs" 2 | import path from "node:path" 3 | import tmpDir from "temp-dir" 4 | 5 | import { HfInference } from "@huggingface/inference" 6 | 7 | const hf = new HfInference(process.env.VC_HF_API_TOKEN) 8 | 9 | export const generateActor = async (prompt: string, fileName: string, seed: number) => { 10 | const positivePrompt = [ 11 | `profile photo of ${prompt || ""}`, 12 | "id picture", 13 | "photoshoot", 14 | "portrait photography", 15 | "neutral expression", 16 | "neutral background", 17 | "studio photo", 18 | "award winning", 19 | "high resolution", 20 | "photo realistic", 21 | "intricate details", 22 | "beautiful", 23 | ] 24 | const negativePrompt = [ 25 | "anime", 26 | "drawing", 27 | "painting", 28 | "lowres", 29 | "blurry", 30 | "artificial" 31 | ] 32 | 33 | console.log(`generating actor: ${positivePrompt.join(", ")}`) 34 | 35 | const blob = await hf.textToImage({ 36 | inputs: positivePrompt.join(", "), 37 | model: "stabilityai/stable-diffusion-2-1", 38 | parameters: { 39 | negative_prompt: negativePrompt.join(", "), 40 | // seed, no seed? 41 | } 42 | }) 43 | 44 | const filePath = path.resolve(tmpDir, fileName) 45 | 46 | const buffer = Buffer.from(await blob.arrayBuffer()) 47 | await fs.writeFile(filePath, buffer, "utf8") 48 | 49 | return filePath 50 | } -------------------------------------------------------------------------------- /src/providers/image-caption/analyzeImageWithIDEFICS.mts: -------------------------------------------------------------------------------- 1 | 2 | import { client } from "@gradio/client" 3 | 4 | const instances: string[] = [ 5 | `${process.env.VC_ANALYSIS_SPACE_API_URL || ""}`, 6 | ].filter(instance => instance?.length > 0) 7 | 8 | export async function analyzeImage(src: string, prompt: string): Promise { 9 | 10 | const instance = instances.shift() 11 | instances.push(instance) 12 | 13 | const api = await client(instance, { 14 | hf_token: `${process.env.VC_HF_API_TOKEN}` as any 15 | }) 16 | 17 | // console.log("/analyzeImage: calling api.predict(6, ...)") 18 | 19 | /* 20 | the chat history has this format: 21 | [ 22 | [ 23 | '![](/file=/tmp/gradio/2ee0577f810cba5c50d0a7f047a9e6557f4e269f/image.png)What do you see in the following image?', 24 | 'I' 25 | ] 26 | ] 27 | */ 28 | const chat_history = [ 29 | // ['', ''] 30 | ] 31 | 32 | // unfortunately the Gradio client doesn't support streaming, and will crash here with a nasty error 33 | /* 34 | node_modules/@gradio/client/dist/index.js:705 35 | return data.map((d, i) => { 36 | ^ 37 | TypeError: Cannot read properties of null (reading 'is_file') 38 | at node_modules/@gradio/client/dist/index.js:713:43 39 | at Array.map () 40 | at transform_output (node_modules/@gradio/client/dist/index.js:705:15) 41 | */ 42 | 43 | const result = await api.predict(6, [ 44 | "HuggingFaceM4/idefics-80b-instruct", // string (Option from: ['HuggingFaceM4/idefics-80b-instruct']) in 'Model' Dropdown component 45 | prompt, // string in 'Text input' Textbox component 46 | chat_history, // any (any valid json) in 'IDEFICS' Chatbot component 47 | src, // blob in 'Image input' Image component 48 | 49 | // the following values come from the source code at: 50 | // https://huggingface.co/spaces/HuggingFaceM4/idefics_playground/blob/main/app_dialogue.py#L416-L472 51 | 52 | "Greedy", // string in 'Decoding strategy' Radio component 53 | 0.4, // number (numeric value between 0.0 and 5.0) in 'Sampling temperature' Slider component 54 | 512, // number (numeric value between 8 and 1024) in 'Maximum number of new tokens to generate' Slider component 55 | 1, // number (numeric value between 0.0 and 5.0) in 'Repetition penalty' Slider component 56 | 0.8, // number (numeric value between 0.01 and 0.99) in 'Top P' Slider component 57 | ]) 58 | 59 | const rawResponse = result as any 60 | 61 | console.log("got a response!:", rawResponse) 62 | 63 | return rawResponse?.data?.[0] as string 64 | } 65 | -------------------------------------------------------------------------------- /src/providers/image-caption/analyzeImageWithIDEFICSAndNastyHack.mts: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | unfortunately the Gradio client doesn't support streaming: 4 | it will crash here with a nasty error 5 | 6 | node_modules/@gradio/client/dist/index.js:705 7 | return data.map((d, i) => { 8 | ^ 9 | TypeError: Cannot read properties of null (reading 'is_file') 10 | at node_modules/@gradio/client/dist/index.js:713:43 11 | at Array.map () 12 | at transform_output (node_modules/@gradio/client/dist/index.js:705:15) 13 | 14 | 15 | This prevents use from using IDEFICS using the Gradio API, 16 | so the only solution is to hack our way in using puppeteer. 17 | */ 18 | 19 | 20 | import path from "node:path" 21 | 22 | import { v4 as uuidv4 } from "uuid" 23 | import tmpDir from "temp-dir" 24 | import puppeteer from "puppeteer" 25 | 26 | import { writeBase64ToFile } from "../../utils/filesystem/writeBase64ToFile.mts" 27 | import { sleep } from "../../utils/misc/sleep.mts" 28 | import { deleteFileIfExists } from "../../utils/filesystem/deleteFileIfExists.mts" 29 | 30 | const instances: string[] = [ 31 | `${process.env.VC_ANALYSIS_SPACE_API_URL || ""}`, 32 | ].filter(instance => instance?.length > 0) 33 | 34 | // There is no easy to use public API for IDEFICS 35 | // (something where we can just push text + file and get a response without handling history, upload etc) 36 | // So let's hack our way in 🐕 37 | export async function analyzeImage(image: string, prompt: string) { 38 | const instance = instances.shift() 39 | instances.push(instance) 40 | 41 | // wait.. is that really a jpg we have? 42 | // well, let's hope so. 43 | const tmpImageFilePath = path.join(tmpDir, `${uuidv4()}.jpg`) 44 | 45 | await writeBase64ToFile(image, tmpImageFilePath) 46 | // console.log("wrote the image to ", tmpImageFilePath) 47 | 48 | const browser = await puppeteer.launch({ 49 | headless: true, 50 | protocolTimeout: 30000, 51 | }) 52 | 53 | try { 54 | const page = await browser.newPage() 55 | 56 | await page.goto(instance, { 57 | waitUntil: 'networkidle2', 58 | }) 59 | 60 | // console.log("filling in the prompt..") 61 | const promptField = await page.$('textarea') 62 | await promptField.type(prompt) 63 | 64 | // console.log("beginning:", imageBase64.slice(0, 100)) 65 | 66 | // await new Promise(r => setTimeout(r, 1000)) 67 | 68 | const fileField = await page.$('input[type=file]') 69 | 70 | console.log(`uploading file..`) 71 | await fileField.uploadFile(tmpImageFilePath) 72 | // console.log(`did it work? did it do something?`) 73 | // await sleep(2000) 74 | 75 | // console.log('looking for the button to submit') 76 | const submitButton = await page.$('button.lg') 77 | 78 | // console.log('clicking on the submit') 79 | await submitButton.click() 80 | 81 | console.log("waiting for bot response..") 82 | await page.$('.message.bot') 83 | 84 | // note: we are going to receive the response in streaming 85 | 86 | // TODO we should a different approach here, like perhaps something to detect when the element 87 | // has stopped receiving updates 88 | await sleep(12000) 89 | 90 | const message = await page.$$eval(".message.bot p", el => el.map(x => x.innerText)[0]) 91 | console.log("response:", message) 92 | 93 | return message || "" 94 | } catch (err) { 95 | throw err 96 | } finally { 97 | await browser.close() 98 | await deleteFileIfExists(tmpImageFilePath) 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /src/providers/image-generation/generateImage.mts: -------------------------------------------------------------------------------- 1 | import { HfInference } from "@huggingface/inference" 2 | 3 | import { getValidNumber } from "../../utils/validators/getValidNumber.mts" 4 | import { generateSeed } from "../../utils/misc/generateSeed.mts" 5 | 6 | const hf = new HfInference(process.env.VC_HF_API_TOKEN) 7 | 8 | export async function generateImage(options: { 9 | positivePrompt: string; 10 | negativePrompt: string; 11 | seed?: number; 12 | width?: number; 13 | height?: number; 14 | nbSteps?: number; 15 | }) { 16 | 17 | const positivePrompt = options?.positivePrompt || "" 18 | if (!positivePrompt) { 19 | throw new Error("missing prompt") 20 | } 21 | const negativePrompt = options?.negativePrompt || "" 22 | 23 | // we treat 0 as meaning "random seed" 24 | const seed = (options?.seed ? options.seed : 0) || generateSeed() 25 | 26 | const width = getValidNumber(options?.width, 256, 1024, 512) 27 | const height = getValidNumber(options?.height, 256, 1024, 512) 28 | const nbSteps = getValidNumber(options?.nbSteps, 5, 50, 25) 29 | 30 | const blob = await hf.textToImage({ 31 | inputs: [ 32 | positivePrompt, 33 | "beautiful", 34 | "award winning", 35 | // "intricate details", 36 | "high resolution" 37 | ].filter(word => word) 38 | .join(", "), 39 | model: "stabilityai/stable-diffusion-2-1", 40 | parameters: { 41 | negative_prompt: [ 42 | negativePrompt, 43 | "blurry", 44 | // "artificial", 45 | // "cropped", 46 | "low quality", 47 | "ugly" 48 | ].filter(word => word) 49 | .join(", ") 50 | } 51 | }) 52 | const buffer = Buffer.from(await blob.arrayBuffer()) 53 | 54 | return buffer 55 | } -------------------------------------------------------------------------------- /src/providers/image-generation/generateImageLCMFetch.mts: -------------------------------------------------------------------------------- 1 | import { client } from "@gradio/client" 2 | 3 | import { generateSeed } from "../../utils/misc/generateSeed.mts" 4 | import { getValidNumber } from "../../utils/validators/getValidNumber.mts" 5 | 6 | // TODO add a system to mark failed instances as "unavailable" for a couple of minutes 7 | // console.log("process.env:", process.env) 8 | 9 | // note: to reduce costs I use the small A10s (not the large) 10 | // anyway, we will soon not need to use this cloud anymore 11 | // since we will be able to leverage the Inference API 12 | const instance = `${process.env.VC_LCM_SPACE_API_URL || ""}` 13 | const secretToken = `${process.env.VC_MICROSERVICE_SECRET_TOKEN || ""}` 14 | 15 | // console.log("DEBUG:", JSON.stringify({ instances, secretToken }, null, 2)) 16 | 17 | export async function generateImageLCMAsBase64(options: { 18 | positivePrompt: string; 19 | negativePrompt?: string; 20 | seed?: number; 21 | width?: number; 22 | height?: number; 23 | nbSteps?: number; 24 | }): Promise { 25 | 26 | // console.log("querying " + instance) 27 | const positivePrompt = options?.positivePrompt || "" 28 | if (!positivePrompt) { 29 | throw new Error("missing prompt") 30 | } 31 | 32 | // the negative prompt CAN be missing, since we use a trick 33 | // where we make the interface mandatory in the TS doc, 34 | // but browsers might send something partial 35 | const negativePrompt = options?.negativePrompt || "" 36 | 37 | // we treat 0 as meaning "random seed" 38 | const seed = (options?.seed ? options.seed : 0) || generateSeed() 39 | 40 | const width = getValidNumber(options?.width, 256, 1024, 512) 41 | const height = getValidNumber(options?.height, 256, 1024, 512) 42 | const nbSteps = getValidNumber(options?.nbSteps, 1, 8, 4) 43 | // console.log("SEED:", seed) 44 | 45 | const positive = [ 46 | 47 | // oh well.. is it too late to move this to the bottom? 48 | "beautiful", 49 | 50 | // too opinionated, so let's remove it 51 | // "intricate details", 52 | 53 | positivePrompt, 54 | 55 | "award winning", 56 | "high resolution" 57 | ].filter(word => word) 58 | .join(", ") 59 | 60 | const negative = [ 61 | negativePrompt, 62 | "watermark", 63 | "copyright", 64 | "blurry", 65 | // "artificial", 66 | // "cropped", 67 | "low quality", 68 | "ugly" 69 | ].filter(word => word) 70 | .join(", ") 71 | 72 | const res = await fetch(instance + (instance.endsWith("/") ? "" : "/") + "api/predict", { 73 | method: "POST", 74 | headers: { 75 | "Content-Type": "application/json", 76 | // Authorization: `Bearer ${token}`, 77 | }, 78 | body: JSON.stringify({ 79 | fn_index: 1, // <- important! 80 | data: [ 81 | positive, // string in 'Prompt' Textbox component 82 | negative, // string in 'Negative prompt' Textbox component 83 | seed, // number (numeric value between 0 and 2147483647) in 'Seed' Slider component 84 | width, // number (numeric value between 256 and 1024) in 'Width' Slider component 85 | height, // number (numeric value between 256 and 1024) in 'Height' Slider component 86 | 0.0, // can be disabled for LCM-LORA-SSD-1B 87 | nbSteps, // number (numeric value between 2 and 8) in 'Number of inference steps for base' Slider component 88 | secretToken 89 | ], 90 | }), 91 | cache: "no-store", 92 | }) 93 | 94 | const { data } = await res.json() 95 | 96 | 97 | // Recommendation: handle errors 98 | if (res.status !== 200 || !Array.isArray(data)) { 99 | // This will activate the closest `error.js` Error Boundary 100 | throw new Error(`Failed to fetch data (status: ${res.status})`) 101 | } 102 | // console.log("data:", data.slice(0, 50)) 103 | 104 | if (!data[0]) { 105 | throw new Error(`the returned image was empty`) 106 | } 107 | 108 | return data[0] as string 109 | } 110 | -------------------------------------------------------------------------------- /src/providers/image-generation/generateImageLCMGradio.mts: -------------------------------------------------------------------------------- 1 | 2 | import { client } from "@gradio/client" 3 | 4 | import { generateSeed } from "../../utils/misc/generateSeed.mts" 5 | import { getValidNumber } from "../../utils/validators/getValidNumber.mts" 6 | import { convertToWebp } from "../../utils/image/convertToWebp.mts" 7 | 8 | // TODO add a system to mark failed instances as "unavailable" for a couple of minutes 9 | // console.log("process.env:", process.env) 10 | 11 | // note: to reduce costs I use the small A10s (not the large) 12 | // anyway, we will soon not need to use this cloud anymore 13 | // since we will be able to leverage the Inference API 14 | const instance = `${process.env.VC_LCM_SPACE_API_URL || ""}` 15 | const secretToken = `${process.env.VC_MICROSERVICE_SECRET_TOKEN || ""}` 16 | 17 | // console.log("DEBUG:", JSON.stringify({ instances, secretToken }, null, 2)) 18 | 19 | export async function generateImageLCMAsBase64(options: { 20 | positivePrompt: string; 21 | negativePrompt?: string; 22 | seed?: number; 23 | width?: number; 24 | height?: number; 25 | nbSteps?: number; 26 | }): Promise { 27 | 28 | // console.log("querying " + instance) 29 | const positivePrompt = options?.positivePrompt || "" 30 | if (!positivePrompt) { 31 | throw new Error("missing prompt") 32 | } 33 | 34 | // the negative prompt CAN be missing, since we use a trick 35 | // where we make the interface mandatory in the TS doc, 36 | // but browsers might send something partial 37 | const negativePrompt = options?.negativePrompt || "" 38 | 39 | // we treat 0 as meaning "random seed" 40 | const seed = (options?.seed ? options.seed : 0) || generateSeed() 41 | 42 | const width = getValidNumber(options?.width, 256, 1024, 512) 43 | const height = getValidNumber(options?.height, 256, 1024, 512) 44 | const nbSteps = getValidNumber(options?.nbSteps, 1, 8, 4) 45 | // console.log("SEED:", seed) 46 | 47 | const positive = [ 48 | 49 | // oh well.. is it too late to move this to the bottom? 50 | "beautiful", 51 | 52 | // too opinionated, so let's remove it 53 | // "intricate details", 54 | 55 | positivePrompt, 56 | 57 | "award winning", 58 | "high resolution" 59 | ].filter(word => word) 60 | .join(", ") 61 | 62 | const negative = [ 63 | negativePrompt, 64 | "watermark", 65 | "copyright", 66 | "blurry", 67 | // "artificial", 68 | // "cropped", 69 | "low quality", 70 | "ugly" 71 | ].filter(word => word) 72 | .join(", ") 73 | 74 | const api = await client(instance, { 75 | hf_token: `${process.env.VC_HF_API_TOKEN}` as any 76 | }) 77 | 78 | const rawResponse = (await api.predict("/run", [ 79 | positive, // string in 'Prompt' Textbox component 80 | negative, // string in 'Negative prompt' Textbox component 81 | seed, // number (numeric value between 0 and 2147483647) in 'Seed' Slider component 82 | width, // number (numeric value between 256 and 1024) in 'Width' Slider component 83 | height, // number (numeric value between 256 and 1024) in 'Height' Slider component 84 | 0.0, // can be disabled for LCM SDXL 85 | nbSteps, // number (numeric value between 2 and 8) in 'Number of inference steps for base' Slider component 86 | secretToken 87 | ])) as any 88 | 89 | const result = rawResponse?.data?.[0] as string 90 | if (!result?.length) { 91 | throw new Error(`the returned image was empty`) 92 | } 93 | 94 | try { 95 | const finalImage = await convertToWebp(result) 96 | return finalImage 97 | } catch (err) { 98 | // console.log("err:", err) 99 | throw new Error(err) 100 | } 101 | } -------------------------------------------------------------------------------- /src/providers/image-generation/generateImagePulib.mts: -------------------------------------------------------------------------------- 1 | 2 | import { client } from "@gradio/client" 3 | 4 | import { generateSeed } from "../../utils/misc/generateSeed.mts" 5 | import { getValidNumber } from "../../utils/validators/getValidNumber.mts" 6 | import { convertToWebp } from "../../utils/image/convertToWebp.mts" 7 | import { addBase64HeaderToPng } from "../../utils/image/addBase64HeaderToPng.mts" 8 | 9 | // TODO add a system to mark failed instances as "unavailable" for a couple of minutes 10 | // console.log("process.env:", process.env) 11 | 12 | // note: to reduce costs I use the small A10s (not the large) 13 | // anyway, we will soon not need to use this cloud anymore 14 | // since we will be able to leverage the Inference API 15 | const gradioSpaceApiUrl = `https://jbilcke-hf-ai-tube-model-pulid.hf.space` 16 | const gradioSpace = `jbilcke-hf/ai-tube-model-pulid` 17 | const secretToken = `${process.env.VC_MICROSERVICE_SECRET_TOKEN || ""}` 18 | 19 | // console.log("DEBUG:", JSON.stringify({ instances, secretToken }, null, 2)) 20 | 21 | export async function generateImagePulibAsBase64(options: { 22 | positivePrompt: string; 23 | negativePrompt?: string; 24 | identityImage?: string; 25 | seed?: number; 26 | width?: number; 27 | height?: number; 28 | nbSteps?: number; 29 | }): Promise { 30 | 31 | const positivePrompt = options?.positivePrompt || "" 32 | if (!positivePrompt) { 33 | throw new Error("missing prompt") 34 | } 35 | 36 | // the negative prompt CAN be missing, since we use a trick 37 | // where we make the interface mandatory in the TS doc, 38 | // but browsers might send something partial 39 | const negativePrompt = options?.negativePrompt || "" 40 | 41 | // we treat 0 as meaning "random seed" 42 | const seed = (options?.seed ? options.seed : 0) || generateSeed() 43 | 44 | const width = getValidNumber(options?.width, 256, 1024, 512) 45 | const height = getValidNumber(options?.height, 256, 1024, 512) 46 | const nbSteps = getValidNumber(options?.nbSteps, 1, 8, 4) 47 | // console.log("SEED:", seed) 48 | 49 | const identityImage = `${options.identityImage || ""}` 50 | 51 | const positive = [ 52 | positivePrompt, 53 | ].filter(word => word) 54 | .join(", ") 55 | 56 | const negative = [ 57 | negativePrompt, 58 | "watermark", 59 | "copyright", 60 | "blurry", 61 | // "artificial", 62 | // "cropped", 63 | "low quality", 64 | "ugly", 65 | 'flaws in the eyes', 66 | 'flaws in the face', 67 | 'flaws', 68 | 'lowres', 69 | 'non-HDRi', 70 | 'low quality', 71 | 'worst quality', 72 | 'artifacts noise', 73 | 'text', 74 | 'glitch', 75 | 'deformed', 76 | 'mutated', 77 | 'disfigured hands', 78 | 'low resolution', 79 | 'partially rendered objects', 80 | 'deformed or partially rendered eyes', 81 | 'ddeformed eyeballs', 82 | 'cross-eyed', 83 | ].filter(word => word) 84 | .join(", ") 85 | 86 | const api = await client(gradioSpaceApiUrl, { 87 | hf_token: `${process.env.VC_HF_API_TOKEN}` as any 88 | }) 89 | 90 | // we hardcode the number of steps to 4 91 | const steps = 4 92 | 93 | // console.log("querying " + gradioSpaceApiUrl + " with tons of params") 94 | 95 | const rawResponse = (await api.predict("/run", [ 96 | secretToken, // # str in 'parameter_4' Textbox component 97 | identityImage || "", // 'ID image (main)' Image component 98 | "", // 'Additional ID image (auxiliary)' Image component 99 | "", // 'Additional ID image (auxiliary)' Image component 100 | "", // 'Additional ID image (auxiliary)' Image component 101 | positive, // # str in 'Prompt' Textbox component 102 | negative, // # str in 'Negative Prompt' Textbox component 103 | 1.2, // # int | float (numeric value between 1 and 1.5) in 'CFG, recommend value range [1, 1.5], 1 will be faster ' Slider component 104 | generateSeed(), //, # int | float (numeric value between 0 and 4294967295) in 'Seed' Slider component 105 | steps, // # int | float (numeric value between 1 and 100) in 'Steps' Slider component 106 | height, // # int | float (numeric value between 512 and 1280) in 'Height' Slider component 107 | width, // # int | float (numeric value between 512 and 1280) in 'Width' Slider component 108 | 0.8, // # int | float (numeric value between 0 and 5) in 'ID scale' Slider component 109 | "fidelity", // # str (Option from: ['fidelity', 'extremely style']) in 'mode' Dropdown component 110 | false, // 'ID Mix (if you want to mix two ID image, please turn this on, otherwise, turn this off)' Checkbox component 111 | ])) as any 112 | 113 | const result = rawResponse?.data?.[0] as string 114 | if (!result?.length) { 115 | throw new Error(`the returned image was empty`) 116 | } 117 | 118 | try { 119 | const finalImage = await convertToWebp(addBase64HeaderToPng(result)) 120 | return finalImage 121 | } catch (err) { 122 | // console.log("err:", err) 123 | throw new Error(err) 124 | } 125 | } -------------------------------------------------------------------------------- /src/providers/image-generation/generateImageSDXL360.mts: -------------------------------------------------------------------------------- 1 | import { client } from "@gradio/client" 2 | 3 | import { generateSeed } from "../../utils/misc/generateSeed.mts" 4 | import { getValidNumber } from "../../utils/validators/getValidNumber.mts" 5 | 6 | // we don't use replicas yet, because it ain't easy to get their hostname 7 | const instances: string[] = [ 8 | `${process.env.VC_SDXL_360_SPACE_API_URL_1 || ""}`, 9 | // `${process.env.VC_SDXL_SPACE_API_URL_2 || ""}`, 10 | ].filter(instance => instance?.length > 0) 11 | 12 | const secretToken = `${process.env.VC_MICROSERVICE_SECRET_TOKEN || ""}` 13 | 14 | export async function generateImageSDXL360AsBase64(options: { 15 | positivePrompt: string; 16 | negativePrompt?: string; 17 | seed?: number; 18 | width?: number; 19 | height?: number; 20 | nbSteps?: number; 21 | }): Promise { 22 | 23 | const positivePrompt = options?.positivePrompt || "" 24 | if (!positivePrompt) { 25 | throw new Error("missing prompt") 26 | } 27 | const negativePrompt = options?.negativePrompt || "" 28 | 29 | // we treat 0 as meaning "random seed" 30 | const seed = (options?.seed ? options.seed : 0) || generateSeed() 31 | 32 | const width = getValidNumber(options?.width, 256, 1024, 512) 33 | const height = getValidNumber(options?.height, 256, 1024, 512) 34 | const nbSteps = getValidNumber(options?.nbSteps, 5, 100, 20) 35 | // console.log("SEED FOR 360:", seed) 36 | 37 | const instance = instances.shift() 38 | instances.push(instance) 39 | 40 | const positive = [ 41 | "360 view", 42 | positivePrompt, 43 | "beautiful", 44 | // "intricate details", 45 | "award winning", 46 | "high resolution" 47 | ].filter(word => word) 48 | .join(", ") 49 | 50 | const negative = [ 51 | negativePrompt, 52 | "watermark", 53 | "copyright", 54 | "blurry", 55 | // "artificial", 56 | // "cropped", 57 | "low quality", 58 | "ugly" 59 | ].filter(word => word) 60 | .join(", ") 61 | 62 | const api = await client(instance, { 63 | hf_token: `${process.env.VC_HF_API_TOKEN}` as any 64 | }) 65 | 66 | 67 | const rawResponse = (await api.predict("/run", [ 68 | positive, // string in 'Prompt' Textbox component 69 | negative, // string in 'Negative prompt' Textbox component 70 | positive, // string in 'Prompt 2' Textbox component 71 | negative, // string in 'Negative prompt 2' Textbox component 72 | true, // boolean in 'Use negative prompt' Checkbox component 73 | false, // boolean in 'Use prompt 2' Checkbox component 74 | false, // boolean in 'Use negative prompt 2' Checkbox component 75 | seed, // number (numeric value between 0 and 2147483647) in 'Seed' Slider component 76 | width, // number (numeric value between 256 and 1024) in 'Width' Slider component 77 | height, // number (numeric value between 256 and 1024) in 'Height' Slider component 78 | 8, // number (numeric value between 1 and 20) in 'Guidance scale for base' Slider component 79 | 8, // number (numeric value between 1 and 20) in 'Guidance scale for refiner' Slider component 80 | nbSteps, // number (numeric value between 10 and 100) in 'Number of inference steps for base' Slider component 81 | nbSteps, // number (numeric value between 10 and 100) in 'Number of inference steps for refiner' Slider component 82 | true, // boolean in 'Apply refiner' Checkbox component 83 | secretToken, 84 | ])) as any 85 | 86 | const result = rawResponse?.data?.[0] as string 87 | if (!result?.length) { 88 | throw new Error(`the returned image was empty`) 89 | } 90 | return result 91 | } 92 | -------------------------------------------------------------------------------- /src/providers/image-generation/generateImageSDXLFetch.mts: -------------------------------------------------------------------------------- 1 | import { client } from "@gradio/client" 2 | 3 | import { generateSeed } from "../../utils/misc/generateSeed.mts" 4 | import { getValidNumber } from "../../utils/validators/getValidNumber.mts" 5 | 6 | // TODO add a system to mark failed instances as "unavailable" for a couple of minutes 7 | // console.log("process.env:", process.env) 8 | 9 | // note: to reduce costs I use the small A10s (not the large) 10 | // anyway, we will soon not need to use this cloud anymore 11 | // since we will be able to leverage the Inference API 12 | const instance = `${process.env.VC_SDXL_SPACE_API_URL || ""}` 13 | const secretToken = `${process.env.VC_MICROSERVICE_SECRET_TOKEN || ""}` 14 | 15 | // console.log("DEBUG:", JSON.stringify({ instances, secretToken }, null, 2)) 16 | 17 | export async function generateImageSDXLAsBase64(options: { 18 | positivePrompt: string; 19 | negativePrompt?: string; 20 | seed?: number; 21 | width?: number; 22 | height?: number; 23 | nbSteps?: number; 24 | }): Promise { 25 | 26 | const positivePrompt = options?.positivePrompt || "" 27 | if (!positivePrompt) { 28 | throw new Error("missing prompt") 29 | } 30 | 31 | // the negative prompt CAN be missing, since we use a trick 32 | // where we make the interface mandatory in the TS doc, 33 | // but browsers might send something partial 34 | const negativePrompt = options?.negativePrompt || "" 35 | 36 | // we treat 0 as meaning "random seed" 37 | const seed = (options?.seed ? options.seed : 0) || generateSeed() 38 | 39 | const width = getValidNumber(options?.width, 256, 1024, 512) 40 | const height = getValidNumber(options?.height, 256, 1024, 512) 41 | const nbSteps = getValidNumber(options?.nbSteps, 5, 100, 20) 42 | // console.log("SEED:", seed) 43 | 44 | const positive = [ 45 | 46 | // oh well.. is it too late to move this to the bottom? 47 | "beautiful", 48 | // "intricate details", 49 | positivePrompt, 50 | 51 | "award winning", 52 | "high resolution" 53 | ].filter(word => word) 54 | .join(", ") 55 | 56 | const negative = [ 57 | negativePrompt, 58 | "watermark", 59 | "copyright", 60 | "blurry", 61 | // "artificial", 62 | // "cropped", 63 | "low quality", 64 | "ugly" 65 | ].filter(word => word) 66 | .join(", ") 67 | 68 | const res = await fetch(instance + (instance.endsWith("/") ? "" : "/") + "api/predict", { 69 | method: "POST", 70 | headers: { 71 | "Content-Type": "application/json", 72 | // Authorization: `Bearer ${token}`, 73 | }, 74 | body: JSON.stringify({ 75 | fn_index: 1, // <- important! 76 | data: [ 77 | positive, // string in 'Prompt' Textbox component 78 | negative, // string in 'Negative prompt' Textbox component 79 | positive, // string in 'Prompt 2' Textbox component 80 | negative, // string in 'Negative prompt 2' Textbox component 81 | true, // boolean in 'Use negative prompt' Checkbox component 82 | false, // boolean in 'Use prompt 2' Checkbox component 83 | false, // boolean in 'Use negative prompt 2' Checkbox component 84 | seed, // number (numeric value between 0 and 2147483647) in 'Seed' Slider component 85 | width, // number (numeric value between 256 and 1024) in 'Width' Slider component 86 | height, // number (numeric value between 256 and 1024) in 'Height' Slider component 87 | 8, // number (numeric value between 1 and 20) in 'Guidance scale for base' Slider component 88 | 8, // number (numeric value between 1 and 20) in 'Guidance scale for refiner' Slider component 89 | nbSteps, // number (numeric value between 10 and 100) in 'Number of inference steps for base' Slider component 90 | nbSteps, // number (numeric value between 10 and 100) in 'Number of inference steps for refiner' Slider component 91 | true, // boolean in 'Apply refiner' Checkbox component, 92 | secretToken 93 | ], 94 | }), 95 | cache: "no-store", 96 | }) 97 | 98 | const { data } = await res.json() 99 | 100 | // console.log("data:", data) 101 | // Recommendation: handle errors 102 | if (res.status !== 200 || !Array.isArray(data)) { 103 | // This will activate the closest `error.js` Error Boundary 104 | throw new Error(`Failed to fetch data (status: ${res.status})`) 105 | } 106 | // console.log("data:", data.slice(0, 50)) 107 | 108 | if (!data[0]) { 109 | throw new Error(`the returned image was empty`) 110 | } 111 | 112 | return data[0] as string 113 | } 114 | -------------------------------------------------------------------------------- /src/providers/image-generation/generateImageSDXLGradio.mts: -------------------------------------------------------------------------------- 1 | import { client } from "@gradio/client" 2 | 3 | import { generateSeed } from "../../utils/misc/generateSeed.mts" 4 | import { getValidNumber } from "../../utils/validators/getValidNumber.mts" 5 | import { convertToWebp } from "../../utils/image/convertToWebp.mts" 6 | 7 | // TODO add a system to mark failed instances as "unavailable" for a couple of minutes 8 | // console.log("process.env:", process.env) 9 | 10 | // note: to reduce costs I use the small A10s (not the large) 11 | // anyway, we will soon not need to use this cloud anymore 12 | // since we will be able to leverage the Inference API 13 | const instance = `${process.env.VC_SDXL_SPACE_API_URL || ""}` 14 | const secretToken = `${process.env.VC_MICROSERVICE_SECRET_TOKEN || ""}` 15 | 16 | // console.log("DEBUG:", JSON.stringify({ instances, secretToken }, null, 2)) 17 | 18 | export async function generateImageSDXLAsBase64(options: { 19 | positivePrompt: string; 20 | negativePrompt?: string; 21 | seed?: number; 22 | width?: number; 23 | height?: number; 24 | nbSteps?: number; 25 | }): Promise { 26 | 27 | const positivePrompt = options?.positivePrompt || "" 28 | if (!positivePrompt) { 29 | throw new Error("missing prompt") 30 | } 31 | 32 | // the negative prompt CAN be missing, since we use a trick 33 | // where we make the interface mandatory in the TS doc, 34 | // but browsers might send something partial 35 | const negativePrompt = options?.negativePrompt || "" 36 | 37 | // we treat 0 as meaning "random seed" 38 | const seed = (options?.seed ? options.seed : 0) || generateSeed() 39 | 40 | const width = getValidNumber(options?.width, 256, 1024, 512) 41 | const height = getValidNumber(options?.height, 256, 1024, 512) 42 | const nbSteps = getValidNumber(options?.nbSteps, 5, 100, 20) 43 | // console.log("SEED:", seed) 44 | 45 | const positive = [ 46 | 47 | // oh well.. is it too late to move this to the bottom? 48 | "beautiful", 49 | // "intricate details", 50 | positivePrompt, 51 | 52 | "award winning", 53 | "high resolution" 54 | ].filter(word => word) 55 | .join(", ") 56 | 57 | const negative = [ 58 | negativePrompt, 59 | "watermark", 60 | "copyright", 61 | "blurry", 62 | // "artificial", 63 | // "cropped", 64 | "low quality", 65 | "ugly" 66 | ].filter(word => word) 67 | .join(", ") 68 | 69 | const api = await client(instance, { 70 | hf_token: `${process.env.VC_HF_API_TOKEN}` as any 71 | }) 72 | 73 | 74 | const rawResponse = (await api.predict("/run", [ 75 | positive, // string in 'Prompt' Textbox component 76 | negative, // string in 'Negative prompt' Textbox component 77 | positive, // string in 'Prompt 2' Textbox component 78 | negative, // string in 'Negative prompt 2' Textbox component 79 | true, // boolean in 'Use negative prompt' Checkbox component 80 | false, // boolean in 'Use prompt 2' Checkbox component 81 | false, // boolean in 'Use negative prompt 2' Checkbox component 82 | seed, // number (numeric value between 0 and 2147483647) in 'Seed' Slider component 83 | width, // number (numeric value between 256 and 1024) in 'Width' Slider component 84 | height, // number (numeric value between 256 and 1024) in 'Height' Slider component 85 | 8, // number (numeric value between 1 and 20) in 'Guidance scale for base' Slider component 86 | 8, // number (numeric value between 1 and 20) in 'Guidance scale for refiner' Slider component 87 | nbSteps, // number (numeric value between 10 and 100) in 'Number of inference steps for base' Slider component 88 | nbSteps, // number (numeric value between 10 and 100) in 'Number of inference steps for refiner' Slider component 89 | true, // boolean in 'Apply refiner' Checkbox component, 90 | secretToken 91 | ])) as any 92 | 93 | const result = rawResponse?.data?.[0] as string 94 | if (!result?.length) { 95 | throw new Error(`the returned image was empty`) 96 | } 97 | 98 | try { 99 | const finalImage = await convertToWebp(result) 100 | return finalImage 101 | } catch (err) { 102 | // console.log("err:", err) 103 | throw new Error(err) 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /src/providers/image-generation/generateImageSDXLTurbo.mts: -------------------------------------------------------------------------------- 1 | 2 | import { client } from "@gradio/client" 3 | 4 | import { generateSeed } from "../../utils/misc/generateSeed.mts" 5 | import { getValidNumber } from "../../utils/validators/getValidNumber.mts" 6 | import { convertToWebp } from "../../utils/image/convertToWebp.mts" 7 | 8 | 9 | const instance = `${process.env.VC_SDXL_TURBO_SPACE_API_URL || ""}` 10 | const secretToken = `${process.env.VC_MICROSERVICE_SECRET_TOKEN || ""}` 11 | 12 | // console.log("DEBUG:", JSON.stringify({ instances, secretToken }, null, 2)) 13 | 14 | export async function generateImageSDXLTurboAsBase64(options: { 15 | positivePrompt: string; 16 | negativePrompt?: string; 17 | seed?: number; 18 | width?: number; 19 | height?: number; 20 | nbSteps?: number; 21 | }): Promise { 22 | 23 | // console.log("querying " + instance) 24 | const positivePrompt = options?.positivePrompt || "" 25 | if (!positivePrompt) { 26 | throw new Error("missing prompt") 27 | } 28 | 29 | // the negative prompt CAN be missing, since we use a trick 30 | // where we make the interface mandatory in the TS doc, 31 | // but browsers might send something partial 32 | const negativePrompt = options?.negativePrompt || "" 33 | 34 | // we treat 0 as meaning "random seed" 35 | const seed = (options?.seed ? options.seed : 0) || generateSeed() 36 | 37 | const maxRequestedResolution = 1024 38 | const maxModelResolution = 512 39 | 40 | const requestedWidth = getValidNumber(options?.width, 256, maxRequestedResolution, maxModelResolution) 41 | const requestedHeight = getValidNumber(options?.height, 256, maxRequestedResolution, maxModelResolution) 42 | 43 | // we try to preserve the original image ratio 44 | const ratioH = requestedHeight / requestedWidth 45 | const ratioW = requestedWidth / requestedHeight 46 | 47 | // we always try to ccrank the resolution to the max 48 | let width = ratioW < 1 ? Math.round(ratioW * maxModelResolution) : maxModelResolution 49 | let height = ratioH < 1 ? Math.round(ratioH * maxModelResolution) : maxModelResolution 50 | 51 | const positive = [ 52 | 53 | // oh well.. is it too late to move this to the bottom? 54 | "beautiful", 55 | 56 | // too opinionated, so let's remove it 57 | // "intricate details", 58 | 59 | positivePrompt, 60 | 61 | "award winning", 62 | "high resolution" 63 | ].filter(word => word) 64 | .join(", ") 65 | 66 | const negative = [ 67 | negativePrompt, 68 | "watermark", 69 | "copyright", 70 | "blurry", 71 | // "artificial", 72 | // "cropped", 73 | "low quality", 74 | "ugly" 75 | ].filter(word => word) 76 | .join(", ") 77 | 78 | const api = await client(instance, { 79 | hf_token: `${process.env.VC_HF_API_TOKEN}` as any 80 | }) 81 | 82 | const rawResponse = (await api.predict("/run", [ 83 | positive, // string in 'Prompt' Textbox component 84 | negative, // string in 'Negative prompt' Textbox component 85 | seed, // number (numeric value between 0 and 2147483647) in 'Seed' Slider component 86 | width, // number (numeric value between 256 and 1024) in 'Width' Slider component 87 | height, // number (numeric value between 256 and 1024) in 'Height' Slider component 88 | secretToken 89 | ])) as any 90 | 91 | const result = rawResponse?.data?.[0] as string 92 | if (!result?.length) { 93 | throw new Error(`the returned image was empty`) 94 | } 95 | 96 | try { 97 | const finalImage = await convertToWebp(result) 98 | return finalImage 99 | } catch (err) { 100 | // console.log("err:", err) 101 | throw new Error(err) 102 | } 103 | } -------------------------------------------------------------------------------- /src/providers/image-segmentation/segmentImage.mts: -------------------------------------------------------------------------------- 1 | import puppeteer from "puppeteer" 2 | 3 | import { sleep } from "../../utils/misc/sleep.mts" 4 | import { ImageSegment } from "../../types.mts" 5 | import { downloadFileAsBase64 } from "../../utils/download/downloadFileAsBase64.mts" 6 | import { resizeBase64Image } from "../../utils/image/resizeBase64Image.mts" 7 | 8 | // we don't use replicas yet, because it ain't easy to get their hostname 9 | const instances: string[] = [ 10 | `${process.env.VC_SEGMENTATION_MODULE_SPACE_API_URL_1 || ""}`, 11 | `${process.env.VC_SEGMENTATION_MODULE_SPACE_API_URL_2 || ""}`, 12 | // `${process.env.VC_SEGMENTATION_MODULE_SPACE_API_URL_3 || ""}`, 13 | ] 14 | 15 | // TODO we should use an inference endpoint instead 16 | 17 | // note: on a large T4 (8 vCPU) 18 | // it takes about 30 seconds to compute 19 | export async function segmentImage( 20 | inputImageFilePath: string, 21 | actionnables: string[], 22 | width: number, 23 | height: number, 24 | ): Promise<{ 25 | maskUrl: string 26 | segments: ImageSegment[] 27 | }> { 28 | 29 | console.log(`segmenting image..`) 30 | 31 | const instance = instances.shift() 32 | instances.push(instance) 33 | 34 | const browser = await puppeteer.launch({ 35 | headless: true, 36 | protocolTimeout: 40000, 37 | }) 38 | 39 | try { 40 | const page = await browser.newPage() 41 | await page.goto(instance, { waitUntil: 'networkidle2' }) 42 | 43 | // await new Promise(r => setTimeout(r, 1000)) 44 | 45 | const fileField = await page.$('input[type="file"]') 46 | 47 | // console.log(`uploading file..`) 48 | await fileField.uploadFile(inputImageFilePath) 49 | 50 | const firstTextarea = await page.$('textarea[data-testid="textbox"]') 51 | 52 | const conceptsToDetect = actionnables.join(" . ") 53 | await firstTextarea.type(conceptsToDetect) 54 | 55 | // console.log('looking for the button to submit') 56 | const submitButton = await page.$('button.lg') 57 | 58 | await sleep(300) 59 | 60 | // console.log('clicking on the button') 61 | await submitButton.click() 62 | 63 | await page.waitForSelector('img[data-testid="detailed-image"]', { 64 | timeout: 40000, // we keep it tight, to fail early 65 | }) 66 | 67 | const tmpMaskDownloadUrl = await page.$$eval('img[data-testid="detailed-image"]', el => el.map(x => x.getAttribute("src"))[0]) 68 | 69 | let segments: ImageSegment[] = [] 70 | 71 | try { 72 | segments = JSON.parse(await page.$$eval('textarea', el => el.map(x => x.value)[1])) 73 | } catch (err) { 74 | console.log(`failed to parse JSON: ${err}`) 75 | segments = [] 76 | } 77 | 78 | // const tmpMaskFileName = `${uuidv4()}.png` 79 | // await downloadFileToTmp(maskUrl, tmpMaskFileName) 80 | 81 | const rawPngInBase64 = await downloadFileAsBase64(tmpMaskDownloadUrl) 82 | 83 | const maskUrl = await resizeBase64Image(rawPngInBase64, width, height) 84 | 85 | return { 86 | maskUrl, 87 | segments, 88 | } 89 | } catch (err) { 90 | throw err 91 | } finally { 92 | await browser.close() 93 | } 94 | } 95 | 96 | /* 97 | 98 | If you want to try: 99 | 100 | / note: must be a jpg and not jpeg it seems 101 | // (probably a playwright bug) 102 | const results = await segmentImage("./barn.jpg", [ 103 | "roof", 104 | "door", 105 | "window" 106 | ]) 107 | 108 | console.log("results:", results) 109 | */ -------------------------------------------------------------------------------- /src/providers/image-segmentation/segmentImageFromURL.mts: -------------------------------------------------------------------------------- 1 | import { v4 as uuidv4 } from "uuid" 2 | 3 | import { downloadFileToTmp } from "../../utils/download/downloadFileToTmp.mts" 4 | import { segmentImage } from "./segmentImage.mts" 5 | 6 | // TODO we should use an inference endpoint instead 7 | 8 | // WARNING: this function is currently unused 9 | // if you do attempt to use it, please check the hardcoded 1024x1024 thing line 21, and refactor it to your needs 10 | export async function segmentImageFromURL( 11 | inputUrl: string, 12 | actionnables: string[] 13 | ) { 14 | if (!actionnables?.length) { 15 | throw new Error("cannot segment image without actionnables!") 16 | } 17 | console.log(`segmenting image from URL: "${inputUrl}"`) 18 | const tmpFileName = `${uuidv4()}` 19 | const tmpFilePath = await downloadFileToTmp(inputUrl, tmpFileName) 20 | 21 | const results = await segmentImage(tmpFilePath, actionnables, 1024, 1024) 22 | 23 | console.log("image has been segmented!", results) 24 | return results 25 | } -------------------------------------------------------------------------------- /src/providers/image-upscaling/upscaleImage.mts: -------------------------------------------------------------------------------- 1 | 2 | import { client } from "@gradio/client" 3 | 4 | import { getValidNumber } from "../../utils/validators/getValidNumber.mts" 5 | 6 | // we don't use replicas yet, because it ain't easy to get their hostname 7 | const instance = `${process.env.VC_UPSCALING_SPACE_API_URL || ""}` 8 | const secretToken = `${process.env.VC_MICROSERVICE_SECRET_TOKEN || ""}` 9 | 10 | // this doesn't work because of this error.. I think the version of Gradio is too old/young? 11 | // ReferenceError: addEventListener is not defined 12 | // at file:///Users/jbilcke/Projects/VideoChain-API/node_modules/@gradio/client/dist/index.js:551:15 13 | // at processTicksAndRejections (node:internal/process/task_queues:95:5) 14 | export async function upscaleImage(src: string, factor?: number) { 15 | 16 | // by default we do a 2X scale 17 | // VideoQuest will use 4X 18 | // 4 is really the max/limit, as this can generate PNGs of 50 Mb.. 19 | const scaleFactor = getValidNumber(factor, 0, 4, 2) 20 | 21 | if (scaleFactor < 2) { 22 | return src 23 | } 24 | 25 | const api = await client(instance, { 26 | hf_token: `${process.env.VC_HF_API_TOKEN}` as any 27 | }) 28 | 29 | const result = await api.predict("/upscale", [ 30 | secretToken, 31 | src, // blob in 'Source Image' Image component 32 | "realesr-general-x4v3", // string (Option from: ['RealESRGAN_x4plus', 'RealESRNet_x4plus', 'RealESRGAN_x4plus_anime_6B', 'RealESRGAN_x2plus', 'realesr-general-x4v3']) in 'Real-ESRGAN inference model to be used' Dropdown component 33 | 0.5, // number (numeric value between 0 and 1) in 'Denoise Strength (Used only with the realesr-general-x4v3 model)' Slider component 34 | false, // boolean in 'Face Enhancement using GFPGAN (Doesn't work for anime images)' Checkbox component 35 | scaleFactor, // number (numeric value between 1 and 10) in 'Image Upscaling Factor' Slider component 36 | ]); 37 | 38 | 39 | const rawResponse = result as any 40 | 41 | // console.log("rawResponse:", rawResponse) 42 | 43 | return rawResponse?.data?.[0] as string 44 | } 45 | -------------------------------------------------------------------------------- /src/providers/language-model/enrichVideoSpecsUsingLLM.mts: -------------------------------------------------------------------------------- 1 | import { ChatCompletionRequestMessage } from "openai" 2 | 3 | import { Video, VideoAPIRequest } from "../../types.mts" 4 | import { generateYAML } from "./openai/generateYAML.mts" 5 | import { HallucinatedVideoRequest, OpenAIErrorResponse } from "./types.mts" 6 | import { getQueryChatMessages } from "../../preproduction/prompts.mts" 7 | import { getValidNumber } from "../../utils/validators/getValidNumber.mts" 8 | import { parseShotRequest } from "../../utils/requests/parseShotRequest.mts" 9 | 10 | 11 | export const enrichVideoSpecsUsingLLM = async (video: Video): Promise