├── .DS_Store ├── README.md ├── apps ├── .DS_Store ├── web-app │ ├── .babelrc │ ├── .dockerignore │ ├── .env.example │ ├── .eslintrc.json │ ├── .gitignore │ ├── Dockerfile │ ├── README.md │ ├── app │ │ ├── api │ │ │ └── videos │ │ │ │ └── route.ts │ │ ├── components │ │ │ ├── Header.js │ │ │ └── ui │ │ │ │ └── button.tsx │ │ ├── db │ │ │ └── model.js │ │ ├── favicon.ico │ │ ├── globals.css │ │ ├── layout.tsx │ │ ├── page.tsx │ │ ├── view │ │ │ └── page.tsx │ │ └── worker │ │ │ └── worker.ts │ ├── components.json │ ├── lib │ │ └── utils.ts │ ├── next.config.mjs │ ├── package-lock.json │ ├── package.json │ ├── postcss.config.js │ ├── public │ │ ├── next.svg │ │ └── vercel.svg │ ├── tailwind.config.ts │ ├── tsconfig.json │ └── utils │ │ └── fileUpload.js └── worker │ ├── .env.example │ ├── .gitignore │ ├── README.md │ ├── consumer.py │ ├── gpt.py │ ├── requirements.txt │ ├── utils.py │ └── writer.py └── docs ├── app-1.png ├── app-2.png ├── system.png └── video.mp4 /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aadeshkulkarni/sanchay-ai/cf4b816af44395ee8d0da03dba366491926588ef/.DS_Store -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Sanchay AI (A Generative AI app) 2 | 3 | Upload your video and SanchayAI will generate key elements (video transcription, video subtitles, and video chapters) in an organized and easily accessible manner. 4 | 5 | ### Project Status and Contribution Potential 6 | 7 | This project is in its infancy and has just been scaffolded with a new architecture. It's at a crucial stage where contributions can significantly shape its future and scalability. With the right environment and collaborative effort, it has the potential to evolve into something truly remarkable. 8 | 9 | ### System (In a nutshell) 10 | ![System](/docs/system.png) 11 | 12 | 13 | ### Requirements: 14 | - MongoDB 15 | - RabbitMQ 16 | - Localstack 17 | 18 | 19 | There are 2 apps: 20 | 21 | 1. web-app 22 | - This is a fullstack NextJS app. 23 | - The project depends on 3 services: 24 | - RabbitMQ 25 | - Localstack 26 | - MongoDB 27 | 28 | 2. worker 29 | - This is a Python based RabbitMQ worker that listens to request from web-app and processes them in the background. 30 | - The project also depends on 3 services: 31 | - RabbitMQ 32 | - Localstack 33 | - MongoDB 34 | 35 | 36 | ### Setup 37 | 38 | Before setting up the codebase, it's important to setup the services the codebase depends on. 39 | 40 | ### Setup RabbitMQ locally 41 | - If you have docker installed on your system, you can setup RabbitMQ using the command 42 | - `docker run -it --rm --name rabbitmq -p 5672:5672 -p 15672:15672 rabbitmq:3.13-management` 43 | - This will spinup RabbitMQ on localhost:15672 (default username/password = guest/guest) 44 | > OR 45 | 46 | - If you don't want to setup using docker, you can setup RabbitMQ by following steps [here](https://www.rabbitmq.com/docs/download) 47 | 48 | ### Setup MongoDB 49 | - Recommended: Create a database cluster on https://cloud.mongodb.com/ for free and use the connection string 50 | 51 | ### Localstack 52 | - Follow the guide mentioned here- https://app.localstack.cloud/getting-started 53 | - Create a new bucket by running the command `aws s3 mb s3://sanchayai --endpoint-url=http://localhost:4566` 54 | 55 | Once the above services are up, clone the sanchay-ai repository 56 | 57 | ### Worker setup 58 | - cd into apps/worker folder 59 | - follow readme instructions 60 | - pip install 61 | - python consumer.py 62 | 63 | ### Web-app setup 64 | - cd into apps/web-app folder 65 | - follow readme instructions 66 | - npm install 67 | - npm run dev 68 | 69 | If all is working well, 70 | - Your Web-app should be listening on `localhost:3000` 71 | - Your Rabbit-MQ should be working on `http://localhost:15672/` 72 | - Your localstack should be working on `http://localhost:4566` (nothing to display on chrome here) 73 | 74 | 75 | You can run the app and upload a test video which is available in the codebase here - ./docs/video.mp4 76 | 77 | ![Home](/docs/app-1.png) 78 | ![Videos](/docs/app-2.png) 79 | -------------------------------------------------------------------------------- /apps/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aadeshkulkarni/sanchay-ai/cf4b816af44395ee8d0da03dba366491926588ef/apps/.DS_Store -------------------------------------------------------------------------------- /apps/web-app/.babelrc: -------------------------------------------------------------------------------- 1 | { 2 | "presets": ["next/babel"], 3 | "plugins": [] 4 | } -------------------------------------------------------------------------------- /apps/web-app/.dockerignore: -------------------------------------------------------------------------------- 1 | .git 2 | .results 3 | scripts 4 | *Dockerfile* 5 | node_modules 6 | .dockerignore 7 | .gitignore 8 | .next 9 | out 10 | public -------------------------------------------------------------------------------- /apps/web-app/.env.example: -------------------------------------------------------------------------------- 1 | AWS_ACCESS_KEY_ID = "na" 2 | AWS_SECRET_ACCESS_KEY = "na" 3 | AWS_REGION_NAME = "ap-south-1" 4 | S3_BUCKET_NAME = "sanchayai" 5 | MONGO_DB="" 6 | RABBITMQ_DEV="amqp://guest:guest@localhost:5672" -------------------------------------------------------------------------------- /apps/web-app/.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": ["next/babel","next/core-web-vitals"] 3 | } -------------------------------------------------------------------------------- /apps/web-app/.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | /node_modules 5 | /.pnp 6 | .pnp.js 7 | .yarn/install-state.gz 8 | 9 | # testing 10 | /coverage 11 | 12 | # next.js 13 | /.next/ 14 | /out/ 15 | 16 | # production 17 | /build 18 | 19 | # misc 20 | .DS_Store 21 | *.pem 22 | 23 | # debug 24 | npm-debug.log* 25 | yarn-debug.log* 26 | yarn-error.log* 27 | 28 | # local env files 29 | .env*.local 30 | 31 | # vercel 32 | .vercel 33 | 34 | # typescript 35 | *.tsbuildinfo 36 | next-env.d.ts 37 | .next 38 | 39 | .env -------------------------------------------------------------------------------- /apps/web-app/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM node:20-alpine 2 | 3 | WORKDIR /usr/src/app 4 | 5 | COPY package*.json ./ 6 | RUN npm install 7 | COPY . . 8 | EXPOSE 3000 9 | CMD ["npm", "run", "dev"] -------------------------------------------------------------------------------- /apps/web-app/README.md: -------------------------------------------------------------------------------- 1 | ### Setup 2 | 3 | - copy .env.example and rename it as .env 4 | - add MongoDB connection string 5 | - `npm run dev` 6 | 7 | 8 | ### Tech 9 | - NextJS 10 | - Tailwind 11 | - ShadCDN 12 | -------------------------------------------------------------------------------- /apps/web-app/app/api/videos/route.ts: -------------------------------------------------------------------------------- 1 | import { NextResponse } from "next/server"; 2 | import { Video } from "../../db/model"; 3 | 4 | export const GET = async () => { 5 | const data = await Video.find(); 6 | return NextResponse.json({ data: data }); 7 | }; 8 | -------------------------------------------------------------------------------- /apps/web-app/app/components/Header.js: -------------------------------------------------------------------------------- 1 | import Link from 'next/link' 2 | 3 | const Header = () => { 4 | return ( 5 |
6 |

Sanchay.ai

7 | Your videos 8 |
9 | ) 10 | } 11 | 12 | export default Header -------------------------------------------------------------------------------- /apps/web-app/app/components/ui/button.tsx: -------------------------------------------------------------------------------- 1 | import * as React from "react" 2 | import { Slot } from "@radix-ui/react-slot" 3 | import { cva, type VariantProps } from "class-variance-authority" 4 | 5 | import { cn } from "@/lib/utils" 6 | 7 | const buttonVariants = cva( 8 | "inline-flex items-center justify-center whitespace-nowrap rounded-md text-sm font-medium ring-offset-white transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-gray-950 focus-visible:ring-offset-2 disabled:pointer-events-none disabled:opacity-50 dark:ring-offset-gray-950 dark:focus-visible:ring-gray-300", 9 | { 10 | variants: { 11 | variant: { 12 | default: "bg-gray-900 text-gray-50 hover:bg-gray-900/90 dark:bg-gray-50 dark:text-gray-900 dark:hover:bg-gray-50/90", 13 | destructive: 14 | "bg-red-500 text-gray-50 hover:bg-red-500/90 dark:bg-red-900 dark:text-gray-50 dark:hover:bg-red-900/90", 15 | outline: 16 | "border border-gray-200 bg-white hover:bg-gray-100 hover:text-gray-900 dark:border-gray-800 dark:bg-gray-950 dark:hover:bg-gray-800 dark:hover:text-gray-50", 17 | secondary: 18 | "bg-gray-100 text-gray-900 hover:bg-gray-100/80 dark:bg-gray-800 dark:text-gray-50 dark:hover:bg-gray-800/80", 19 | ghost: "hover:bg-gray-100 hover:text-gray-900 dark:hover:bg-gray-800 dark:hover:text-gray-50", 20 | link: "text-gray-900 underline-offset-4 hover:underline dark:text-gray-50", 21 | }, 22 | size: { 23 | default: "h-10 px-4 py-2", 24 | sm: "h-9 rounded-md px-3", 25 | lg: "h-11 rounded-md px-8", 26 | icon: "h-10 w-10", 27 | }, 28 | }, 29 | defaultVariants: { 30 | variant: "default", 31 | size: "default", 32 | }, 33 | } 34 | ) 35 | 36 | export interface ButtonProps 37 | extends React.ButtonHTMLAttributes, 38 | VariantProps { 39 | asChild?: boolean 40 | } 41 | 42 | const Button = React.forwardRef( 43 | ({ className, variant, size, asChild = false, ...props }, ref) => { 44 | const Comp = asChild ? Slot : "button" 45 | return ( 46 | 51 | ) 52 | } 53 | ) 54 | Button.displayName = "Button" 55 | 56 | export { Button, buttonVariants } 57 | -------------------------------------------------------------------------------- /apps/web-app/app/db/model.js: -------------------------------------------------------------------------------- 1 | import mongoose from "mongoose"; 2 | 3 | const MONGODB_URL = process.env.MONGO_DB; 4 | console.log("Aadesh: ",MONGODB_URL) 5 | 6 | mongoose.connect(MONGODB_URL); 7 | 8 | const videoSchema = new mongoose.Schema({ 9 | videoUrl: String, 10 | transcription: String, 11 | subtitles: String, 12 | chapters: String, 13 | title: String, 14 | description: String, 15 | }); 16 | 17 | const Video = mongoose.models.Video || mongoose.model("Video", videoSchema); 18 | 19 | export { Video }; 20 | -------------------------------------------------------------------------------- /apps/web-app/app/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aadeshkulkarni/sanchay-ai/cf4b816af44395ee8d0da03dba366491926588ef/apps/web-app/app/favicon.ico -------------------------------------------------------------------------------- /apps/web-app/app/globals.css: -------------------------------------------------------------------------------- 1 | @tailwind base; 2 | @tailwind components; 3 | @tailwind utilities; 4 | -------------------------------------------------------------------------------- /apps/web-app/app/layout.tsx: -------------------------------------------------------------------------------- 1 | import type { Metadata } from "next"; 2 | // import { Inter } from "next/font/google"; 3 | import "./globals.css"; 4 | import Header from './components/Header'; 5 | // const inter = Inter({ subsets: ["latin"] }); 6 | 7 | export const metadata: Metadata = { 8 | title: "Sanchay AI", 9 | description: "Video meta data generation", 10 | }; 11 | 12 | export default function RootLayout({ 13 | children, 14 | }: Readonly<{ 15 | children: React.ReactNode; 16 | }>) { 17 | return ( 18 | 19 | 20 |
21 | {children} 22 | 23 | ); 24 | } 25 | -------------------------------------------------------------------------------- /apps/web-app/app/page.tsx: -------------------------------------------------------------------------------- 1 | import { Video } from './db/model' 2 | import { uploadToS3 } from '@/utils/fileUpload' 3 | import executeInBackground from '@/app/worker/worker' 4 | 5 | const create = async (formData: FormData) => { 6 | 'use server' 7 | console.log('dump') 8 | const title = formData.get('title') 9 | const file = formData.get('video') as File 10 | // TODOS: add file type = video check here 11 | if (!file.size || !title) { 12 | return 13 | } 14 | 15 | const videoUrl = await uploadToS3(file) 16 | const video = await Video.create({ title: title, videoUrl: videoUrl }) 17 | const newVideo = await video.save() 18 | await executeInBackground('task_queue', { ...newVideo }) 19 | } 20 | 21 | export default function Home() { 22 | return ( 23 |
24 |
25 | 26 | 27 | 28 |
29 |
30 | ) 31 | } 32 | -------------------------------------------------------------------------------- /apps/web-app/app/view/page.tsx: -------------------------------------------------------------------------------- 1 | //@ts-nocheck 2 | "use client"; 3 | 4 | import { useEffect, useState } from "react"; 5 | import { Button } from "../components/ui/button"; 6 | import { ResultDialog } from "../components/ResultDialog"; 7 | 8 | export interface video { 9 | _id: string; 10 | title: string; 11 | } 12 | export default function Home() { 13 | const [videos, setVideos] = useState([]); 14 | const [output, setOutput] = useState(""); 15 | const [activePreview, setActivePreview] = useState({}); 16 | useEffect(() => { 17 | async function fetchVideos() { 18 | const res = await fetch("./api/videos"); 19 | const data = await res.json(); 20 | setVideos(data.data); 21 | } 22 | fetchVideos(); 23 | }, []); 24 | useEffect(() => { 25 | console.log("Videos: ", videos); 26 | }, [videos]); 27 | 28 | function openDialog(data) { 29 | setDialogOpen(true); 30 | setOutput(data); 31 | } 32 | 33 | return ( 34 |
35 |
36 |

Video list

37 |
38 | {videos.length > 0 && 39 | videos?.map((video: video) => 40 | video.failed ? ( 41 | 42 | ) : ( 43 | 44 | ) 45 | )} 46 |
47 |
48 |
49 |

Preview

50 |
{output}
51 |
52 |
53 | ); 54 | } 55 | 56 | const ErrorCard = ({ video }) => ( 57 |
58 |
{video.title}
59 |
{video.error}
60 |
61 | Failed 62 |
63 |
64 | ); 65 | 66 | const SuccessCard = ({ video, setOutput }) => ( 67 |
68 |
{video.title}
69 |
70 |
71 | 75 |
76 |
77 | 80 | 83 | 86 |
87 |
88 |
Success
89 |
90 | ); 91 | -------------------------------------------------------------------------------- /apps/web-app/app/worker/worker.ts: -------------------------------------------------------------------------------- 1 | import amqp from 'amqplib' 2 | 3 | const RABBITMQ = process.env.RABBITMQ_DEV 4 | 5 | async function executeInBackground(queue:any, payload:any) { 6 | const connection = await amqp.connect(RABBITMQ); 7 | const channel = await connection.createChannel(); 8 | await channel.assertQueue(queue, { durable: false }); 9 | channel.sendToQueue(queue, Buffer.from(JSON.stringify(payload))); 10 | } 11 | 12 | export default executeInBackground -------------------------------------------------------------------------------- /apps/web-app/components.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://ui.shadcn.com/schema.json", 3 | "style": "default", 4 | "rsc": true, 5 | "tsx": true, 6 | "tailwind": { 7 | "config": "tailwind.config.ts", 8 | "css": "app/globals.css", 9 | "baseColor": "gray", 10 | "cssVariables": false, 11 | "prefix": "" 12 | }, 13 | "aliases": { 14 | "components": "@/app/components", 15 | "utils": "@/lib/utils" 16 | } 17 | } -------------------------------------------------------------------------------- /apps/web-app/lib/utils.ts: -------------------------------------------------------------------------------- 1 | import { type ClassValue, clsx } from "clsx" 2 | import { twMerge } from "tailwind-merge" 3 | 4 | export function cn(...inputs: ClassValue[]) { 5 | return twMerge(clsx(inputs)) 6 | } 7 | -------------------------------------------------------------------------------- /apps/web-app/next.config.mjs: -------------------------------------------------------------------------------- 1 | /** @type {import('next').NextConfig} */ 2 | const nextConfig = {}; 3 | 4 | export default nextConfig; 5 | -------------------------------------------------------------------------------- /apps/web-app/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "web", 3 | "version": "0.1.0", 4 | "private": true, 5 | "scripts": { 6 | "dev": "next dev", 7 | "build": "next build", 8 | "start": "next start", 9 | "lint": "next lint" 10 | }, 11 | "dependencies": { 12 | "@radix-ui/react-dialog": "^1.0.5", 13 | "@radix-ui/react-slot": "^1.0.2", 14 | "amqplib": "^0.10.3", 15 | "aws-sdk": "^2.1579.0", 16 | "class-variance-authority": "^0.7.0", 17 | "clsx": "^2.1.0", 18 | "lucide-react": "^0.363.0", 19 | "mongoose": "^8.2.2", 20 | "next": "14.1.3", 21 | "react": "^18", 22 | "react-dom": "^18", 23 | "react-markdown": "^9.0.1", 24 | "tailwind-merge": "^2.2.2", 25 | "tailwindcss-animate": "^1.0.7", 26 | "uuid": "^9.0.1" 27 | }, 28 | "devDependencies": { 29 | "@types/node": "^20", 30 | "@types/react": "^18", 31 | "@types/react-dom": "^18", 32 | "@types/uuid": "^9.0.8", 33 | "autoprefixer": "^10.0.1", 34 | "eslint": "^8", 35 | "eslint-config-next": "14.1.3", 36 | "postcss": "^8", 37 | "tailwindcss": "^3.3.0", 38 | "typescript": "^5" 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /apps/web-app/postcss.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | plugins: { 3 | tailwindcss: {}, 4 | autoprefixer: {}, 5 | }, 6 | }; 7 | -------------------------------------------------------------------------------- /apps/web-app/public/next.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /apps/web-app/public/vercel.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /apps/web-app/tailwind.config.ts: -------------------------------------------------------------------------------- 1 | import type { Config } from "tailwindcss" 2 | 3 | const config = { 4 | darkMode: ["class"], 5 | content: [ 6 | './pages/**/*.{ts,tsx}', 7 | './components/**/*.{ts,tsx}', 8 | './app/**/*.{ts,tsx}', 9 | './src/**/*.{ts,tsx}', 10 | ], 11 | prefix: "", 12 | theme: { 13 | container: { 14 | center: true, 15 | padding: "2rem", 16 | screens: { 17 | "2xl": "1400px", 18 | }, 19 | }, 20 | extend: { 21 | keyframes: { 22 | "accordion-down": { 23 | from: { height: "0" }, 24 | to: { height: "var(--radix-accordion-content-height)" }, 25 | }, 26 | "accordion-up": { 27 | from: { height: "var(--radix-accordion-content-height)" }, 28 | to: { height: "0" }, 29 | }, 30 | }, 31 | animation: { 32 | "accordion-down": "accordion-down 0.2s ease-out", 33 | "accordion-up": "accordion-up 0.2s ease-out", 34 | }, 35 | }, 36 | }, 37 | plugins: [require("tailwindcss-animate")], 38 | } satisfies Config 39 | 40 | export default config -------------------------------------------------------------------------------- /apps/web-app/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "lib": ["dom", "dom.iterable", "esnext"], 4 | "allowJs": true, 5 | "skipLibCheck": true, 6 | "strict": false, 7 | "noEmit": true, 8 | "esModuleInterop": true, 9 | "module": "esnext", 10 | "moduleResolution": "bundler", 11 | "resolveJsonModule": true, 12 | "isolatedModules": true, 13 | "jsx": "preserve", 14 | "incremental": true, 15 | "plugins": [ 16 | { 17 | "name": "next" 18 | } 19 | ], 20 | "paths": { 21 | "@/*": ["./*"] 22 | } 23 | }, 24 | "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts", "app/context/DialogContextProvider.ts"], 25 | "exclude": ["node_modules"] 26 | } 27 | -------------------------------------------------------------------------------- /apps/web-app/utils/fileUpload.js: -------------------------------------------------------------------------------- 1 | import AWS from 'aws-sdk' 2 | import { v4 as uuidv4 } from 'uuid' 3 | 4 | console.log('Process ENV: ', process.env) 5 | const s3 = new AWS.S3({ 6 | endpoint: 'http://localhost:4566', 7 | s3ForcePathStyle: true, 8 | accessKeyId: "NA", 9 | secretAccessKey: "NA", 10 | region: "ap-south-1" 11 | }) 12 | const BUCKET_NAME = process.env.S3_BUCKET_NAME 13 | s3.api.globalEndpoint = 's3.localhost.localstack.cloud' 14 | 15 | async function createBucketIfNotExists() { 16 | try { 17 | var params = { 18 | Bucket: BUCKET_NAME 19 | } 20 | const data = await s3.waitFor('bucketNotExists', params) 21 | console.log("2") 22 | console.log("Data from createBucket(): ",data) 23 | } catch (ex) { 24 | console.log("2 - Error:") 25 | console.log(ex) 26 | } 27 | } 28 | 29 | export const uploadToS3 = async (file) => { 30 | try { 31 | console.log("1") 32 | await createBucketIfNotExists() 33 | console.log("3 - bucket passed") 34 | const fileExtension = file.name.split('.').pop() 35 | const arrayBuffer = await file.arrayBuffer() 36 | const buffer = new Uint8Array(arrayBuffer) 37 | const params = { 38 | Bucket: BUCKET_NAME, 39 | Key: `${uuidv4()}.${fileExtension}`, 40 | Body: buffer 41 | } 42 | console.log("4 - params: ",params) 43 | const data = await s3.upload(params).promise() 44 | console.log("5 - data from upload: ",data) 45 | return data['Location'] 46 | } catch (ex) { 47 | console.error('Error uploading file:', ex) 48 | return false 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /apps/worker/.env.example: -------------------------------------------------------------------------------- 1 | OPENAI_API_KEY= 2 | GOOGLE_API_KEY= 3 | MONGO_DB= 4 | AMQP_URL=amqp://test:test@rabbitmq:5672 -------------------------------------------------------------------------------- /apps/worker/.gitignore: -------------------------------------------------------------------------------- 1 | .env 2 | __pycache__ 3 | sanchay-env 4 | videos -------------------------------------------------------------------------------- /apps/worker/README.md: -------------------------------------------------------------------------------- 1 | ### RabbitMQ Consumer 2 | 3 | - Listen to RabbitMQ Topic 'task_queue' and execute a callback whenever event is triggered 4 | - The callback should do the following: 5 | - callback expects 2 inputs - videoId and videoUrl 6 | - Download the video on worker thread 7 | - Generate Transcription & Captions 8 | - Update the database with generated content using videoId as base condition 9 | 10 | 11 | Todos: 12 | - Separate out logic for GPT and Whisper into 2 different consumers. 13 | - Add logic for Video title, description, youtube tags 14 | 15 | 16 | ## Project Setup 17 | 18 | Pre-requisite: Python 19 | 20 | ### 1. Install ffmpeg 21 | brew install ffmpeg 22 | 23 | ### 2. Python setup 24 | 25 | We recommend using `venv` for Virtual environment. You can read this article to learn more: https://www.freecodecamp.org/news/how-to-setup-virtual-environments-in-python/ 26 | 27 | Steps: 28 | - `pip install virtualenv` to install venv 29 | - `cd apps/worker` to change directory to worker app 30 | - `python3 -m venv sanchay-env` to create a new virtual environment 'sanchay-env' 31 | - `source sanchay-env/bin/activate` to activate the newly created virtual environment 32 | - `pip install -r requirements.txt` to install all python packages 33 | 34 | ### 3. Setup Env Variables 35 | - copy .env.example and rename it to .env 36 | - OPENAI_API_KEY= (required, get the API key from https://platform.openai.com/api-keys) 37 | - GOOGLE_API_KEY= (required, get the API key from https://ai.google.dev/gemini-api/docs/api-key) 38 | - MONGO_DB= (required, get the key from https://cloud.mongodb.com/) 39 | -------------------------------------------------------------------------------- /apps/worker/consumer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | import json 4 | import pika 5 | from pymongo import MongoClient 6 | from utils import perform_magic, download_video, update_data, log_error_to_db 7 | import socket 8 | socket.gethostbyname("") 9 | 10 | MONGO_DB=os.getenv("MONGO_DB") 11 | AMQP_URL=os.getenv("AMQP_URL") 12 | client = MongoClient(MONGO_DB) 13 | db = client['test'] 14 | collection = db["videos"] 15 | 16 | # Testing | see all collections within the connected db 17 | # for coll in db.list_collection_names(): 18 | # print(coll) 19 | 20 | credentials = pika.PlainCredentials('test', 'test') 21 | parameters = pika.ConnectionParameters('localhost', credentials=credentials) 22 | connection = pika.BlockingConnection(parameters) 23 | # connection = pika.BlockingConnection(pika.ConnectionParameters(host="rabbitmq",port=5672,credentials=credentials,virtual_host="/")) 24 | channel = connection.channel() 25 | channel.queue_declare(queue="task_queue") 26 | print(" [*] Waiting for messages. To exit press CTRL+C") 27 | 28 | 29 | def callback(ch, method, properties, body): 30 | try: 31 | print(" [x] Recieved new message") 32 | data = json.loads(body.decode()) 33 | print(" [x] ", data["_doc"]) 34 | video_url = data["_doc"]["videoUrl"] 35 | video_id = data["_doc"]["_id"] 36 | filename = os.path.basename(video_url) 37 | download_video(video_url, filename) 38 | output = perform_magic(filename) 39 | update_data(collection, video_id, output) 40 | print(" [x] Done") 41 | ch.basic_ack(delivery_tag=method.delivery_tag) 42 | except Exception as e: 43 | print(" [x] Error: ",str(e)) 44 | print(e) 45 | log_error_to_db(collection,video_id,str(e)) 46 | ch.basic_nack(delivery_tag=method.delivery_tag, requeue=False) 47 | 48 | 49 | channel.basic_qos(prefetch_count=1) 50 | channel.basic_consume(queue="task_queue", on_message_callback=callback) 51 | 52 | channel.start_consuming() 53 | -------------------------------------------------------------------------------- /apps/worker/gpt.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pathlib 3 | import textwrap 4 | import google.generativeai as genai 5 | from openai import OpenAI 6 | from dotenv import load_dotenv 7 | 8 | load_dotenv() 9 | 10 | 11 | client = OpenAI() 12 | genai.configure(api_key=os.getenv("GOOGLE_API_KEY")) 13 | 14 | 15 | def generate_chapters_openai(subtitles): 16 | chat_completion = client.chat.completions.create( 17 | messages=[ 18 | { 19 | "role": "user", 20 | "content": f"""For the below video subtitles, generate video chapters of the form: [start_time] - [chapter_title]. 21 | For example: 22 | 01:27 - Start of the class. 23 | 03:00 - Why do you need React? 24 | 07:03 - What is React? 25 | and so on. 26 | start_time format should be hh:mm:ss :\n {subtitles}""", 27 | } 28 | ], 29 | model="gpt-3.5-turbo", 30 | ) 31 | return chat_completion.choices[0].message 32 | 33 | 34 | def generate_chapters_gemini(subtitles): 35 | model = genai.GenerativeModel("gemini-pro") 36 | response = model.generate_content( 37 | f"""You are a helpful assistant that receives video\'s subtitles in vtt as input and responds back with chapters for the video in the format: 38 | [ 39 | {{ "title": "Introduction", "start": "0:00"}}, 40 | {{ "title": "Chapter description", "start": "01:35" }} 41 | {{ "title": "Chapter description", "start": "02:50"}} 42 | {{ "title": "Chapter description", "start": "03:52" }} 43 | .. 44 | {{ "title": "Outro", "start": "07:46" }} 45 | ] 46 | Your response should be a valid json without the codeblock formatting. 47 | {subtitles} """ 48 | ) 49 | response.resolve() 50 | return response.text 51 | 52 | 53 | # def test(): 54 | # model = genai.GenerativeModel("gemini-pro") 55 | # response = model.generate_content("What is the meaning of life?") 56 | # # print(response) 57 | # response.resolve() 58 | # print(response.text) 59 | # # print(to_markdown(response.text)) 60 | 61 | # test() -------------------------------------------------------------------------------- /apps/worker/requirements.txt: -------------------------------------------------------------------------------- 1 | annotated-types==0.6.0 2 | anyio==4.3.0 3 | cachetools==5.3.3 4 | certifi==2024.2.2 5 | charset-normalizer==3.3.2 6 | distro==1.9.0 7 | dnspython==2.6.1 8 | exceptiongroup==1.2.0 9 | filelock==3.13.1 10 | fsspec==2024.3.1 11 | google-ai-generativelanguage==0.4.0 12 | google-api-core==2.17.1 13 | google-auth==2.29.0 14 | google-generativeai==0.4.1 15 | googleapis-common-protos==1.63.0 16 | grpcio==1.62.1 17 | grpcio-status==1.62.1 18 | h11==0.14.0 19 | httpcore==1.0.4 20 | httpx==0.27.0 21 | idna==3.6 22 | Jinja2==3.1.3 23 | llvmlite==0.42.0 24 | load-dotenv==0.1.0 25 | MarkupSafe==2.1.5 26 | more-itertools==10.2.0 27 | mpmath==1.3.0 28 | networkx==3.2.1 29 | numba==0.59.1 30 | numpy==1.26.4 31 | openai==1.14.2 32 | openai-whisper @ git+https://github.com/openai/whisper.git@ba3f3cd54b0e5b8ce1ab3de13e32122d0d5f98ab 33 | pika==1.3.2 34 | pillow==10.2.0 35 | proto-plus==1.23.0 36 | protobuf==4.25.3 37 | pyasn1==0.5.1 38 | pyasn1-modules==0.3.0 39 | pydantic==2.6.4 40 | pydantic_core==2.16.3 41 | pymongo==4.6.2 42 | python-dotenv==1.0.1 43 | regex==2023.12.25 44 | requests==2.31.0 45 | rsa==4.9 46 | semantic-version==2.10.0 47 | setuptools-rust==1.9.0 48 | sniffio==1.3.1 49 | sympy==1.12 50 | tiktoken==0.6.0 51 | tomli==2.0.1 52 | torch==2.2.1 53 | torchvision==0.17.1 54 | tqdm==4.66.2 55 | typing_extensions==4.10.0 56 | urllib3==2.2.1 57 | -------------------------------------------------------------------------------- /apps/worker/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import whisper 4 | import urllib.request 5 | from bson.objectid import ObjectId 6 | from writer import get_writer 7 | from gpt import generate_chapters_gemini 8 | 9 | def perform_magic(video_url): 10 | model = whisper.load_model("tiny") 11 | result = model.transcribe("./videos/" + video_url) 12 | writer = get_writer() 13 | writer_args = { 14 | "highlight_words": False, 15 | "max_line_count": None, 16 | "max_line_width": None, 17 | "max_words_per_line": None, 18 | } 19 | captions = writer(result, writer_args) 20 | result["captions"] = captions 21 | print(len(captions)) 22 | chapters = generate_chapters_gemini(captions) 23 | print(chapters) 24 | result["chapters"] = chapters 25 | return result 26 | 27 | 28 | def download_video(video_url, output_path): 29 | if not os.path.exists(os.path.dirname("./videos/" + output_path)): 30 | os.makedirs(os.path.dirname("./videos/" + output_path)) 31 | urllib.request.urlretrieve(video_url, "./videos/" + output_path) 32 | 33 | 34 | def update_data(collection, video_id, output): 35 | condition = {"_id": ObjectId(video_id)} 36 | newvalues = { 37 | "$set": {"transcription": output["text"], "captions": output["captions"], "chapters": output["chapters"]} 38 | } 39 | result = collection.update_one(condition, newvalues) 40 | print(result) 41 | 42 | 43 | def log_error_to_db(collection, video_id, error): 44 | condition = {"_id": ObjectId(video_id)} 45 | newvalues = {"$set": {"failed": True, "error": error}} 46 | result = collection.update_one(condition, newvalues) 47 | print(result) 48 | -------------------------------------------------------------------------------- /apps/worker/writer.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import re 4 | import sys 5 | import zlib 6 | from typing import Callable, List, Optional, TextIO 7 | 8 | system_encoding = sys.getdefaultencoding() 9 | 10 | if system_encoding != "utf-8": 11 | 12 | def make_safe(string): 13 | # replaces any character not representable using the system default encoding with an '?', 14 | # avoiding UnicodeEncodeError (https://github.com/openai/whisper/discussions/729). 15 | return string.encode(system_encoding, errors="replace").decode(system_encoding) 16 | 17 | else: 18 | 19 | def make_safe(string): 20 | # utf-8 can encode any Unicode code point, so no need to do the round-trip encoding 21 | return string 22 | 23 | 24 | def exact_div(x, y): 25 | assert x % y == 0 26 | return x // y 27 | 28 | 29 | def str2bool(string): 30 | str2val = {"True": True, "False": False} 31 | if string in str2val: 32 | return str2val[string] 33 | else: 34 | raise ValueError(f"Expected one of {set(str2val.keys())}, got {string}") 35 | 36 | 37 | def optional_int(string): 38 | return None if string == "None" else int(string) 39 | 40 | 41 | def optional_float(string): 42 | return None if string == "None" else float(string) 43 | 44 | 45 | def compression_ratio(text) -> float: 46 | text_bytes = text.encode("utf-8") 47 | return len(text_bytes) / len(zlib.compress(text_bytes)) 48 | 49 | 50 | def format_timestamp( 51 | seconds: float, always_include_hours: bool = False, decimal_marker: str = "." 52 | ): 53 | assert seconds >= 0, "non-negative timestamp expected" 54 | milliseconds = round(seconds * 1000.0) 55 | 56 | hours = milliseconds // 3_600_000 57 | milliseconds -= hours * 3_600_000 58 | 59 | minutes = milliseconds // 60_000 60 | milliseconds -= minutes * 60_000 61 | 62 | seconds = milliseconds // 1_000 63 | milliseconds -= seconds * 1_000 64 | 65 | hours_marker = f"{hours:02d}:" if always_include_hours or hours > 0 else "" 66 | return ( 67 | f"{hours_marker}{minutes:02d}:{seconds:02d}{decimal_marker}{milliseconds:03d}" 68 | ) 69 | 70 | 71 | def get_start(segments: List[dict]) -> Optional[float]: 72 | return next( 73 | (w["start"] for s in segments for w in s["words"]), 74 | segments[0]["start"] if segments else None, 75 | ) 76 | 77 | 78 | def get_end(segments: List[dict]) -> Optional[float]: 79 | return next( 80 | (w["end"] for s in reversed(segments) for w in reversed(s["words"])), 81 | segments[-1]["end"] if segments else None, 82 | ) 83 | 84 | 85 | class ResultWriter: 86 | extension: str 87 | 88 | def __init__(self, options: Optional[dict] = None): 89 | self.options = options or {} # Set default empty dictionary 90 | 91 | def __call__( 92 | self, result: dict, options: Optional[dict] = None, **kwargs 93 | ): 94 | return self.write_result(result, options, **kwargs) 95 | # audio_basename = os.path.basename(audio_path) 96 | # audio_basename = os.path.splitext(audio_basename)[0] 97 | # output_path = os.path.join(self.output_dir, audio_basename + "." + self.extension) 98 | # with open(output_path, "w", encoding="utf-8") as f: 99 | 100 | def write_result( 101 | self, result: dict, options: Optional[dict] = None, **kwargs 102 | ) -> str: 103 | raise NotImplementedError 104 | 105 | 106 | class SubtitlesWriter(ResultWriter): 107 | always_include_hours: bool 108 | decimal_marker: str 109 | 110 | def iterate_result( 111 | self, 112 | result: dict, 113 | options: Optional[dict] = None, 114 | *, 115 | max_line_width: Optional[int] = None, 116 | max_line_count: Optional[int] = None, 117 | highlight_words: bool = False, 118 | max_words_per_line: Optional[int] = None, 119 | ): 120 | options = options or {} 121 | max_line_width = max_line_width or options.get("max_line_width") 122 | max_line_count = max_line_count or options.get("max_line_count") 123 | highlight_words = highlight_words or options.get("highlight_words", False) 124 | max_words_per_line = max_words_per_line or options.get("max_words_per_line") 125 | preserve_segments = max_line_count is None or max_line_width is None 126 | max_line_width = max_line_width or 1000 127 | max_words_per_line = max_words_per_line or 1000 128 | 129 | def iterate_subtitles(): 130 | line_len = 0 131 | line_count = 1 132 | # the next subtitle to yield (a list of word timings with whitespace) 133 | subtitle: List[dict] = [] 134 | last: float = get_start(result["segments"]) or 0.0 135 | for segment in result["segments"]: 136 | chunk_index = 0 137 | words_count = max_words_per_line 138 | while chunk_index < len(segment["words"]): 139 | remaining_words = len(segment["words"]) - chunk_index 140 | if max_words_per_line > len(segment["words"]) - chunk_index: 141 | words_count = remaining_words 142 | for i, original_timing in enumerate( 143 | segment["words"][chunk_index : chunk_index + words_count] 144 | ): 145 | timing = original_timing.copy() 146 | long_pause = ( 147 | not preserve_segments and timing["start"] - last > 3.0 148 | ) 149 | has_room = line_len + len(timing["word"]) <= max_line_width 150 | seg_break = i == 0 and len(subtitle) > 0 and preserve_segments 151 | if ( 152 | line_len > 0 153 | and has_room 154 | and not long_pause 155 | and not seg_break 156 | ): 157 | # line continuation 158 | line_len += len(timing["word"]) 159 | else: 160 | # new line 161 | timing["word"] = timing["word"].strip() 162 | if ( 163 | len(subtitle) > 0 164 | and max_line_count is not None 165 | and (long_pause or line_count >= max_line_count) 166 | or seg_break 167 | ): 168 | # subtitle break 169 | yield subtitle 170 | subtitle = [] 171 | line_count = 1 172 | elif line_len > 0: 173 | # line break 174 | line_count += 1 175 | timing["word"] = "\n" + timing["word"] 176 | line_len = len(timing["word"].strip()) 177 | subtitle.append(timing) 178 | last = timing["start"] 179 | chunk_index += max_words_per_line 180 | if len(subtitle) > 0: 181 | yield subtitle 182 | 183 | if len(result["segments"]) > 0 and "words" in result["segments"][0]: 184 | for subtitle in iterate_subtitles(): 185 | subtitle_start = self.format_timestamp(subtitle[0]["start"]) 186 | subtitle_end = self.format_timestamp(subtitle[-1]["end"]) 187 | subtitle_text = "".join([word["word"] for word in subtitle]) 188 | if highlight_words: 189 | last = subtitle_start 190 | all_words = [timing["word"] for timing in subtitle] 191 | for i, this_word in enumerate(subtitle): 192 | start = self.format_timestamp(this_word["start"]) 193 | end = self.format_timestamp(this_word["end"]) 194 | if last != start: 195 | yield last, start, subtitle_text 196 | 197 | yield start, end, "".join( 198 | [ 199 | ( 200 | re.sub(r"^(\s*)(.*)$", r"\1\2", word) 201 | if j == i 202 | else word 203 | ) 204 | for j, word in enumerate(all_words) 205 | ] 206 | ) 207 | last = end 208 | else: 209 | yield subtitle_start, subtitle_end, subtitle_text 210 | else: 211 | for segment in result["segments"]: 212 | segment_start = self.format_timestamp(segment["start"]) 213 | segment_end = self.format_timestamp(segment["end"]) 214 | segment_text = segment["text"].strip().replace("-->", "->") 215 | yield segment_start, segment_end, segment_text 216 | 217 | def format_timestamp(self, seconds: float): 218 | return format_timestamp( 219 | seconds=seconds, 220 | always_include_hours=self.always_include_hours, 221 | decimal_marker=self.decimal_marker, 222 | ) 223 | 224 | 225 | class WriteVTT(SubtitlesWriter): 226 | extension: str = "vtt" 227 | always_include_hours: bool = False 228 | decimal_marker: str = "." 229 | 230 | 231 | def write_result(self, result: dict, options: Optional[dict] = None, **kwargs) -> str: 232 | output = "WEBVTT\n" 233 | result 234 | for start, end, text in self.iterate_result(result, options, **kwargs): 235 | output += f"{start} --> {end}\n{text}\n" 236 | return output 237 | 238 | def get_writer() -> Callable[[dict, TextIO, dict], None]: 239 | return WriteVTT("./output/") 240 | -------------------------------------------------------------------------------- /docs/app-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aadeshkulkarni/sanchay-ai/cf4b816af44395ee8d0da03dba366491926588ef/docs/app-1.png -------------------------------------------------------------------------------- /docs/app-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aadeshkulkarni/sanchay-ai/cf4b816af44395ee8d0da03dba366491926588ef/docs/app-2.png -------------------------------------------------------------------------------- /docs/system.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aadeshkulkarni/sanchay-ai/cf4b816af44395ee8d0da03dba366491926588ef/docs/system.png -------------------------------------------------------------------------------- /docs/video.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aadeshkulkarni/sanchay-ai/cf4b816af44395ee8d0da03dba366491926588ef/docs/video.mp4 --------------------------------------------------------------------------------