├── talking-photo-frontend ├── src │ ├── vite-env.d.ts │ ├── main.tsx │ ├── index.css │ ├── assets │ │ └── react.svg │ ├── App.css │ └── App.tsx ├── public │ ├── images │ │ ├── favicon.ico │ │ └── 4p6vr8j7vbom4axo7k0 2.png │ └── vite.svg ├── tsconfig.json ├── vite.config.ts ├── .gitignore ├── index.html ├── tsconfig.node.json ├── tsconfig.app.json ├── eslint.config.js ├── package.json ├── README.md └── package-lock.json ├── talking_photo_webhook ├── .env.example ├── requirements.txt └── app.py ├── .gitignore └── README.md /talking-photo-frontend/src/vite-env.d.ts: -------------------------------------------------------------------------------- 1 | /// 2 | -------------------------------------------------------------------------------- /talking_photo_webhook/.env.example: -------------------------------------------------------------------------------- 1 | # API Credentials 2 | CLIENT_ID = your_client_id_here 3 | CLIENT_SECRET = your_secret_key_here -------------------------------------------------------------------------------- /talking-photo-frontend/public/images/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AKOOL-Official/akool-talking-photo-demo/HEAD/talking-photo-frontend/public/images/favicon.ico -------------------------------------------------------------------------------- /talking-photo-frontend/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "files": [], 3 | "references": [ 4 | { "path": "./tsconfig.app.json" }, 5 | { "path": "./tsconfig.node.json" } 6 | ] 7 | } 8 | -------------------------------------------------------------------------------- /talking_photo_webhook/requirements.txt: -------------------------------------------------------------------------------- 1 | Flask==3.0.2 2 | Flask-CORS==4.0.0 3 | Flask-SocketIO==5.3.6 4 | python-dotenv==1.0.1 5 | pycryptodome==3.20.0 6 | gevent==24.2.1 7 | gevent-websocket==0.10.1 -------------------------------------------------------------------------------- /talking-photo-frontend/public/images/4p6vr8j7vbom4axo7k0 2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AKOOL-Official/akool-talking-photo-demo/HEAD/talking-photo-frontend/public/images/4p6vr8j7vbom4axo7k0 2.png -------------------------------------------------------------------------------- /talking-photo-frontend/vite.config.ts: -------------------------------------------------------------------------------- 1 | import { defineConfig } from 'vite' 2 | import react from '@vitejs/plugin-react' 3 | 4 | // https://vite.dev/config/ 5 | export default defineConfig({ 6 | plugins: [react()], 7 | }) 8 | -------------------------------------------------------------------------------- /talking-photo-frontend/src/main.tsx: -------------------------------------------------------------------------------- 1 | import { StrictMode } from 'react' 2 | import { createRoot } from 'react-dom/client' 3 | import './index.css' 4 | import App from './App.tsx' 5 | 6 | createRoot(document.getElementById('root')!).render( 7 | 8 | 9 | , 10 | ) 11 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | pnpm-debug.log* 8 | lerna-debug.log* 9 | 10 | node_modules 11 | dist 12 | dist-ssr 13 | *.local 14 | 15 | # Editor directories and files 16 | .vscode/* 17 | !.vscode/extensions.json 18 | .idea 19 | .DS_Store 20 | *.suo 21 | *.ntvs* 22 | *.njsproj 23 | *.sln 24 | *.sw? 25 | -------------------------------------------------------------------------------- /talking-photo-frontend/.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | pnpm-debug.log* 8 | lerna-debug.log* 9 | 10 | node_modules 11 | dist 12 | dist-ssr 13 | *.local 14 | 15 | # Editor directories and files 16 | .vscode/* 17 | !.vscode/extensions.json 18 | .idea 19 | .DS_Store 20 | *.suo 21 | *.ntvs* 22 | *.njsproj 23 | *.sln 24 | *.sw? 25 | -------------------------------------------------------------------------------- /talking-photo-frontend/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Talking Photo Demo 8 | 9 | 10 |
11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /talking-photo-frontend/tsconfig.node.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.node.tsbuildinfo", 4 | "target": "ES2022", 5 | "lib": ["ES2023"], 6 | "module": "ESNext", 7 | "skipLibCheck": true, 8 | 9 | /* Bundler mode */ 10 | "moduleResolution": "bundler", 11 | "allowImportingTsExtensions": true, 12 | "isolatedModules": true, 13 | "moduleDetection": "force", 14 | "noEmit": true, 15 | 16 | /* Linting */ 17 | "strict": true, 18 | "noUnusedLocals": true, 19 | "noUnusedParameters": true, 20 | "noFallthroughCasesInSwitch": true, 21 | "noUncheckedSideEffectImports": true 22 | }, 23 | "include": ["vite.config.ts"] 24 | } 25 | -------------------------------------------------------------------------------- /talking-photo-frontend/tsconfig.app.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.app.tsbuildinfo", 4 | "target": "ES2020", 5 | "useDefineForClassFields": true, 6 | "lib": ["ES2020", "DOM", "DOM.Iterable"], 7 | "module": "ESNext", 8 | "skipLibCheck": true, 9 | 10 | /* Bundler mode */ 11 | "moduleResolution": "bundler", 12 | "allowImportingTsExtensions": true, 13 | "isolatedModules": true, 14 | "moduleDetection": "force", 15 | "noEmit": true, 16 | "jsx": "react-jsx", 17 | 18 | /* Linting */ 19 | "strict": true, 20 | "noUnusedLocals": true, 21 | "noUnusedParameters": true, 22 | "noFallthroughCasesInSwitch": true, 23 | "noUncheckedSideEffectImports": true 24 | }, 25 | "include": ["src"] 26 | } 27 | -------------------------------------------------------------------------------- /talking-photo-frontend/eslint.config.js: -------------------------------------------------------------------------------- 1 | import js from '@eslint/js' 2 | import globals from 'globals' 3 | import reactHooks from 'eslint-plugin-react-hooks' 4 | import reactRefresh from 'eslint-plugin-react-refresh' 5 | import tseslint from 'typescript-eslint' 6 | 7 | export default tseslint.config( 8 | { ignores: ['dist'] }, 9 | { 10 | extends: [js.configs.recommended, ...tseslint.configs.recommended], 11 | files: ['**/*.{ts,tsx}'], 12 | languageOptions: { 13 | ecmaVersion: 2020, 14 | globals: globals.browser, 15 | }, 16 | plugins: { 17 | 'react-hooks': reactHooks, 18 | 'react-refresh': reactRefresh, 19 | }, 20 | rules: { 21 | ...reactHooks.configs.recommended.rules, 22 | 'react-refresh/only-export-components': [ 23 | 'warn', 24 | { allowConstantExport: true }, 25 | ], 26 | }, 27 | }, 28 | ) 29 | -------------------------------------------------------------------------------- /talking-photo-frontend/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "talking-photo-frontend", 3 | "private": true, 4 | "version": "0.0.0", 5 | "type": "module", 6 | "scripts": { 7 | "dev": "vite", 8 | "build": "tsc -b && vite build", 9 | "lint": "eslint .", 10 | "preview": "vite preview" 11 | }, 12 | "dependencies": { 13 | "react": "^18.3.1", 14 | "react-dom": "^18.3.1", 15 | "socket.io-client": "^4.8.1" 16 | }, 17 | "devDependencies": { 18 | "@eslint/js": "^9.15.0", 19 | "@types/react": "^18.3.12", 20 | "@types/react-dom": "^18.3.1", 21 | "@vitejs/plugin-react": "^4.3.4", 22 | "eslint": "^9.15.0", 23 | "eslint-plugin-react-hooks": "^5.0.0", 24 | "eslint-plugin-react-refresh": "^0.4.14", 25 | "globals": "^15.12.0", 26 | "typescript": "~5.6.2", 27 | "typescript-eslint": "^8.15.0", 28 | "vite": "^6.0.1" 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /talking-photo-frontend/public/vite.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /talking-photo-frontend/src/index.css: -------------------------------------------------------------------------------- 1 | :root { 2 | font-family: Inter, system-ui, Avenir, Helvetica, Arial, sans-serif; 3 | line-height: 1.5; 4 | font-weight: 400; 5 | 6 | color-scheme: light dark; 7 | color: rgba(255, 255, 255, 0.87); 8 | background-color: #242424; 9 | 10 | font-synthesis: none; 11 | text-rendering: optimizeLegibility; 12 | -webkit-font-smoothing: antialiased; 13 | -moz-osx-font-smoothing: grayscale; 14 | } 15 | 16 | a { 17 | font-weight: 500; 18 | color: #646cff; 19 | text-decoration: inherit; 20 | } 21 | a:hover { 22 | color: #535bf2; 23 | } 24 | 25 | body { 26 | margin: 0; 27 | display: flex; 28 | place-items: center; 29 | min-width: 320px; 30 | min-height: 100vh; 31 | } 32 | 33 | h1 { 34 | font-size: 3.2em; 35 | line-height: 1.1; 36 | } 37 | 38 | button { 39 | border-radius: 8px; 40 | border: 1px solid transparent; 41 | padding: 0.6em 1.2em; 42 | font-size: 1em; 43 | font-weight: 500; 44 | font-family: inherit; 45 | background-color: #1a1a1a; 46 | cursor: pointer; 47 | transition: border-color 0.25s; 48 | } 49 | button:hover { 50 | border-color: #646cff; 51 | } 52 | button:focus, 53 | button:focus-visible { 54 | outline: 4px auto -webkit-focus-ring-color; 55 | } 56 | 57 | @media (prefers-color-scheme: light) { 58 | :root { 59 | color: #213547; 60 | background-color: #ffffff; 61 | } 62 | a:hover { 63 | color: #747bff; 64 | } 65 | button { 66 | background-color: #f9f9f9; 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /talking-photo-frontend/README.md: -------------------------------------------------------------------------------- 1 | # React + TypeScript + Vite 2 | 3 | This template provides a minimal setup to get React working in Vite with HMR and some ESLint rules. 4 | 5 | Currently, two official plugins are available: 6 | 7 | - [@vitejs/plugin-react](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react/README.md) uses [Babel](https://babeljs.io/) for Fast Refresh 8 | - [@vitejs/plugin-react-swc](https://github.com/vitejs/vite-plugin-react-swc) uses [SWC](https://swc.rs/) for Fast Refresh 9 | 10 | ## Expanding the ESLint configuration 11 | 12 | If you are developing a production application, we recommend updating the configuration to enable type aware lint rules: 13 | 14 | - Configure the top-level `parserOptions` property like this: 15 | 16 | ```js 17 | export default tseslint.config({ 18 | languageOptions: { 19 | // other options... 20 | parserOptions: { 21 | project: ['./tsconfig.node.json', './tsconfig.app.json'], 22 | tsconfigRootDir: import.meta.dirname, 23 | }, 24 | }, 25 | }) 26 | ``` 27 | 28 | - Replace `tseslint.configs.recommended` to `tseslint.configs.recommendedTypeChecked` or `tseslint.configs.strictTypeChecked` 29 | - Optionally add `...tseslint.configs.stylisticTypeChecked` 30 | - Install [eslint-plugin-react](https://github.com/jsx-eslint/eslint-plugin-react) and update the config: 31 | 32 | ```js 33 | // eslint.config.js 34 | import react from 'eslint-plugin-react' 35 | 36 | export default tseslint.config({ 37 | // Set the react version 38 | settings: { react: { version: '18.3' } }, 39 | plugins: { 40 | // Add the react plugin 41 | react, 42 | }, 43 | rules: { 44 | // other rules... 45 | // Enable its recommended rules 46 | ...react.configs.recommended.rules, 47 | ...react.configs['jsx-runtime'].rules, 48 | }, 49 | }) 50 | ``` 51 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Talking Photo Application 2 | 3 | This project involves the development of a talking photo application featuring a React frontend and a Flask backend. The application enables users to animate photos, allowing them to generate synchronized lip movements and audio based on text input. Users can type text, which is converted into audio, and the photo is animated to appear as though it is speaking the provided text. 4 | 5 | ## Frontend Setup 6 | 7 | ### Prerequisites 8 | - Node Version Manager (nvm) 9 | - Node.js v20 10 | - npm or yarn 11 | 12 | ### Installation & Setup 13 | 1. Install Node.js v20 using nvm: 14 | ```bash 15 | nvm install 20 16 | nvm use 20 17 | ``` 18 | 19 | 2. Navigate to the frontend directory: 20 | ```bash 21 | cd talking-photo-frontend 22 | ``` 23 | 24 | 3. Install dependencies: 25 | ```bash 26 | npm install 27 | # or 28 | yarn install 29 | ``` 30 | 31 | 4. Start the development server: 32 | ```bash 33 | npm run dev 34 | # or 35 | yarn dev 36 | ``` 37 | 38 | The frontend will be available at `http://localhost:5173` 39 | 40 | ### Configuration 41 | You'll need to update the webhook URLs in `src/App.tsx`. Search for URLs containing `ngrok-free.app` and replace them with your ngrok forwarding URL. For example: 42 | 43 | Change: 44 | ```typescript 45 | webhookUrl: "https://c184-219-91-134-123.ngrok-free.app/api/webhook" 46 | ``` 47 | to: 48 | ```typescript 49 | webhookUrl: "https://your-ngrok-url.ngrok-free.app/api/webhook" 50 | ``` 51 | 52 | ## Backend Setup 53 | 54 | ### Prerequisites 55 | - Python 3.x 56 | - pip 57 | - Virtual environment (recommended) 58 | 59 | ### Installation & Setup 60 | 1. Create and activate a virtual environment: 61 | ```bash 62 | python -m venv venv 63 | source venv/bin/activate # On Windows: venv\Scripts\activate 64 | ``` 65 | 66 | 2. Navigate to the backend directory: 67 | ```bash 68 | cd talking_photo_webhook 69 | ``` 70 | 71 | 3. Install dependencies: 72 | ```bash 73 | pip install -r requirements.txt 74 | ``` 75 | 76 | 4. Create a `.env` file in the backend directory with your credentials: 77 | ```env 78 | CLIENT_ID=your_client_id 79 | CLIENT_SECRET=your_client_secret 80 | ``` 81 | 82 | 5. Start the Flask server: 83 | ```bash 84 | python app.py 85 | ``` 86 | 87 | The backend will be available at `http://localhost:3008` 88 | 89 | ## Setting up ngrok 90 | 91 | ngrok is required to create a public URL for your local webhook endpoint. 92 | 93 | 1. Install ngrok: 94 | - Download from ngrok website 95 | - Sign up for a free account 96 | - Follow the installation instructions for your OS 97 | 98 | 2. Authenticate ngrok: 99 | ```bash 100 | ngrok config add-authtoken your_auth_token 101 | ``` 102 | 103 | 3. Start ngrok to forward your backend port: 104 | ```bash 105 | ngrok http 3008 106 | ``` 107 | 108 | Copy the forwarding URL (e.g., `https://your-ngrok-url.ngrok-free.app`) and update it in the frontend code as described in the Frontend Configuration section. 109 | 110 | ## Important Notes 111 | - Make sure both frontend and backend servers are running simultaneously 112 | - The ngrok URL changes every time you restart ngrok (unless you have a paid plan) 113 | - Update the webhook URLs in the frontend whenever you get a new ngrok URL 114 | - Keep your `CLIENT_ID` and `CLIENT_SECRET` secure and never commit them to version control 115 | 116 | ## Troubleshooting 117 | - If you encounter CORS issues, ensure the backend CORS settings are properly configured 118 | - If the websocket connection fails, check that your ports aren't blocked by a firewall 119 | - For ngrok connection issues, ensure your authtoken is properly configured 120 | -------------------------------------------------------------------------------- /talking-photo-frontend/src/assets/react.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /talking_photo_webhook/app.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, request, jsonify, Response 2 | from flask_cors import CORS 3 | from Crypto.Cipher import AES 4 | from dotenv import load_dotenv 5 | from flask_socketio import SocketIO, emit 6 | import base64 7 | import json 8 | import time 9 | import os 10 | 11 | load_dotenv() 12 | 13 | app = Flask(__name__) 14 | # Allow all origins with CORS 15 | CORS(app, resources={r"/*": {"origins": "*"}}) 16 | 17 | # Update SocketIO configuration to allow all origins 18 | socketio = SocketIO(app, 19 | cors_allowed_origins="*", 20 | ping_timeout=60, 21 | ping_interval=25, 22 | async_mode='gevent') 23 | 24 | # Store events temporarily in memory 25 | events = [] 26 | 27 | def generate_aes_decrypt(data_encrypt, client_id, client_secret): 28 | aes_key = client_secret.encode('utf-8') 29 | 30 | # Ensure the IV is 16 bytes long 31 | iv = client_id.encode('utf-8') 32 | iv = iv[:16] if len(iv) >= 16 else iv.ljust(16, b'\0') 33 | 34 | cipher = AES.new(aes_key, AES.MODE_CBC, iv) 35 | decrypted_data = cipher.decrypt(base64.b64decode(data_encrypt)) 36 | 37 | # Handle padding 38 | padding_len = decrypted_data[-1] 39 | return decrypted_data[:-padding_len].decode('utf-8') 40 | 41 | @app.route('/test-app', methods=['GET']) 42 | def test_app(): 43 | #emit socket event 44 | socketio.emit('message', {'data': 'Hello, World!'}) 45 | return jsonify({"message": "Hello, World!"}), 200 46 | 47 | @app.route('/api/webhook', methods=['POST']) 48 | def webhook(): 49 | print("Webhook received") 50 | # Log the raw request data 51 | print("Raw request data:", request.get_data()) 52 | data = request.get_json() 53 | print("JSON data received:", data) # Add this line to log the incoming JSON 54 | 55 | # Extract the encrypted data and metadata 56 | encrypted_data = data.get('dataEncrypt') 57 | client_id = os.getenv('CLIENT_ID') 58 | client_secret = os.getenv('CLIENT_SECRET') 59 | 60 | try: 61 | # Decrypt the data 62 | decrypted_data = generate_aes_decrypt(encrypted_data, client_id, client_secret) 63 | print("Decrypted Data:", decrypted_data) 64 | 65 | # Process the decrypted data 66 | try: 67 | decrypted_json = json.loads(decrypted_data) 68 | print("Parsed JSON:", decrypted_json) # Add this line for debugging 69 | 70 | # Check if required fields exist 71 | if 'status' not in decrypted_json: 72 | print("Missing 'status' in payload:", decrypted_json) 73 | return jsonify({"message": "Invalid payload structure - missing video_status"}), 400 74 | 75 | # Log all status updates 76 | print(f"Processing status {decrypted_json['status']} with full payload:", decrypted_json) 77 | 78 | # Handle different status codes 79 | if decrypted_json['status'] == 3: 80 | print("Status is 3, emitting event", decrypted_json) 81 | socketio.emit('message', {'data': decrypted_json, 'type': 'event'}) 82 | elif decrypted_json['status'] == 4: 83 | print("Face swap failed") 84 | socketio.emit('message', { 85 | 'type': 'error', 86 | 'message': 'Face swap failed. This could be due to invalid VSideo/Image format, network issues, or processing errors. Please try again or contact support if the issue persists.' 87 | }) 88 | else: 89 | print(f"Received status {decrypted_json['status']}, payload:", decrypted_json) 90 | # Optionally emit other status updates 91 | socketio.emit('message', {'data': decrypted_json, 'type': 'status_update'}) 92 | 93 | return jsonify({ 94 | "message": "Webhook received and processed successfully", 95 | "decrypted_data": decrypted_json 96 | }), 200 97 | 98 | except json.JSONDecodeError as je: 99 | print(f"JSON parsing error: {je}") 100 | return jsonify({"message": "Invalid JSON format in decrypted data"}), 400 101 | 102 | except Exception as e: 103 | print(f"Error processing webhook: {e}") 104 | return jsonify({"message": f"Error processing webhook: {str(e)}"}), 400 105 | 106 | 107 | @socketio.on('connect') 108 | def handle_connect(): 109 | print("Client connected") 110 | emit('message', {'data': 'Connected to server', 'type': 'info'}) 111 | 112 | @socketio.on('disconnect') 113 | def handle_disconnect(): 114 | print("Client disconnected") 115 | 116 | 117 | if __name__ == '__main__': 118 | socketio.run(app, host='0.0.0.0', port=3008) 119 | 120 | -------------------------------------------------------------------------------- /talking-photo-frontend/src/App.css: -------------------------------------------------------------------------------- 1 | :root { 2 | --primary-color: #7b7bfa; 3 | --secondary-color: #334895; 4 | --bg-color: #000814; 5 | --text-color: #ffffff; 6 | --accent-color: #2395de; 7 | } 8 | 9 | body { 10 | background-color: var(--bg-color); 11 | color: var(--text-color); 12 | } 13 | 14 | .container { 15 | min-height: 100vh; 16 | display: flex; 17 | justify-content: center; 18 | align-items: center; 19 | padding: 2rem; 20 | margin: 0 auto; 21 | position: fixed; 22 | top: 0; 23 | left: 0; 24 | right: 0; 25 | bottom: 0; 26 | } 27 | 28 | .auth-section, .photo-section { 29 | position: relative; 30 | background: rgba(0, 0, 0, 0.7); 31 | padding: 3rem; 32 | border-radius: 0; 33 | border: 1px solid var(--accent-color); 34 | box-shadow: 0 0 20px rgba(0, 255, 255, 0.2); 35 | max-width: 600px; 36 | width: 100%; 37 | margin: auto; 38 | text-align: center; 39 | } 40 | 41 | .glowing-circle { 42 | position: absolute; 43 | width: 150px; 44 | height: 150px; 45 | background: var(--primary-color); 46 | filter: blur(100px); 47 | opacity: 0.3; 48 | animation: glitch 4s infinite; 49 | z-index: -1; 50 | } 51 | 52 | .title { 53 | font-size: 2.5rem; 54 | margin-bottom: 2rem; 55 | color: var(--text-color); 56 | text-transform: uppercase; 57 | text-shadow: 2px 2px var(--primary-color), 58 | -2px -2px var(--secondary-color); 59 | letter-spacing: 3px; 60 | -webkit-text-fill-color: var(--text-color); 61 | text-align: center; 62 | } 63 | 64 | .subtitle { 65 | font-size: 1.8rem; 66 | margin-bottom: 1.5rem; 67 | color: var(--text-color); 68 | text-align: center; 69 | } 70 | 71 | .input-container { 72 | display: flex; 73 | flex-direction: column; 74 | align-items: center; 75 | width: 100%; 76 | } 77 | 78 | .input-field { 79 | width: 100%; 80 | padding: 0.8rem; 81 | margin-bottom: 1rem; 82 | background: rgba(0, 0, 0, 0.8); 83 | border: 1px solid var(--accent-color); 84 | border-radius: 0; 85 | color: var(--text-color); 86 | font-family: monospace; 87 | transition: all 0.3s ease; 88 | } 89 | 90 | .input-field:focus { 91 | outline: none; 92 | border-color: var(--primary-color); 93 | box-shadow: 0 0 20px rgba(255, 0, 85, 0.3); 94 | } 95 | 96 | .textarea { 97 | min-height: 100px; 98 | resize: vertical; 99 | } 100 | 101 | .submit-btn { 102 | width: 50%; 103 | padding: 0.8rem; 104 | background: transparent; 105 | border: 1px solid var(--primary-color); 106 | color: var(--primary-color); 107 | font-weight: bold; 108 | text-transform: uppercase; 109 | letter-spacing: 2px; 110 | cursor: pointer; 111 | position: relative; 112 | overflow: hidden; 113 | transition: all 0.3s ease; 114 | margin: 0 auto; 115 | } 116 | 117 | .submit-btn:hover { 118 | background: var(--primary-color); 119 | color: var(--bg-color); 120 | transform: translateY(0); 121 | box-shadow: 0 0 30px rgba(255, 0, 85, 0.5); 122 | } 123 | 124 | @keyframes glitch { 125 | 0% { 126 | transform: translate(0); 127 | opacity: 0.3; 128 | } 129 | 20% { 130 | transform: translate(-10px, 10px); 131 | opacity: 0.4; 132 | } 133 | 40% { 134 | transform: translate(10px, -10px); 135 | opacity: 0.2; 136 | } 137 | 60% { 138 | transform: translate(-5px, 5px); 139 | opacity: 0.3; 140 | } 141 | 80% { 142 | transform: translate(5px, -5px); 143 | opacity: 0.4; 144 | } 145 | 100% { 146 | transform: translate(0); 147 | opacity: 0.3; 148 | } 149 | } 150 | 151 | .logo { 152 | width: 120px; 153 | height: auto; 154 | display: block; 155 | margin: 0 auto 1.5rem; 156 | filter: drop-shadow(0 0 10px var(--accent-color)); 157 | } 158 | 159 | select.input-field { 160 | appearance: none; 161 | -webkit-appearance: none; 162 | -moz-appearance: none; 163 | background-image: url("data:image/svg+xml;charset=UTF-8,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24' fill='none' stroke='%230ff' stroke-width='2' stroke-linecap='round' stroke-linejoin='round'%3e%3cpolyline points='6 9 12 15 18 9'%3e%3c/polyline%3e%3c/svg%3e"); 164 | background-repeat: no-repeat; 165 | background-position: right 1rem center; 166 | background-size: 1em; 167 | padding-right: 2.5rem; 168 | } 169 | 170 | select.input-field:disabled { 171 | opacity: 0.5; 172 | cursor: not-allowed; 173 | } 174 | 175 | select.input-field option { 176 | background-color: var(--bg-color); 177 | color: var(--text-color); 178 | padding: 10px; 179 | font-size: 0.9em; 180 | line-height: 1.4; 181 | } 182 | 183 | select.input-field option:hover { 184 | background-color: rgba(0, 255, 255, 0.1); 185 | } 186 | 187 | .popup { 188 | position: fixed; 189 | top: 0; 190 | left: 0; 191 | width: 100%; 192 | height: 100%; 193 | background-color: rgba(0, 0, 0, 0.85); 194 | display: flex; 195 | justify-content: center; 196 | align-items: center; 197 | z-index: 1000; 198 | } 199 | 200 | .popup-content { 201 | background: rgba(0, 0, 0, 0.7); 202 | padding: 2rem; 203 | border-radius: 0; 204 | border: 1px solid var(--accent-color); 205 | box-shadow: 0 0 20px rgba(0, 255, 255, 0.2); 206 | text-align: center; 207 | color: var(--text-color); 208 | max-width: 400px; 209 | width: 90%; 210 | } 211 | 212 | .popup-content p { 213 | margin-bottom: 1.5rem; 214 | font-size: 1.1rem; 215 | } 216 | 217 | .popup-content button { 218 | background: transparent; 219 | border: 1px solid var(--accent-color); 220 | color: var(--accent-color); 221 | padding: 0.8rem 2rem; 222 | cursor: pointer; 223 | transition: all 0.3s ease; 224 | text-transform: uppercase; 225 | letter-spacing: 2px; 226 | } 227 | 228 | .popup-content button:hover { 229 | background: var(--accent-color); 230 | color: var(--bg-color); 231 | transform: translateY(-2px); 232 | box-shadow: 0 5px 15px rgba(0, 255, 255, 0.3); 233 | } 234 | 235 | .video-popup { 236 | position: fixed; 237 | top: 0; 238 | left: 0; 239 | width: 100%; 240 | height: 100%; 241 | background-color: rgba(0, 0, 0, 0.85); 242 | display: flex; 243 | justify-content: center; 244 | align-items: center; 245 | z-index: 1000; 246 | animation: fadeIn 0.3s ease-in-out; 247 | } 248 | 249 | .video-popup-content { 250 | background-color: var(--bg-color); 251 | border: 1px solid var(--accent-color); 252 | box-shadow: 0 0 30px rgba(0, 255, 255, 0.3); 253 | padding: 2rem; 254 | border-radius: 4px; 255 | text-align: center; 256 | max-width: 800px; 257 | width: 90%; 258 | position: relative; 259 | animation: slideUp 0.4s ease-out; 260 | } 261 | 262 | .video-container { 263 | position: relative; 264 | width: 100%; 265 | margin-bottom: 1.5rem; 266 | border: 1px solid var(--accent-color); 267 | overflow: hidden; 268 | } 269 | 270 | .video-player { 271 | width: 100%; 272 | max-height: 70vh; 273 | background: #000; 274 | } 275 | 276 | .close-button { 277 | position: absolute; 278 | top: -15px; 279 | right: -15px; 280 | width: 30px; 281 | height: 30px; 282 | border-radius: 50%; 283 | background: var(--primary-color); 284 | border: none; 285 | color: white; 286 | font-size: 1.2rem; 287 | cursor: pointer; 288 | display: flex; 289 | align-items: center; 290 | justify-content: center; 291 | transition: all 0.3s ease; 292 | } 293 | 294 | .close-button:hover { 295 | transform: rotate(90deg); 296 | background: var(--accent-color); 297 | } 298 | 299 | .download-button { 300 | background: transparent; 301 | border: 2px solid var(--accent-color); 302 | color: var(--accent-color); 303 | padding: 0.8rem 2rem; 304 | font-size: 1rem; 305 | cursor: pointer; 306 | transition: all 0.3s ease; 307 | text-transform: uppercase; 308 | letter-spacing: 2px; 309 | margin-top: 1rem; 310 | position: relative; 311 | overflow: hidden; 312 | } 313 | 314 | .download-button:hover { 315 | background: var(--primary-color); 316 | color: var(--bg-color); 317 | transform: translateY(-2px); 318 | box-shadow: 0 5px 15px rgba(0, 255, 255, 0.3); 319 | } 320 | 321 | @keyframes fadeIn { 322 | from { opacity: 0; } 323 | to { opacity: 1; } 324 | } 325 | 326 | @keyframes slideUp { 327 | from { 328 | transform: translateY(50px); 329 | opacity: 0; 330 | } 331 | to { 332 | transform: translateY(0); 333 | opacity: 1; 334 | } 335 | } 336 | 337 | .processing-popup { 338 | position: fixed; 339 | top: 0; 340 | left: 0; 341 | width: 100%; 342 | height: 100%; 343 | background: rgba(0, 0, 0, 0.85); 344 | display: flex; 345 | justify-content: center; 346 | align-items: center; 347 | z-index: 1000; 348 | } 349 | 350 | .processing-content { 351 | background: rgba(0, 0, 0, 0.7); 352 | padding: 2rem; 353 | border-radius: 0; 354 | text-align: center; 355 | max-width: 400px; 356 | border: 1px solid var(--accent-color); 357 | box-shadow: 0 0 30px rgba(0, 255, 255, 0.2); 358 | } 359 | 360 | .loader { 361 | width: 80px; 362 | height: 80px; 363 | margin: 20px auto; 364 | border: 4px solid rgba(0, 255, 255, 0.1); 365 | border-top: 4px solid var(--accent-color); 366 | border-right: 4px solid var(--primary-color); 367 | border-radius: 50%; 368 | animation: spin 1s linear infinite; 369 | } 370 | 371 | .processing-message { 372 | color: var(--text-color); 373 | margin-top: 20px; 374 | font-size: 1.2rem; 375 | text-transform: uppercase; 376 | letter-spacing: 2px; 377 | text-shadow: 0 0 10px var(--accent-color); 378 | } 379 | 380 | .processing-submessage { 381 | color: var(--text-color); 382 | opacity: 0.8; 383 | margin-top: 10px; 384 | font-size: 0.9rem; 385 | line-height: 1.4; 386 | } 387 | 388 | @keyframes spin { 389 | 0% { transform: rotate(0deg); } 390 | 100% { transform: rotate(360deg); } 391 | } 392 | 393 | .preview-icon-container:hover .preview-hover { 394 | display: block !important; 395 | } 396 | 397 | .url-input-container { 398 | width: 100%; 399 | position: relative; 400 | display: flex; 401 | align-items: center; 402 | gap: 10px; 403 | } 404 | 405 | .preview-button-container { 406 | position: relative; 407 | } 408 | 409 | .preview-button { 410 | color: var(--text-color); 411 | cursor: pointer; 412 | font-size: 14px; 413 | padding: 4px 8px; 414 | border: 1px solid var(--primary-color); 415 | border-radius: 4px; 416 | background: transparent; 417 | } 418 | 419 | .preview-hover { 420 | display: none; 421 | position: absolute; 422 | top: 100%; 423 | right: 0; 424 | margin-top: 10px; 425 | padding: 5px; 426 | background: #1a1a1a; 427 | border: 1px solid var(--secondary-color); 428 | border-radius: 4px; 429 | box-shadow: 0 2px 10px rgba(0, 255, 255, 0.2); 430 | z-index: 1000; 431 | } 432 | 433 | .preview-button-container:hover .preview-hover { 434 | display: block; 435 | } 436 | 437 | .preview-image { 438 | max-width: 200px; 439 | max-height: 200px; 440 | object-fit: contain; 441 | } 442 | 443 | .textarea-container { 444 | position: relative; 445 | width: 100%; 446 | } 447 | 448 | .char-counter { 449 | position: absolute; 450 | bottom: 5px; 451 | right: 10px; 452 | font-size: 0.8em; 453 | color: #666; 454 | } 455 | 456 | .auth-tabs { 457 | display: flex; 458 | justify-content: center; 459 | gap: 1rem; 460 | margin-bottom: 2rem; 461 | } 462 | 463 | .tab-btn { 464 | background: transparent; 465 | border: 1px solid var(--accent-color); 466 | color: var(--accent-color); 467 | padding: 0.5rem 1rem; 468 | cursor: pointer; 469 | transition: all 0.3s ease; 470 | } 471 | 472 | .tab-btn:hover { 473 | background: rgba(0, 255, 255, 0.1); 474 | } 475 | 476 | .tab-btn.active { 477 | background: var(--accent-color); 478 | color: var(--bg-color); 479 | } 480 | 481 | .auth-section form { 482 | display: flex; 483 | flex-direction: column; 484 | gap: 1rem; 485 | width: 100%; 486 | } -------------------------------------------------------------------------------- /talking-photo-frontend/src/App.tsx: -------------------------------------------------------------------------------- 1 | import { useState, useEffect, useRef } from 'react' 2 | import './App.css' 3 | import io from 'socket.io-client' 4 | 5 | interface Voice { 6 | voice_id: string; 7 | id: string; 8 | name: string; 9 | gender: string; 10 | accent?: string; 11 | description?: string; 12 | } 13 | 14 | 15 | function App() { 16 | const [apiToken, setApiToken] = useState('') 17 | const [isAuthenticated, setIsAuthenticated] = useState(false) 18 | const [photoUrl, setPhotoUrl] = useState('https://i.ibb.co/gRX4tFs/art-74050-1920.jpg') 19 | const [prompt, setPrompt] = useState('') 20 | const [voices, setVoices] = useState([]) 21 | const [selectedGender, setSelectedGender] = useState('') 22 | const [selectedVoice, setSelectedVoice] = useState('') 23 | const [socket, setSocket] = useState(io('http://localhost:3008')) 24 | const [showProcessingPopup, setShowProcessingPopup] = useState(false) 25 | const [errorMessage, setErrorMessage] = useState('') 26 | const [showErrorPopup, setShowErrorPopup] = useState(false) 27 | const [rate] = useState("100%") 28 | const [isConnected, setIsConnected] = useState(false) 29 | const [videoPopupUrl, setVideoPopupUrl] = useState(''); 30 | const [charCount, setCharCount] = useState(0) 31 | const [authTab, setAuthTab] = useState('token') // 'token' or 'client' 32 | const [clientId, setClientId] = useState('') 33 | const [clientSecret, setClientSecret] = useState('') 34 | 35 | useEffect(() => { 36 | if (isAuthenticated) { 37 | fetchVoices() 38 | } 39 | }, [isAuthenticated]) 40 | 41 | useEffect(()=>{ 42 | socket.current = io('http://localhost:3008', { 43 | reconnection: true, 44 | reconnectionAttempts: 5, 45 | reconnectionDelay: 1000, 46 | withCredentials: true, 47 | transports: ['websocket', 'polling'] 48 | }); 49 | socket.current.on("connect", () => { 50 | console.log("Connected to WebSocket server"); 51 | setIsConnected(true); 52 | }); 53 | 54 | socket.current.on("message", async (msg) => { 55 | if (msg.type === 'event') { 56 | if (msg.data.type === 'audio') { 57 | // If it's audio type, make the API call for video creation 58 | try { 59 | const payload = { 60 | talking_photo_url: photoUrl, // The image URL from user input 61 | audio_url: msg.data.url, // The audio URL from the event 62 | webhookUrl: "https://297a-219-91-134-123.ngrok-free.app/api/webhook" 63 | }; 64 | 65 | const response = await fetch('https://openapi.akool.com/api/open/v3/content/video/createbytalkingphoto', { 66 | method: 'POST', 67 | headers: { 68 | 'Authorization': `Bearer ${apiToken}`, 69 | 'Content-Type': 'application/json' 70 | }, 71 | body: JSON.stringify(payload) 72 | }); 73 | 74 | if (!response.ok) { 75 | throw new Error('Failed to create video'); 76 | } 77 | 78 | // Keep showing processing popup while waiting for final video 79 | setShowProcessingPopup(true); 80 | 81 | } catch (error) { 82 | console.error('Error creating video:', error); 83 | setErrorMessage('Failed to create video'); 84 | setShowErrorPopup(true); 85 | setShowProcessingPopup(false); 86 | } 87 | } else { 88 | // If it's not audio type (i.e., final video) 89 | const videoUrl = msg.data.url; 90 | setVideoPopupUrl(videoUrl); // Set the video URL to show in popup 91 | setShowProcessingPopup(false); 92 | } 93 | } else if (msg.type === 'error') { 94 | // Handle error message 95 | setShowProcessingPopup(false); 96 | setErrorMessage(msg.message); 97 | setShowErrorPopup(true); 98 | } 99 | }); 100 | 101 | socket.current.on("connect_error", (error) => { 102 | console.error("WebSocket connection error:", error); 103 | setErrorMessage("Failed to connect to server"); 104 | setIsConnected(false); 105 | }); 106 | 107 | socket.current.on("disconnect", (reason) => { 108 | console.log("Disconnected from server:", reason); 109 | setIsConnected(false); 110 | }); 111 | 112 | return () => { 113 | if (socket.current) { 114 | socket.current.disconnect(); 115 | } 116 | } 117 | }, [apiToken, photoUrl]); 118 | 119 | useEffect(() => { 120 | if (selectedVoice) { 121 | const selectedVoiceDetails = voices.find(voice => voice.voice_id === selectedVoice); 122 | console.log('Selected Voice Details:', selectedVoiceDetails); 123 | } 124 | }, [selectedVoice, voices]); 125 | 126 | const fetchVoices = async () => { 127 | try { 128 | const response = await fetch('https://openapi.akool.com/api/open/v3/voice/list?from=3', { 129 | headers: { 130 | 'Authorization': `Bearer ${apiToken}` 131 | } 132 | }); 133 | const data = await response.json(); 134 | setVoices(data.data || []); 135 | } catch (error) { 136 | console.error('Error fetching voices:', error); 137 | } 138 | } 139 | 140 | const handleTokenSubmit = async (e: React.FormEvent) => { 141 | e.preventDefault() 142 | 143 | if (authTab === 'token') { 144 | if (apiToken.trim()) { 145 | setIsAuthenticated(true) 146 | } 147 | } else { 148 | // Handle client credentials flow 149 | if (clientId.trim() && clientSecret.trim()) { 150 | try { 151 | const response = await fetch('https://openapi.akool.com/api/open/v3/getToken', { 152 | method: 'POST', 153 | headers: { 154 | 'Content-Type': 'application/json' 155 | }, 156 | body: JSON.stringify({ 157 | clientId: clientId, 158 | clientSecret: clientSecret 159 | }) 160 | }); 161 | 162 | const data = await response.json(); 163 | if (data.token) { 164 | setApiToken(data.token); 165 | setIsAuthenticated(true); 166 | } else { 167 | setErrorMessage('Invalid credentials'); 168 | setShowErrorPopup(true); 169 | } 170 | } catch (error) { 171 | setErrorMessage('Failed to authenticate'); 172 | setShowErrorPopup(true); 173 | } 174 | } 175 | } 176 | } 177 | 178 | // Get unique genders from voices 179 | const genders = [...new Set(voices.map(voice => voice.gender))]; 180 | 181 | // Filter voices by selected gender 182 | const filteredVoices = voices.filter(voice => voice.gender === selectedGender); 183 | 184 | const handleAnimatePhoto = async () => { 185 | // Validate inputs 186 | if (!photoUrl || !prompt || !selectedVoice) { 187 | setErrorMessage("Please fill in all fields"); 188 | setShowErrorPopup(true); 189 | return; 190 | } 191 | 192 | try { 193 | // Create the payload for the API call 194 | const payload = { 195 | input_text: prompt, 196 | voice_id: selectedVoice, 197 | rate: rate, 198 | webhookUrl: "https://297a-219-91-134-123.ngrok-free.app/api/webhook" 199 | }; 200 | 201 | // Make the API call 202 | const response = await fetch('https://openapi.akool.com/api/open/v3/audio/create', { 203 | method: 'POST', 204 | headers: { 205 | 'Authorization': `Bearer ${apiToken}`, 206 | 'Content-Type': 'application/json' 207 | }, 208 | body: JSON.stringify(payload) 209 | }); 210 | 211 | const data = await response.json(); 212 | 213 | if (!response.ok) { 214 | throw new Error(data.message || 'Failed to create audio'); 215 | } 216 | 217 | // Show processing popup 218 | setShowProcessingPopup(true); 219 | 220 | // Store the photo URL and other data if needed 221 | // You can add additional state variables to store this information 222 | localStorage.setItem('lastPhotoUrl', photoUrl); 223 | localStorage.setItem('lastPrompt', prompt); 224 | localStorage.setItem('lastVoiceId', selectedVoice); 225 | 226 | } catch (error) { 227 | console.error('Error creating audio:', error); 228 | setErrorMessage(error.message || 'Failed to create audio'); 229 | setShowErrorPopup(true); 230 | } 231 | }; 232 | 233 | return ( 234 |
235 | {!isAuthenticated ? ( 236 |
237 |
238 | Logo 243 |

AI Photo Animator

244 | 245 |
246 | 252 | 258 |
259 | 260 |
261 | {authTab === 'token' ? ( 262 | setApiToken(e.target.value)} 266 | placeholder="Enter your API token" 267 | className="input-field" 268 | /> 269 | ) : ( 270 | <> 271 | setClientId(e.target.value)} 275 | placeholder="Enter Client ID" 276 | className="input-field" 277 | /> 278 | setClientSecret(e.target.value)} 282 | placeholder="Enter Client Secret" 283 | className="input-field" 284 | /> 285 | 286 | )} 287 | 290 |
291 |
292 | ) : ( 293 |
294 | Logo 299 |

Create Your Talking Photo

300 |
301 |
302 | setPhotoUrl(e.target.value)} 306 | placeholder="Enter photo URL" 307 | className="input-field" 308 | /> 309 | {photoUrl && ( 310 |
311 | 312 | Preview 313 | 314 |
315 | Preview 320 |
321 |
322 | )} 323 |
324 | 325 | {/* Gender Selection Dropdown */} 326 | 341 | 342 | {/* Voice Selection Dropdown */} 343 | 362 | 363 |
364 |