├── .env ├── bun.lockb ├── src └── app │ ├── favicon.ico │ ├── layout.tsx │ ├── globals.css │ ├── api │ └── chat │ │ └── route.ts │ └── page.tsx ├── next.config.js ├── postcss.config.js ├── .env.example ├── .gitignore ├── tailwind.config.ts ├── tsconfig.json ├── package.json └── README.md /.env: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /bun.lockb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/developersdigest/multi-llm-siri/HEAD/bun.lockb -------------------------------------------------------------------------------- /src/app/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/developersdigest/multi-llm-siri/HEAD/src/app/favicon.ico -------------------------------------------------------------------------------- /next.config.js: -------------------------------------------------------------------------------- 1 | /** @type {import('next').NextConfig} */ 2 | const nextConfig = {} 3 | 4 | module.exports = nextConfig 5 | -------------------------------------------------------------------------------- /postcss.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | plugins: { 3 | tailwindcss: {}, 4 | autoprefixer: {}, 5 | }, 6 | } 7 | -------------------------------------------------------------------------------- /.env.example: -------------------------------------------------------------------------------- 1 | # https://platform.openai.com/account/api-keys 2 | OPENAI_API_KEY="" 3 | # https://docs.perplexity.ai/docs/getting-started 4 | PERPLEXITY_API_KEY="" -------------------------------------------------------------------------------- /src/app/layout.tsx: -------------------------------------------------------------------------------- 1 | import type { Metadata } from 'next' 2 | import { Inter } from 'next/font/google' 3 | import './globals.css' 4 | 5 | const inter = Inter({ subsets: ['latin'] }) 6 | 7 | export const metadata: Metadata = { 8 | title: 'Siri-of-Everything', 9 | description: 'Siri for LLMs', 10 | } 11 | 12 | export default function RootLayout({ 13 | children, 14 | }: { 15 | children: React.ReactNode 16 | }) { 17 | return ( 18 | 19 | {children} 20 | 21 | ) 22 | } 23 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | /node_modules 5 | /.pnp 6 | .pnp.js 7 | .yarn/install-state.gz 8 | 9 | # testing 10 | /coverage 11 | 12 | # next.js 13 | /.next/ 14 | /out/ 15 | 16 | # production 17 | /build 18 | 19 | # misc 20 | .DS_Store 21 | *.pem 22 | 23 | # debug 24 | npm-debug.log* 25 | yarn-debug.log* 26 | yarn-error.log* 27 | 28 | # local env files 29 | .env*.local 30 | 31 | # vercel 32 | .vercel 33 | 34 | # typescript 35 | *.tsbuildinfo 36 | next-env.d.ts 37 | -------------------------------------------------------------------------------- /tailwind.config.ts: -------------------------------------------------------------------------------- 1 | import type { Config } from 'tailwindcss' 2 | 3 | const config: Config = { 4 | content: [ 5 | './src/pages/**/*.{js,ts,jsx,tsx,mdx}', 6 | './src/components/**/*.{js,ts,jsx,tsx,mdx}', 7 | './src/app/**/*.{js,ts,jsx,tsx,mdx}', 8 | ], 9 | theme: { 10 | extend: { 11 | backgroundImage: { 12 | 'gradient-radial': 'radial-gradient(var(--tw-gradient-stops))', 13 | 'gradient-conic': 14 | 'conic-gradient(from 180deg at 50% 50%, var(--tw-gradient-stops))', 15 | }, 16 | }, 17 | }, 18 | plugins: [], 19 | } 20 | export default config 21 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "es5", 4 | "lib": ["dom", "dom.iterable", "esnext"], 5 | "allowJs": true, 6 | "skipLibCheck": true, 7 | "strict": true, 8 | "noEmit": true, 9 | "esModuleInterop": true, 10 | "module": "esnext", 11 | "moduleResolution": "bundler", 12 | "resolveJsonModule": true, 13 | "isolatedModules": true, 14 | "jsx": "preserve", 15 | "incremental": true, 16 | "plugins": [ 17 | { 18 | "name": "next" 19 | } 20 | ], 21 | "paths": { 22 | "@/*": ["./src/*"] 23 | } 24 | }, 25 | "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"], 26 | "exclude": ["node_modules"] 27 | } 28 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "llm-chatterbox", 3 | "version": "0.1.0", 4 | "private": true, 5 | "scripts": { 6 | "dev": "next dev", 7 | "build": "next build", 8 | "start": "next start", 9 | "lint": "next lint" 10 | }, 11 | "dependencies": { 12 | "@langchain/community": "^0.0.15", 13 | "@langchain/openai": "^0.0.10", 14 | "api": "^6.1.1", 15 | "dotenv": "^16.3.1", 16 | "langchain": "^0.1.1", 17 | "openai": "^4.24.1", 18 | "next": "14.0.4", 19 | "react": "^18", 20 | "react-dom": "^18" 21 | }, 22 | "devDependencies": { 23 | "@types/node": "^20", 24 | "@types/react": "^18", 25 | "@types/react-dom": "^18", 26 | "autoprefixer": "^10.0.1", 27 | "postcss": "^8", 28 | "tailwindcss": "^3.3.0", 29 | "typescript": "^5" 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/app/globals.css: -------------------------------------------------------------------------------- 1 | @tailwind base; 2 | @tailwind components; 3 | @tailwind utilities; 4 | :root { 5 | --foreground-rgb: 0, 0, 0; 6 | --background-start-rgb: 214, 219, 220; 7 | --background-end-rgb: 255, 255, 255; 8 | } 9 | @media (prefers-color-scheme: dark) { 10 | :root { 11 | --foreground-rgb: 255, 255, 255; 12 | --background-start-rgb: 0, 0, 0; 13 | --background-end-rgb: 0, 0, 0; 14 | } 15 | } 16 | body { 17 | color: rgb(var(--foreground-rgb)); 18 | background: linear-gradient(to bottom, transparent, rgb(var(--background-end-rgb))) rgb(var(--background-start-rgb)); 19 | } 20 | .model-bubble { 21 | position: relative; 22 | } 23 | .prominent-pulse { 24 | animation: prominentPulse 1s ease-in-out infinite; 25 | } 26 | @keyframes prominentPulse { 27 | 0%, 28 | 100% { 29 | transform: scale(1); 30 | } 31 | 50% { 32 | transform: scale(0.8); 33 | } 34 | } 35 | .loading-indicator { 36 | border: 10px solid rgba(255, 255, 255, 0.3); 37 | border-top: 5px solid blue; 38 | border-radius: 50%; 39 | width: 198px; 40 | height: 198px; 41 | animation: spin 2s linear infinite; 42 | position: absolute; 43 | transform: translate(-50%, -50%); 44 | } 45 | @keyframes spin { 46 | 0% { 47 | transform: rotate(0deg); 48 | } 49 | 100% { 50 | transform: rotate(360deg); 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # The "Siri of Everything" 2 | 3 | This project, aptly named the "Siri of Everything," is a voice recognition and response system. It utilizes a variety of advanced language models like GPT-3.5, GPT-4, and Perplexity to process and respond to voice inputs, akin to having a universal Siri capable of understanding and interacting using multiple AI technologies. 4 | 5 | ## Features 6 | 7 | - Advanced voice recognition capabilities. 8 | - Seamless integration with various language models including GPT-3.5, GPT-4, Mistral, Mixtral, Llama2 and others. 9 | - Dynamic and intelligent response generation with audio output. 10 | - User-friendly interface with toggleable recording states for interaction control. 11 | 12 | ## Setup and Installation 13 | 14 | ### Dependencies 15 | 16 | This project requires: 17 | 18 | - Node.js 19 | - OpenAI API 20 | - Perplexity API 21 | - Langchain OpenAI 22 | - Ollama 23 | 24 | ### Environment Variables 25 | 26 | Before running the project, configure your API keys in the `.env` file: 27 | 28 | #### OpenAI API Key 29 | Get from https://platform.openai.com/account/api-keys 30 | OPENAI_API_KEY="" 31 | 32 | #### Perplexity API Key 33 | Get from https://docs.perplexity.ai/docs/getting-started 34 | PERPLEXITY_API_KEY="" 35 | 36 | ### Installation 37 | 38 | To install the necessary packages, run: 39 | 40 | bun install/npm install 41 | 42 | ### Running the Project 43 | 44 | Start the development server: 45 | 46 | npm run dev 47 | 48 | ## Usage 49 | 50 | Interact with the system using voice commands. The system will capture your speech, process it using the selected AI model, and respond both textually and audibly. 51 | 52 | ## Contributing 53 | 54 | Contributions to enhance the "Siri of Everything" are welcome. Whether it's adding new features, improving existing ones, or fixing bugs, your input is valuable. 55 | 56 | Enjoy using the "Siri of Everything" to explore the capabilities of modern AI language models through voice interactions! 57 | ## Acknowledgements 58 | 59 | Initial setup for basic web voice recognition was inspired by the work found at [sambowenhughes/voice-recording-with-nextjs](https://github.com/sambowenhughes/voice-recording-with-nextjs). A big thank you to them! Don't forget to give them a star and follow on GitHub for their amazing contribution. 60 | 61 | ## Connect and Support 62 | 63 | I'm the developer behind Developers Digest. If you find my work helpful or enjoy what I do, consider supporting me. Here are a few ways you can do that: 64 | 65 | - **Patreon**: Support me on Patreon at [patreon.com/DevelopersDigest](https://www.patreon.com/DevelopersDigest) 66 | - **Buy Me A Coffee**: You can buy me a coffee at [buymeacoffee.com/developersdigest](https://www.buymeacoffee.com/developersdigest) 67 | - **Website**: Check out my website at [developersdigest.tech](https://developersdigest.tech) 68 | - **Github**: Follow me on GitHub at [github.com/developersdigest](https://github.com/developersdigest) 69 | - **Twitter**: Follow me on Twitter at [twitter.com/dev__digest](https://twitter.com/dev__digest) 70 | 71 | Your support is greatly appreciated and helps me continue to develop and maintain free, open-source projects. 72 | -------------------------------------------------------------------------------- /src/app/api/chat/route.ts: -------------------------------------------------------------------------------- 1 | // 0. Import Dependencies 2 | import OpenAI from "openai"; 3 | import dotenv from "dotenv"; 4 | import { OpenAI as LangchainOpenAI } from "@langchain/openai"; 5 | import { Ollama } from "@langchain/community/llms/ollama"; 6 | import api from 'api'; 7 | 8 | // 1. Initialize the Perplexity SDK 9 | const sdk = api('@pplx/v0#rht322clnm9gt25'); 10 | 11 | // 2. Configure environment variables 12 | dotenv.config(); 13 | sdk.auth(process.env.PERPLEXITY_API_KEY); 14 | 15 | // 3. Define the response data structure 16 | interface ResponseData { 17 | data: string; 18 | contentType: string; 19 | model: string; 20 | } 21 | 22 | // 4. Initialize the OpenAI instance 23 | const openai = new OpenAI(); 24 | 25 | // 5. Function to create audio from text 26 | async function createAudio( fullMessage: string, voice: "alloy" | "echo" | "fable" | "onyx" | "nova" | "shimmer") { 27 | const mp3 = await openai.audio.speech.create({ 28 | model: "tts-1", 29 | voice: voice, 30 | input: fullMessage, 31 | }); 32 | const buffer = Buffer.from(await mp3.arrayBuffer()); 33 | return buffer.toString('base64'); 34 | } 35 | 36 | // 6. HTTP POST handler function 37 | export async function POST(req: Request, res: Response): Promise { 38 | const body = await req.json(); 39 | let message = body.message.toLowerCase(); 40 | let modelName = body.model || "gpt"; 41 | 42 | // 7. Function to remove the first word of a string 43 | const removeFirstWord = (text: string) => text.includes(" ") ? text.substring(text.indexOf(" ") + 1) : ""; 44 | message = removeFirstWord(message); 45 | 46 | // 8. Initialize variables for messages and audio 47 | let introMessage = "", base64Audio, voice: "alloy" | "echo" | "fable" | "onyx" | "nova" | "shimmer" = "echo", gptMessage, fullMessage; 48 | 49 | // 9. Common prompt for all models 50 | const commonPrompt = "Be precise and concise, never respond in more than 1-2 sentences! " + message; 51 | 52 | // 10. Handle different model cases 53 | if (modelName === "gpt") { 54 | const llm = new LangchainOpenAI({ 55 | openAIApiKey: process.env.OPENAI_API_KEY, 56 | }); 57 | gptMessage = await llm.invoke(commonPrompt); 58 | introMessage = "GPT3 point 5 here, "; 59 | voice = "echo"; 60 | } else if (modelName === "gpt4") { 61 | // 11. Handling GPT-4 model 62 | const llm = new LangchainOpenAI({ 63 | openAIApiKey: process.env.OPENAI_API_KEY, 64 | modelName: 'gpt-4' 65 | }); 66 | gptMessage = await llm.invoke(commonPrompt); 67 | introMessage = "GPT-4 here, "; 68 | voice = "echo"; 69 | } else if (modelName === "local mistral") { 70 | // 12. Handling local Mistral model 71 | const llm = new Ollama({ 72 | baseUrl: "http://localhost:11434", 73 | model: "mistral", 74 | }); 75 | gptMessage = await llm.invoke(commonPrompt); 76 | introMessage = "Ollama Mitral-7B here, "; 77 | voice = "fable"; 78 | } else if (modelName === "local llama") { 79 | // 13. Handling local Llama model 80 | const llm = new Ollama({ 81 | baseUrl: "http://localhost:11434", 82 | model: "llama2", 83 | }); 84 | gptMessage = await llm.invoke(commonPrompt); 85 | introMessage = "Ollama Llama 2 here, "; 86 | voice = "fable"; 87 | } else if (modelName === "mixture") { 88 | // 14. Handling Mixture model 89 | const response = await sdk.post_chat_completions({ 90 | model: 'mixtral-8x7b-instruct', 91 | messages: [{ role: 'user', content: commonPrompt }] 92 | }); 93 | gptMessage = response.data.choices[0].message.content; 94 | introMessage = "Mixtral here, "; 95 | voice = "alloy"; 96 | } else if (modelName === "mistral") { 97 | // 15. Handling Mistral model 98 | const response = await sdk.post_chat_completions({ 99 | model: 'mistral-7b-instruct', 100 | messages: [{ role: 'user', content: commonPrompt }] 101 | }); 102 | gptMessage = response.data.choices[0].message.content; 103 | introMessage = "Mistral here, "; 104 | voice = "nova"; 105 | } else if (modelName === "perplexity") { 106 | // 16. Handling Perplexity model 107 | const response = await sdk.post_chat_completions({ 108 | model: 'pplx-70b-online', 109 | messages: [ 110 | { role: 'system', content: commonPrompt }, 111 | { role: 'user', content: commonPrompt } 112 | ] 113 | }); 114 | gptMessage = response.data.choices[0].message.content; 115 | introMessage = "Perplexity here, "; 116 | voice = "onyx"; 117 | } else if (modelName === "llama") { 118 | // 17. Handling Llama model 119 | const response = await sdk.post_chat_completions({ 120 | model: 'llama-2-70b-chat', 121 | messages: [{ role: 'user', content: commonPrompt }] 122 | }); 123 | gptMessage = response.data.choices[0]. message.content; 124 | introMessage = "Llama 2 70B here, "; 125 | voice = "nova"; 126 | } 127 | 128 | // 18. Compile the full message and create the audio 129 | fullMessage = introMessage + gptMessage; 130 | base64Audio = await createAudio( fullMessage, voice); 131 | 132 | // 19. Return the response 133 | return Response.json({ data: base64Audio, contentType: 'audio/mp3', model: modelName }); 134 | } 135 | -------------------------------------------------------------------------------- /src/app/page.tsx: -------------------------------------------------------------------------------- 1 | //1. Import necessary hooks and types from React 2 | "use client"; 3 | import { useEffect, useState, useRef } from "react"; 4 | 5 | //2. Extend Window interface for webkitSpeechRecognition 6 | declare global { 7 | interface Window { 8 | webkitSpeechRecognition: any; 9 | } 10 | } 11 | 12 | //3. Main functional component declaration 13 | export default function Home() { 14 | //4. State hooks for various functionalities 15 | const [isRecording, setIsRecording] = useState(false); 16 | const [isPlaying, setIsPlaying] = useState(false); 17 | const [transcript, setTranscript] = useState(""); 18 | const [model, setModel] = useState(""); 19 | const [response, setResponse] = useState(""); 20 | const [isLoading, setIsLoading] = useState(false); 21 | 22 | //5. Ref hooks for speech recognition and silence detection 23 | const recognitionRef = useRef(null); 24 | const silenceTimerRef = useRef(null); 25 | 26 | //6. Determine CSS class for model display based on state 27 | const getModelClassName = (model: string): string => (model === model && isPlaying ? " prominent-pulse" : ""); 28 | 29 | //7. Asynchronous function to handle backend communication 30 | const sendToBackend = async (message: string, modelKeyword?: string): Promise => { 31 | setIsLoading(true); 32 | if (modelKeyword) setModel(modelKeyword); 33 | else if (!model) setModel("gpt-3.5"); 34 | 35 | try { 36 | //7.1 Stop recording before sending data 37 | stopRecording(); 38 | //7.2 Send POST request to backend 39 | const response = await fetch("/api/chat", { 40 | method: "POST", 41 | headers: { "Content-Type": "application/json" }, 42 | body: JSON.stringify({ message, model: modelKeyword }), 43 | }); 44 | //7.3 Check for response validity 45 | if (!response.ok) throw new Error(`HTTP error! status: ${response.status}`); 46 | //7.4 Process and play audio response if available 47 | const data = await response.json(); 48 | if (data.data && data.contentType === "audio/mp3") { 49 | const audioSrc = `data:audio/mp3;base64,${data.data}`; 50 | const audio = new Audio(audioSrc); 51 | setIsPlaying(true); 52 | audio.play(); 53 | audio.onended = () => { 54 | setIsPlaying(false); 55 | startRecording(); 56 | if (data.model) setModel(data.model); 57 | }; 58 | } 59 | } catch (error) { 60 | //7.5 Handle errors during data transmission or audio playback 61 | console.error("Error sending data to backend or playing audio:", error); 62 | } 63 | setIsLoading(false); 64 | }; 65 | 66 | //8. Render individual model selection bubbles 67 | const renderModelBubble = (model: string, displayName: string, bgColor: string): JSX.Element => ( 68 |
69 | {isLoading && model === model &&
} 70 |
71 | {displayName} 72 |
73 |
74 | ); 75 | 76 | //9. Process speech recognition results 77 | const handleResult = (event: any): void => { 78 | if (silenceTimerRef.current) clearTimeout(silenceTimerRef.current); 79 | let interimTranscript = ""; 80 | for (let i = event.resultIndex; i < event.results.length; ++i) { 81 | interimTranscript += event.results[i][0].transcript; 82 | } 83 | setTranscript(interimTranscript); 84 | silenceTimerRef.current = setTimeout(() => { 85 | //9.1 Extract and send detected words to backend 86 | const words = interimTranscript.split(" "); 87 | const modelKeywords = [ 88 | "gpt4", 89 | "gpt", 90 | "perplexity", 91 | "local mistral", 92 | "local llama", 93 | "mixture", 94 | "mistral", 95 | "llama", 96 | ]; 97 | const detectedModel = modelKeywords.find((keyword) => 98 | words.slice(0, 3).join(" ").toLowerCase().includes(keyword) 99 | ); 100 | setModel(detectedModel || "gpt"); 101 | sendToBackend(interimTranscript, detectedModel); 102 | setTranscript(""); 103 | }, 2000); 104 | }; 105 | 106 | //10. Initialize speech recognition 107 | const startRecording = () => { 108 | setIsRecording(true); 109 | setTranscript(""); 110 | setResponse(""); 111 | recognitionRef.current = new window.webkitSpeechRecognition(); 112 | recognitionRef.current.continuous = true; 113 | recognitionRef.current.interimResults = true; 114 | recognitionRef.current.onresult = handleResult; 115 | recognitionRef.current.onend = () => { 116 | setIsRecording(false); 117 | if (silenceTimerRef.current) clearTimeout(silenceTimerRef.current); 118 | }; 119 | recognitionRef.current.start(); 120 | }; 121 | 122 | //11. Clean up with useEffect on component unmount 123 | useEffect( 124 | () => () => { 125 | if (recognitionRef.current) recognitionRef.current.stop(); 126 | }, 127 | [] 128 | ); 129 | 130 | //12. Function to terminate speech recognition 131 | const stopRecording = () => { 132 | if (recognitionRef.current) recognitionRef.current.stop(); 133 | }; 134 | 135 | //13. Toggle recording state 136 | const handleToggleRecording = () => { 137 | if (!isRecording && !isPlaying) startRecording(); 138 | else if (isRecording) stopRecording(); 139 | }; 140 | 141 | //14. Main component rendering method 142 | return ( 143 | //14.1 Render recording and transcript status 144 |
145 | {(isRecording || transcript || response) && ( 146 |
147 |
148 |
149 |

{isRecording ? "Listening" : ""}

150 | {transcript && ( 151 |
152 |

{transcript}

153 |
154 | )} 155 |
156 |
157 |
158 | )} 159 | {/* 14.2 Render model selection and recording button */} 160 |
161 |
162 |
163 | {renderModelBubble("gpt", "GPT-3.5", "bg-indigo-500")} 164 | {renderModelBubble("gpt4", "GPT-4", "bg-teal-500")} 165 | {renderModelBubble("perplexity", "Perplexity", "bg-pink-500")} 166 | {renderModelBubble("local mistral", "Mistral-7B (Ollama)", "bg-purple-500")} 167 |
168 | 174 |
175 | {renderModelBubble("local llama", "Llama2 (Ollama)", "bg-red-500")} 176 | {renderModelBubble("mixture", "Mixtral (Perplexity)", "bg-orange-500")} 177 | {renderModelBubble("mistral", "Mistral-7B (Perplexity)", "bg-purple-500")} 178 | {renderModelBubble("llama", "Llama2 70B (Perplexity)", "bg-lime-500")} 179 |
180 |
181 |
182 |
183 | ); 184 | } 185 | --------------------------------------------------------------------------------