├── .eslintrc.json ├── app ├── favicon.ico ├── components │ ├── chat │ │ ├── Chat.module.css │ │ ├── ChatLineGroup.tsx │ │ └── ChatLine.tsx │ ├── toast │ │ ├── Toaster.tsx │ │ └── Toast.tsx │ ├── form │ │ ├── Select.tsx │ │ └── TextArea.tsx │ └── modal │ │ └── SamePageModal.tsx ├── utils │ ├── local-keys.ts │ ├── openai.ts │ ├── i18n.ts │ ├── chat-message.ts │ ├── audio.ts │ └── azure-speech.ts ├── [locale] │ ├── review │ │ ├── components │ │ │ ├── Loading.tsx │ │ │ ├── QAInput.tsx │ │ │ └── EvaluationCard.tsx │ │ └── page.tsx │ ├── components │ │ ├── sidebar │ │ │ ├── SidebarFunctionButton.tsx │ │ │ ├── SidebarToggleButton.tsx │ │ │ └── Sidebar.tsx │ │ └── modal │ │ │ ├── SettingsModal.tsx │ │ │ └── NewChatModal.tsx │ ├── layout.tsx │ ├── globals.css │ ├── page.tsx │ └── chat │ │ ├── components │ │ └── ChatInput.tsx │ │ └── page.tsx ├── hooks │ ├── locale.ts │ └── toast.ts └── api │ ├── azurespeech │ └── token │ │ └── route.ts │ └── openai │ ├── review │ ├── qa │ │ └── route.ts │ └── evaluation │ │ └── route.ts │ └── chat │ └── route.ts ├── public ├── audio │ ├── empty.wav │ └── mono-processor.js └── icons │ ├── mic-stop.svg │ ├── plus.svg │ ├── audio-pause.svg │ ├── new.svg │ ├── bar-left-arrow.svg │ ├── bar-right-arrow.svg │ ├── send.svg │ ├── audio-play.svg │ ├── trashbin.svg │ ├── mic-loading.svg │ ├── mic.svg │ ├── loading.svg │ ├── review.svg │ ├── github-mark.svg │ ├── docs.svg │ ├── logo.svg │ ├── settings.svg │ └── logo-name.svg ├── postcss.config.js ├── tailwind.config.js ├── middleware.ts ├── .gitignore ├── tsconfig.json ├── next.config.js ├── package.json ├── messages ├── zh-cn.json ├── zh-tw.json ├── ja.json ├── ko.json ├── en.json └── es.json ├── README.md └── LICENSE /.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "next/core-web-vitals" 3 | } 4 | -------------------------------------------------------------------------------- /app/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/siranshen/small-talk/HEAD/app/favicon.ico -------------------------------------------------------------------------------- /public/audio/empty.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/siranshen/small-talk/HEAD/public/audio/empty.wav -------------------------------------------------------------------------------- /postcss.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | plugins: { 3 | tailwindcss: {}, 4 | autoprefixer: {}, 5 | }, 6 | } 7 | -------------------------------------------------------------------------------- /public/icons/mic-stop.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /app/components/chat/Chat.module.css: -------------------------------------------------------------------------------- 1 | @media (min-width: 768px) { 2 | .gradient-to-top { 3 | background: linear-gradient(to top, #fff, #fff 60%, rgba(255, 255, 255, 0)); 4 | } 5 | } -------------------------------------------------------------------------------- /public/icons/plus.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /public/icons/audio-pause.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /public/icons/new.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /public/icons/bar-left-arrow.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /public/icons/bar-right-arrow.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /public/icons/send.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /public/icons/audio-play.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /app/utils/local-keys.ts: -------------------------------------------------------------------------------- 1 | export const SYSTEM_LANG_KEY = 'systemLang' 2 | export const LEARNING_LANG_KEY = 'learningLang' 3 | export const LEVEL_KEY = 'level' 4 | export const SELF_INTRO_KEY = 'selfIntro' 5 | export const VOICE_NAME_KEY = 'voiceName' 6 | export const TOPIC_KEY = 'topic' 7 | export const TOPIC_PROMPT_KEY = 'topicPrompt' 8 | export const CONVO_STORAGE_KEY = 'convo' -------------------------------------------------------------------------------- /tailwind.config.js: -------------------------------------------------------------------------------- 1 | /** @type {import('tailwindcss').Config} */ 2 | module.exports = { 3 | content: [ 4 | './app/**/*.{js,ts,jsx,tsx,mdx}', 5 | ], 6 | theme: { 7 | extend: { 8 | keyframes: { 9 | 'fade-in': { 10 | from: { opacity: 0 }, 11 | to: { opacity: 1 }, 12 | } 13 | } 14 | }, 15 | }, 16 | plugins: [], 17 | } 18 | -------------------------------------------------------------------------------- /public/icons/trashbin.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /middleware.ts: -------------------------------------------------------------------------------- 1 | import createMiddleware from 'next-intl/middleware' 2 | 3 | export default createMiddleware({ 4 | locales: ['en', 'zh-cn', 'zh-tw', 'ja', 'ko', 'es'], 5 | // If this locale is matched, pathnames work without a prefix 6 | defaultLocale: 'en', 7 | }) 8 | 9 | export const config = { 10 | // Skip folder "api" and all files with an extension 11 | matcher: ['/((?!api|.*\\..*).*)'], 12 | } 13 | -------------------------------------------------------------------------------- /app/[locale]/review/components/Loading.tsx: -------------------------------------------------------------------------------- 1 | import LoadingIcon from '@/public/icons/mic-loading.svg' 2 | 3 | export default function Loading() { 4 | return ( 5 |
6 |
7 | 8 |
9 |
10 | ) 11 | } 12 | -------------------------------------------------------------------------------- /public/icons/mic-loading.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /app/components/toast/Toaster.tsx: -------------------------------------------------------------------------------- 1 | import { ToastData } from '@/app/hooks/toast' 2 | import Toast from './Toast' 3 | 4 | export default function Toaster({ toasts, removeToast }: { toasts: ToastData[]; removeToast: (id: number) => void }) { 5 | return ( 6 | <> 7 | {toasts.map((toast) => ( 8 | 9 | ))} 10 | 11 | ) 12 | } 13 | -------------------------------------------------------------------------------- /public/icons/mic.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | /node_modules 5 | /.pnp 6 | .pnp.js 7 | 8 | # testing 9 | /coverage 10 | 11 | # next.js 12 | /.next/ 13 | /out/ 14 | 15 | # production 16 | /build 17 | 18 | # misc 19 | .DS_Store 20 | *.pem 21 | 22 | # debug 23 | npm-debug.log* 24 | yarn-debug.log* 25 | yarn-error.log* 26 | 27 | # local env files 28 | .env*.local 29 | 30 | # vercel 31 | .vercel 32 | 33 | # typescript 34 | *.tsbuildinfo 35 | next-env.d.ts 36 | 37 | .cosine -------------------------------------------------------------------------------- /public/icons/loading.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /app/hooks/locale.ts: -------------------------------------------------------------------------------- 1 | import { useLocale } from "next-intl" 2 | import { SYSTEM_LANG_KEY } from "@/app/utils/local-keys" 3 | import { usePathname, useRouter } from 'next-intl/client' 4 | import { useEffect } from "react" 5 | 6 | /* Custom hook for loading local locale setting and rerouting if needed */ 7 | export default function useLocaleLoader() { 8 | const locale = useLocale() 9 | const router = useRouter() 10 | const pathname = usePathname() 11 | useEffect(() => { 12 | const localLocale = localStorage.getItem(SYSTEM_LANG_KEY) 13 | if (localLocale && localLocale !== locale) { 14 | router.replace(pathname, { locale: localLocale }) 15 | } 16 | }, [locale, pathname, router]) 17 | } -------------------------------------------------------------------------------- /app/[locale]/components/sidebar/SidebarFunctionButton.tsx: -------------------------------------------------------------------------------- 1 | import { MouseEventHandler } from 'react' 2 | 3 | export default function SidebarFunctionButton({ 4 | text, 5 | disabled, 6 | onClick, 7 | Icon, 8 | }: { 9 | text: string 10 | disabled?: boolean 11 | onClick: MouseEventHandler 12 | Icon: any 13 | }) { 14 | return ( 15 | 24 | ) 25 | } 26 | -------------------------------------------------------------------------------- /app/components/form/Select.tsx: -------------------------------------------------------------------------------- 1 | import { ChangeEventHandler, Ref, forwardRef } from 'react' 2 | 3 | const Select = forwardRef(function Select( 4 | { label, id, children, onChange }: { label: string; id: string; children: React.ReactNode; onChange?: ChangeEventHandler }, 5 | ref: Ref 6 | ) { 7 | return ( 8 |
9 | 12 |
13 | 16 |
17 |
18 | ) 19 | }) 20 | 21 | export default Select 22 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "es5", 4 | "lib": ["dom", "dom.iterable", "esnext"], 5 | "allowJs": true, 6 | "skipLibCheck": true, 7 | "strict": true, 8 | "forceConsistentCasingInFileNames": true, 9 | "noEmit": true, 10 | "esModuleInterop": true, 11 | "module": "esnext", 12 | "moduleResolution": "node", 13 | "resolveJsonModule": true, 14 | "isolatedModules": true, 15 | "jsx": "preserve", 16 | "incremental": true, 17 | "plugins": [ 18 | { 19 | "name": "next" 20 | } 21 | ], 22 | "paths": { 23 | "@/*": ["./*"], 24 | } 25 | }, 26 | "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"], 27 | "exclude": ["node_modules"] 28 | } 29 | -------------------------------------------------------------------------------- /public/icons/review.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /next.config.js: -------------------------------------------------------------------------------- 1 | /** @type {import('next').NextConfig} */ 2 | const nextConfig = { 3 | webpack(config, { isServer }) { 4 | // Grab the existing rule that handles SVG imports 5 | const fileLoaderRule = config.module.rules.find((rule) => rule.test?.test?.('.svg')) 6 | 7 | config.module.rules.push( 8 | // Convert all *.svg imports to React components 9 | { 10 | test: /\.svg$/i, 11 | issuer: /\.[jt]sx?$/, 12 | use: ['@svgr/webpack'], 13 | } 14 | ) 15 | 16 | // Modify the file loader rule to ignore *.svg, since we have it handled now. 17 | fileLoaderRule.exclude = /\.svg$/i 18 | 19 | if (isServer) { 20 | config.externals.push({ 21 | bufferutil: 'bufferutil', 22 | 'utf-8-validate': 'utf-8-validate', 23 | }) 24 | } 25 | 26 | return config 27 | }, 28 | } 29 | 30 | module.exports = nextConfig 31 | -------------------------------------------------------------------------------- /app/components/toast/Toast.tsx: -------------------------------------------------------------------------------- 1 | import { useEffect, useState } from 'react' 2 | 3 | export default function Toast({ 4 | id, 5 | message, 6 | duration, 7 | removeToast, 8 | }: { 9 | id: number 10 | message: string 11 | duration: number 12 | removeToast: (id: number) => void 13 | }) { 14 | const [fadeOut, setFadeOut] = useState(false) 15 | 16 | useEffect(() => { 17 | setTimeout(() => setFadeOut(true), duration - 300) 18 | }, [duration]) 19 | 20 | return ( 21 |
{ 26 | if (fadeOut) { 27 | removeToast(id) 28 | } 29 | }} 30 | > 31 | {message} 32 |
33 | ) 34 | } 35 | -------------------------------------------------------------------------------- /public/icons/github-mark.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /app/[locale]/components/sidebar/SidebarToggleButton.tsx: -------------------------------------------------------------------------------- 1 | import BarRightArrow from '@/public/icons/bar-right-arrow.svg' 2 | import BarLeftArrow from '@/public/icons/bar-left-arrow.svg' 3 | 4 | export default function SidebarToggleButton({ 5 | open, 6 | sidebarOpen, 7 | setSidebarOpen, 8 | }: { 9 | open: boolean 10 | sidebarOpen: boolean 11 | setSidebarOpen: Function 12 | }) { 13 | var buttonStates = '' 14 | if (open) { 15 | buttonStates = 'mr-[-32px]' 16 | if (sidebarOpen) { 17 | buttonStates += ' hidden' 18 | } 19 | } else if (!sidebarOpen) { 20 | buttonStates = 'hidden' 21 | } 22 | return ( 23 | 29 | ) 30 | } 31 | -------------------------------------------------------------------------------- /app/components/form/TextArea.tsx: -------------------------------------------------------------------------------- 1 | import { Ref, forwardRef } from 'react' 2 | 3 | const TextArea = forwardRef(function Select( 4 | { 5 | label, 6 | id, 7 | placeholder, 8 | rows = 3, 9 | value = '', 10 | }: { label: string; id: string; placeholder: string; rows?: number; value?: string }, 11 | ref: Ref 12 | ) { 13 | return ( 14 |
15 | 18 | 27 |
28 | ) 29 | }) 30 | 31 | export default TextArea 32 | -------------------------------------------------------------------------------- /app/hooks/toast.ts: -------------------------------------------------------------------------------- 1 | import { useCallback, useState } from "react"; 2 | 3 | export interface ToastData { 4 | id: number 5 | message: string 6 | duration: number 7 | } 8 | 9 | export default function useToasts(): [ToastData[], (message: string, duration?: number) => void, (id: number) => void] { 10 | const [toastsState, setToastsState] = useState<{ idCounter: number; toasts: ToastData[] }>({ idCounter: 0, toasts: [] }) 11 | 12 | const addToast = useCallback((message: string, duration: number = 3000) => { 13 | setToastsState((state) => ({ 14 | idCounter: state.idCounter + 1, 15 | toasts: [...state.toasts, { id: state.idCounter, message, duration }], 16 | })) 17 | }, []) 18 | 19 | const removeToast = useCallback((id: number) => { 20 | setToastsState((state) => ({ 21 | ...state, 22 | toasts: state.toasts.filter((data) => data.id !== id), 23 | })) 24 | }, []) 25 | 26 | return [toastsState.toasts, addToast, removeToast] 27 | } -------------------------------------------------------------------------------- /public/icons/docs.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /app/api/azurespeech/token/route.ts: -------------------------------------------------------------------------------- 1 | import { NextRequest, NextResponse } from 'next/server' 2 | 3 | const AZURE_SPEECH_KEY = process.env.AZURE_SPEECH_KEY ?? '' 4 | const AZURE_SPEECH_REGION = process.env.AZURE_SPEECH_REGION ?? '' 5 | const AZURE_SPEECH_ENDPOINT = `https://${AZURE_SPEECH_REGION}.api.cognitive.microsoft.com/sts/v1.0/issueToken` 6 | 7 | export async function POST(request: NextRequest) { 8 | // TODO: Authenticate request 9 | try { 10 | const response = await fetch(AZURE_SPEECH_ENDPOINT, { 11 | method: 'POST', 12 | headers: { 13 | 'Ocp-Apim-Subscription-Key': AZURE_SPEECH_KEY, 14 | 'Content-Type': 'application/x-www-form-urlencoded', 15 | }, 16 | }) 17 | if (!response.ok) { 18 | console.error('Error getting Azure Speech token', response.status) 19 | return NextResponse.error() 20 | } 21 | const data = await response.text() 22 | return NextResponse.json({ token: data, region: AZURE_SPEECH_REGION }) 23 | } catch (e) { 24 | console.error('Error getting Azure Speech token', e) 25 | return NextResponse.error() 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "small-talk", 3 | "version": "0.1.0", 4 | "private": true, 5 | "scripts": { 6 | "dev": "next dev", 7 | "build": "next build", 8 | "start": "next start", 9 | "lint": "next lint" 10 | }, 11 | "dependencies": { 12 | "@types/node": "20.3.3", 13 | "@types/react": "18.2.14", 14 | "@types/react-dom": "18.2.6", 15 | "async": "~3.2.4", 16 | "autoprefixer": "10.4.14", 17 | "dedent": "~1.2.0", 18 | "eslint": "~8.44.0", 19 | "eslint-config-next": "~13.4.8", 20 | "eventsource-parser": "~1.0.0", 21 | "microsoft-cognitiveservices-speech-sdk": "~1.30.1", 22 | "next": "~13.4.10", 23 | "next-intl": "~2.19.0", 24 | "postcss": "8.4.24", 25 | "react": "18.2.0", 26 | "react-dom": "18.2.0", 27 | "remark": "~14.0.3", 28 | "remark-html": "~15.0.2", 29 | "tailwindcss": "3.3.2", 30 | "typescript": "5.1.6", 31 | "uuid": "9.0.0" 32 | }, 33 | "devDependencies": { 34 | "@svgr/webpack": "~8.0.1", 35 | "@types/async": "~3.2.20", 36 | "@types/uuid": "~9.0.2", 37 | "prettier": "~3.0.0", 38 | "prettier-plugin-tailwindcss": "~0.3.0" 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /app/[locale]/layout.tsx: -------------------------------------------------------------------------------- 1 | import './globals.css' 2 | import { Metadata } from 'next' 3 | import { NextIntlClientProvider } from 'next-intl' 4 | import { notFound } from 'next/navigation' 5 | import { LANGUAGES } from '../utils/i18n' 6 | import Sidebar from './components/sidebar/Sidebar' 7 | 8 | export const metadata: Metadata = { 9 | title: 'SmallTalk', 10 | } 11 | 12 | export function generateStaticParams() { 13 | return LANGUAGES.map((lang) => ({ locale: lang.locale })) 14 | } 15 | 16 | export default async function RootLayout({ 17 | children, 18 | params: { locale }, 19 | }: { 20 | children: React.ReactNode 21 | params: { locale: string } 22 | }) { 23 | let messages 24 | try { 25 | messages = (await import(`@/messages/${locale}.json`)).default 26 | } catch (error) { 27 | notFound() 28 | } 29 | 30 | return ( 31 | 32 | 33 | 34 |
35 | 36 | {children} 37 |
38 |
39 | 40 | 41 | ) 42 | } 43 | -------------------------------------------------------------------------------- /app/api/openai/review/qa/route.ts: -------------------------------------------------------------------------------- 1 | import dedent from 'dedent' 2 | import getResponseStream from '@/app/utils/openai' 3 | import { NextRequest, NextResponse } from 'next/server' 4 | 5 | export const runtime = 'edge' 6 | 7 | const constructSystemPrompt = (language: string, evalLanguage: string, evaluation: string) => { 8 | return dedent`You are a professional ${evalLanguage} teacher. 9 | You are given an evaluation of a user's performance based on a previous chat in ${evalLanguage}. The user is learning ${evalLanguage}. 10 | Your task is to answer user's questions regarding the evaluation and ${evalLanguage} in general. 11 | 12 | ## Rules 13 | - Respond in ${language}. 14 | - When asked questions unrelated to the evaluation or ${evalLanguage}, simply respond that you can't answer. 15 | 16 | ## Evaluation 17 | ${evaluation}` 18 | } 19 | 20 | export async function POST(request: NextRequest) { 21 | const { evaluation, messages, language, evalLanguage } = await request.json() 22 | try { 23 | const stream = await getResponseStream(constructSystemPrompt(language, evalLanguage, evaluation), messages) 24 | return new NextResponse(stream) 25 | } catch (e) { 26 | console.log('Error calling OpenAI', e) 27 | return new NextResponse('Error calling OpenAI', { status: 500 }) 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /app/components/chat/ChatLineGroup.tsx: -------------------------------------------------------------------------------- 1 | import { ChatLine, LoadingChatLine } from './ChatLine' 2 | import { AudioChatMessage, ChatMessage } from '@/app/utils/chat-message' 3 | 4 | function ChatLineGroupLayout({ isAi, children }: { isAi: boolean; children: React.ReactNode }) { 5 | return ( 6 |
11 | {children} 12 |
13 | ) 14 | } 15 | 16 | export function ChatLineGroup({ message, shouldShowAiText }: { message: ChatMessage; shouldShowAiText: boolean }) { 17 | const isAi = message.isAiMessage() 18 | const isAudio = message.getType() === 'audio' 19 | return ( 20 | 21 | {isAudio && } 22 | {isAi && message.isStreaming() && } 23 | {message.getText() && (!isAi || shouldShowAiText) && } 24 | 25 | ) 26 | } 27 | 28 | export function LoadingChatLineGroup({ isAi }: { isAi: boolean }) { 29 | return ( 30 | 31 | 32 | 33 | ) 34 | } 35 | -------------------------------------------------------------------------------- /public/audio/mono-processor.js: -------------------------------------------------------------------------------- 1 | /** 2 | * A simple audio worklet processor that converts stereo audio to mono and converts it to 16bit 3 | */ 4 | class MonoProcessor extends AudioWorkletProcessor { 5 | process(inputs, outputs) { 6 | // By default, the node has single input and output 7 | const input = inputs[0] 8 | let buffer 9 | 10 | if (input.length === 2) { 11 | // The input is stereo 12 | const left = input[0], 13 | right = input[1], 14 | newLeft = new Int16Array(left.length), 15 | newRight = new Int16Array(left.length) 16 | buffer = new Int16Array(left.length) 17 | for (let i = 0; i < left.length; ++i) { 18 | // Convert stereo to mono by averaging the two channels 19 | newLeft[i] = floatTo16BitPCM(left[i]) 20 | newRight[i] = floatTo16BitPCM(right[i]) 21 | buffer[i] = (newLeft[i] + newRight[i]) / 2 22 | } 23 | this.port.postMessage({ type: 'interm', buffers: [newLeft, newRight] }) 24 | } else if (input.length === 1) { 25 | const mono = input[0] 26 | buffer = new Int16Array(mono.length) 27 | // The input is already mono 28 | for (let i = 0; i < mono.length; ++i) { 29 | buffer[i] = floatTo16BitPCM(mono[i]) 30 | } 31 | this.port.postMessage({ type: 'interm', buffers: [buffer] }) 32 | } 33 | if (buffer) { 34 | // Posts ArrayBuffer 35 | this.port.postMessage({ type: 'final', buffer: buffer.buffer }) 36 | } 37 | 38 | return true 39 | } 40 | } 41 | 42 | function floatTo16BitPCM(inputValue) { 43 | let s = Math.max(-1, Math.min(1, inputValue)) 44 | return s < 0 ? s * 0x8000 : s * 0x7fff 45 | } 46 | 47 | registerProcessor('MonoProcessor', MonoProcessor) 48 | -------------------------------------------------------------------------------- /app/[locale]/globals.css: -------------------------------------------------------------------------------- 1 | @tailwind base; 2 | @tailwind components; 3 | @tailwind utilities; 4 | 5 | :root { 6 | --main-font-color: #333; 7 | --main-theme-color: #007aff; 8 | --secondary-theme-color: #f4f4f5; 9 | font-size: 16px; 10 | } 11 | 12 | html, 13 | body { 14 | height: 100%; 15 | font-family: 16 | -apple-system, 17 | system-ui, 18 | BlinkMacSystemFont, 19 | Helvetica Neue, 20 | Helvetica, 21 | sans-serif; 22 | } 23 | 24 | html { 25 | box-sizing: border-box; 26 | } 27 | 28 | *, 29 | *::before, 30 | *::after { 31 | box-sizing: inherit; 32 | } 33 | 34 | body { 35 | background-color: #fff; 36 | color: var(--main-font-color); 37 | margin: 0; 38 | } 39 | 40 | button, 41 | input, 42 | select, 43 | textarea { 44 | font-family: inherit; 45 | font-size: 100%; 46 | padding: 0; 47 | margin: 0; 48 | box-sizing: border-box; 49 | } 50 | 51 | ul { 52 | list-style: disc; 53 | padding-left: 1.5em; 54 | } 55 | 56 | ol { 57 | list-style: decimal; 58 | padding-left: 1.5em; 59 | } 60 | 61 | button { 62 | cursor: pointer; 63 | } 64 | 65 | button:disabled { 66 | cursor: not-allowed; 67 | } 68 | 69 | .solid-button { 70 | @apply bg-[--main-theme-color] text-white font-[600] text-sm border-none p-2 hover:opacity-80 disabled:opacity-80; 71 | } 72 | 73 | .solid-button-light { 74 | @apply bg-white text-[--main-font-color] font-[600] text-sm border border-solid border-zinc-300 p-2 hover:bg-gray-50 disabled:bg-gray-50; 75 | } 76 | 77 | select { 78 | @apply appearance-none w-full h-full rounded-lg pt-1 pr-6 pb-1 pl-3; 79 | } 80 | 81 | .select-wrapper { 82 | @apply relative rounded-lg border border-solid border-zinc-300 after:content-['▼'] after:text-[1rem] after:absolute after:top-1 after:right-2; 83 | } 84 | -------------------------------------------------------------------------------- /public/icons/logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /app/components/modal/SamePageModal.tsx: -------------------------------------------------------------------------------- 1 | 'use client' 2 | 3 | import { useCallback, useRef, useEffect, MouseEventHandler, useState } from 'react' 4 | 5 | export default function SamePageModal({ 6 | children, 7 | isOpen, 8 | setOpen, 9 | }: { 10 | children: React.ReactNode 11 | isOpen: boolean 12 | setOpen: Function 13 | }) { 14 | const overlayRef = useRef(null) 15 | const wrapperRef = useRef(null) 16 | const [fadeOut, setFadeOut] = useState(false) 17 | 18 | const onClick: MouseEventHandler = useCallback( 19 | (e) => { 20 | if (e.target === overlayRef.current || e.target === wrapperRef.current) { 21 | setFadeOut(true) 22 | } 23 | }, 24 | [overlayRef, wrapperRef] 25 | ) 26 | 27 | const onKeyDown = useCallback((e: KeyboardEvent) => { 28 | if (e.key === 'Escape') { 29 | setFadeOut(true) 30 | } 31 | }, []) 32 | 33 | useEffect(() => { 34 | document.addEventListener('keydown', onKeyDown) 35 | return () => document.removeEventListener('keydown', onKeyDown) 36 | }, [onKeyDown]) 37 | 38 | return ( 39 |
{ 45 | if (fadeOut) { 46 | setOpen(false) 47 | setFadeOut(false) 48 | } 49 | }} 50 | onClick={onClick} 51 | > 52 |
56 |
57 | {children} 58 |
59 |
60 |
61 | ) 62 | } 63 | -------------------------------------------------------------------------------- /app/api/openai/review/evaluation/route.ts: -------------------------------------------------------------------------------- 1 | import dedent from 'dedent' 2 | import getResponseStream from '@/app/utils/openai' 3 | import { NextRequest, NextResponse } from 'next/server' 4 | import { GPTMessage } from '@/app/utils/chat-message' 5 | 6 | export const runtime = 'edge' 7 | 8 | // TODO: Perhaps best to provide a evaluation template/example for each language 9 | const constructSystemPrompt = (language: string, evalLanguage: string, convo: string) => { 10 | return dedent`You are a professional ${evalLanguage} teacher. 11 | You are given user messages from a conversation with an AI, and your task is to evaluate the user's performance. 12 | 13 | ## Rules 14 | 1. Overall evaluation should be provided in ${language}. You should still use ${evalLanguage} when citing user's message or giving specific suggestions. 15 | 2. Evaluate user's messages one by one. For each message, analyze its grammar, vocabulary usage, and fluency. 16 | 3. User's text may be generated by speech recognition, so you should ignore the punctuation mistakes. 17 | 4. List all other mistakes you find, each followed by a citation of the original message and a suggestion. 18 | 19 | ## Messages 20 | ${convo} 21 | 22 | ## Evaluation 23 | ` 24 | } 25 | 26 | export async function POST(request: NextRequest) { 27 | if (process.env.NODE_ENV === 'development') { 28 | await new Promise((resolve) => setTimeout(resolve, 1000)) 29 | return new NextResponse('This is a dummy answer used in development mode. Uncomment this line to use the API.') 30 | } 31 | const { messages, language, evalLanguage } = await request.json() 32 | try { 33 | const stream = await getResponseStream( 34 | constructSystemPrompt( 35 | language, 36 | evalLanguage, 37 | messages 38 | .filter((msg: GPTMessage) => msg.role === 'user') 39 | .map((msg: GPTMessage) => `- ${msg.content}`) 40 | .join('\n') 41 | ), 42 | [], 43 | 0, 44 | 1000 45 | ) 46 | return new NextResponse(stream) 47 | } catch (e) { 48 | console.log('Error calling OpenAI', e) 49 | return new NextResponse('Error calling OpenAI', { status: 500 }) 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /app/api/openai/chat/route.ts: -------------------------------------------------------------------------------- 1 | import { PAUSE_TOKEN } from '@/app/utils/chat-message' 2 | import getResponseStream from '@/app/utils/openai' 3 | import dedent from 'dedent' 4 | import { NextRequest, NextResponse } from 'next/server' 5 | 6 | export const runtime = 'edge' 7 | 8 | const levelMap: Record = { 9 | 'beginner': 'Use extremely simple words and short sentences.', 10 | 'intermediate': 'Use simple words in general, but use some complex words from time to time.', 11 | 'advanced': 'Use complex words and sentences.', 12 | } 13 | 14 | const constructSystemPrompt = ( 15 | language: string, 16 | level: string, 17 | selfIntro: string, 18 | speakerName: string, 19 | topic: string, 20 | isFirstMessage: boolean 21 | ) => { 22 | return dedent`You are ${speakerName}, a native ${language} speaker. Your task is to talk with the user. 23 | 24 | ## Topic 25 | ${topic}${selfIntro ? `\n\n## User's Info\n${selfIntro}` : ''} 26 | 27 | ## Rules 28 | - Use ${language} to communicate with the user.${ 29 | level ? `\n- User's language skill is ${level} level. ${levelMap[level]}` : '' 30 | } 31 | - Talk in an informal tone as a friend. 32 | - Keep your response concise. 33 | - Adhere to the topic if it is defined.${isFirstMessage ? ' Start the conversation according to the topic.' : ''} 34 | - Ask a question or change the subject if the conversation is not going well. 35 | - Ask one question at a time. 36 | 37 | ## Response Format 38 | - Add a special token ${PAUSE_TOKEN} where appropriate to simulate a pause in human conversations. 39 | ### Example 40 | Hey, man! ${PAUSE_TOKEN} I haven't seen you for a while. ${PAUSE_TOKEN} I've been working a project lately, which is getting really run! ${PAUSE_TOKEN} How about you?` 41 | } 42 | 43 | export async function POST(request: NextRequest) { 44 | const { messages, language, level, selfIntro, speakerName, topic } = await request.json() 45 | try { 46 | const stream = await getResponseStream( 47 | constructSystemPrompt(language, level, selfIntro, speakerName, topic, messages.length === 0), 48 | messages 49 | ) 50 | return new NextResponse(stream) 51 | } catch (e) { 52 | console.log('Error calling OpenAI', e) 53 | return new NextResponse('Error calling OpenAI', { status: 500 }) 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /app/utils/openai.ts: -------------------------------------------------------------------------------- 1 | import { ParsedEvent, ReconnectInterval, createParser } from "eventsource-parser" 2 | import { GPTMessage } from "./chat-message" 3 | 4 | export default async function getResponseStream(systemMessage: string, messages: GPTMessage[], temperature: number = 0.8, maxTokens: number = 500): Promise> { 5 | const res = await fetch('https://api.openai.com/v1/chat/completions', { 6 | headers: { 7 | 'Content-Type': 'application/json', 8 | Authorization: `Bearer ${process.env.OPENAI_API_KEY}`, 9 | ...(process.env.OPENAI_ORGANIZATION && { 10 | 'OpenAI-Organization': process.env.OPENAI_ORGANIZATION, 11 | }), 12 | }, 13 | method: 'POST', 14 | body: JSON.stringify({ 15 | model: process.env.OPENAI_MODEL ?? 'gpt-3.5-turbo', 16 | messages: [ 17 | { 18 | role: 'system', 19 | content: systemMessage, 20 | }, 21 | ...messages, 22 | ], 23 | temperature, 24 | max_tokens: maxTokens, 25 | stream: true, 26 | }), 27 | }) 28 | 29 | const encoder = new TextEncoder() 30 | const decoder = new TextDecoder() 31 | 32 | if (res.status !== 200) { 33 | const result = await res.json() 34 | throw new Error(`OpenAI API returned an error: ${result?.error || decoder.decode(result?.value) || result.statusText}`) 35 | } 36 | 37 | const stream = new ReadableStream({ 38 | async start(controller) { 39 | const onParse = (event: ParsedEvent | ReconnectInterval) => { 40 | if (event.type === 'event') { 41 | const data = event.data 42 | if (data === '[DONE]') { 43 | controller.close() 44 | return 45 | } 46 | 47 | try { 48 | const json = JSON.parse(data) 49 | if (json.choices[0].finish_reason != null) { 50 | controller.close() 51 | return 52 | } 53 | const text = json.choices[0].delta.content 54 | const queue = encoder.encode(text) 55 | controller.enqueue(queue) 56 | } catch (e) { 57 | controller.error(e) 58 | } 59 | } 60 | } 61 | 62 | const parser = createParser(onParse) 63 | 64 | for await (const chunk of res.body as any) { 65 | parser.feed(decoder.decode(chunk)) 66 | } 67 | }, 68 | }) 69 | 70 | return stream 71 | } -------------------------------------------------------------------------------- /app/[locale]/review/components/QAInput.tsx: -------------------------------------------------------------------------------- 1 | import SendIcon from '@/public/icons/send.svg' 2 | import { useEffect, useRef, useState } from 'react' 3 | import styles from '@/app/components/chat/Chat.module.css' 4 | import { useTranslations } from 'next-intl' 5 | 6 | export default function QAInput({ isStreaming, sendMessage }: { isStreaming: boolean; sendMessage: Function }) { 7 | const i18n = useTranslations('Chat') 8 | 9 | const textareaRef = useRef(null) 10 | const [input, setInput] = useState('') 11 | 12 | function send() { 13 | if (!input || isStreaming) { 14 | return 15 | } 16 | sendMessage(input) 17 | setInput('') 18 | } 19 | 20 | function handleKeyDown(e: React.KeyboardEvent) { 21 | if (e.key === 'Enter' && !e.shiftKey) { 22 | e.preventDefault() 23 | send() 24 | } 25 | } 26 | 27 | useEffect(() => { 28 | if (!textareaRef || !textareaRef.current) { 29 | return 30 | } 31 | textareaRef.current.style.height = '1px' 32 | textareaRef.current.style.height = textareaRef.current.scrollHeight + 'px' 33 | }, [input]) 34 | 35 | return ( 36 |
39 |
40 | 49 |
50 | 53 |
54 |
55 |
56 | ) 57 | } 58 | -------------------------------------------------------------------------------- /public/icons/settings.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /app/[locale]/page.tsx: -------------------------------------------------------------------------------- 1 | 'use client' 2 | 3 | import useLocaleLoader from '@/app/hooks/locale' 4 | import NewChatModal from './components/modal/NewChatModal' 5 | import { useState } from 'react' 6 | import { useTranslations } from 'next-intl' 7 | 8 | function ScenarioButton({ icon, text, onClick }: { icon: string; text: string; onClick: () => void }) { 9 | return ( 10 | 17 | ) 18 | } 19 | 20 | export default function Home() { 21 | useLocaleLoader() 22 | const i18n = useTranslations('NewChat') 23 | 24 | const [isModalOpen, setModalOpen] = useState(false) 25 | const [topic, setTopic] = useState(null) 26 | 27 | return ( 28 | <> 29 | 30 |
31 |
32 |
{i18n('header.title')}
33 |
34 |
35 |
36 |
{i18n('intro.freetalk')}
37 |
38 | { 42 | setModalOpen(true) 43 | setTopic(null) 44 | }} 45 | /> 46 |
47 |
{i18n('intro.preset')}
48 |
49 | {Array.from(Array(16).keys()).map((i) => ( 50 | { 55 | setModalOpen(true) 56 | setTopic(i18n(`scenarios.${i}.text`)) 57 | }} 58 | /> 59 | ))} 60 |
61 |
62 |
63 |
64 | 65 | ) 66 | } 67 | -------------------------------------------------------------------------------- /app/[locale]/components/modal/SettingsModal.tsx: -------------------------------------------------------------------------------- 1 | 'use client' 2 | 3 | import { LANGUAGES } from '@/app/utils/i18n' 4 | import SamePageModal from '@/app/components/modal/SamePageModal' 5 | import Select from '@/app/components/form/Select' 6 | import { useTranslations } from 'next-intl' 7 | import { useCallback, useEffect, useRef } from 'react' 8 | import { usePathname, useRouter } from 'next-intl/client' 9 | import { SELF_INTRO_KEY, SYSTEM_LANG_KEY } from '@/app/utils/local-keys' 10 | import TextArea from '@/app/components/form/TextArea' 11 | 12 | export default function SettingsModal({ 13 | isOpen, 14 | setOpen, 15 | addToast, 16 | }: { 17 | isOpen: boolean 18 | setOpen: (isOpen: boolean) => void 19 | addToast: (message: string, duration?: number) => void 20 | }) { 21 | const i18n = useTranslations('Settings') 22 | const i18nCommon = useTranslations('Common') 23 | 24 | const systemLangRef = useRef(null) 25 | const selfIntroRef = useRef(null) 26 | const router = useRouter() 27 | const pathname = usePathname() 28 | 29 | useEffect(() => { 30 | if (!systemLangRef.current || !selfIntroRef.current) { 31 | return 32 | } 33 | systemLangRef.current.value = localStorage.getItem(SYSTEM_LANG_KEY) ?? LANGUAGES[0].locale 34 | selfIntroRef.current.value = localStorage.getItem(SELF_INTRO_KEY) ?? '' 35 | }, []) 36 | 37 | const setLanguages = useCallback(() => { 38 | if (!systemLangRef.current || !selfIntroRef.current) { 39 | return 40 | } 41 | localStorage.setItem(SYSTEM_LANG_KEY, systemLangRef.current.value) 42 | localStorage.setItem(SELF_INTRO_KEY, selfIntroRef.current.value) 43 | setOpen(false) 44 | addToast(i18n('saved'), 1000) 45 | router.replace(pathname, { locale: systemLangRef.current.value }) 46 | }, [addToast, i18n, pathname, router, setOpen]) 47 | 48 | return ( 49 | 50 |
51 |
52 | {i18n('title')} 53 |
54 | 61 | 92 |
93 | 111 | 118 |
124 | } 126 | text={ 127 | messageStates.shouldShowAiText ? i18n('controls.toggleTextShow.hide') : i18n('controls.toggleTextShow.show') 128 | } 129 | onClick={() => setShowText(!messageStates.shouldShowAiText)} 130 | /> 131 | } 133 | text={i18n('controls.review')} 134 | onClick={() => router.push('/review')} 135 | /> 136 |
137 |
138 |
139 | 140 | ) 141 | } 142 | -------------------------------------------------------------------------------- /app/utils/audio.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * A lot of inspiration from https://github.com/mattdiamond/Recorderjs/blob/master/src/recorder.js 3 | * Beware, it has a bug with mono and the PR was never merged https://github.com/mattdiamond/Recorderjs/pull/118 4 | */ 5 | 6 | export function exportAudioInWav(sampleRate: number, buffers: Int16Array[][]): Blob { 7 | if (buffers.length === 0) { 8 | return new Blob() 9 | } 10 | const numChannels = buffers[0].length 11 | const newBuffers: Int16Array[] = [] 12 | let totalLength = 0 13 | for (let i = 0; i < buffers.length; i++) { 14 | if (numChannels == 1) { 15 | newBuffers[i] = buffers[i][0] 16 | totalLength += buffers[i][0].length 17 | } else { 18 | if (!buffers[i][1]) { 19 | // Somehow this happens on Safari, so let's just duplicate the channel 20 | buffers[i][1] = buffers[i][0] 21 | } 22 | const interleaved = interleave(buffers[i][0], buffers[i][1]) 23 | newBuffers[i] = interleaved 24 | totalLength += interleaved.length 25 | } 26 | } 27 | const merged = mergeBuffers(newBuffers, totalLength) 28 | const dataview = encodeWavSamples(sampleRate, numChannels, merged) 29 | return new Blob([dataview], { type: 'audio/wav' }) 30 | } 31 | 32 | export function exportBufferInWav(sampleRate: number, numChannels: number, buffer: ArrayBuffer): Blob { 33 | const newBuffer = new Uint8Array(44 + buffer.byteLength) 34 | newBuffer.set(new Uint8Array(buffer), 44) 35 | const view = new DataView(newBuffer.buffer) 36 | writeHeader(view, sampleRate, numChannels, buffer.byteLength) 37 | return new Blob([view], { type: 'audio/wav' }) 38 | } 39 | 40 | export function exportBuffersInWav(sampleRate: number, numChannels: number, buffers: ArrayBuffer[]): Blob { 41 | const totalBytes = buffers.reduce((partialSum, b) => partialSum + b.byteLength, 0) 42 | const newBuffer = new Uint8Array(44 + totalBytes) 43 | let offset = 44 44 | for (let i = 0; i < buffers.length; i++) { 45 | newBuffer.set(new Uint8Array(buffers[i]), offset) 46 | offset += buffers[i].byteLength 47 | } 48 | const view = new DataView(newBuffer.buffer) 49 | writeHeader(view, sampleRate, numChannels, totalBytes) 50 | return new Blob([view], { type: 'audio/wav' }) 51 | } 52 | 53 | function encodeWavSamples(sampleRate: number, numChannels: number, samples: Int16Array): DataView { 54 | const buffer = new ArrayBuffer(44 + samples.length * 2) 55 | const view = new DataView(buffer) 56 | writeHeader(view, sampleRate, numChannels, samples.length * 2) 57 | for (let i = 0, offset = 44; i < samples.length; i++, offset += 2) { 58 | view.setInt16(offset, samples[i], true) 59 | } 60 | 61 | return view 62 | } 63 | 64 | function writeHeader(view: DataView, sampleRate: number, numChannels: number, dataChunkLength: number) { 65 | /* RIFF identifier */ 66 | writeString(view, 0, 'RIFF') 67 | /* RIFF chunk length */ 68 | view.setUint32(4, 36 + dataChunkLength, true) 69 | /* RIFF type */ 70 | writeString(view, 8, 'WAVE') 71 | /* format chunk identifier */ 72 | writeString(view, 12, 'fmt ') 73 | /* format chunk length */ 74 | view.setUint32(16, 16, true) 75 | /* sample format (raw) */ 76 | view.setUint16(20, 1, true) 77 | /* channel count */ 78 | view.setUint16(22, numChannels, true) 79 | /* sample rate */ 80 | view.setUint32(24, sampleRate, true) 81 | /* byte rate (sample rate * block align) */ 82 | view.setUint32(28, sampleRate * numChannels * 2, true) 83 | /* block align (channel count * bytes per sample) */ 84 | view.setUint16(32, numChannels * 2, true) 85 | /* bits per sample */ 86 | view.setUint16(34, 16, true) 87 | /* data chunk identifier */ 88 | writeString(view, 36, 'data') 89 | /* data chunk length */ 90 | view.setUint32(40, dataChunkLength, true) 91 | } 92 | 93 | function writeString(view: DataView, offset: number, str: string): void { 94 | for (let i = 0; i < str.length; i++) { 95 | view.setUint8(offset + i, str.charCodeAt(i)) 96 | } 97 | } 98 | 99 | function mergeBuffers(buffers: Int16Array[], totalLength: number): Int16Array { 100 | let result = new Int16Array(totalLength) 101 | let offset = 0 102 | for (let i = 0; i < buffers.length; i++) { 103 | result.set(buffers[i], offset) 104 | offset += buffers[i].length 105 | } 106 | return result 107 | } 108 | 109 | function interleave(leftChannel: Int16Array, rightChannel: Int16Array): Int16Array { 110 | const length = leftChannel.length + rightChannel.length 111 | let result = new Int16Array(length), 112 | index = 0, 113 | inputIndex = 0 114 | while (index < length) { 115 | result[index++] = leftChannel[inputIndex] 116 | result[index++] = rightChannel[inputIndex] 117 | inputIndex++ 118 | } 119 | return result 120 | } 121 | 122 | export interface AudioMetadata { 123 | duration: number 124 | volumeBins: number[] 125 | } 126 | 127 | export function getMetadataFromWav(audio: Blob, numBins: number): Promise { 128 | return new Promise((resolve, reject) => { 129 | const reader = new FileReader() 130 | reader.onload = () => { 131 | const view = new DataView(reader.result as ArrayBuffer) 132 | const channelCount = view.getUint16(22, true), 133 | sampleRate = view.getUint32(24, true), 134 | numSamples = view.getUint32(40, true) / 2 135 | const duration = numSamples / channelCount / sampleRate 136 | 137 | const numSamplesPerBin = (numSamples + numBins - 1) / numBins 138 | const volumeBins: number[] = new Array(numBins) 139 | for (let i = 0, bin = 0; i < numSamples; i += numSamplesPerBin, bin++) { 140 | const len = Math.min(numSamplesPerBin, numSamples - i) 141 | let sumSquares = 0 142 | let peakVolumn = 0 143 | for (let j = 0; j < len; j++) { 144 | const val = pcm16BitToFloat(view.getInt16(44 + i * 2, true)) 145 | sumSquares += val * val 146 | peakVolumn = Math.max(peakVolumn, Math.abs(val)) 147 | } 148 | // Calculates the weighted average of RMS and peak volume 149 | volumeBins[bin] = 0.4 * Math.sqrt(sumSquares / len) + 0.6 * peakVolumn 150 | } 151 | // Apply exponential moving average 152 | const smoothedVolumeBins: number[] = new Array(numBins) 153 | smoothedVolumeBins[0] = volumeBins[0] 154 | for (let i = 1; i < numBins; i++) { 155 | const average = 0.2 * smoothedVolumeBins[i - 1] + 0.8 * volumeBins[i] 156 | smoothedVolumeBins[i] = average 157 | } 158 | resolve({ duration, volumeBins: smoothedVolumeBins }) 159 | } 160 | reader.onerror = reject 161 | reader.readAsArrayBuffer(audio) 162 | }) 163 | } 164 | 165 | function pcm16BitToFloat(intValue: number) { 166 | return intValue < 0 ? intValue / 0x8000 : intValue / 0x7fff 167 | } 168 | 169 | export interface AudioPlayTask { 170 | audioData: ArrayBuffer 171 | } 172 | -------------------------------------------------------------------------------- /app/components/chat/ChatLine.tsx: -------------------------------------------------------------------------------- 1 | import { AUDIO_VOLUMN_BIN_COUNT, AudioChatMessage } from '@/app/utils/chat-message' 2 | import AudioPauseIcon from '@/public/icons/audio-pause.svg' 3 | import AudioPlayIcon from '@/public/icons/audio-play.svg' 4 | import LoadingIcon from '@/public/icons/loading.svg' 5 | import { MouseEventHandler, useCallback, useEffect, useRef, useState } from 'react' 6 | 7 | const CANVAS_WIDTH = 400 8 | const CANVAS_HEIGHT = 48 9 | const DISPLAY_RATIO = 2 10 | const MAX_BAR_HEIGHT = CANVAS_HEIGHT / 2 11 | const GAP_WIDTH = CANVAS_WIDTH / (AUDIO_VOLUMN_BIN_COUNT + 1) 12 | const LINE_WIDTH = 6 13 | const PROGRESS_WIDTH = CANVAS_WIDTH - GAP_WIDTH * 2 + LINE_WIDTH 14 | 15 | function ChatLineLayout({ isAi, children }: { isAi: boolean; children: React.ReactNode }) { 16 | return ( 17 |
22 | {children} 23 |
24 | ) 25 | } 26 | 27 | export function ChatLine({ 28 | isAi, 29 | isAudio, 30 | content, 31 | message, 32 | }: { 33 | isAi: boolean 34 | isAudio: boolean 35 | content?: string 36 | message?: AudioChatMessage 37 | }) { 38 | const audioRef = useRef(null) 39 | const waveCanvasRef = useRef(null) 40 | const progressCanvasRef = useRef(null) 41 | const requestAnimationFrameRef = useRef(null) 42 | const [isPlaying, setIsPlaying] = useState(false) 43 | // rgb(51,51,51) === #333 44 | const audioPlayedColor = isAi ? 'rgba(255,255,255,1)' : 'rgba(51,51,51,1)' 45 | const audioUnplayedColor = isAi ? 'rgba(255,255,255,0.6)' : 'rgba(51,51,51,0.6)' 46 | // See --main-theme-color and --secondary-theme-color 47 | const audioFillColor = isAi ? '#007aff' : '#f4f4f5' 48 | 49 | // Draw the progress on each frame while audio is playing 50 | useEffect(() => { 51 | if (!audioRef.current) { 52 | return 53 | } 54 | if (isPlaying) { 55 | audioRef.current.play() 56 | const animate = () => { 57 | if (!message || !progressCanvasRef.current || !audioRef.current) { 58 | return 59 | } 60 | const audioMetadata = message.getAudioMetadata() 61 | const ctx = progressCanvasRef.current.getContext('2d') 62 | if (!audioMetadata || !ctx) { 63 | return 64 | } 65 | // Draw progress 66 | const progress = Math.min(audioRef.current.currentTime / audioMetadata.duration, 1) 67 | ctx.clearRect(0, 0, CANVAS_WIDTH, CANVAS_HEIGHT) 68 | ctx.fillStyle = audioPlayedColor 69 | ctx.fillRect(GAP_WIDTH - LINE_WIDTH / 2, 0, PROGRESS_WIDTH * progress, CANVAS_HEIGHT) 70 | requestAnimationFrameRef.current = requestAnimationFrame(animate) 71 | } 72 | animate() 73 | } else { 74 | audioRef.current.pause() 75 | requestAnimationFrameRef.current && cancelAnimationFrame(requestAnimationFrameRef.current) 76 | } 77 | }, [audioPlayedColor, isPlaying, message]) 78 | 79 | // Only draw the waveform once 80 | useEffect(() => { 81 | if (!message || !waveCanvasRef.current) { 82 | return 83 | } 84 | const audioMetadata = message.getAudioMetadata() 85 | const ctx = waveCanvasRef.current.getContext('2d') 86 | if (!audioMetadata || !ctx) { 87 | return 88 | } 89 | // Fill the background 90 | ctx.fillStyle = audioFillColor 91 | ctx.fillRect(0, 0, CANVAS_WIDTH, CANVAS_HEIGHT) 92 | // Draw waveform 93 | ctx.lineCap = 'round' 94 | ctx.lineWidth = LINE_WIDTH 95 | ctx.strokeStyle = audioUnplayedColor 96 | const midY = CANVAS_HEIGHT / 2 97 | for (let i = 0; i < AUDIO_VOLUMN_BIN_COUNT; i++) { 98 | const bar = new Path2D() 99 | const x = GAP_WIDTH * (i + 1) 100 | const offset = audioMetadata.volumeBins[i] * MAX_BAR_HEIGHT 101 | bar.moveTo(x, midY - offset) 102 | bar.lineTo(x, midY + offset) 103 | // First crop out the background, and then fill it with the color 104 | ctx.globalCompositeOperation = 'destination-out' 105 | ctx.stroke(bar) 106 | ctx.globalCompositeOperation = 'source-over' 107 | ctx.stroke(bar) 108 | } 109 | }, [audioUnplayedColor, audioFillColor, message]) 110 | 111 | const handleCanvasClick: MouseEventHandler = useCallback( 112 | (e) => { 113 | if (!audioRef.current || !progressCanvasRef.current) { 114 | return 115 | } 116 | const bounding = e.currentTarget.getBoundingClientRect() 117 | const x = (e.clientX - bounding.left) * DISPLAY_RATIO 118 | if (x < GAP_WIDTH - LINE_WIDTH / 2 || x > CANVAS_WIDTH - GAP_WIDTH + LINE_WIDTH / 2) { 119 | return 120 | } 121 | const progress = Math.max(0, Math.min(1, (x - GAP_WIDTH + LINE_WIDTH / 2) / PROGRESS_WIDTH)) 122 | audioRef.current.currentTime = progress * audioRef.current.duration 123 | setIsPlaying(true) 124 | }, 125 | [audioRef, progressCanvasRef, setIsPlaying] 126 | ) 127 | 128 | return ( 129 | 130 | {isAudio ? ( 131 | <> 132 | 151 |
152 | 158 | 165 |
166 | 167 | ) : ( 168 | content 169 | )} 170 |
171 | ) 172 | } 173 | 174 | export function LoadingChatLine({ isAi }: { isAi: boolean }) { 175 | return ( 176 | 177 | 178 | 179 | ) 180 | } 181 | -------------------------------------------------------------------------------- /app/utils/azure-speech.ts: -------------------------------------------------------------------------------- 1 | import { QueueObject, queue } from 'async' 2 | import { AudioPlayTask, exportAudioInWav, exportBufferInWav, exportBuffersInWav } from './audio' 3 | import { 4 | AudioConfig, 5 | AudioInputStream, 6 | AudioStreamFormat, 7 | CancellationDetails, 8 | PushAudioInputStream, 9 | ResultReason, 10 | SpeechConfig, 11 | SpeechRecognizer, 12 | SpeechSynthesisOutputFormat, 13 | SpeechSynthesizer, 14 | } from 'microsoft-cognitiveservices-speech-sdk' 15 | import { Language, VoiceName } from './i18n' 16 | 17 | const VALIDITY_DURATION = 9 * 60 * 1000 // Azure Speech access tokens are valid for 10 minutes. Using 9 minutes 18 | let azureToken = { 19 | token: '', 20 | region: '', 21 | lastRetrieved: 0, // Unix timestamp in ms 22 | } 23 | 24 | async function getSpeechConfig(): Promise { 25 | if (azureToken.lastRetrieved + VALIDITY_DURATION < Date.now()) { 26 | // Token is about to expire. Get a new one 27 | try { 28 | const response = await fetch('/api/azurespeech/token', { 29 | method: 'POST', 30 | }) 31 | if (!response.ok) { 32 | console.log('Error retrieving Azure speech token', response.status, response.statusText) 33 | return Promise.reject('Internal error') 34 | } 35 | const data = await response.json() 36 | azureToken = { 37 | token: data.token, 38 | region: data.region, 39 | lastRetrieved: Date.now(), 40 | } 41 | } catch (e) { 42 | console.error('Error constructing Azure speech recognizer', e) 43 | return Promise.reject(e) 44 | } 45 | } 46 | return SpeechConfig.fromAuthorizationToken(azureToken.token, azureToken.region) 47 | } 48 | 49 | export interface SpeechSynthesisTask { 50 | text: string 51 | } 52 | 53 | export async function generateSpeech( 54 | speechSynthesizer: SpeechSynthesizer, 55 | speechName: string, 56 | voiceCode: string, 57 | text: string, 58 | speakingRate: number 59 | ): Promise { 60 | return new Promise((resolve, reject) => { 61 | speechSynthesizer.speakSsmlAsync( 62 | ` 63 | 64 | 65 | 66 | ${text} 67 | 68 | 69 | 70 | `, 71 | (result) => { 72 | console.log('Speech synthesis result', result) 73 | resolve(result.audioData) 74 | }, 75 | (err) => { 76 | console.log('Speech synthesis error', err) 77 | reject(err) 78 | } 79 | ) 80 | }) 81 | } 82 | 83 | export class SpeechSynthesisTaskProcessor { 84 | private audioContext: AudioContext 85 | private audioBuffers: ArrayBuffer[] = [] 86 | private sampleRate: number 87 | private lang: Language 88 | private voice: VoiceName 89 | private speakingRate: number 90 | 91 | private speechSynthesizer: SpeechSynthesizer | null = null 92 | private audioPlayQueue: QueueObject | null = null 93 | private speechSynthesisQueue: QueueObject | null = null 94 | private currentPlaying: AudioBufferSourceNode | null = null 95 | private running: boolean = false 96 | private waitPromise: Promise | null = null 97 | private waitResolve: (() => void) | null = null 98 | 99 | constructor(audioContext: AudioContext, sampleRate: number, lang: Language, voice: VoiceName, userLevel: string) { 100 | this.audioContext = audioContext 101 | this.sampleRate = sampleRate 102 | this.lang = lang 103 | this.voice = voice 104 | this.speakingRate = userLevel === 'beginner' ? 0.9 : userLevel === 'advanced' ? 1.1 : 1 105 | } 106 | 107 | async init(): Promise { 108 | const speechConfig = await getSpeechConfig() 109 | speechConfig.speechSynthesisOutputFormat = SpeechSynthesisOutputFormat.Raw24Khz16BitMonoPcm 110 | this.speechSynthesizer = new SpeechSynthesizer(speechConfig, null as unknown as AudioConfig) 111 | this.waitPromise = new Promise((resolve) => { 112 | this.waitResolve = resolve 113 | }) 114 | this.running = true 115 | this.audioPlayQueue = queue(async (task: AudioPlayTask, _) => { 116 | this.audioBuffers.push(task.audioData) 117 | if (!this.running) { 118 | return 119 | } 120 | const tempAudioBlob = exportBufferInWav(this.sampleRate, 1, task.audioData) 121 | let decodedBuffer: AudioBuffer 122 | try { 123 | decodedBuffer = await this.audioContext.decodeAudioData(await tempAudioBlob.arrayBuffer()) 124 | } catch (e) { 125 | console.error('Error decoding audio buffer', e) 126 | return 127 | } 128 | const source = this.audioContext.createBufferSource() 129 | source.buffer = decodedBuffer 130 | source.connect(this.audioContext.destination) 131 | this.currentPlaying = source 132 | await new Promise((resolve) => { 133 | source.onended = () => { 134 | resolve() 135 | } 136 | source.start() 137 | }) 138 | }, 1) 139 | this.speechSynthesisQueue = queue(async (task: SpeechSynthesisTask, _) => { 140 | if (task.text.trim() === '') { 141 | return 142 | } 143 | if (!this.speechSynthesizer) { 144 | return 145 | } 146 | try { 147 | const audioData = await generateSpeech( 148 | this.speechSynthesizer, 149 | this.lang.speechName, 150 | this.voice.code, 151 | task.text, 152 | this.speakingRate 153 | ) 154 | this.audioPlayQueue?.push({ audioData }) 155 | } catch (e) { 156 | console.error('Error generating speech', e) 157 | return 158 | } 159 | }, 1) 160 | } 161 | 162 | async pushTask(task: SpeechSynthesisTask): Promise { 163 | await this.speechSynthesisQueue?.push(task) 164 | } 165 | 166 | async exportAudio(): Promise { 167 | if (!this.speechSynthesisQueue?.idle()) { 168 | await this.speechSynthesisQueue?.drain() 169 | } 170 | if (!this.audioPlayQueue?.idle()) { 171 | await this.audioPlayQueue?.drain() 172 | } 173 | return exportBuffersInWav(this.sampleRate, 1, this.audioBuffers) 174 | } 175 | 176 | async stop(): Promise { 177 | if (!this.running) { 178 | return 179 | } 180 | this.running = false 181 | this.currentPlaying?.stop() 182 | await this.waitPromise 183 | } 184 | 185 | complete(): void { 186 | this.waitResolve?.() 187 | } 188 | 189 | releaseResources(): void { 190 | this.speechSynthesizer?.close() 191 | } 192 | } 193 | 194 | export class SpeechRecognitionProcessor { 195 | private audioContext: AudioContext 196 | private audioStream: MediaStream 197 | private lang: Language 198 | private isSafari: boolean 199 | 200 | private audioSource: MediaStreamAudioSourceNode | null = null 201 | private processorNode: AudioWorkletNode | null = null 202 | private pushStream: PushAudioInputStream | null = null 203 | private buffers: Int16Array[][] = [] 204 | private speechRecognizer: SpeechRecognizer | null = null 205 | private lastMessage: string = '' 206 | 207 | constructor(audioContext: AudioContext, audioStream: MediaStream, lang: Language, isSafari: boolean) { 208 | this.audioContext = audioContext 209 | this.audioStream = audioStream 210 | this.lang = lang 211 | this.isSafari = isSafari 212 | } 213 | 214 | async init(): Promise { 215 | this.audioSource = this.audioContext.createMediaStreamSource(this.audioStream) 216 | this.processorNode = new AudioWorkletNode(this.audioContext, 'MonoProcessor') 217 | const pushStream = (this.pushStream = AudioInputStream.createPushStream( 218 | AudioStreamFormat.getWaveFormatPCM(this.audioContext.sampleRate, 16, 1) 219 | )) 220 | this.processorNode.port.onmessage = (event) => { 221 | switch (event.data.type) { 222 | case 'interm': 223 | this.buffers.push(event.data.buffers) 224 | break 225 | case 'final': 226 | pushStream.write(event.data.buffer) 227 | break 228 | default: 229 | console.error('Unhandled data', event.data) 230 | } 231 | } 232 | const audioConfig = AudioConfig.fromStreamInput(this.pushStream) 233 | const speechConfig = await getSpeechConfig() 234 | speechConfig.speechRecognitionLanguage = this.lang.speechName 235 | this.speechRecognizer = new SpeechRecognizer(speechConfig, audioConfig) 236 | 237 | this.speechRecognizer.recognized = (_, event) => { 238 | let result = event.result 239 | switch (result.reason) { 240 | case ResultReason.RecognizedSpeech: 241 | console.log('Speech recognized', result.text) 242 | if (this.lastMessage === '') { 243 | this.lastMessage = result.text 244 | } else if (this.lang.spaceDelimited) { 245 | // Add space for English or other space-delimited languages 246 | this.lastMessage += ' ' + result.text 247 | } else { 248 | this.lastMessage += result.text 249 | } 250 | break 251 | case ResultReason.NoMatch: 252 | console.log('Speech could not be recognized.') 253 | break 254 | case ResultReason.Canceled: 255 | console.log(`Speech recognization canceled: ${CancellationDetails.fromResult(result)}`) 256 | break 257 | default: 258 | console.log('Unknown recognition result received.', result) 259 | } 260 | } 261 | 262 | this.audioSource?.connect(this.processorNode as AudioNode) 263 | if (this.isSafari) { 264 | // Safari requires connecting to destination to start recording 265 | this.processorNode?.connect(this.audioContext.destination) 266 | } 267 | } 268 | 269 | async start(): Promise { 270 | await new Promise((resolve, reject) => { 271 | if (!this.speechRecognizer) { 272 | reject('Speech recognizer not initialized') 273 | return 274 | } 275 | this.speechRecognizer?.startContinuousRecognitionAsync( 276 | () => { 277 | resolve() 278 | }, 279 | (err) => { 280 | reject(err) 281 | } 282 | ) 283 | }) 284 | } 285 | 286 | async stopAndGetResult(): Promise { 287 | this.audioSource?.disconnect() 288 | this.processorNode?.port.close() 289 | this.processorNode?.disconnect() 290 | this.pushStream?.close() 291 | return await new Promise((resolve, reject) => { 292 | if (!this.speechRecognizer) { 293 | resolve(this.lastMessage) 294 | return 295 | } 296 | this.speechRecognizer.stopContinuousRecognitionAsync( 297 | () => { 298 | resolve(this.lastMessage) 299 | }, 300 | (err) => { 301 | reject(err) 302 | } 303 | ) 304 | }) 305 | } 306 | 307 | exportAudio(): Blob { 308 | return exportAudioInWav(this.audioContext.sampleRate, this.buffers) 309 | } 310 | 311 | releaseResources(): void { 312 | this.audioStream?.getTracks().forEach((track) => track.stop()) 313 | this.pushStream?.close() 314 | this.speechRecognizer?.close() 315 | } 316 | } 317 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2023 Siran Shen 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. -------------------------------------------------------------------------------- /app/[locale]/chat/page.tsx: -------------------------------------------------------------------------------- 1 | 'use client' 2 | 3 | import { ChatLineGroup, LoadingChatLineGroup } from '@/app/components/chat/ChatLineGroup' 4 | import ChatInput from './components/ChatInput' 5 | import { useCallback, useEffect, useRef, useState } from 'react' 6 | import { AudioChatMessage, ChatMessage, PAUSE_TOKEN, serializeConvo } from '@/app/utils/chat-message' 7 | import { SpeechRecognitionProcessor, SpeechSynthesisTaskProcessor } from '@/app/utils/azure-speech' 8 | import { useTranslations } from 'next-intl' 9 | import { LANGUAGES, LANGUAGES_MAP } from '@/app/utils/i18n' 10 | import { 11 | CONVO_STORAGE_KEY, 12 | LEARNING_LANG_KEY, 13 | LEVEL_KEY, 14 | TOPIC_PROMPT_KEY, 15 | SELF_INTRO_KEY, 16 | VOICE_NAME_KEY, 17 | TOPIC_KEY, 18 | } from '@/app/utils/local-keys' 19 | import Toaster from '@/app/components/toast/Toaster' 20 | import useToasts from '@/app/hooks/toast' 21 | import useLocaleLoader from '@/app/hooks/locale' 22 | import MicIcon from '@/public/icons/mic.svg' 23 | import PlusIcon from '@/public/icons/plus.svg' 24 | 25 | const SAMPLE_RATE = 24000 26 | 27 | /* Custom hook that stores conversation history to session storage when Chat unmounts */ 28 | function useConvo() { 29 | const [convo, setConvo] = useState([]) 30 | const convoRef = useRef([]) 31 | convoRef.current = convo 32 | useEffect(() => { 33 | return () => { 34 | sessionStorage.setItem(CONVO_STORAGE_KEY, serializeConvo(convoRef.current)) 35 | } 36 | }, []) 37 | return [convo, setConvo] as const 38 | } 39 | 40 | export default function Chat() { 41 | useLocaleLoader() 42 | const i18n = useTranslations('Chat') 43 | const i18nCommon = useTranslations('Common') 44 | const [toasts, addToast, removeToast] = useToasts() 45 | const [topicTitle, setTopicTitle] = useState(null) 46 | const chatContainerRef = useRef(null) 47 | const [convo, setConvo] = useConvo() 48 | const [started, setStarted] = useState(false) 49 | 50 | const isAutoplayEnabled = useRef(false) 51 | const audioContextRef = useRef(null) 52 | const emptyAudioRef = useRef(null) 53 | const speechRecognitionProcessorRef = useRef(null) 54 | const speechSynthesisTaskProcessorRef = useRef(null) 55 | const [isConfiguringAudio, setConfiguringAudio] = useState(false) 56 | const [isTranscribing, setTranscribing] = useState(false) 57 | const [isStreaming, setStreaming] = useState(false) 58 | const [isPlayingAudio, setPlayingAudio] = useState(false) 59 | const [shouldShowAiText, setShowText] = useState(true) 60 | 61 | const isSafari = useCallback(() => { 62 | return navigator.userAgent.indexOf('Safari') !== -1 && navigator.userAgent.indexOf('Chrome') === -1 63 | }, []) 64 | 65 | const resumeAudioIfNecessary = useCallback(async () => { 66 | if (!audioContextRef.current) { 67 | audioContextRef.current = new AudioContext() 68 | audioContextRef.current.audioWorklet.addModule('/audio/mono-processor.js').then(() => setConfiguringAudio(false)) 69 | } 70 | if (audioContextRef.current.state == 'suspended') { 71 | await audioContextRef.current.resume() 72 | } 73 | }, []) 74 | 75 | /* A workaround to unlock autoplay on Webkit browsers */ 76 | const enableAudioAutoplay = useCallback(async () => { 77 | if (isAutoplayEnabled.current || !audioContextRef.current) { 78 | return 79 | } 80 | if (!isSafari()) { 81 | // Non-Webkit browsers don't need the rest 82 | return 83 | } 84 | await emptyAudioRef.current?.play() 85 | isAutoplayEnabled.current = true 86 | }, [isSafari]) 87 | 88 | /* Run once */ 89 | useEffect(() => { 90 | setTopicTitle(sessionStorage.getItem(TOPIC_KEY) ?? i18n('header.title')) 91 | const shouldShow = localStorage.getItem('shouldShowAiText') 92 | setShowText(shouldShow === null || shouldShow === 'true') 93 | setConfiguringAudio(true) 94 | audioContextRef.current = new AudioContext() 95 | audioContextRef.current.audioWorklet.addModule('/audio/mono-processor.js').then(() => setConfiguringAudio(false)) 96 | return () => { 97 | audioContextRef.current?.close() 98 | } 99 | }, [i18n]) 100 | 101 | useEffect(() => { 102 | localStorage.setItem('shouldShowAiText', shouldShowAiText ? 'true' : 'false') 103 | }, [shouldShowAiText]) 104 | 105 | /* Scroll to bottom upon new message */ 106 | useEffect(() => { 107 | if (!chatContainerRef.current) { 108 | return 109 | } 110 | chatContainerRef.current.scrollTop = chatContainerRef.current.scrollHeight 111 | }, [convo, isTranscribing]) 112 | 113 | /* Request LLM to generate response and then synthesize voice */ 114 | const generateResponse = useCallback( 115 | async (newConvo: ChatMessage[]) => { 116 | setStreaming(true) 117 | setConvo([...newConvo, new ChatMessage('', true, true)]) 118 | const learningLanguage = LANGUAGES_MAP[localStorage.getItem(LEARNING_LANG_KEY) ?? LANGUAGES[0].locale] 119 | const voiceIndex = sessionStorage.getItem(VOICE_NAME_KEY) ?? '0' 120 | const voice = learningLanguage.voiceNames[parseInt(voiceIndex)] 121 | const userLevel = localStorage.getItem(LEVEL_KEY) ?? '' 122 | await resumeAudioIfNecessary() 123 | const ssProcessor = (speechSynthesisTaskProcessorRef.current = new SpeechSynthesisTaskProcessor( 124 | audioContextRef.current as AudioContext, 125 | SAMPLE_RATE, 126 | learningLanguage, 127 | voice, 128 | userLevel 129 | )) 130 | let response 131 | try { 132 | const llmCallPromise = fetch('/api/openai/chat', { 133 | method: 'POST', 134 | headers: { 135 | 'Content-Type': 'application/json', 136 | }, 137 | body: JSON.stringify({ 138 | messages: newConvo.slice(-8).map((msg) => msg.toGPTMessage()), // TODO: Calculate tokens 139 | language: learningLanguage.name, 140 | level: userLevel, 141 | selfIntro: localStorage.getItem(SELF_INTRO_KEY) ?? '', 142 | speakerName: voice.name, 143 | topic: sessionStorage.getItem(TOPIC_PROMPT_KEY) ?? 'Undefined. Can be any random topic.', 144 | }), 145 | }) 146 | const ssProcessorInitPromise = ssProcessor.init() 147 | ;[response] = await Promise.all([llmCallPromise, ssProcessorInitPromise]) 148 | 149 | if (!response.ok) { 150 | throw new Error(response.statusText) 151 | } 152 | if (!response.body) { 153 | throw new Error('No response returned!') 154 | } 155 | } catch (e) { 156 | addToast(i18nCommon('error')) 157 | console.error('Error generating response', e) 158 | setStreaming(false) 159 | setConvo([...newConvo]) // Remove the loading message 160 | return 161 | } 162 | 163 | const reader = response.body.getReader() 164 | const decoder = new TextDecoder() 165 | let done = false 166 | let lastMessage = '', 167 | lastPauseIndex = 0 168 | try { 169 | while (!done) { 170 | const { value, done: doneReading } = await reader.read() 171 | done = doneReading 172 | const chunkValue = decoder.decode(value) 173 | lastMessage += chunkValue 174 | const pauseIndex = lastMessage.lastIndexOf(PAUSE_TOKEN) 175 | if (pauseIndex > lastPauseIndex) { 176 | ssProcessor.pushTask({ text: lastMessage.substring(lastPauseIndex, pauseIndex) }) 177 | lastPauseIndex = pauseIndex + PAUSE_TOKEN.length 178 | } 179 | setConvo([...newConvo, new ChatMessage(lastMessage, true, true)]) 180 | } 181 | ssProcessor.pushTask({ text: lastMessage.substring(lastPauseIndex) }) 182 | setStreaming(false) 183 | setPlayingAudio(true) 184 | const audioBlob = await ssProcessor.exportAudio() 185 | const newAudioMessage = new AudioChatMessage(lastMessage, true, audioBlob) 186 | await newAudioMessage.loadAudioMetadata() 187 | setConvo([...newConvo, newAudioMessage]) 188 | } catch (e) { 189 | addToast(i18nCommon('error')) 190 | console.error('Error while reading LLM response', e) 191 | } 192 | ssProcessor.releaseResources() 193 | ssProcessor.complete() 194 | setStreaming(false) 195 | setPlayingAudio(false) 196 | }, 197 | [addToast, i18nCommon, resumeAudioIfNecessary, setConvo] 198 | ) 199 | 200 | const startChat = useCallback(async () => { 201 | enableAudioAutoplay() 202 | setStarted(true) 203 | await generateResponse([]) 204 | }, [enableAudioAutoplay, generateResponse]) 205 | 206 | const stopAudio = useCallback(async () => { 207 | await speechSynthesisTaskProcessorRef.current?.stop() 208 | setPlayingAudio(false) 209 | }, []) 210 | 211 | /* Send user text message */ 212 | const sendText = useCallback( 213 | async (message: string) => { 214 | const newMessage = new ChatMessage(message, false) 215 | const newConvo = [...convo, newMessage] 216 | setConvo(newConvo) 217 | await generateResponse(newConvo) 218 | }, 219 | [convo, generateResponse, setConvo] 220 | ) 221 | 222 | const startRecording = useCallback(async () => { 223 | setConfiguringAudio(true) 224 | try { 225 | const audioStream = await navigator.mediaDevices.getUserMedia({ audio: true }) 226 | await resumeAudioIfNecessary() 227 | if (!audioContextRef.current) { 228 | return 229 | } 230 | const audioContext = audioContextRef.current 231 | const learningLanguage = LANGUAGES_MAP[localStorage.getItem(LEARNING_LANG_KEY) ?? LANGUAGES[0].locale] 232 | speechRecognitionProcessorRef.current = new SpeechRecognitionProcessor( 233 | audioContext, 234 | audioStream, 235 | learningLanguage, 236 | isSafari() 237 | ) 238 | } catch (e) { 239 | addToast(i18nCommon('error')) 240 | console.error('Error initializing audio', e) 241 | setConfiguringAudio(false) 242 | return 243 | } 244 | const srProcessor = speechRecognitionProcessorRef.current 245 | 246 | try { 247 | await srProcessor.init() 248 | setConfiguringAudio(false) 249 | setTranscribing(true) 250 | await srProcessor.start() 251 | } catch (e) { 252 | srProcessor.releaseResources() 253 | addToast(i18nCommon('error')) 254 | console.error('Error starting speech recognition', e) 255 | setConfiguringAudio(false) 256 | setTranscribing(false) 257 | } 258 | }, [addToast, i18nCommon, isSafari, resumeAudioIfNecessary]) 259 | 260 | const stopRecording = useCallback(async () => { 261 | if (!speechRecognitionProcessorRef.current) { 262 | return 263 | } 264 | setConfiguringAudio(true) 265 | let lastMessage = '' 266 | try { 267 | lastMessage = await speechRecognitionProcessorRef.current.stopAndGetResult() 268 | } catch (e) { 269 | addToast(i18nCommon('error')) 270 | console.error('Error stopping recognition', e) 271 | speechRecognitionProcessorRef.current?.releaseResources() 272 | setConfiguringAudio(false) 273 | return 274 | } 275 | const audioBlob = speechRecognitionProcessorRef.current.exportAudio() 276 | speechRecognitionProcessorRef.current?.releaseResources() 277 | setTranscribing(false) 278 | setConfiguringAudio(false) 279 | 280 | if (lastMessage.trim()) { 281 | const newAudioMessage = new AudioChatMessage(lastMessage, false, audioBlob) 282 | await newAudioMessage.loadAudioMetadata() 283 | const newConvo = [...convo, newAudioMessage] 284 | setConvo(newConvo) 285 | await generateResponse(newConvo) 286 | } 287 | }, [addToast, i18nCommon, convo, setConvo, generateResponse]) 288 | 289 | return ( 290 | /* overflow-hidden prevents sticky div from jumping */ 291 |
292 | 293 |
294 |
{topicTitle ?? i18nCommon('loading')}
295 |
296 |
297 |
298 | {started ? ( 299 | <> 300 | {convo.map((msg) => ( 301 | 302 | ))} 303 | {isTranscribing && } 304 | 305 | ) : ( 306 |
307 | {i18n.rich('intro', { 308 | p: (paragraph) =>
{paragraph}
, 309 | MicIcon: () => , 310 | PlusIcon: () => , 311 | })} 312 | 315 |
317 | )} 318 |
319 |
320 | 328 |
329 |
330 | ) 331 | } 332 | --------------------------------------------------------------------------------