├── img ├── talk2gpt.jpg ├── android-icon-16x16.png ├── android-icon-32x32.png ├── android-icon-36x36.png ├── android-icon-48x48.png ├── android-icon-57x57.png ├── android-icon-60x60.png ├── android-icon-72x72.png ├── android-icon-76x76.png ├── android-icon-96x96.png ├── android-icon-114x114.png ├── android-icon-120x120.png ├── android-icon-144x144.png ├── android-icon-152x152.png ├── android-icon-180x180.png └── android-icon-192x192.png ├── js ├── storage.js ├── voices.js ├── incremental-text.js ├── $.js ├── listener.js ├── openai.js ├── ok.js ├── listen.js ├── events.js ├── interim.js └── index.js ├── README.md ├── events └── index.html ├── listener └── index.html ├── interim └── index.html ├── ok └── index.html ├── LICENSE ├── sw.js ├── manifest.json ├── css ├── index.css └── settings.css └── index.html /img/talk2gpt.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WebReflection/talk2gpt/HEAD/img/talk2gpt.jpg -------------------------------------------------------------------------------- /img/android-icon-16x16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WebReflection/talk2gpt/HEAD/img/android-icon-16x16.png -------------------------------------------------------------------------------- /img/android-icon-32x32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WebReflection/talk2gpt/HEAD/img/android-icon-32x32.png -------------------------------------------------------------------------------- /img/android-icon-36x36.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WebReflection/talk2gpt/HEAD/img/android-icon-36x36.png -------------------------------------------------------------------------------- /img/android-icon-48x48.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WebReflection/talk2gpt/HEAD/img/android-icon-48x48.png -------------------------------------------------------------------------------- /img/android-icon-57x57.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WebReflection/talk2gpt/HEAD/img/android-icon-57x57.png -------------------------------------------------------------------------------- /img/android-icon-60x60.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WebReflection/talk2gpt/HEAD/img/android-icon-60x60.png -------------------------------------------------------------------------------- /img/android-icon-72x72.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WebReflection/talk2gpt/HEAD/img/android-icon-72x72.png -------------------------------------------------------------------------------- /img/android-icon-76x76.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WebReflection/talk2gpt/HEAD/img/android-icon-76x76.png -------------------------------------------------------------------------------- /img/android-icon-96x96.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WebReflection/talk2gpt/HEAD/img/android-icon-96x96.png -------------------------------------------------------------------------------- /img/android-icon-114x114.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WebReflection/talk2gpt/HEAD/img/android-icon-114x114.png -------------------------------------------------------------------------------- /img/android-icon-120x120.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WebReflection/talk2gpt/HEAD/img/android-icon-120x120.png -------------------------------------------------------------------------------- /img/android-icon-144x144.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WebReflection/talk2gpt/HEAD/img/android-icon-144x144.png -------------------------------------------------------------------------------- /img/android-icon-152x152.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WebReflection/talk2gpt/HEAD/img/android-icon-152x152.png -------------------------------------------------------------------------------- /img/android-icon-180x180.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WebReflection/talk2gpt/HEAD/img/android-icon-180x180.png -------------------------------------------------------------------------------- /img/android-icon-192x192.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WebReflection/talk2gpt/HEAD/img/android-icon-192x192.png -------------------------------------------------------------------------------- /js/storage.js: -------------------------------------------------------------------------------- 1 | /*! (c) Andrea Giammarchi */ 2 | 3 | const { 4 | JSON, 5 | localStorage, 6 | sessionStorage, 7 | } = globalThis; 8 | 9 | const {parse, stringify} = JSON; 10 | 11 | const JSONStorage = storage => ({ 12 | get: key => parse(storage.getItem(key) || 'null'), 13 | set: (key, value) => storage.setItem(key, stringify(value)) 14 | }); 15 | 16 | export const local = JSONStorage(localStorage); 17 | export const session = JSONStorage(sessionStorage); 18 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Talk2GPT 2 | 3 | **Social Media Photo by [Rock'n Roll Monkey](https://unsplash.com/@rocknrollmonkey) on [Unsplash](https://unsplash.com/)** 4 | 5 | A 100% client-side PoC of OpenAI Chat GPT API via Web Speech API. 6 | 7 | It requires your OpenAI secret but it does not store it anywhere except for your browser session storage. 8 | 9 | [Live Here](https://webreflection.github.io/talk2gpt/) 10 | 11 | - - - 12 | 13 | The rest of the folders are live to test and demo the API. 14 | -------------------------------------------------------------------------------- /events/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Web Speech API - Speech Recognition Events 9 | 10 | 11 |
12 |
13 | 14 | -------------------------------------------------------------------------------- /listener/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Web Speech API - Speech Recognition Listener 9 | 10 | 11 |
12 |
13 | 14 | -------------------------------------------------------------------------------- /interim/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Web Speech API - Speech Recognition via interimResults 9 | 10 | 11 |
12 |
13 | 14 | -------------------------------------------------------------------------------- /js/voices.js: -------------------------------------------------------------------------------- 1 | /*! (c) Andrea Giammarchi - ISC */ 2 | 3 | let {Promise, setTimeout, speechSynthesis} = globalThis; 4 | 5 | export default (timeout = 3000) => new Promise($ => { 6 | // must be assigned before trying to access voices 7 | speechSynthesis.addEventListener( 8 | 'voiceschanged', 9 | () => { $(speechSynthesis.getVoices()) }, 10 | {once: true} 11 | ); 12 | // kinda trigger the voices recognition 13 | const voices = speechSynthesis.getVoices(); 14 | // if already populated, just resolve with it 15 | if (voices.length) $(voices); 16 | setTimeout($, timeout, []); 17 | }); 18 | -------------------------------------------------------------------------------- /ok/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Web Speech API - OK Web 9 | 10 | 11 |
12 |
13 |
Start listening. Say "OK Web" for reaction. Say "Stop listening" to stop.
14 |
15 | 16 | -------------------------------------------------------------------------------- /js/incremental-text.js: -------------------------------------------------------------------------------- 1 | const { 2 | document, 3 | cancelAnimationFrame, 4 | requestAnimationFrame 5 | } = globalThis; 6 | 7 | export default class IncrementalText { 8 | #raf = 0; 9 | #target = null; 10 | constructor(target) { 11 | this.#target = target; 12 | } 13 | show(text) { 14 | cancelAnimationFrame(this.#raf); 15 | this.#target.textContent = ''; 16 | const node = this.#target.appendChild(document.createTextNode('')); 17 | const chars = [...text]; 18 | let i = 0; 19 | const show = () => { 20 | if (i < chars.length) { 21 | node.data += chars[i++]; 22 | this.#raf = requestAnimationFrame(show); 23 | } 24 | }; 25 | this.#raf = requestAnimationFrame(show); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | ISC License 2 | 3 | Copyright (c) 2023, Andrea Giammarchi, @WebReflection 4 | 5 | Permission to use, copy, modify, and/or distribute this software for any 6 | purpose with or without fee is hereby granted, provided that the above 7 | copyright notice and this permission notice appear in all copies. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH 10 | REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY 11 | AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, 12 | INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM 13 | LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE 14 | OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 15 | PERFORMANCE OF THIS SOFTWARE. 16 | -------------------------------------------------------------------------------- /js/$.js: -------------------------------------------------------------------------------- 1 | /* @see https://github.com/WebReflection/handy-wrap#readme */ 2 | /*! (c) Andrea Giammarchi - ISC */ 3 | const{iterator:e}=Symbol,t=new Map,r=(e,t)=>e?t.at(0):t,n={get({_:n,$:l},o,u){if(t.has(o))return t.get(o)(r(n,l),o,u);switch(o){case e:return l[e].bind(l);case"emit":return(e,...t)=>{for(let r=0;rr(n,l);case"on":o="addEventListener";default:{let e;for(let t=0;t{for(let t=0;t"string"==typeof e?r[t](e):e.valueOf(),o=(e,t)=>new Proxy({_:1,$:[l(e,"querySelector",t)]},n),u=(e,t)=>new Proxy({_:0,$:l(e,"querySelectorAll",t)},n);export{o as $,u as $$,t as plugins}; -------------------------------------------------------------------------------- /js/listener.js: -------------------------------------------------------------------------------- 1 | import {$} from './$.js'; 2 | import listen from './listen.js'; 3 | 4 | // create a div and show some text 5 | const log = text => { 6 | const div = document.createElement('div'); 7 | div.textContent = text; 8 | $('#content').appendChild(div); 9 | }; 10 | 11 | // activate the listening 12 | $('#mic').on('click', ({currentTarget}) => { 13 | // avoid clicks while listenings 14 | currentTarget.disabled = true; 15 | 16 | // log passed time 17 | log(0); 18 | const time = new Date; 19 | const i = setInterval(node => { 20 | node.textContent = ((new Date - time) / 1000).toFixed(1); 21 | }, 100, $('#content').lastChild); 22 | 23 | // listen to something 24 | listen().then( 25 | transcript => { 26 | clearInterval(i); 27 | log('You said: ' + (transcript || 'nothing')); 28 | currentTarget.disabled = false; 29 | }, 30 | console.error 31 | ); 32 | }); 33 | -------------------------------------------------------------------------------- /js/openai.js: -------------------------------------------------------------------------------- 1 | const API = 'https://api.openai.com/v1/'; 2 | 3 | const {JSON, fetch} = globalThis; 4 | 5 | const {stringify} = JSON; 6 | 7 | export default class OpenAI { 8 | #headers = null; 9 | #models = null; 10 | constructor(bearer, options) { 11 | this.#headers = { 12 | 'Content-Type': 'application/json', 13 | 'Authorization': 'Bearer ' + bearer 14 | }; 15 | this.options = options; 16 | } 17 | get models() { 18 | return this.#models || (this.#models = fetch(API + 'models', { 19 | headers: this.#headers, 20 | method: 'GET' 21 | }).then(res => res.json())); 22 | } 23 | complete(transcript) { 24 | return fetch(API + 'completions', { 25 | headers: this.#headers, 26 | method: 'POST', 27 | body: stringify({ 28 | ...this.options, 29 | prompt: transcript 30 | }) 31 | }).then(res => res.json()); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /sw.js: -------------------------------------------------------------------------------- 1 | addEventListener('install', event => { 2 | event.waitUntil( 3 | caches.open('talk2gpt').then(db => db.addAll([ 4 | './', 5 | './js/$.js', 6 | './js/incremental-text.js', 7 | './js/index.js', 8 | './js/listen.js', 9 | './js/openai.js', 10 | './js/storage.js', 11 | './js/voices.js', 12 | './css/index.css', 13 | './css/settings.css' 14 | ])) 15 | ); 16 | }); 17 | 18 | addEventListener('fetch', event => { 19 | const {request} = event; 20 | event.respondWith( 21 | caches.open('talk2gpt').then(db => db.match(request).then(response => { 22 | const fallback = fetch(request).then( 23 | response => { 24 | if(response.ok && request.method === 'GET') 25 | db.put(request, response.clone()); 26 | return response; 27 | }, 28 | () => new Response('Not Found', { 29 | status: 404, 30 | type: 'plain/text' 31 | }) 32 | ); 33 | return response || fallback; 34 | })) 35 | ); 36 | }); 37 | -------------------------------------------------------------------------------- /manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "background_color":"#ffffff", 3 | "description":"Talk to GPT", 4 | "display":"standalone", 5 | "name":"Talk2GPT", 6 | "orientation":"any", 7 | "short_name":"Talk2GPT", 8 | "start_url":"./", 9 | "theme_color":"#ffffff", 10 | "icons": [ 11 | { 12 | "src": "img/android-icon-36x36.png", 13 | "sizes": "36x36", 14 | "type": "image/png", 15 | "density": "0.75" 16 | }, 17 | { 18 | "src": "img/android-icon-48x48.png", 19 | "sizes": "48x48", 20 | "type": "image/png", 21 | "density": "1.0" 22 | }, 23 | { 24 | "src": "img/android-icon-72x72.png", 25 | "sizes": "72x72", 26 | "type": "image/png", 27 | "density": "1.5" 28 | }, 29 | { 30 | "src": "img/android-icon-96x96.png", 31 | "sizes": "96x96", 32 | "type": "image/png", 33 | "density": "2.0" 34 | }, 35 | { 36 | "src": "img/android-icon-144x144.png", 37 | "sizes": "144x144", 38 | "type": "image/png", 39 | "density": "3.0" 40 | }, 41 | { 42 | "src": "img/android-icon-192x192.png", 43 | "sizes": "192x192", 44 | "type": "image/png", 45 | "density": "4.0" 46 | } 47 | ] 48 | } -------------------------------------------------------------------------------- /js/ok.js: -------------------------------------------------------------------------------- 1 | import {$} from './$.js'; 2 | import listen from './listen.js'; 3 | 4 | // create a div and show some text 5 | const log = text => { 6 | const div = document.createElement('div'); 7 | div.textContent = text; 8 | $('#content').appendChild(div); 9 | }; 10 | 11 | // say something in the default language 12 | const say = text => { 13 | const ssu = new SpeechSynthesisUtterance(text); 14 | // cancel any previous text before starting this one 15 | speechSynthesis.cancel(); 16 | speechSynthesis.speak(ssu); 17 | }; 18 | 19 | // activate the listening 20 | $('#mic').on('click', ({currentTarget}) => { 21 | currentTarget.disabled = true; 22 | const check = transcript => { 23 | switch (transcript.toLowerCase()) { 24 | case 'stop listening': 25 | currentTarget.disabled = false; 26 | say('just stopped'); 27 | log('Just stopped 👍'); 28 | break; 29 | case 'ok web': 30 | case 'okay web': 31 | say('I am ready'); 32 | log('I am ready 🤖'); 33 | default: 34 | console.log(transcript); 35 | listen().then(check); 36 | break; 37 | } 38 | }; 39 | // grant SpeechSynthesisUtterance usage 40 | say(''); 41 | // listen and check 42 | listen().then(check); 43 | }); 44 | -------------------------------------------------------------------------------- /css/index.css: -------------------------------------------------------------------------------- 1 | * { 2 | box-sizing: border-box; 3 | } 4 | 5 | html, body { 6 | overflow: hidden; 7 | } 8 | 9 | html, body, section, #mic { 10 | padding: 0; 11 | margin: 0; 12 | } 13 | 14 | body { 15 | font-family: Arial, Helvetica, sans-serif; 16 | width: 100vw; 17 | width: 100svw; 18 | height: 100vh; 19 | height: 100svh; 20 | } 21 | 22 | body::after { 23 | width: 100vw; 24 | width: 100svw; 25 | display: block; 26 | position: fixed; 27 | bottom: 0; 28 | font-size: xx-small; 29 | text-align: center; 30 | content: 'You need a browser compatible with Web Speech API' 31 | } 32 | 33 | body > section:first-of-type { 34 | height: 20svh; 35 | } 36 | 37 | body > section.content { 38 | height: 80svh; 39 | overflow: auto; 40 | scrollbar-gutter: stable both-edges; 41 | } 42 | 43 | #mic { 44 | width: 100%; 45 | height: 100%; 46 | line-height: 100%; 47 | font-size: xx-large; 48 | border: 0; 49 | transition: opacity ease-in 250ms; 50 | } 51 | 52 | #mic:disabled { 53 | opacity: .5; 54 | } 55 | 56 | #content { 57 | padding: 1rem; 58 | white-space: pre-wrap; 59 | transition: opacity ease-in 250ms; 60 | } 61 | 62 | #content label { 63 | display: flex; 64 | flex-direction: column; 65 | min-height: 60px; 66 | justify-content: space-around; 67 | } 68 | 69 | #content label span { 70 | opacity: .7; 71 | } 72 | 73 | #content label span, center { 74 | font-size: small; 75 | } 76 | 77 | #content img { 78 | max-width: 100%; 79 | } -------------------------------------------------------------------------------- /js/listen.js: -------------------------------------------------------------------------------- 1 | /*! (c) Andrea Giammarchi - ISC */ 2 | 3 | let {Object, Promise, SpeechRecognition, clearTimeout, setTimeout} = globalThis; 4 | if (!SpeechRecognition) 5 | SpeechRecognition = webkitSpeechRecognition; 6 | 7 | const {assign} = Object; 8 | const interimResults = {interimResults: true}; 9 | const once = {once: true}; 10 | 11 | export default (options = void 0) => new Promise((resolve, reject) => { 12 | let t = 0, ended = false; 13 | const stop = event => { 14 | clearTimeout(t); 15 | ended = true; 16 | sr.stop(); 17 | if (event) { 18 | if (event.type === 'nomatch' || event.error === 'no-speech') 19 | resolve(''); 20 | else 21 | reject(event.type === 'end' ? {error: 'unable to understand'} : event); 22 | } 23 | }; 24 | const result = ({results}) => { 25 | stop(); 26 | for (const result of results) { 27 | if (result.isFinal) { 28 | for (const {transcript} of result) { 29 | resolve(transcript); 30 | return; 31 | } 32 | } 33 | } 34 | }; 35 | const sr = assign(new SpeechRecognition, options, interimResults); 36 | sr.addEventListener('error', stop, once); 37 | sr.addEventListener('nomatch', stop, once); 38 | sr.addEventListener('end', stop, once); 39 | sr.addEventListener('audioend', () => stop(), once); 40 | sr.addEventListener('result', event => { 41 | if (ended) 42 | result(event); 43 | else { 44 | clearTimeout(t); 45 | t = setTimeout(result, 750, event); 46 | } 47 | }); 48 | sr.start(); 49 | }); 50 | -------------------------------------------------------------------------------- /js/events.js: -------------------------------------------------------------------------------- 1 | import {$} from './$.js'; 2 | 3 | // normalize SpeechRecognition 4 | let {SpeechRecognition} = globalThis; 5 | if (!SpeechRecognition) 6 | SpeechRecognition = webkitSpeechRecognition; 7 | 8 | // create a div and show the event name 9 | const logEvent = ({type}) => { 10 | const div = document.createElement('div'); 11 | div.textContent = type; 12 | $('#content').appendChild(div); 13 | }; 14 | 15 | // activate the listening 16 | $('#mic').on('click', ({currentTarget}) => { 17 | // avoid clicks while listenings 18 | currentTarget.disabled = true; 19 | 20 | // log passed time 21 | logEvent({type: 0}); 22 | const time = new Date; 23 | const i = setInterval(node => { 24 | node.textContent = ((new Date - time) / 1000).toFixed(1); 25 | }, 100, $('#content').lastChild); 26 | 27 | // start listening to all events *and* 28 | // avoid iOS listening forever (it stops in 10 seconds) 29 | setTimeout( 30 | $(new SpeechRecognition) 31 | .on('start', logEvent) 32 | .on('audiostart', logEvent) 33 | .on('soundstart', logEvent) 34 | .on('speechstart', logEvent) 35 | .on('speechend', logEvent) 36 | .on('soundend', logEvent) 37 | .on('audioend', logEvent) 38 | .on('result', logEvent) 39 | .on('end', event => { 40 | logEvent(event); 41 | // cleanup and stop listening 42 | clearInterval(i); 43 | event.currentTarget.stop(); 44 | currentTarget.disabled = false; 45 | }) 46 | // extra events 47 | .on('error', logEvent) 48 | .on('nomatch', logEvent) 49 | .start() 50 | // forward the stop 51 | .stop, 52 | 10000 53 | ); 54 | }); 55 | -------------------------------------------------------------------------------- /css/settings.css: -------------------------------------------------------------------------------- 1 | body::after { 2 | content: attr(data-usage); 3 | } 4 | 5 | #settings { 6 | display: none; 7 | opacity: 0; 8 | position: absolute; 9 | top: 0; 10 | left: 0; 11 | width: 100vw; 12 | width: 100svw; 13 | height: 100vh; 14 | height: 100svh; 15 | padding: 1rem; 16 | transform: translateX(100vw); 17 | transform: translateX(100svw); 18 | } 19 | 20 | #settings, #settings > button { 21 | transition: all ease-in 250ms; 22 | } 23 | 24 | body.settings #settings { 25 | transform: translateX(0); 26 | } 27 | 28 | body > *:not(#settings) { 29 | opacity: 1; 30 | transition: opacity ease-in 125ms; 31 | } 32 | 33 | body.settings > *:not(#settings) { 34 | opacity: 0; 35 | } 36 | 37 | #settings fieldset { 38 | display: flex; 39 | flex-direction: column; 40 | } 41 | 42 | #settings fieldset > legend { 43 | font-weight: bold; 44 | font-size: small; 45 | padding: 8px; 46 | } 47 | 48 | #settings label { 49 | margin: .5rem; 50 | width: 100%; 51 | } 52 | 53 | #settings label > span:first-child { 54 | display: inline-block; 55 | width: 40%; 56 | } 57 | #settings label > span:first-child + * { 58 | width: 40%; 59 | } 60 | #settings label > span:last-child { 61 | display: inline-block; 62 | width: calc(20% - 1rem - 8px); 63 | text-align: right; 64 | } 65 | 66 | #settings > button { 67 | margin: 0; 68 | padding: 4px; 69 | font-size: 1rem; 70 | line-height: 1rem; 71 | position: fixed; 72 | bottom: 1rem; 73 | right: 1rem; 74 | transform: translateX(-100vw); 75 | transform: translateX(-100svw); 76 | } 77 | 78 | body.settings #settings > button { 79 | transform: translateX(0); 80 | } 81 | 82 | #fallback { 83 | position: relative; 84 | } 85 | 86 | #fallback, #fallback > textarea { 87 | width: 100%; 88 | height: 100%; 89 | resize: none; 90 | } 91 | 92 | #fallback > button { 93 | position: absolute; 94 | right: 0; 95 | bottom: 0; 96 | margin: 0; 97 | padding: 4px; 98 | font-size: 1rem; 99 | line-height: 1rem; 100 | bottom: 1rem; 101 | right: 1rem; 102 | } 103 | -------------------------------------------------------------------------------- /js/interim.js: -------------------------------------------------------------------------------- 1 | import {$} from './$.js'; 2 | 3 | // normalize SpeechRecognition 4 | let {SpeechRecognition} = globalThis; 5 | if (!SpeechRecognition) 6 | SpeechRecognition = webkitSpeechRecognition; 7 | 8 | // create a div and show some text 9 | const log = text => { 10 | const div = document.createElement('div'); 11 | div.textContent = text; 12 | $('#content').appendChild(div); 13 | }; 14 | 15 | // activate the listening 16 | $('#mic').on('click', ({currentTarget}) => { 17 | // avoid clicks while listenings 18 | currentTarget.disabled = true; 19 | 20 | // log passed time 21 | log(0); 22 | const time = new Date; 23 | const i = setInterval(node => { 24 | node.textContent = ((new Date - time) / 1000).toFixed(1); 25 | }, 100, $('#content').lastChild); 26 | 27 | // start listening with interimResults 28 | const sr = new SpeechRecognition; 29 | sr.interimResults = true; 30 | let t = 0, ended = false; 31 | $(sr) 32 | // works both on Chrome and Safari 33 | .on('result', event => { 34 | // prevent multiple showResult calls 35 | clearTimeout(t); 36 | // but if audioend fired already 37 | if (ended) 38 | // show results right away (if any final is present) 39 | showResult(event); 40 | // otherwise wait 750ms (or more, or less) 41 | else 42 | t = setTimeout(showResult, 750, event); 43 | }) 44 | // works on Chrome, maybe on Safari too 45 | .on('audioend', () => { 46 | ended = true; 47 | }) 48 | .start() 49 | ; 50 | 51 | // stop listening (collects the final result) 52 | // and show the result. This could get called 53 | // multiple times. 54 | function showResult({results}) { 55 | ended = true; // speed up iOS 56 | sr.stop(); 57 | for (const result of results) { 58 | // consider only the final result 59 | if (result.isFinal) { 60 | // loop the first alternative returned 61 | for (const {transcript} of result) { 62 | // clean up and show result + enable button 63 | clearInterval(i); 64 | console.log(result); 65 | log('You said: ' + transcript); 66 | currentTarget.disabled = false; 67 | return; 68 | } 69 | } 70 | } 71 | } 72 | }); 73 | -------------------------------------------------------------------------------- /index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | Talk2GPT 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 |
44 |
45 |
46 |
47 |
48 | Generic Settings 49 | 53 | 58 |
59 |
60 | Completition Settings 61 | 65 | 70 | 75 |
76 |
77 | 78 |
79 | 80 | 81 | -------------------------------------------------------------------------------- /js/index.js: -------------------------------------------------------------------------------- 1 | import IncrementalText from './incremental-text.js'; 2 | import OpenAI from './openai.js'; 3 | import {local, session} from './storage.js'; 4 | import listen from './listen.js'; 5 | import whenVoices from './voices.js'; 6 | import {$, $$} from './$.js'; 7 | 8 | const { 9 | SpeechSynthesisUtterance, 10 | requestAnimationFrame, 11 | speechSynthesis 12 | } = globalThis; 13 | 14 | let chosenVoice; 15 | let bearer = session.get('bearer') || local.get('bearer'); 16 | let voice = local.get('voice'); 17 | let language = local.get('language'); 18 | let volume = local.get('volume'); 19 | let options = local.get('options') || { 20 | model: 'text-davinci-003', 21 | temperature: 0.2, 22 | max_tokens: 64 23 | }; 24 | 25 | if (bearer) 26 | prepareListening(new OpenAI(bearer, options)); 27 | else { 28 | $('#content').innerHTML = ` 29 |
30 | 34 | 42 | 43 |
44 | `.trim(); 45 | $('#content > form').on('submit', event => { 46 | event.preventDefault(); 47 | const {currentTarget: form} = event; 48 | const fields = $$('input, select', form); 49 | fields.disabled = true; 50 | bearer = $('input[name="api-key"]', form).value.trim(); 51 | let gpt = new OpenAI(bearer, options); 52 | gpt.models.then(json => { 53 | if (json.error) { 54 | fields.disabled = false; 55 | alert(json.error.message); 56 | } 57 | else { 58 | switch ($('select[name="save-key"]', form).value) { 59 | case 'session': session.set('bearer', bearer); break; 60 | case 'local': local.set('bearer', bearer); break; 61 | } 62 | $('#content') 63 | .on('transitionend', () => prepareListening(gpt), {once: true}) 64 | .style.opacity = 0; 65 | } 66 | }); 67 | }); 68 | } 69 | 70 | const say = (something, voice) => { 71 | const ssu = new SpeechSynthesisUtterance(something); 72 | if (voice) { 73 | ssu.lang = voice.lang; 74 | ssu.voice = voice; 75 | } 76 | if (volume != null) 77 | ssu.volume = volume; 78 | ssu.rate = 1.2; 79 | speechSynthesis.cancel(); 80 | speechSynthesis.speak(ssu); 81 | }; 82 | 83 | const showUsage = ({usage}) => { 84 | const { 85 | prompt_tokens: prompt, 86 | completion_tokens: completition, 87 | total_tokens: total 88 | } = usage; 89 | $('body').dataset.usage = ` 90 | Tokens: prompt ${prompt} - completition ${completition} - total ${total} 91 | `.trim(); 92 | }; 93 | 94 | const byNameAndLang = ({name, lang}) => 95 | name === voice && lang === language; 96 | 97 | async function prepareListening(gpt) { 98 | const voices = await whenVoices(); 99 | const it = new IncrementalText($('#content').valueOf()); 100 | const error = ({error, message}) => { 101 | $('#mic').disabled = false; 102 | $('#mic').focus(); 103 | it.show(`⚠️ ${error || message || 'something is wrong'}`); 104 | }; 105 | chosenVoice = voices.find(byNameAndLang); 106 | settings(gpt, voices); 107 | it.show('🎙️ click the mic to ask anything'); 108 | $('#content').replaceChildren().style.opacity = 1; 109 | $('#mic') 110 | .on('click', ({currentTarget: button}) => { 111 | button.disabled = true; 112 | it.show('🧑 ...'); 113 | say('', chosenVoice); 114 | listen(chosenVoice ? {lang: language} : void 0).then(complete, fallback); 115 | function complete(transcript) { 116 | if (transcript) { 117 | it.show(`🧑 “${transcript}”`); 118 | gpt.complete(transcript).then( 119 | result => { 120 | button.disabled = false; 121 | button.focus(); 122 | if (result.error) 123 | error(result.error); 124 | else { 125 | showUsage(result); 126 | for (const choice of result.choices) { 127 | const images = []; 128 | const text = choice.text.trim() 129 | .replace(/^[?!]\s*/, '') 130 | .replace( 131 | /!\[(.+?)\]\((.+?)\)/, 132 | (_, alt, src) => { 133 | return `[${images.push({alt, src})}]`; 134 | } 135 | ); 136 | say(text, chosenVoice); 137 | it.show('🤖 ' + text); 138 | if (images.length) { 139 | $('#content').append(document.createElement('hr')); 140 | for (const details of images) { 141 | const image = Object.assign(new Image, details); 142 | $('#content').append(image); 143 | } 144 | } 145 | break; 146 | } 147 | } 148 | }, 149 | error 150 | ); 151 | } 152 | else { 153 | button.disabled = false; 154 | button.focus(); 155 | it.show('🤷'); 156 | } 157 | } 158 | 159 | function fallback({error, message}) { 160 | if ('service-not-allowed' !== (error || message) || !$$('#mic').length) 161 | error({error, message}); 162 | else { 163 | const div = document.createElement('div'); 164 | const textarea = div.appendChild(document.createElement('textarea')); 165 | textarea.placeholder = 'Microphone placeholder.\nClick the robot to ask.'; 166 | button = div.appendChild(document.createElement('button')); 167 | div.id = 'fallback'; 168 | $('#mic').replaceWith(div); 169 | $(button) 170 | .on('click', () => { 171 | const value = textarea.value.trim(); 172 | if (value) { 173 | button.disabled = true; 174 | textarea.value = ''; 175 | complete(value); 176 | } 177 | }) 178 | .textContent = '🤖'; 179 | } 180 | } 181 | }) 182 | .disabled = false; 183 | } 184 | 185 | async function settings(gpt, voices) { 186 | $('#settings > button').on('click', () => { 187 | $('body').classList.toggle('settings'); 188 | }); 189 | 190 | // voice & volume 191 | let opts = [document.createElement('option')]; 192 | opts[0].value = 'default\x00'; 193 | opts[0].textContent = 'OS Default'; 194 | opts[0].selected = !voice; 195 | for (const {name, lang} of voices) { 196 | const option = document.createElement('option'); 197 | option.value = `${name}\x00${lang}`; 198 | option.textContent = `${name} - ${lang}`; 199 | if (name === voice && lang === language) 200 | option.selected = true; 201 | opts.push(option); 202 | } 203 | 204 | $('#settings select[name="voice"]') 205 | .on('change', ({currentTarget: {value}}) => { 206 | const [name, lang] = value.split('\x00'); 207 | if (name === 'default') { 208 | voice = null; 209 | language = null; 210 | chosenVoice = null; 211 | } 212 | else { 213 | voice = name; 214 | language = lang; 215 | chosenVoice = voices.find(byNameAndLang); 216 | } 217 | local.set('voice', voice); 218 | local.set('language', language); 219 | }) 220 | .append(...opts) 221 | ; 222 | 223 | const $volumeBar = $('#settings input[name="volume"]') 224 | .on('pointermove', ({currentTarget: bar}) => { 225 | volume = Math.min(bar.max, Math.max(bar.min, bar.value)); 226 | bar.value = volume; 227 | bar.nextElementSibling.textContent = volume; 228 | }) 229 | .on('change', ({currentTarget: bar}) => { 230 | $(bar).emit('pointermove'); 231 | local.set('volume', volume); 232 | }) 233 | ; 234 | $volumeBar.value = volume; 235 | $volumeBar.emit('pointermove'); 236 | 237 | // models & completition & temperature 238 | opts = []; 239 | for (const model of (await gpt.models).data) { 240 | const option = document.createElement('option'); 241 | if (model.id.includes('deprecated')) 242 | continue; 243 | option.textContent = option.value = model.id; 244 | if (model.id === options.model) 245 | option.selected = true; 246 | opts.push(option); 247 | } 248 | 249 | $('#settings select[name="model"]') 250 | .on('change', ({currentTarget: {value}}) => { 251 | options.model = value; 252 | local.set('options', options); 253 | }) 254 | .append(...opts) 255 | ; 256 | 257 | const $options = $$('#settings input[name="max_tokens"], #settings input[name="temperature"]') 258 | .on('pointermove', ({currentTarget: {nextElementSibling, value}}) => { 259 | nextElementSibling.textContent = value; 260 | }) 261 | .on('change', ({currentTarget}) => { 262 | const {name, value} = currentTarget; 263 | if (value) { 264 | $(currentTarget).emit('pointermove'); 265 | options[name] = parseFloat(value); 266 | local.set('options', options); 267 | } 268 | }) 269 | ; 270 | for (const input of $options) { 271 | input.value = options[input.name]; 272 | $(input).emit('pointermove'); 273 | } 274 | 275 | const {style} = $('#settings'); 276 | style.display = 'block'; 277 | requestAnimationFrame(() => { 278 | requestAnimationFrame(() => { 279 | style.opacity = 1; 280 | }); 281 | }); 282 | } 283 | --------------------------------------------------------------------------------