├── img
    ├── talk2gpt.jpg
    ├── android-icon-16x16.png
    ├── android-icon-32x32.png
    ├── android-icon-36x36.png
    ├── android-icon-48x48.png
    ├── android-icon-57x57.png
    ├── android-icon-60x60.png
    ├── android-icon-72x72.png
    ├── android-icon-76x76.png
    ├── android-icon-96x96.png
    ├── android-icon-114x114.png
    ├── android-icon-120x120.png
    ├── android-icon-144x144.png
    ├── android-icon-152x152.png
    ├── android-icon-180x180.png
    └── android-icon-192x192.png
├── js
    ├── storage.js
    ├── voices.js
    ├── incremental-text.js
    ├── $.js
    ├── listener.js
    ├── openai.js
    ├── ok.js
    ├── listen.js
    ├── events.js
    ├── interim.js
    └── index.js
├── README.md
├── events
    └── index.html
├── listener
    └── index.html
├── interim
    └── index.html
├── ok
    └── index.html
├── LICENSE
├── sw.js
├── manifest.json
├── css
    ├── index.css
    └── settings.css
└── index.html


/img/talk2gpt.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WebReflection/talk2gpt/HEAD/img/talk2gpt.jpg


--------------------------------------------------------------------------------
/img/android-icon-16x16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WebReflection/talk2gpt/HEAD/img/android-icon-16x16.png


--------------------------------------------------------------------------------
/img/android-icon-32x32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WebReflection/talk2gpt/HEAD/img/android-icon-32x32.png


--------------------------------------------------------------------------------
/img/android-icon-36x36.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WebReflection/talk2gpt/HEAD/img/android-icon-36x36.png


--------------------------------------------------------------------------------
/img/android-icon-48x48.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WebReflection/talk2gpt/HEAD/img/android-icon-48x48.png


--------------------------------------------------------------------------------
/img/android-icon-57x57.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WebReflection/talk2gpt/HEAD/img/android-icon-57x57.png


--------------------------------------------------------------------------------
/img/android-icon-60x60.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WebReflection/talk2gpt/HEAD/img/android-icon-60x60.png


--------------------------------------------------------------------------------
/img/android-icon-72x72.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WebReflection/talk2gpt/HEAD/img/android-icon-72x72.png


--------------------------------------------------------------------------------
/img/android-icon-76x76.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WebReflection/talk2gpt/HEAD/img/android-icon-76x76.png


--------------------------------------------------------------------------------
/img/android-icon-96x96.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WebReflection/talk2gpt/HEAD/img/android-icon-96x96.png


--------------------------------------------------------------------------------
/img/android-icon-114x114.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WebReflection/talk2gpt/HEAD/img/android-icon-114x114.png


--------------------------------------------------------------------------------
/img/android-icon-120x120.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WebReflection/talk2gpt/HEAD/img/android-icon-120x120.png


--------------------------------------------------------------------------------
/img/android-icon-144x144.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WebReflection/talk2gpt/HEAD/img/android-icon-144x144.png


--------------------------------------------------------------------------------
/img/android-icon-152x152.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WebReflection/talk2gpt/HEAD/img/android-icon-152x152.png


--------------------------------------------------------------------------------
/img/android-icon-180x180.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WebReflection/talk2gpt/HEAD/img/android-icon-180x180.png


--------------------------------------------------------------------------------
/img/android-icon-192x192.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WebReflection/talk2gpt/HEAD/img/android-icon-192x192.png


--------------------------------------------------------------------------------
/js/storage.js:
--------------------------------------------------------------------------------
 1 | /*! (c) Andrea Giammarchi */
 2 | 
 3 | const {
 4 |   JSON,
 5 |   localStorage,
 6 |   sessionStorage,
 7 | } = globalThis;
 8 | 
 9 | const {parse, stringify} = JSON;
10 | 
11 | const JSONStorage = storage => ({
12 |   get: key => parse(storage.getItem(key) || 'null'),
13 |   set: (key, value) => storage.setItem(key, stringify(value))
14 | });
15 | 
16 | export const local = JSONStorage(localStorage);
17 | export const session = JSONStorage(sessionStorage);
18 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Talk2GPT
 2 | 
 3 | <sup>**Social Media Photo by [Rock'n Roll Monkey](https://unsplash.com/@rocknrollmonkey) on [Unsplash](https://unsplash.com/)**</sup>
 4 | 
 5 | A 100% client-side PoC of OpenAI Chat GPT API via Web Speech API.
 6 | 
 7 | It requires your OpenAI secret but it does not store it anywhere except for your browser session storage.
 8 | 
 9 | [Live Here](https://webreflection.github.io/talk2gpt/)
10 | 
11 | - - -
12 | 
13 | The rest of the folders are live to test and demo the API.
14 | 


--------------------------------------------------------------------------------
/events/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |   <meta charset="UTF-8">
 5 |   <meta name="viewport" content="width=device-width,initial-scale=1.0">
 6 |   <script type="module" src="../js/events.js"></script>
 7 |   <link rel="stylesheet" href="../css/index.css">
 8 |   <title>Web Speech API - Speech Recognition Events</title>
 9 | </head>
10 | <body>
11 |   <section><button id="mic" title="click to talk">🎙️</button></section>
12 |   <section class="content"><div id="content"></div></section>
13 | </body>
14 | </html>


--------------------------------------------------------------------------------
/listener/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |   <meta charset="UTF-8">
 5 |   <meta name="viewport" content="width=device-width,initial-scale=1.0">
 6 |   <script type="module" src="../js/listener.js"></script>
 7 |   <link rel="stylesheet" href="../css/index.css">
 8 |   <title>Web Speech API - Speech Recognition Listener</title>
 9 | </head>
10 | <body>
11 |   <section><button id="mic" title="click to talk">🎙️</button></section>
12 |   <section class="content"><div id="content"></div></section>
13 | </body>
14 | </html>


--------------------------------------------------------------------------------
/interim/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |   <meta charset="UTF-8">
 5 |   <meta name="viewport" content="width=device-width,initial-scale=1.0">
 6 |   <script type="module" src="../js/interim.js"></script>
 7 |   <link rel="stylesheet" href="../css/index.css">
 8 |   <title>Web Speech API - Speech Recognition via interimResults</title>
 9 | </head>
10 | <body>
11 |   <section><button id="mic" title="click to talk">🎙️</button></section>
12 |   <section class="content"><div id="content"></div></section>
13 | </body>
14 | </html>


--------------------------------------------------------------------------------
/js/voices.js:
--------------------------------------------------------------------------------
 1 | /*! (c) Andrea Giammarchi - ISC */
 2 | 
 3 | let {Promise, setTimeout, speechSynthesis} = globalThis;
 4 | 
 5 | export default (timeout = 3000) => new Promise($ => {
 6 |   // must be assigned before trying to access voices
 7 |   speechSynthesis.addEventListener(
 8 |     'voiceschanged',
 9 |     () => { $(speechSynthesis.getVoices()) },
10 |     {once: true}
11 |   );
12 |   // kinda trigger the voices recognition
13 |   const voices = speechSynthesis.getVoices();
14 |   // if already populated, just resolve with it
15 |   if (voices.length) $(voices);
16 |   setTimeout($, timeout, []);
17 | });
18 | 


--------------------------------------------------------------------------------
/ok/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |   <meta charset="UTF-8">
 5 |   <meta name="viewport" content="width=device-width,initial-scale=1.0">
 6 |   <script type="module" src="../js/ok.js"></script>
 7 |   <link rel="stylesheet" href="../css/index.css">
 8 |   <title>Web Speech API - OK Web</title>
 9 | </head>
10 | <body>
11 |   <section><button id="mic" title="click to talk">🎙️</button></section>
12 |   <section class="content"><div id="content">
13 |     <center>Start listening. Say "<em>OK Web</em>" for reaction. Say "<em>Stop listening</em>" to stop.</center>
14 |   </div></section>
15 | </body>
16 | </html>


--------------------------------------------------------------------------------
/js/incremental-text.js:
--------------------------------------------------------------------------------
 1 | const {
 2 |   document,
 3 |   cancelAnimationFrame,
 4 |   requestAnimationFrame
 5 | } = globalThis;
 6 | 
 7 | export default class IncrementalText {
 8 |   #raf = 0;
 9 |   #target = null;
10 |   constructor(target) {
11 |     this.#target = target;
12 |   }
13 |   show(text) {
14 |     cancelAnimationFrame(this.#raf);
15 |     this.#target.textContent = '';
16 |     const node = this.#target.appendChild(document.createTextNode(''));
17 |     const chars = [...text];
18 |     let i = 0;
19 |     const show = () => {
20 |       if (i < chars.length) {
21 |         node.data += chars[i++];
22 |         this.#raf = requestAnimationFrame(show);
23 |       }
24 |     };
25 |     this.#raf = requestAnimationFrame(show);
26 |   }
27 | }
28 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | ISC License
 2 | 
 3 | Copyright (c) 2023, Andrea Giammarchi, @WebReflection
 4 | 
 5 | Permission to use, copy, modify, and/or distribute this software for any
 6 | purpose with or without fee is hereby granted, provided that the above
 7 | copyright notice and this permission notice appear in all copies.
 8 | 
 9 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
10 | REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11 | AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
12 | INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13 | LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14 | OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15 | PERFORMANCE OF THIS SOFTWARE.
16 | 


--------------------------------------------------------------------------------
/js/$.js:
--------------------------------------------------------------------------------
1 | /* @see https://github.com/WebReflection/handy-wrap#readme */
2 | /*! (c) Andrea Giammarchi - ISC */
3 | const{iterator:e}=Symbol,t=new Map,r=(e,t)=>e?t.at(0):t,n={get({_:n,$:l},o,u){if(t.has(o))return t.get(o)(r(n,l),o,u);switch(o){case e:return l[e].bind(l);case"emit":return(e,...t)=>{for(let r=0;r<l.length;r++)l[r].dispatchEvent(new Event(e,...t));return u};case"length":return r(n,l)?.length;case"valueOf":return()=>r(n,l);case"on":o="addEventListener";default:{let e;for(let t=0;t<l.length;t++)if(t)e[t]=l[t][o];else{if(e=l[t][o],"function"==typeof e)return(...e)=>{for(let t=0;t<l.length;t++)l[t][o](...e);return u};if(n)return e;e=[e]}return e}}},set({$:e},t,r){for(let n=0;n<e.length;n++)e[n][t]=r;return!0}},l=(e,t,r=globalThis.document)=>"string"==typeof e?r[t](e):e.valueOf(),o=(e,t)=>new Proxy({_:1,$:[l(e,"querySelector",t)]},n),u=(e,t)=>new Proxy({_:0,$:l(e,"querySelectorAll",t)},n);export{o as $,u as $$,t as plugins};


--------------------------------------------------------------------------------
/js/listener.js:
--------------------------------------------------------------------------------
 1 | import {$} from './$.js';
 2 | import listen from './listen.js';
 3 | 
 4 | // create a div and show some text
 5 | const log = text => {
 6 |   const div = document.createElement('div');
 7 |   div.textContent = text;
 8 |   $('#content').appendChild(div);
 9 | };
10 | 
11 | // activate the listening
12 | $('#mic').on('click', ({currentTarget}) => {
13 |   // avoid clicks while listenings
14 |   currentTarget.disabled = true;
15 | 
16 |   // log passed time
17 |   log(0);
18 |   const time = new Date;
19 |   const i = setInterval(node => {
20 |     node.textContent = ((new Date - time) / 1000).toFixed(1);
21 |   }, 100, $('#content').lastChild);
22 | 
23 |   // listen to something
24 |   listen().then(
25 |     transcript => {
26 |       clearInterval(i);
27 |       log('You said: ' + (transcript || 'nothing'));
28 |       currentTarget.disabled = false;
29 |     },
30 |     console.error
31 |   );
32 | });
33 | 


--------------------------------------------------------------------------------
/js/openai.js:
--------------------------------------------------------------------------------
 1 | const API = 'https://api.openai.com/v1/';
 2 | 
 3 | const {JSON, fetch} = globalThis;
 4 | 
 5 | const {stringify} = JSON;
 6 | 
 7 | export default class OpenAI {
 8 |   #headers = null;
 9 |   #models = null;
10 |   constructor(bearer, options) {
11 |     this.#headers = {
12 |       'Content-Type': 'application/json',
13 |       'Authorization': 'Bearer ' + bearer
14 |     };
15 |     this.options = options;
16 |   }
17 |   get models() {
18 |     return this.#models || (this.#models = fetch(API + 'models', {
19 |       headers: this.#headers,
20 |       method: 'GET'
21 |     }).then(res => res.json()));
22 |   }
23 |   complete(transcript) {
24 |     return fetch(API + 'completions', {
25 |       headers: this.#headers,
26 |       method: 'POST',
27 |       body: stringify({
28 |         ...this.options,
29 |         prompt: transcript
30 |       })
31 |     }).then(res => res.json());
32 |   }
33 | }
34 | 


--------------------------------------------------------------------------------
/sw.js:
--------------------------------------------------------------------------------
 1 | addEventListener('install', event => {
 2 |   event.waitUntil(
 3 |     caches.open('talk2gpt').then(db => db.addAll([
 4 |       './',
 5 |       './js/$.js',
 6 |       './js/incremental-text.js',
 7 |       './js/index.js',
 8 |       './js/listen.js',
 9 |       './js/openai.js',
10 |       './js/storage.js',
11 |       './js/voices.js',
12 |       './css/index.css',
13 |       './css/settings.css'
14 |     ]))
15 |   );
16 | });
17 | 
18 | addEventListener('fetch', event => {
19 |   const {request} = event;
20 |   event.respondWith(
21 |     caches.open('talk2gpt').then(db => db.match(request).then(response => {
22 |       const fallback = fetch(request).then(
23 |         response => {
24 |           if(response.ok && request.method === 'GET')
25 |             db.put(request, response.clone());
26 |           return response;
27 |         },
28 |         () => new Response('Not Found', {
29 |           status: 404,
30 |           type: 'plain/text'
31 |         })
32 |       );
33 |       return response || fallback;
34 |     }))
35 |   );
36 | });
37 | 


--------------------------------------------------------------------------------
/manifest.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "background_color":"#ffffff",
 3 |   "description":"Talk to GPT",
 4 |   "display":"standalone",
 5 |   "name":"Talk2GPT",
 6 |   "orientation":"any",
 7 |   "short_name":"Talk2GPT",
 8 |   "start_url":"./",
 9 |   "theme_color":"#ffffff",
10 |   "icons": [
11 |     {
12 |      "src": "img/android-icon-36x36.png",
13 |      "sizes": "36x36",
14 |      "type": "image/png",
15 |      "density": "0.75"
16 |     },
17 |     {
18 |      "src": "img/android-icon-48x48.png",
19 |      "sizes": "48x48",
20 |      "type": "image/png",
21 |      "density": "1.0"
22 |     },
23 |     {
24 |      "src": "img/android-icon-72x72.png",
25 |      "sizes": "72x72",
26 |      "type": "image/png",
27 |      "density": "1.5"
28 |     },
29 |     {
30 |      "src": "img/android-icon-96x96.png",
31 |      "sizes": "96x96",
32 |      "type": "image/png",
33 |      "density": "2.0"
34 |     },
35 |     {
36 |      "src": "img/android-icon-144x144.png",
37 |      "sizes": "144x144",
38 |      "type": "image/png",
39 |      "density": "3.0"
40 |     },
41 |     {
42 |      "src": "img/android-icon-192x192.png",
43 |      "sizes": "192x192",
44 |      "type": "image/png",
45 |      "density": "4.0"
46 |     }
47 |   ]
48 | }


--------------------------------------------------------------------------------
/js/ok.js:
--------------------------------------------------------------------------------
 1 | import {$} from './$.js';
 2 | import listen from './listen.js';
 3 | 
 4 | // create a div and show some text
 5 | const log = text => {
 6 |   const div = document.createElement('div');
 7 |   div.textContent = text;
 8 |   $('#content').appendChild(div);
 9 | };
10 | 
11 | // say something in the default language
12 | const say = text => {
13 |   const ssu = new SpeechSynthesisUtterance(text);
14 |   // cancel any previous text before starting this one
15 |   speechSynthesis.cancel();
16 |   speechSynthesis.speak(ssu);
17 | };
18 | 
19 | // activate the listening
20 | $('#mic').on('click', ({currentTarget}) => {
21 |   currentTarget.disabled = true;
22 |   const check = transcript => {
23 |     switch (transcript.toLowerCase()) {
24 |       case 'stop listening':
25 |         currentTarget.disabled = false;
26 |         say('just stopped');
27 |         log('Just stopped 👍');
28 |         break;
29 |       case 'ok web':
30 |       case 'okay web':
31 |         say('I am ready');
32 |         log('I am ready 🤖');
33 |       default:
34 |         console.log(transcript);
35 |         listen().then(check);
36 |         break;
37 |     }
38 |   };
39 |   // grant SpeechSynthesisUtterance usage
40 |   say('');
41 |   // listen and check
42 |   listen().then(check);
43 | });
44 | 


--------------------------------------------------------------------------------
/css/index.css:
--------------------------------------------------------------------------------
 1 | * {
 2 |   box-sizing: border-box;
 3 | }
 4 | 
 5 | html, body {
 6 |   overflow: hidden;
 7 | }
 8 | 
 9 | html, body, section, #mic {
10 |   padding: 0;
11 |   margin: 0;
12 | }
13 | 
14 | body {
15 |   font-family: Arial, Helvetica, sans-serif;
16 |   width: 100vw;
17 |   width: 100svw;
18 |   height: 100vh;
19 |   height: 100svh;
20 | }
21 | 
22 | body::after {
23 |   width: 100vw;
24 |   width: 100svw;
25 |   display: block;
26 |   position: fixed;
27 |   bottom: 0;
28 |   font-size: xx-small;
29 |   text-align: center;
30 |   content: 'You need a browser compatible with Web Speech API'
31 | }
32 | 
33 | body > section:first-of-type {
34 |   height: 20svh;
35 | }
36 | 
37 | body > section.content {
38 |   height: 80svh;
39 |   overflow: auto;
40 |   scrollbar-gutter: stable both-edges;
41 | }
42 | 
43 | #mic {
44 |   width: 100%;
45 |   height: 100%;
46 |   line-height: 100%;
47 |   font-size: xx-large;
48 |   border: 0;
49 |   transition: opacity ease-in 250ms;
50 | }
51 | 
52 | #mic:disabled {
53 |   opacity: .5;
54 | }
55 | 
56 | #content {
57 |   padding: 1rem;
58 |   white-space: pre-wrap;
59 |   transition: opacity ease-in 250ms;
60 | }
61 | 
62 | #content label {
63 |   display: flex;
64 |   flex-direction: column;
65 |   min-height: 60px;
66 |   justify-content: space-around;
67 | }
68 | 
69 | #content label span {
70 |   opacity: .7;
71 | }
72 | 
73 | #content label span, center {
74 |   font-size: small;
75 | }
76 | 
77 | #content img {
78 |   max-width: 100%;
79 | }


--------------------------------------------------------------------------------
/js/listen.js:
--------------------------------------------------------------------------------
 1 | /*! (c) Andrea Giammarchi - ISC */
 2 | 
 3 | let {Object, Promise, SpeechRecognition, clearTimeout, setTimeout} = globalThis;
 4 | if (!SpeechRecognition)
 5 |   SpeechRecognition = webkitSpeechRecognition;
 6 | 
 7 | const {assign} = Object;
 8 | const interimResults = {interimResults: true};
 9 | const once = {once: true};
10 | 
11 | export default (options = void 0) => new Promise((resolve, reject) => {
12 |   let t = 0, ended = false;
13 |   const stop = event => {
14 |     clearTimeout(t);
15 |     ended = true;
16 |     sr.stop();
17 |     if (event) {
18 |       if (event.type === 'nomatch' || event.error === 'no-speech')
19 |         resolve('');
20 |       else
21 |         reject(event.type === 'end' ? {error: 'unable to understand'} : event);
22 |     }
23 |   };
24 |   const result = ({results}) => {
25 |     stop();
26 |     for (const result of results) {
27 |       if (result.isFinal) {
28 |         for (const {transcript} of result) {
29 |           resolve(transcript);
30 |           return;
31 |         }
32 |       }
33 |     }
34 |   };
35 |   const sr = assign(new SpeechRecognition, options, interimResults);
36 |   sr.addEventListener('error', stop, once);
37 |   sr.addEventListener('nomatch', stop, once);
38 |   sr.addEventListener('end', stop, once);
39 |   sr.addEventListener('audioend', () => stop(), once);
40 |   sr.addEventListener('result', event => {
41 |     if (ended)
42 |       result(event);
43 |     else {
44 |       clearTimeout(t);
45 |       t = setTimeout(result, 750, event);
46 |     }
47 |   });
48 |   sr.start();
49 | });
50 | 


--------------------------------------------------------------------------------
/js/events.js:
--------------------------------------------------------------------------------
 1 | import {$} from './$.js';
 2 | 
 3 | // normalize SpeechRecognition
 4 | let {SpeechRecognition} = globalThis;
 5 | if (!SpeechRecognition)
 6 |   SpeechRecognition = webkitSpeechRecognition;
 7 | 
 8 | // create a div and show the event name
 9 | const logEvent = ({type}) => {
10 |   const div = document.createElement('div');
11 |   div.textContent = type;
12 |   $('#content').appendChild(div);
13 | };
14 | 
15 | // activate the listening
16 | $('#mic').on('click', ({currentTarget}) => {
17 |   // avoid clicks while listenings
18 |   currentTarget.disabled = true;
19 | 
20 |   // log passed time
21 |   logEvent({type: 0});
22 |   const time = new Date;
23 |   const i = setInterval(node => {
24 |     node.textContent = ((new Date - time) / 1000).toFixed(1);
25 |   }, 100, $('#content').lastChild);
26 | 
27 |   // start listening to all events *and*
28 |   // avoid iOS listening forever (it stops in 10 seconds)
29 |   setTimeout(
30 |     $(new SpeechRecognition)
31 |       .on('start', logEvent)
32 |       .on('audiostart', logEvent)
33 |       .on('soundstart', logEvent)
34 |       .on('speechstart', logEvent)
35 |       .on('speechend', logEvent)
36 |       .on('soundend', logEvent)
37 |       .on('audioend', logEvent)
38 |       .on('result', logEvent)
39 |       .on('end', event => {
40 |         logEvent(event);
41 |         // cleanup and stop listening
42 |         clearInterval(i);
43 |         event.currentTarget.stop();
44 |         currentTarget.disabled = false;
45 |       })
46 |       // extra events
47 |       .on('error', logEvent)
48 |       .on('nomatch', logEvent)
49 |       .start()
50 |       // forward the stop
51 |       .stop,
52 |     10000
53 |   );
54 | });
55 | 


--------------------------------------------------------------------------------
/css/settings.css:
--------------------------------------------------------------------------------
  1 | body::after {
  2 |   content: attr(data-usage);
  3 | }
  4 | 
  5 | #settings {
  6 |   display: none;
  7 |   opacity: 0;
  8 |   position: absolute;
  9 |   top: 0;
 10 |   left: 0;
 11 |   width: 100vw;
 12 |   width: 100svw;
 13 |   height: 100vh;
 14 |   height: 100svh;
 15 |   padding: 1rem;
 16 |   transform: translateX(100vw);
 17 |   transform: translateX(100svw);
 18 | }
 19 | 
 20 | #settings, #settings > button {
 21 |   transition: all ease-in 250ms;
 22 | }
 23 | 
 24 | body.settings #settings {
 25 |   transform: translateX(0);
 26 | }
 27 | 
 28 | body > *:not(#settings) {
 29 |   opacity: 1;
 30 |   transition: opacity ease-in 125ms;
 31 | }
 32 | 
 33 | body.settings > *:not(#settings) {
 34 |   opacity: 0;
 35 | }
 36 | 
 37 | #settings fieldset {
 38 |   display: flex;
 39 |   flex-direction: column;
 40 | }
 41 | 
 42 | #settings fieldset > legend {
 43 |   font-weight: bold;
 44 |   font-size: small;
 45 |   padding: 8px;
 46 | }
 47 | 
 48 | #settings label {
 49 |   margin: .5rem;
 50 |   width: 100%;
 51 | }
 52 | 
 53 | #settings label > span:first-child {
 54 |   display: inline-block;
 55 |   width: 40%;
 56 | }
 57 | #settings label > span:first-child + * {
 58 |   width: 40%;
 59 | }
 60 | #settings label > span:last-child {
 61 |   display: inline-block;
 62 |   width: calc(20% - 1rem - 8px);
 63 |   text-align: right;
 64 | }
 65 | 
 66 | #settings > button {
 67 |   margin: 0;
 68 |   padding: 4px;
 69 |   font-size: 1rem;
 70 |   line-height: 1rem;
 71 |   position: fixed;
 72 |   bottom: 1rem;
 73 |   right: 1rem;
 74 |   transform: translateX(-100vw);
 75 |   transform: translateX(-100svw);
 76 | }
 77 | 
 78 | body.settings #settings > button {
 79 |   transform: translateX(0);
 80 | }
 81 | 
 82 | #fallback {
 83 |   position: relative;
 84 | }
 85 | 
 86 | #fallback, #fallback > textarea {
 87 |   width: 100%;
 88 |   height: 100%;
 89 |   resize: none;
 90 | }
 91 | 
 92 | #fallback > button {
 93 |   position: absolute;
 94 |   right: 0;
 95 |   bottom: 0;
 96 |   margin: 0;
 97 |   padding: 4px;
 98 |   font-size: 1rem;
 99 |   line-height: 1rem;
100 |   bottom: 1rem;
101 |   right: 1rem;
102 | }
103 | 


--------------------------------------------------------------------------------
/js/interim.js:
--------------------------------------------------------------------------------
 1 | import {$} from './$.js';
 2 | 
 3 | // normalize SpeechRecognition
 4 | let {SpeechRecognition} = globalThis;
 5 | if (!SpeechRecognition)
 6 |   SpeechRecognition = webkitSpeechRecognition;
 7 | 
 8 | // create a div and show some text
 9 | const log = text => {
10 |   const div = document.createElement('div');
11 |   div.textContent = text;
12 |   $('#content').appendChild(div);
13 | };
14 | 
15 | // activate the listening
16 | $('#mic').on('click', ({currentTarget}) => {
17 |   // avoid clicks while listenings
18 |   currentTarget.disabled = true;
19 | 
20 |   // log passed time
21 |   log(0);
22 |   const time = new Date;
23 |   const i = setInterval(node => {
24 |     node.textContent = ((new Date - time) / 1000).toFixed(1);
25 |   }, 100, $('#content').lastChild);
26 | 
27 |   // start listening with interimResults
28 |   const sr = new SpeechRecognition;
29 |   sr.interimResults = true;
30 |   let t = 0, ended = false;
31 |   $(sr)
32 |     // works both on Chrome and Safari
33 |     .on('result', event => {
34 |       // prevent multiple showResult calls
35 |       clearTimeout(t);
36 |       // but if audioend fired already
37 |       if (ended)
38 |         // show results right away (if any final is present)
39 |         showResult(event);
40 |       // otherwise wait 750ms (or more, or less)
41 |       else
42 |         t = setTimeout(showResult, 750, event);
43 |     })
44 |     // works on Chrome, maybe on Safari too
45 |     .on('audioend', () => {
46 |       ended = true;
47 |     })
48 |     .start()
49 |   ;
50 | 
51 |   // stop listening (collects the final result)
52 |   // and show the result. This could get called
53 |   // multiple times.
54 |   function showResult({results}) {
55 |     ended = true; // speed up iOS
56 |     sr.stop();
57 |     for (const result of results) {
58 |       // consider only the final result
59 |       if (result.isFinal) {
60 |         // loop the first alternative returned
61 |         for (const {transcript} of result) {
62 |           // clean up and show result + enable button
63 |           clearInterval(i);
64 |           console.log(result);
65 |           log('You said: ' + transcript);
66 |           currentTarget.disabled = false;
67 |           return;
68 |         }
69 |       }
70 |     }
71 |   }
72 | });
73 | 


--------------------------------------------------------------------------------
/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |   <meta charset="UTF-8">
 5 |   <meta name="viewport" content="width=device-width,initial-scale=1.0,user-scalable=0">
 6 |   <meta name=author content="Andrea Giammarchi, @webreflection">
 7 |   <meta name=description content="Web Speech API and Chat GPT">
 8 |   <meta name=apple-mobile-web-app-capable content=yes>
 9 |   <meta name=mobile-web-app-capable content=yes>
10 |   <meta name="msapplication-TileColor" content="#ffffff">
11 |   <meta name="msapplication-TileImage" content="img/android-icon-144x144.png">
12 |   <meta name="theme-color" content="#ffffff">
13 |   <meta property="og:title" content="A 100% client-side PoC of OpenAI Chat GPT API via Web Speech API">
14 |   <meta property="og:type" content="application" />
15 |   <meta property="og:image" content="img/talk2gpt.jpg">
16 |   <meta property="og:url" content="https://webreflection.github.io/talk2gpt/">
17 |   <meta name="twitter:card" content="summary_large_image">
18 |   <meta property="og:description" content="A 100% client-side PoC of OpenAI Chat GPT API via Web Speech API">
19 |   <meta property="og:site_name" content="Talk2GPT">
20 |   <meta name="twitter:image:alt" content="A robot waiting to interact">
21 |   <meta name="twitter:site" content="@webreflection">
22 |   <title>Talk2GPT</title>
23 |   <link rel="apple-touch-icon" sizes="57x57" href="img/android-icon-57x57.png">
24 |   <link rel="apple-touch-icon" sizes="60x60" href="img/android-icon-60x60.png">
25 |   <link rel="apple-touch-icon" sizes="72x72" href="img/android-icon-72x72.png">
26 |   <link rel="apple-touch-icon" sizes="76x76" href="img/android-icon-76x76.png">
27 |   <link rel="apple-touch-icon" sizes="114x114" href="img/android-icon-114x114.png">
28 |   <link rel="apple-touch-icon" sizes="120x120" href="img/android-icon-120x120.png">
29 |   <link rel="apple-touch-icon" sizes="144x144" href="img/android-icon-144x144.png">
30 |   <link rel="apple-touch-icon" sizes="152x152" href="img/android-icon-152x152.png">
31 |   <link rel="apple-touch-icon" sizes="180x180" href="img/android-icon-180x180.png">
32 |   <link rel="icon" type="image/png" sizes="192x192"  href="img/android-icon-192x192.png">
33 |   <link rel="icon" type="image/png" sizes="32x32" href="img/android-icon-32x32.png">
34 |   <link rel="icon" type="image/png" sizes="96x96" href="img/android-icon-96x96.png">
35 |   <link rel="icon" type="image/png" sizes="16x16" href="img/android-icon-16x16.png">
36 |   <link rel="manifest" href="manifest.json">
37 |   <script>if ('serviceWorker' in navigator) navigator.serviceWorker.register('./sw.js');</script>
38 |   <script type="module" src="./js/index.js"></script>
39 |   <link rel="stylesheet" href="./css/index.css">
40 |   <link rel="stylesheet" href="./css/settings.css">
41 | </head>
42 | <body>
43 |   <section><button id="mic" title="click to talk" disabled>🎙️</button></section>
44 |   <section class="content"><div id="content"></div></section>
45 |   <section id="settings">
46 |     <form>
47 |       <fieldset>
48 |         <legend>Generic Settings</legend>
49 |         <label>
50 |           <span>Preferred voice</span>
51 |           <select name="voice"></select>
52 |         </label>
53 |         <label>
54 |           <span>Voice volume</span>
55 |           <input type="range" name="volume" value="1" min="0.0" max="1.0" step="0.1">
56 |           <span>1</span>
57 |         </label>
58 |       </fieldset>
59 |       <fieldset>
60 |         <legend>Completition Settings</legend>
61 |         <label>
62 |           <span>Model</span>
63 |           <select name="model"></select>
64 |         </label>
65 |         <label>
66 |           <span>Max tokens</span>
67 |           <input type="number" name="max_tokens" value="140" min="1" max="4096" step="1" placeholder="1">
68 |           <span>140</span>
69 |         </label>
70 |         <label>
71 |           <span>temperature</span>
72 |           <input type="range" name="temperature" value="0.5" min="0.0" max="1.0" step="0.1">
73 |           <span>0.5</span>
74 |         </label>
75 |       </fieldset>
76 |     </form>
77 |     <button title="change settings">⚙</button>
78 |   </section>
79 | </body>
80 | </html>
81 | 


--------------------------------------------------------------------------------
/js/index.js:
--------------------------------------------------------------------------------
  1 | import IncrementalText from './incremental-text.js';
  2 | import OpenAI from './openai.js';
  3 | import {local, session} from './storage.js';
  4 | import listen from './listen.js';
  5 | import whenVoices from './voices.js';
  6 | import {$, $$} from './$.js';
  7 | 
  8 | const {
  9 |   SpeechSynthesisUtterance,
 10 |   requestAnimationFrame,
 11 |   speechSynthesis
 12 | } = globalThis;
 13 | 
 14 | let chosenVoice;
 15 | let bearer = session.get('bearer') || local.get('bearer');
 16 | let voice = local.get('voice');
 17 | let language = local.get('language');
 18 | let volume = local.get('volume');
 19 | let options = local.get('options') || {
 20 |   model: 'text-davinci-003',
 21 |   temperature: 0.2,
 22 |   max_tokens: 64
 23 | };
 24 | 
 25 | if (bearer)
 26 |   prepareListening(new OpenAI(bearer, options));
 27 | else {
 28 |   $('#content').innerHTML = `
 29 |   <form>
 30 |     <label>
 31 |       <span>OpenAI API Key</span>
 32 |       <input name="api-key" required>
 33 |     </label>
 34 |     <label>
 35 |       <span>Save API Key</span>
 36 |       <select name="save-key">
 37 |       <option value="no" selected>nope</option>
 38 |       <option value="session">in sessionStorage</option>
 39 |       <option value="local">in localStorage</option>
 40 |       </select>
 41 |     </label>
 42 |     <input type="submit">
 43 |   </form>
 44 |   `.trim();
 45 |   $('#content > form').on('submit', event => {
 46 |     event.preventDefault();
 47 |     const {currentTarget: form} = event;
 48 |     const fields = $$('input, select', form);
 49 |     fields.disabled = true;
 50 |     bearer = $('input[name="api-key"]', form).value.trim();
 51 |     let gpt = new OpenAI(bearer, options);
 52 |     gpt.models.then(json => {
 53 |       if (json.error) {
 54 |         fields.disabled = false;
 55 |         alert(json.error.message);
 56 |       }
 57 |       else {
 58 |         switch ($('select[name="save-key"]', form).value) {
 59 |           case 'session': session.set('bearer', bearer); break;
 60 |           case 'local': local.set('bearer', bearer); break;
 61 |         }
 62 |         $('#content')
 63 |           .on('transitionend', () => prepareListening(gpt), {once: true})
 64 |           .style.opacity = 0;
 65 |       }
 66 |     });
 67 |   });
 68 | }
 69 | 
 70 | const say = (something, voice) => {
 71 |   const ssu = new SpeechSynthesisUtterance(something);
 72 |   if (voice) {
 73 |     ssu.lang = voice.lang;
 74 |     ssu.voice = voice;
 75 |   }
 76 |   if (volume != null)
 77 |     ssu.volume = volume;
 78 |   ssu.rate = 1.2;
 79 |   speechSynthesis.cancel();
 80 |   speechSynthesis.speak(ssu);
 81 | };
 82 | 
 83 | const showUsage = ({usage}) => {
 84 |   const {
 85 |     prompt_tokens: prompt,
 86 |     completion_tokens: completition,
 87 |     total_tokens: total
 88 |   } = usage;
 89 |   $('body').dataset.usage = `
 90 |     Tokens: prompt ${prompt} - completition ${completition} - total ${total}
 91 |   `.trim();
 92 | };
 93 | 
 94 | const byNameAndLang = ({name, lang}) =>
 95 |                         name === voice && lang === language;
 96 | 
 97 | async function prepareListening(gpt) {
 98 |   const voices = await whenVoices();
 99 |   const it = new IncrementalText($('#content').valueOf());
100 |   const error = ({error, message}) => {
101 |     $('#mic').disabled = false;
102 |     $('#mic').focus();
103 |     it.show(`⚠️ ${error || message || 'something is wrong'}`);
104 |   };
105 |   chosenVoice = voices.find(byNameAndLang);
106 |   settings(gpt, voices);
107 |   it.show('🎙️ click the mic to ask anything');
108 |   $('#content').replaceChildren().style.opacity = 1;
109 |   $('#mic')
110 |     .on('click', ({currentTarget: button}) => {
111 |       button.disabled = true;
112 |       it.show('🧑 ...');
113 |       say('', chosenVoice);
114 |       listen(chosenVoice ? {lang: language} : void 0).then(complete, fallback);
115 |       function complete(transcript) {
116 |         if (transcript) {
117 |           it.show(`🧑 “${transcript}”`);
118 |           gpt.complete(transcript).then(
119 |             result => {
120 |               button.disabled = false;
121 |               button.focus();
122 |               if (result.error)
123 |                 error(result.error);
124 |               else {
125 |                 showUsage(result);
126 |                 for (const choice of result.choices) {
127 |                   const images = [];
128 |                   const text = choice.text.trim()
129 |                     .replace(/^[?!]\s*/, '')
130 |                     .replace(
131 |                       /!\[(.+?)\]\((.+?)\)/,
132 |                       (_, alt, src) => {
133 |                         return `[${images.push({alt, src})}]`;
134 |                       }
135 |                     );
136 |                   say(text, chosenVoice);
137 |                   it.show('🤖 ' + text);
138 |                   if (images.length) {
139 |                     $('#content').append(document.createElement('hr'));
140 |                     for (const details of images) {
141 |                       const image = Object.assign(new Image, details);
142 |                       $('#content').append(image);
143 |                     }
144 |                   }
145 |                   break;
146 |                 }
147 |               }
148 |             },
149 |             error
150 |           );
151 |         }
152 |         else {
153 |           button.disabled = false;
154 |           button.focus();
155 |           it.show('🤷');
156 |         }
157 |       }
158 | 
159 |       function fallback({error, message}) {
160 |         if ('service-not-allowed' !== (error || message) || !$$('#mic').length)
161 |           error({error, message});
162 |         else {
163 |           const div = document.createElement('div');
164 |           const textarea = div.appendChild(document.createElement('textarea'));
165 |           textarea.placeholder = 'Microphone placeholder.\nClick the robot to ask.';
166 |           button = div.appendChild(document.createElement('button'));
167 |           div.id = 'fallback';
168 |           $('#mic').replaceWith(div);
169 |           $(button)
170 |             .on('click', () => {
171 |               const value = textarea.value.trim();
172 |               if (value) {
173 |                 button.disabled = true;
174 |                 textarea.value = '';
175 |                 complete(value);
176 |               }
177 |             })
178 |             .textContent = '🤖';
179 |         }
180 |       }
181 |     })
182 |     .disabled = false;
183 | }
184 | 
185 | async function settings(gpt, voices) {
186 |   $('#settings > button').on('click', () => {
187 |     $('body').classList.toggle('settings');
188 |   });
189 | 
190 |   // voice & volume
191 |   let opts = [document.createElement('option')];
192 |   opts[0].value = 'default\x00';
193 |   opts[0].textContent = 'OS Default';
194 |   opts[0].selected = !voice;
195 |   for (const {name, lang} of voices) {
196 |     const option = document.createElement('option');
197 |     option.value = `${name}\x00${lang}`;
198 |     option.textContent = `${name} - ${lang}`;
199 |     if (name === voice && lang === language)
200 |       option.selected = true;
201 |     opts.push(option);
202 |   }
203 | 
204 |   $('#settings select[name="voice"]')
205 |     .on('change', ({currentTarget: {value}}) => {
206 |       const [name, lang] = value.split('\x00');
207 |       if (name === 'default') {
208 |         voice = null;
209 |         language = null;
210 |         chosenVoice = null;
211 |       }
212 |       else {
213 |         voice = name;
214 |         language = lang;
215 |         chosenVoice = voices.find(byNameAndLang);
216 |       }
217 |       local.set('voice', voice);
218 |       local.set('language', language);
219 |     })
220 |     .append(...opts)
221 |   ;
222 | 
223 |   const $volumeBar = $('#settings input[name="volume"]')
224 |     .on('pointermove', ({currentTarget: bar}) => {
225 |       volume = Math.min(bar.max, Math.max(bar.min, bar.value));
226 |       bar.value = volume;
227 |       bar.nextElementSibling.textContent = volume;
228 |     })
229 |     .on('change', ({currentTarget: bar}) => {
230 |       $(bar).emit('pointermove');
231 |       local.set('volume', volume);
232 |     })
233 |   ;
234 |   $volumeBar.value = volume;
235 |   $volumeBar.emit('pointermove');
236 | 
237 |   // models & completition & temperature
238 |   opts = [];
239 |   for (const model of (await gpt.models).data) {
240 |     const option = document.createElement('option');
241 |     if (model.id.includes('deprecated'))
242 |       continue;
243 |     option.textContent = option.value = model.id;
244 |     if (model.id === options.model)
245 |       option.selected = true;
246 |     opts.push(option);
247 |   }
248 | 
249 |   $('#settings select[name="model"]')
250 |     .on('change', ({currentTarget: {value}}) => {
251 |       options.model = value;
252 |       local.set('options', options);
253 |     })
254 |     .append(...opts)
255 |   ;
256 | 
257 |   const $options = $$('#settings input[name="max_tokens"], #settings input[name="temperature"]')
258 |     .on('pointermove', ({currentTarget: {nextElementSibling, value}}) => {
259 |       nextElementSibling.textContent = value;
260 |     })
261 |     .on('change', ({currentTarget}) => {
262 |       const {name, value} = currentTarget;
263 |       if (value) {
264 |         $(currentTarget).emit('pointermove');
265 |         options[name] = parseFloat(value);
266 |         local.set('options', options);
267 |       }
268 |     })
269 |   ;
270 |   for (const input of $options) {
271 |     input.value = options[input.name];
272 |     $(input).emit('pointermove');
273 |   }
274 | 
275 |   const {style} = $('#settings');
276 |   style.display = 'block';
277 |   requestAnimationFrame(() => {
278 |     requestAnimationFrame(() => {
279 |       style.opacity = 1;
280 |     });
281 |   });
282 | }
283 | 


--------------------------------------------------------------------------------