├── .npmrc ├── src ├── preview │ ├── index.css │ ├── index.html │ └── app.tsx ├── vanilla.ts └── wake-me.tsx ├── public └── snap │ ├── weights.bin │ ├── metadata.json │ ├── model.json │ └── speech-commands.min.js ├── postcss.config.js ├── vite.config.ts ├── tailwind.config.js ├── tsup.config.ts ├── tsconfig.json ├── LICENSE ├── CHANGELOG.md ├── package.json ├── .gitignore ├── README.ja.md ├── README.ko.md └── README.md /.npmrc: -------------------------------------------------------------------------------- 1 | legacy-peer-deps=true 2 | -------------------------------------------------------------------------------- /src/preview/index.css: -------------------------------------------------------------------------------- 1 | @tailwind base; 2 | @tailwind components; 3 | @tailwind utilities; 4 | -------------------------------------------------------------------------------- /public/snap/weights.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/llami-team/wake-me/HEAD/public/snap/weights.bin -------------------------------------------------------------------------------- /postcss.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | plugins: { 3 | tailwindcss: {}, 4 | autoprefixer: {}, 5 | }, 6 | }; 7 | -------------------------------------------------------------------------------- /public/snap/metadata.json: -------------------------------------------------------------------------------- 1 | {"tfjsSpeechCommandsVersion":"0.4.0","modelName":"TMv2","timeStamp":"2025-01-24T00:48:14.848Z","wordLabels":["FingerSnap","Snap","배경 소음"]} -------------------------------------------------------------------------------- /vite.config.ts: -------------------------------------------------------------------------------- 1 | import { defineConfig } from "vite"; 2 | import react from "@vitejs/plugin-react"; 3 | 4 | export default defineConfig({ 5 | plugins: [react()], 6 | root: "./src/preview", 7 | }); 8 | -------------------------------------------------------------------------------- /tailwind.config.js: -------------------------------------------------------------------------------- 1 | /** @type {import('tailwindcss').Config} */ 2 | module.exports = { 3 | content: ["./src/**/*.{js,jsx,ts,tsx}"], 4 | theme: { 5 | extend: {}, 6 | }, 7 | plugins: [], 8 | }; 9 | -------------------------------------------------------------------------------- /src/preview/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Wake Me Components 7 | 8 | 9 |
10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /tsup.config.ts: -------------------------------------------------------------------------------- 1 | import { defineConfig } from "tsup"; 2 | 3 | export default defineConfig([ 4 | { 5 | entry: ["src/wake-me.tsx"], 6 | format: ["cjs", "esm"], 7 | dts: true, 8 | clean: true, 9 | external: ["react", "react-dom"], 10 | }, 11 | { 12 | entry: ["src/vanilla.ts"], 13 | format: ["iife"], 14 | globalName: "WakeMe", 15 | outDir: "dist/vanilla", 16 | minify: true, 17 | clean: true, 18 | }, 19 | ]); 20 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "es2016", 4 | "module": "commonjs", 5 | "jsx": "react", 6 | "declaration": true, 7 | "declarationDir": "dist", 8 | "sourceMap": true, 9 | "outDir": "dist", 10 | "strict": true, 11 | "moduleResolution": "node", 12 | "esModuleInterop": true, 13 | "skipLibCheck": true, 14 | "forceConsistentCasingInFileNames": true 15 | }, 16 | "include": ["src"], 17 | "exclude": ["node_modules", "dist"] 18 | } 19 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 LLAMI 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## [1.4.3](https://github.com/llami-team/wake-me/compare/v1.4.2...v1.4.3) (2025-02-03) 4 | 5 | ## [1.4.2](https://github.com/llami-team/wake-me/compare/v1.4.1...v1.4.2) (2025-01-31) 6 | 7 | ## [1.4.1](https://github.com/llami-team/wake-me/compare/v1.4.0...v1.4.1) (2025-01-31) 8 | 9 | # [1.4.0](https://github.com/llami-team/wake-me/compare/v1.3.0...v1.4.0) (2025-01-31) 10 | 11 | 12 | ### Features 13 | 14 | * If the script path is intentionally provided as null, the script will not be loaded. ([1bd9d68](https://github.com/llami-team/wake-me/commit/1bd9d68f7d7ccc37948a0fb2423b8f50911a5a71)) 15 | 16 | # [1.3.0](https://github.com/llami-team/wake-me/compare/v1.2.1...v1.3.0) (2025-01-31) 17 | 18 | 19 | ### Features 20 | 21 | * Add vanilla usage examples ([c6489fe](https://github.com/llami-team/wake-me/commit/c6489fe318ca7eb9d0d1918e35b4c70d47aabf8c)) 22 | * Add vanilla usage examples ([534ef9d](https://github.com/llami-team/wake-me/commit/534ef9dee86f059ba406ff6602f136df5b92c2b7)) 23 | 24 | ## [1.2.1](https://github.com/llami-team/wake-me/compare/v1.2.0...v1.2.1) (2025-01-31) 25 | 26 | 27 | ### Bug Fixes 28 | 29 | * Fixed misleading import statements in descriptions ([71e4541](https://github.com/llami-team/wake-me/commit/71e45416eaac90c8f6934a368f91eabec3be2578)) 30 | 31 | # [1.2.0](https://github.com/llami-team/wake-me/compare/v1.1.0...v1.2.0) (2025-01-31) 32 | 33 | 34 | ### Features 35 | 36 | * Lower snapThreshold to 0.95 (for ease of demonstration) ([a4df4f3](https://github.com/llami-team/wake-me/commit/a4df4f39eae7faba26b2b143f111b227083a74f3)) 37 | 38 | # [1.1.0](https://github.com/llami-team/wake-me/compare/v1.0.3...v1.1.0) (2025-01-31) 39 | 40 | 41 | ### Features 42 | 43 | * Added snapThreshold and improved documentation ([ee8fe78](https://github.com/llami-team/wake-me/commit/ee8fe78a067ea2e1f9240192c5eb6d171c7727ed)) 44 | 45 | ## [1.0.3](https://github.com/llami-team/wake-me/compare/v1.0.2...v1.0.3) (2025-01-31) 46 | 47 | ## 1.0.2 (2025-01-31) 48 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "wake-me", 3 | "version": "1.4.3", 4 | "description": "Browser component library powered by TensorFlow.js for AI-based sound detection and recognition", 5 | "main": "dist/wake-me.js", 6 | "module": "dist/wake-me.mjs", 7 | "types": "dist/wake-me.d.ts", 8 | "files": [ 9 | "dist", 10 | "dist/vanilla", 11 | "public" 12 | ], 13 | "scripts": { 14 | "build": "tsup", 15 | "dev": "VITE_CJS_IGNORE_WARNING=true vite", 16 | "release": "release-it" 17 | }, 18 | "keywords": [ 19 | "react", 20 | "component", 21 | "library", 22 | "tensorflow", 23 | "ai", 24 | "machine-learning", 25 | "sound-detection", 26 | "wake-me", 27 | "wake-snap", 28 | "wake-word", 29 | "wake-finger-snap" 30 | ], 31 | "author": "llami-team", 32 | "repository": { 33 | "type": "git", 34 | "url": "https://github.com/llami-team/wake-me.git" 35 | }, 36 | "homepage": "https://llami.net", 37 | "license": "MIT", 38 | "peerDependencies": { 39 | "react": ">=16.8.0", 40 | "react-dom": ">=16.8.0" 41 | }, 42 | "devDependencies": { 43 | "@release-it/conventional-changelog": "^10.0.0", 44 | "@types/react": "^18.2.0", 45 | "@types/react-dom": "^18.2.0", 46 | "@vitejs/plugin-react": "^4.3.4", 47 | "autoprefixer": "^10.4.20", 48 | "postcss": "^8.5.1", 49 | "react": "^18.2.0", 50 | "react-dom": "^18.2.0", 51 | "release-it": "^17.0.3", 52 | "tailwindcss": "^3.4.17", 53 | "tsup": "^8.0.1", 54 | "typescript": "^5.3.3", 55 | "vite": "^6.0.11" 56 | }, 57 | "release-it": { 58 | "hooks": { 59 | "beforeStart": "npm run build" 60 | }, 61 | "git": { 62 | "commitMessage": "chore: release v${version}", 63 | "tagName": "v${version}" 64 | }, 65 | "npm": { 66 | "publish": true 67 | }, 68 | "github": { 69 | "release": true 70 | }, 71 | "plugins": { 72 | "@release-it/conventional-changelog": { 73 | "preset": "angular", 74 | "infile": "CHANGELOG.md" 75 | } 76 | } 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | lerna-debug.log* 8 | .pnpm-debug.log* 9 | 10 | # Diagnostic reports (https://nodejs.org/api/report.html) 11 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json 12 | 13 | # Runtime data 14 | pids 15 | *.pid 16 | *.seed 17 | *.pid.lock 18 | 19 | # Directory for instrumented libs generated by jscoverage/JSCover 20 | lib-cov 21 | 22 | # Coverage directory used by tools like istanbul 23 | coverage 24 | *.lcov 25 | 26 | # nyc test coverage 27 | .nyc_output 28 | 29 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) 30 | .grunt 31 | 32 | # Bower dependency directory (https://bower.io/) 33 | bower_components 34 | 35 | # node-waf configuration 36 | .lock-wscript 37 | 38 | # Compiled binary addons (https://nodejs.org/api/addons.html) 39 | build/Release 40 | 41 | # Dependency directories 42 | node_modules/ 43 | jspm_packages/ 44 | 45 | # Snowpack dependency directory (https://snowpack.dev/) 46 | web_modules/ 47 | 48 | # TypeScript cache 49 | *.tsbuildinfo 50 | 51 | # Optional npm cache directory 52 | .npm 53 | 54 | # Optional eslint cache 55 | .eslintcache 56 | 57 | # Optional stylelint cache 58 | .stylelintcache 59 | 60 | # Microbundle cache 61 | .rpt2_cache/ 62 | .rts2_cache_cjs/ 63 | .rts2_cache_es/ 64 | .rts2_cache_umd/ 65 | 66 | # Optional REPL history 67 | .node_repl_history 68 | 69 | # Output of 'npm pack' 70 | *.tgz 71 | 72 | # Yarn Integrity file 73 | .yarn-integrity 74 | 75 | # dotenv environment variable files 76 | .env 77 | .env.development.local 78 | .env.test.local 79 | .env.production.local 80 | .env.local 81 | 82 | # parcel-bundler cache (https://parceljs.org/) 83 | .cache 84 | .parcel-cache 85 | 86 | # Next.js build output 87 | .next 88 | out 89 | 90 | # Nuxt.js build / generate output 91 | .nuxt 92 | dist 93 | 94 | # Gatsby files 95 | .cache/ 96 | # Comment in the public line in if your project uses Gatsby and not Next.js 97 | # https://nextjs.org/blog/next-9-1#public-directory-support 98 | # public 99 | 100 | # vuepress build output 101 | .vuepress/dist 102 | 103 | # vuepress v2.x temp and cache directory 104 | .temp 105 | .cache 106 | 107 | # Docusaurus cache and generated files 108 | .docusaurus 109 | 110 | # Serverless directories 111 | .serverless/ 112 | 113 | # FuseBox cache 114 | .fusebox/ 115 | 116 | # DynamoDB Local files 117 | .dynamodb/ 118 | 119 | # TernJS port file 120 | .tern-port 121 | 122 | # Stores VSCode versions used for testing VSCode extensions 123 | .vscode-test 124 | 125 | # yarn v2 126 | .yarn/cache 127 | .yarn/unplugged 128 | .yarn/build-state.yml 129 | .yarn/install-state.gz 130 | .pnp.* 131 | 132 | *storybook.log 133 | .DS_Store 134 | -------------------------------------------------------------------------------- /src/vanilla.ts: -------------------------------------------------------------------------------- 1 | interface WakeMeOptions { 2 | onSnap?: () => void; 3 | onNoise?: (matchScore: number) => void; 4 | tfScriptUrl?: string; 5 | speechCommandsScriptUrl?: string; 6 | modelBaseUrl?: string; 7 | snapThreshold?: number; 8 | } 9 | 10 | class WakeMeVanilla { 11 | private cleanupFn: (() => void) | null = null; 12 | private options: WakeMeOptions; 13 | 14 | constructor(options: WakeMeOptions = {}) { 15 | this.options = { 16 | tfScriptUrl: "https://cdn.jsdelivr.net/npm/@tensorflow/tfjs/dist/tf.min.js", 17 | speechCommandsScriptUrl: "https://cdn.jsdelivr.net/npm/@tensorflow-models/speech-commands/dist/speech-commands.min.js", 18 | modelBaseUrl: "https://cdn.jsdelivr.net/npm/wake-me@latest/public/snap/", 19 | snapThreshold: 0.95, 20 | ...options 21 | }; 22 | } 23 | 24 | private loadScript(src: string): Promise { 25 | return new Promise((resolve, reject) => { 26 | const script = document.createElement("script"); 27 | script.src = src; 28 | script.onload = () => resolve(); 29 | script.onerror = () => reject(new Error(`Failed to load script: ${src}`)); 30 | document.head.appendChild(script); 31 | }); 32 | } 33 | 34 | private async detectSnap(): Promise<() => void> { 35 | const url = this.options.modelBaseUrl!.endsWith("/") 36 | ? this.options.modelBaseUrl 37 | : this.options.modelBaseUrl + "/"; 38 | 39 | async function createModel(this: WakeMeVanilla) { 40 | const checkpointURL = url + "model.json"; 41 | const metadataURL = url + "metadata.json"; 42 | 43 | const recognizer = (window as any).speechCommands.create( 44 | "BROWSER_FFT", 45 | undefined, 46 | checkpointURL, 47 | metadataURL 48 | ); 49 | 50 | await recognizer.ensureModelLoaded(); 51 | return recognizer; 52 | } 53 | 54 | const recognizer = await createModel.call(this); 55 | await recognizer.listen( 56 | (result: { scores: number[] }) => { 57 | const score = result.scores[0]; 58 | if (score > (this.options.snapThreshold ?? 0.95)) { 59 | this.options.onSnap?.(); 60 | } 61 | this.options.onNoise?.(score); 62 | }, 63 | { 64 | includeSpectrogram: true, 65 | probabilityThreshold: 0.75, 66 | overlapFactor: 0.5, 67 | } 68 | ); 69 | 70 | return () => { 71 | recognizer.stopListening(); 72 | }; 73 | } 74 | 75 | async init(): Promise { 76 | try { 77 | await this.loadScript(this.options.tfScriptUrl!); 78 | await this.loadScript(this.options.speechCommandsScriptUrl!); 79 | this.cleanupFn = await this.detectSnap(); 80 | } catch (error) { 81 | console.error("Failed to initialize WakeMe:", error); 82 | throw error; 83 | } 84 | } 85 | 86 | destroy(): void { 87 | if (this.cleanupFn) { 88 | this.cleanupFn(); 89 | this.cleanupFn = null; 90 | } 91 | } 92 | } 93 | 94 | // Export for use in vanilla JavaScript 95 | (window as any).WakeMe = WakeMeVanilla; 96 | -------------------------------------------------------------------------------- /README.ja.md: -------------------------------------------------------------------------------- 1 | # Wake Me 2 | 3 |

4 | LLAMI ロゴ 5 |

6 | 7 | 手拍子やフィンガースナップを検出する AI ベースの Browser コンポーネントライブラリです。TensorFlow.js ベースの機械学習モデルを使用して、リアルタイムで正確に音を分析します。 8 | 9 | > このプロジェクトは[LLAMI Team](https://llami.net)が提供しています。 10 | 11 | _他の言語で読む: [English](README.md), [한국어](README.ko.md)_ 12 | 13 | ## オンラインデモ 14 | 15 | ブラウザで直接体験してみましょう: [オンラインデモ](https://codepen.io/hmmhmmhm/full/RNbdjeV) 16 | 17 | ⚠️ **オーディオ環境の推奨事項**: 18 | 19 | - 最良の結果を得るために、外部スピーカーとマイクの使用を推奨します 20 | - イヤホン/イヤポッドの使用時は正常に動作しない可能性があります 21 | - デスクトップとモバイル端末の両方で利用可能です! 22 | 23 | ## ユースケース 24 | 25 | - 🎙️ **音声認識 AI システム**: 手拍子やスナップで AI を起動するウェイクワードの代替として 26 | - 🎮 **ゲームコントロール**: ハンズフリーゲームシナリオでの音声インタラクション 27 | - 🎭 **インタラクティブプレゼンテーション**: プレゼンテーション中のスライド遷移やアニメーションのトリガー 28 | - 🖥️ **ビデオ会議アシスタント**: 発言順番や注目を集めるための合図 29 | - 🎨 **デジタルアートインスタレーション**: 観客とアート作品のインタラクションインターフェース 30 | - 👥 **アクセシビリティ向上**: 身体的制限のあるユーザーのための代替入力方法 31 | - 🤖 **スマートホームコントロール**: IoT デバイス制御のためのシンプルなトリガー方法 32 | - 📱 **モバイルアプリコントロール**: 手が使えない状況でのアプリ制御 33 | 34 | ## 主な機能 35 | 36 | - 🤖 TensorFlow.js を活用した高性能 AI モデル 37 | - ⚡ リアルタイムの音声検出と分析 38 | - 🎯 高精度な手拍子/スナップ認識 39 | - 🪶 軽量で使いやすい 40 | 41 | ## インストール 42 | 43 | ```bash 44 | npm install wake-me 45 | # または 46 | yarn add wake-me 47 | ``` 48 | 49 | ## 使用方法 50 | 51 | ```tsx 52 | import { WakeMe } from "wake-me"; 53 | 54 | function App() { 55 | return console.log("スナップを検出しました!")} />; 56 | } 57 | ``` 58 | 59 | ## Next.js での使用 60 | 61 | Next.js プロジェクトで Wake Me を使用する場合、必要なスクリプトとモデルファイルを直接組み込む必要があります: 62 | 63 | 1. `public`ディレクトリにモデルファイルをコピーします。 64 | 2. スクリプト URL を CDN から指定します: 65 | 66 | ```tsx 67 | import { WakeMe } from "wake-me"; 68 | 69 | function App() { 70 | return ( 71 | console.log("スナップを検出しました!")} 76 | /> 77 | ); 78 | } 79 | ``` 80 | 81 | ## フレームワークサポート 82 | 83 | WakeMe はフレームワークに依存しないように設計されています!React コンポーネントとして提供されていますが、以下の環境でも使用できます: 84 | 85 | - React 86 | - バニラ JavaScript 87 | - Angular 88 | - Vue.js 89 | - その他すべての JavaScript フレームワーク 90 | 91 | ### React での使用方法 92 | 93 | ```tsx 94 | import { WakeMe } from "wake-me"; 95 | 96 | function App() { 97 | return console.log("スナップを検出しました!")} />; 98 | } 99 | ``` 100 | 101 | ### バニラ JavaScript での使用方法 102 | 103 | ```html 104 | 105 | 123 | ``` 124 | 125 | ## プロパティ 126 | 127 | | プロパティ名 | 型 | 必須 | 説明 | 128 | | ----------------------- | ------------------------- | ------ | --------------------------------------------------------------------------------------- | 129 | | onSnap | `() => void` | はい | スナップ音が検出された時に呼び出されるコールバック関数 | 130 | | onNoise | `(score: number) => void` | いいえ | ノイズが検出された時に呼び出されるコールバック関数 | 131 | | tfScriptUrl | `string` | いいえ | TensorFlow.js スクリプトの URL | 132 | | speechCommandsScriptUrl | `string` | いいえ | Speech Commands スクリプトの URL | 133 | | modelBaseUrl | `string` | いいえ | カスタムモデルのベース URL | 134 | | snapThreshold | `number` | いいえ | 検出感度のしきい値(デフォルト: 0.95)。検出が不十分な場合は 0.9 に下げることができます | 135 | 136 | ## ライセンス 137 | 138 | MIT ライセンスの下で公開されています。詳細は[LICENSE](LICENSE)ファイルをご覧ください。 139 | -------------------------------------------------------------------------------- /src/preview/app.tsx: -------------------------------------------------------------------------------- 1 | import React, { useState, useEffect } from "react"; 2 | import ReactDOM from "react-dom/client"; 3 | import WakeMe from "../wake-me"; 4 | 5 | import "./index.css"; 6 | 7 | const App = () => { 8 | const [snapCount, setSnapCount] = useState(0); 9 | const [noiseLevel, setNoiseLevel] = useState(0); 10 | const [isSnapped, setIsSnapped] = useState(false); 11 | 12 | const handleSnap = () => { 13 | setSnapCount((prev) => prev + 1); 14 | setIsSnapped(true); 15 | setTimeout(() => setIsSnapped(false), 500); 16 | }; 17 | 18 | const handleNoise = (score: number) => { 19 | setNoiseLevel(score); 20 | }; 21 | 22 | return ( 23 |
24 |
25 |
26 | LLAMI Logo 31 |

32 | Wake Me Test 33 |

34 |

35 | Snap your fingers or clap to test the detection! 36 |

37 |

38 | Powered by{" "} 39 | 43 | LLAMI Team 44 | 45 |

46 |
47 | 48 |
49 |
50 |
51 |

52 | Snap Counter 53 |

54 |

Total snaps detected

55 |
56 |
57 | {snapCount} 58 |
59 |
60 | 61 |
62 |
63 |
67 |
68 |

69 | Current noise level: {(noiseLevel * 100).toFixed(1)}% 70 |

71 |
72 |
73 | 74 |
81 |
82 |
89 | 100 | 101 | 102 |
103 |

110 | {isSnapped ? "Snap Detected!" : "Waiting for snap..."} 111 |

112 |
113 |
114 |
115 | 116 | 117 |
118 | ); 119 | }; 120 | 121 | ReactDOM.createRoot(document.getElementById("root")!).render( 122 | 123 | 124 | 125 | ); 126 | -------------------------------------------------------------------------------- /README.ko.md: -------------------------------------------------------------------------------- 1 | # Wake Me 2 | 3 |

4 | LLAMI 로고 5 |

6 | 7 | 손뼉 소리나 핑거스냅을 감지하는 AI 기반 브라우저 컴포넌트 라이브러리입니다. TensorFlow.js를 활용한 머신러닝 모델을 통해 정확하고 실시간으로 소리를 감지하고 분석합니다. 8 | 9 | > 이 프로젝트는 [LLAMI Team](https://llami.net)에서 제공합니다. 10 | 11 | _다른 언어로 읽기: [English](README.md), [日本語](README.ja.md)_ 12 | 13 | ## 온라인 데모 14 | 15 | 브라우저에서 직접 체험해보세요: [온라인 데모](https://codepen.io/hmmhmmhm/full/RNbdjeV) 16 | 17 | ⚠️ **오디오 환경 권장사항**: 18 | 19 | - 최상의 결과를 위해 외부 스피커와 마이크 사용을 권장합니다 20 | - 이어폰/이어팟 사용 시 정상적으로 동작하지 않을 수 있습니다 21 | - 데스크톱과 모바일 기기 모두에서 사용 가능합니다! 22 | 23 | ## 활용 사례 24 | 25 | - 🎙️ **음성 인식 AI 시스템**: 음성 명령을 시작하기 전에 손뼉이나 스냅으로 AI를 깨우는 웨이크 워드 대체 용도 26 | - 🎮 **게임 컨트롤**: 핸즈프리 게임 컨트롤이 필요한 상황에서 소리로 상호작용 27 | - 🎭 **인터랙티브 프레젠테이션**: 발표 중 슬라이드 전환이나 애니메이션 트리거 28 | - 🖥️ **화상 회의 보조**: 발언권 요청이나 주의 집중이 필요할 때 소리로 신호 29 | - 🎨 **디지털 아트 설치**: 관객과 작품 간의 상호작용을 위한 인터페이스 30 | - 👥 **접근성 향상**: 신체적 제약이 있는 사용자를 위한 대체 입력 방식 31 | - 🤖 **스마트홈 제어**: IoT 기기 제어를 위한 간단한 트리거 방식 32 | - 📱 **모바일 앱 제어**: 손이 자유롭지 않은 상황에서의 앱 제어 33 | 34 | ## 주요 특징 35 | 36 | - 🤖 TensorFlow.js 기반의 고성능 AI 모델 사용 37 | - ⚡ 실시간 소리 감지 및 분석 38 | - 🎯 높은 정확도의 손뼉/핑거스냅 인식 39 | - 🪶 가벼운 크기와 쉬운 사용성 40 | 41 | ## 설치 42 | 43 | ```bash 44 | npm install wake-me 45 | # 또는 46 | yarn add wake-me 47 | ``` 48 | 49 | ## 사용법 50 | 51 | ```tsx 52 | import { WakeMe } from "wake-me"; 53 | 54 | function App() { 55 | return console.log("스냅이 감지되었습니다!")} />; 56 | } 57 | ``` 58 | 59 | ## 프레임워크 지원 60 | 61 | WakeMe는 프레임워크에 구애받지 않도록 설계되었습니다! React 컴포넌트로 제공되지만, 다음과 같은 환경에서도 사용할 수 있습니다: 62 | 63 | - React 64 | - 바닐라 JavaScript 65 | - Angular 66 | - Vue.js 67 | - 기타 모든 JavaScript 프레임워크 68 | 69 | ### React 사용법 70 | 71 | ```tsx 72 | import { WakeMe } from "wake-me"; 73 | 74 | function App() { 75 | return console.log("스냅이 감지되었습니다!")} />; 76 | } 77 | ``` 78 | 79 | ### 바닐라 JavaScript 사용법 80 | 81 | ```html 82 | 83 | 101 | ``` 102 | 103 | ## Props 104 | 105 | | Prop | 타입 | 필수 여부 | 설명 | 106 | | ----------------------- | ----------------------- | --------- | ------------------------------------------------------------------------------ | 107 | | onSnap | () => void | 선택 | 스냅이 감지되었을 때 호출되는 콜백 | 108 | | onNoise | (score: number) => void | 선택 | 노이즈가 감지되었을 때 호출되는 콜백 | 109 | | modelBaseUrl | string | 선택 | 모델 파일이 위치한 기본 URL (기본값: '/snap/') | 110 | | tfScriptUrl | string | 선택 | TensorFlow.js 스크립트 URL | 111 | | speechCommandsScriptUrl | string | 선택 | Speech Commands 스크립트 URL | 112 | | snapThreshold | number | 선택 | 감지 민감도 임계값 (기본값: 0.95). 감지가 잘 되지 않는 경우 0.9로 낮출 수 있음 | 113 | 114 | ## 모델 파일 설정 115 | 116 | 컴포넌트가 정상적으로 동작하기 위해서는 다음 파일들이 `modelBaseUrl`에 위치해야 합니다: 117 | 118 | - model.json 119 | - metadata.json 120 | - \*.bin 파일들 121 | 122 | ## 스크립트와 모델 파일 내장하기 (Next.js 예시) 123 | 124 | 프로젝트에 필요한 스크립트와 모델 파일을 직접 내장하여 사용할 수 있습니다. Next.js 프로젝트를 예시로 설명하겠습니다. 125 | 126 | ### 1. 필요한 파일 다운로드 127 | 128 | 다음 파일들을 다운로드 받아야 합니다: 129 | 130 | - TensorFlow.js: https://cdn.jsdelivr.net/npm/@tensorflow/tfjs/dist/tf.min.js 131 | - Speech Commands: https://cdn.jsdelivr.net/npm/@tensorflow-models/speech-commands/dist/speech-commands.min.js 132 | - 모델 파일들: 133 | - model.json 134 | - metadata.json 135 | - \*.bin 파일들 136 | 137 | ### 2. 파일 위치 설정 138 | 139 | Next.js 프로젝트에서는 `public` 디렉토리에 정적 파일들을 위치시킵니다: 140 | 141 | ``` 142 | your-nextjs-project/ 143 | ├── public/ 144 | │ ├── scripts/ 145 | │ │ ├── tf.min.js 146 | │ │ └── speech-commands.min.js 147 | │ └── models/ 148 | │ ├── model.json 149 | │ ├── metadata.json 150 | │ └── *.bin 파일들 151 | ``` 152 | 153 | ### 3. 컴포넌트에서 사용하기 154 | 155 | ```tsx 156 | import { WakeMe } from "wake-me"; 157 | 158 | function App() { 159 | return ( 160 | console.log("스냅이 감지되었습니다!")} 162 | // 스크립트 URL 설정 163 | tfScriptUrl="/scripts/tf.min.js" 164 | speechCommandsScriptUrl="/scripts/speech-commands.min.js" 165 | // 모델 파일 경로 설정 166 | modelBaseUrl="/models/" 167 | /> 168 | ); 169 | } 170 | ``` 171 | 172 | Next.js의 경우 `public` 디렉토리의 파일들은 루트 URL('/')에서부터 접근 가능합니다. 173 | 174 | ## 라이선스 175 | 176 | MIT 177 | -------------------------------------------------------------------------------- /src/wake-me.tsx: -------------------------------------------------------------------------------- 1 | import { useEffect, useRef } from "react"; 2 | 3 | export interface WakeMeProps { 4 | /** Callback function triggered when a snap or clap is detected */ 5 | onSnap?: () => void; 6 | /** Callback function triggered when noise is detected, provides a match score */ 7 | onNoise?: (matchScore: number) => void; 8 | /** TensorFlow.js script URL (default: https://cdn.jsdelivr.net/npm/@tensorflow/tfjs/dist/tf.min.js) */ 9 | tfScriptUrl?: string | null; 10 | /** Speech Commands script URL (default: https://cdn.jsdelivr.net/npm/@tensorflow-models/speech-commands/dist/speech-commands.min.js) */ 11 | speechCommandsScriptUrl?: string | null; 12 | /** Base URL where model files are located (default: https://cdn.jsdelivr.net/npm/wake-me@latest/public/snap/) */ 13 | modelBaseUrl?: string; 14 | /** Threshold for snap detection (default: 1) */ 15 | snapThreshold?: number; 16 | } 17 | 18 | /** 19 | * A React component for TensorFlow-based clap or finger snap detection 20 | * 21 | * This component loads TensorFlow.js and a pre-trained audio recognition model 22 | * to detect claps and finger snaps in real-time using the device's microphone. 23 | */ 24 | export const WakeMe = ({ 25 | onSnap, 26 | onNoise, 27 | tfScriptUrl = "https://cdn.jsdelivr.net/npm/@tensorflow/tfjs/dist/tf.min.js", 28 | speechCommandsScriptUrl = "https://cdn.jsdelivr.net/npm/@tensorflow-models/speech-commands/dist/speech-commands.min.js", 29 | modelBaseUrl = "https://cdn.jsdelivr.net/npm/wake-me@latest/public/snap/", 30 | snapThreshold = 0.95, 31 | }: WakeMeProps) => { 32 | // Reference to store cleanup function for the audio recognition 33 | const cleanupRef = useRef<(() => void) | null>(null); 34 | 35 | /** 36 | * Dynamically loads a JavaScript script into the document 37 | * @param src - URL of the script to load 38 | * @returns Promise that resolves when the script is loaded 39 | */ 40 | const loadScript = (src: string): Promise => { 41 | return new Promise((resolve, reject) => { 42 | const script = document.createElement("script"); 43 | script.src = src; 44 | script.async = true; 45 | script.onload = () => resolve(); 46 | script.onerror = () => reject(new Error(`Failed to load script: ${src}`)); 47 | document.body.appendChild(script); 48 | }); 49 | }; 50 | 51 | /** 52 | * Initializes and configures the audio recognition model 53 | * Sets up continuous listening for claps and finger snaps 54 | * @returns Cleanup function to stop listening 55 | */ 56 | const detectSnap = async () => { 57 | const url = modelBaseUrl.endsWith("/") ? modelBaseUrl : modelBaseUrl + "/"; 58 | 59 | /** 60 | * Creates and loads the audio recognition model 61 | * @returns Initialized recognizer instance 62 | */ 63 | async function createModel() { 64 | const checkpointURL = url + "model.json"; 65 | const metadataURL = url + "metadata.json"; 66 | 67 | const recognizer = (window as any).speechCommands.create( 68 | "BROWSER_FFT", 69 | undefined, 70 | checkpointURL, 71 | metadataURL 72 | ); 73 | 74 | await recognizer.ensureModelLoaded(); 75 | return recognizer; 76 | } 77 | 78 | const recognizer = await createModel(); 79 | const classLabels = recognizer.wordLabels(); 80 | // Get indices for snap and finger snap labels in the model's output 81 | const snapLabelIndex = classLabels.indexOf("Snap"); 82 | const fingerSnapLabelIndex = classLabels.indexOf("FingerSnap"); 83 | 84 | // Start continuous audio recognition with specified parameters 85 | recognizer.listen( 86 | (result: any) => { 87 | const scores = result.scores; 88 | const snapScore = scores[snapLabelIndex]; 89 | const fingerSnapScore = scores[fingerSnapLabelIndex]; 90 | // Trigger callbacks based on detection confidence 91 | if (snapScore >= snapThreshold || fingerSnapScore >= snapThreshold) 92 | onSnap?.(); 93 | else onNoise?.(snapScore); 94 | }, 95 | { 96 | includeSpectrogram: true, // Include audio spectrogram in analysis 97 | probabilityThreshold: 0.75, // Minimum confidence threshold 98 | invokeCallbackOnNoiseAndUnknown: true, // Trigger callback for non-matching sounds 99 | overlapFactor: 0.5, // Overlap between audio analysis windows 100 | } 101 | ); 102 | 103 | return () => recognizer.stopListening(); 104 | }; 105 | 106 | /** 107 | * Effect hook to initialize the component 108 | * Loads required scripts and sets up audio recognition 109 | */ 110 | useEffect(() => { 111 | const init = async () => { 112 | try { 113 | if (tfScriptUrl) await loadScript(tfScriptUrl); 114 | if (speechCommandsScriptUrl) await loadScript(speechCommandsScriptUrl); 115 | const cleanup = await detectSnap(); 116 | cleanupRef.current = cleanup; 117 | } catch (error) { 118 | console.error("Failed to initialize WakeMe:", error); 119 | } 120 | }; 121 | 122 | init(); 123 | 124 | return () => { 125 | cleanupRef.current?.(); 126 | cleanupRef.current = null; 127 | }; 128 | }, [tfScriptUrl, speechCommandsScriptUrl, modelBaseUrl]); 129 | 130 | return null; 131 | }; 132 | 133 | export default WakeMe; 134 | -------------------------------------------------------------------------------- /public/snap/model.json: -------------------------------------------------------------------------------- 1 | {"modelTopology":{"class_name":"Model","config":{"name":"model4","layers":[{"name":"conv2d_1_input","class_name":"InputLayer","config":{"batch_input_shape":[null,43,232,1],"dtype":"float32","sparse":false,"name":"conv2d_1_input"},"inbound_nodes":[]},{"name":"conv2d_1","class_name":"Conv2D","config":{"filters":8,"kernel_initializer":{"class_name":"VarianceScaling","config":{"scale":1,"mode":"fan_avg","distribution":"uniform","seed":null}},"kernel_regularizer":null,"kernel_constraint":null,"kernel_size":[2,8],"strides":[1,1],"padding":"valid","data_format":"channels_last","dilation_rate":[1,1],"activation":"relu","use_bias":true,"bias_initializer":{"class_name":"Zeros","config":{}},"bias_regularizer":null,"activity_regularizer":null,"bias_constraint":null,"name":"conv2d_1","trainable":false,"batch_input_shape":[null,43,232,1],"dtype":"float32"},"inbound_nodes":[[["conv2d_1_input",0,0,{}]]]},{"name":"max_pooling2d_1","class_name":"MaxPooling2D","config":{"pool_size":[2,2],"padding":"valid","strides":[2,2],"data_format":"channels_last","name":"max_pooling2d_1","trainable":false},"inbound_nodes":[[["conv2d_1",0,0,{}]]]},{"name":"conv2d_2","class_name":"Conv2D","config":{"filters":32,"kernel_initializer":{"class_name":"VarianceScaling","config":{"scale":1,"mode":"fan_avg","distribution":"uniform","seed":null}},"kernel_regularizer":null,"kernel_constraint":null,"kernel_size":[2,4],"strides":[1,1],"padding":"valid","data_format":"channels_last","dilation_rate":[1,1],"activation":"relu","use_bias":true,"bias_initializer":{"class_name":"Zeros","config":{}},"bias_regularizer":null,"activity_regularizer":null,"bias_constraint":null,"name":"conv2d_2","trainable":false},"inbound_nodes":[[["max_pooling2d_1",0,0,{}]]]},{"name":"max_pooling2d_2","class_name":"MaxPooling2D","config":{"pool_size":[2,2],"padding":"valid","strides":[2,2],"data_format":"channels_last","name":"max_pooling2d_2","trainable":false},"inbound_nodes":[[["conv2d_2",0,0,{}]]]},{"name":"conv2d_3","class_name":"Conv2D","config":{"filters":32,"kernel_initializer":{"class_name":"VarianceScaling","config":{"scale":1,"mode":"fan_avg","distribution":"uniform","seed":null}},"kernel_regularizer":null,"kernel_constraint":null,"kernel_size":[2,4],"strides":[1,1],"padding":"valid","data_format":"channels_last","dilation_rate":[1,1],"activation":"relu","use_bias":true,"bias_initializer":{"class_name":"Zeros","config":{}},"bias_regularizer":null,"activity_regularizer":null,"bias_constraint":null,"name":"conv2d_3","trainable":false},"inbound_nodes":[[["max_pooling2d_2",0,0,{}]]]},{"name":"max_pooling2d_3","class_name":"MaxPooling2D","config":{"pool_size":[2,2],"padding":"valid","strides":[2,2],"data_format":"channels_last","name":"max_pooling2d_3","trainable":false},"inbound_nodes":[[["conv2d_3",0,0,{}]]]},{"name":"conv2d_4","class_name":"Conv2D","config":{"filters":32,"kernel_initializer":{"class_name":"VarianceScaling","config":{"scale":1,"mode":"fan_avg","distribution":"uniform","seed":null}},"kernel_regularizer":null,"kernel_constraint":null,"kernel_size":[2,4],"strides":[1,1],"padding":"valid","data_format":"channels_last","dilation_rate":[1,1],"activation":"relu","use_bias":true,"bias_initializer":{"class_name":"Zeros","config":{}},"bias_regularizer":null,"activity_regularizer":null,"bias_constraint":null,"name":"conv2d_4","trainable":false},"inbound_nodes":[[["max_pooling2d_3",0,0,{}]]]},{"name":"max_pooling2d_4","class_name":"MaxPooling2D","config":{"pool_size":[2,2],"padding":"valid","strides":[1,2],"data_format":"channels_last","name":"max_pooling2d_4","trainable":false},"inbound_nodes":[[["conv2d_4",0,0,{}]]]},{"name":"flatten_1","class_name":"Flatten","config":{"name":"flatten_1","trainable":false},"inbound_nodes":[[["max_pooling2d_4",0,0,{}]]]},{"name":"dropout_1","class_name":"Dropout","config":{"rate":0.25,"noise_shape":null,"seed":null,"name":"dropout_1","trainable":false},"inbound_nodes":[[["flatten_1",0,0,{}]]]},{"name":"dense_1","class_name":"Dense","config":{"units":2000,"activation":"relu","use_bias":true,"kernel_initializer":{"class_name":"VarianceScaling","config":{"scale":1,"mode":"fan_avg","distribution":"uniform","seed":null}},"bias_initializer":{"class_name":"Zeros","config":{}},"kernel_regularizer":null,"bias_regularizer":null,"activity_regularizer":null,"kernel_constraint":null,"bias_constraint":null,"name":"dense_1","trainable":false},"inbound_nodes":[[["dropout_1",0,0,{}]]]},{"name":"sequential_9","class_name":"Sequential","config":{"name":"sequential_9","layers":[{"class_name":"Dense","config":{"units":3,"activation":"softmax","use_bias":true,"kernel_initializer":{"class_name":"VarianceScaling","config":{"scale":1,"mode":"fan_avg","distribution":"normal","seed":null}},"bias_initializer":{"class_name":"Zeros","config":{}},"kernel_regularizer":null,"bias_regularizer":null,"activity_regularizer":null,"kernel_constraint":null,"bias_constraint":null,"name":"NewHeadDense","trainable":true,"batch_input_shape":[null,2000],"dtype":"float32"}}]},"inbound_nodes":[[["dense_1",0,0,{}]]]}],"input_layers":[["conv2d_1_input",0,0]],"output_layers":[["sequential_9",1,0]]},"keras_version":"tfjs-layers 1.3.1","backend":"tensor_flow.js"},"weightsManifest":[{"paths":["weights.bin"],"weights":[{"name":"NewHeadDense/kernel","shape":[2000,3],"dtype":"float32"},{"name":"NewHeadDense/bias","shape":[3],"dtype":"float32"},{"name":"conv2d_1/kernel","shape":[2,8,1,8],"dtype":"float32"},{"name":"conv2d_1/bias","shape":[8],"dtype":"float32"},{"name":"conv2d_2/kernel","shape":[2,4,8,32],"dtype":"float32"},{"name":"conv2d_2/bias","shape":[32],"dtype":"float32"},{"name":"conv2d_3/kernel","shape":[2,4,32,32],"dtype":"float32"},{"name":"conv2d_3/bias","shape":[32],"dtype":"float32"},{"name":"conv2d_4/kernel","shape":[2,4,32,32],"dtype":"float32"},{"name":"conv2d_4/bias","shape":[32],"dtype":"float32"},{"name":"dense_1/kernel","shape":[704,2000],"dtype":"float32"},{"name":"dense_1/bias","shape":[2000],"dtype":"float32"}]}]} -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Wake Me 2 | 3 |

4 | LLAMI Logo 5 |

6 | 7 | AI-based browser component library that detects clapping sounds or finger snaps. Using a TensorFlow.js-based machine learning model, it accurately analyzes sounds in real-time. 8 | 9 | > This project is provided by [LLAMI Team](https://llami.net) 10 | 11 | _Read this in other languages: [English](README.md), [日本語](README.ja.md)_ 12 | 13 | ## Online Demo 14 | 15 | Try out Wake Me in your browser: [Online Demo](https://codepen.io/hmmhmmhm/full/RNbdjeV) 16 | 17 | ⚠️ **Audio Setup Recommendation**: 18 | 19 | - For best results, please use external speakers and microphone 20 | - The detection may not work properly with earphones/earbuds 21 | - Works on both desktop and mobile devices! 22 | 23 | ## Use Cases 24 | 25 | - 🎙️ **Voice Recognition AI Systems**: As a wake word alternative to activate AI using claps or snaps 26 | - 🎮 **Game Control**: Sound-based interaction for hands-free gaming scenarios 27 | - 🎭 **Interactive Presentations**: Trigger slide transitions or animations during presentations 28 | - 🖥️ **Video Conference Assistant**: Signal for speaking turns or attention 29 | - 🎨 **Digital Art Installations**: Interface for audience-artwork interaction 30 | - 👥 **Accessibility Enhancement**: Alternative input method for users with physical limitations 31 | - 🤖 **Smart Home Control**: Simple trigger method for IoT device control 32 | - 📱 **Mobile App Control**: App control in hands-busy situations 33 | 34 | ## Key Features 35 | 36 | - 🤖 High-performance AI model powered by TensorFlow.js 37 | - ⚡ Real-time sound detection and analysis 38 | - 🎯 High-accuracy clap/snap recognition 39 | - 🪶 Lightweight and easy to use 40 | 41 | ## Framework Support 42 | 43 | WakeMe is designed to be framework-agnostic! While it's primarily a React component, you can use it with: 44 | 45 | - React 46 | - Vanilla JavaScript 47 | - Angular 48 | - Vue.js 49 | - Any other JavaScript framework 50 | 51 | ### React Usage 52 | 53 | ```tsx 54 | import { WakeMe } from "wake-me"; 55 | 56 | function App() { 57 | return console.log("Snap detected!")} />; 58 | } 59 | ``` 60 | 61 | ### Vanilla JavaScript Usage 62 | 63 | ```html 64 | 65 | 83 | ``` 84 | 85 | ## Installation 86 | 87 | ```bash 88 | npm install wake-me 89 | # or 90 | yarn add wake-me 91 | ``` 92 | 93 | ## Usage 94 | 95 | ```tsx 96 | import { WakeMe } from "wake-me"; 97 | 98 | function App() { 99 | return console.log("Snap detected!")} />; 100 | } 101 | ``` 102 | 103 | ## Props 104 | 105 | | Prop | Type | Required | Description | 106 | | ----------------------- | ----------------------- | -------- | ----------------------------------------------------------------------------------------------------------- | 107 | | onSnap | () => void | Optional | Callback when snap is detected | 108 | | onNoise | (score: number) => void | Optional | Callback when noise is detected | 109 | | modelBaseUrl | string | Optional | Base URL for model files (default: '/snap/') | 110 | | tfScriptUrl | string | Optional | TensorFlow.js script URL | 111 | | speechCommandsScriptUrl | string | Optional | Speech Commands script URL | 112 | | snapThreshold | number | Optional | Detection sensitivity threshold (default: 0.95). Can be lowered to 0.9 if detection is not sensitive enough | 113 | 114 | ## Model File Setup 115 | 116 | The following files must be present in the `modelBaseUrl` for the component to work properly: 117 | 118 | - model.json 119 | - metadata.json 120 | - \*.bin files 121 | 122 | ## Embedding Scripts and Model Files (Next.js Example) 123 | 124 | You can embed the required scripts and model files directly in your project. Here's an example using Next.js: 125 | 126 | ### 1. Download Required Files 127 | 128 | Download the following files: 129 | 130 | - TensorFlow.js: https://cdn.jsdelivr.net/npm/@tensorflow/tfjs/dist/tf.min.js 131 | - Speech Commands: https://cdn.jsdelivr.net/npm/@tensorflow-models/speech-commands/dist/speech-commands.min.js 132 | - Model files: 133 | - model.json 134 | - metadata.json 135 | - \*.bin files 136 | 137 | ### 2. File Placement 138 | 139 | In a Next.js project, place static files in the `public` directory: 140 | 141 | ``` 142 | your-nextjs-project/ 143 | ├── public/ 144 | │ ├── scripts/ 145 | │ │ ├── tf.min.js 146 | │ │ └── speech-commands.min.js 147 | │ └── models/ 148 | │ ├── model.json 149 | │ ├── metadata.json 150 | │ └── *.bin files 151 | ``` 152 | 153 | ### 3. Using in Component 154 | 155 | ```tsx 156 | import { WakeMe } from "wake-me"; 157 | 158 | function App() { 159 | return ( 160 | console.log("Snap detected!")} 162 | // Set script URLs 163 | tfScriptUrl="/scripts/tf.min.js" 164 | speechCommandsScriptUrl="/scripts/speech-commands.min.js" 165 | // Set model files path 166 | modelBaseUrl="/models/" 167 | /> 168 | ); 169 | } 170 | ``` 171 | 172 | In Next.js, files in the `public` directory are accessible from the root URL ('/'). 173 | 174 | ## License 175 | 176 | MIT 177 | -------------------------------------------------------------------------------- /public/snap/speech-commands.min.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @license 3 | * Copyright 2019 Google LLC. All Rights Reserved. 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * ============================================================================= 16 | */ 17 | !function(e,t){"object"==typeof exports&&"undefined"!=typeof module?t(exports,require("@tensorflow/tfjs"),require("util")):"function"==typeof define&&define.amd?define(["exports","@tensorflow/tfjs","util"],t):t(e.speechCommands={},e.tf,null)}(this,function(e,t,r){"use strict";var n=function(e,t){return(n=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(e,t){e.__proto__=t}||function(e,t){for(var r in t)t.hasOwnProperty(r)&&(e[r]=t[r])})(e,t)};var a=function(){return(a=Object.assign||function(e){for(var t,r=1,n=arguments.length;r0&&a[a.length-1])&&(6===i[0]||2===i[0])){s=0;continue}if(3===i[0]&&(!a||i[1]>a[0]&&i[1]=e.length&&(e=void 0),{value:e&&e[r++],done:!e}}}}function l(e,t){var r="function"==typeof Symbol&&e[Symbol.iterator];if(!r)return e;var n,a,i=r.call(e),s=[];try{for(;(void 0===t||t-- >0)&&!(n=i.next()).done;)s.push(n.value)}catch(e){a={error:e}}finally{try{n&&!n.done&&(r=i.return)&&r.call(i)}finally{if(a)throw a.error}}return s}function u(){for(var e=[],t=0;t0))throw new Error("Invalid value in numFramesPerSpectrogram: "+e.numFramesPerSpectrogram);if(e.suppressionTimeMillis<0)throw new Error("Expected suppressionTimeMillis to be >= 0, but got "+e.suppressionTimeMillis);if(this.suppressionTimeMillis=e.suppressionTimeMillis,this.spectrogramCallback=e.spectrogramCallback,this.numFrames=e.numFramesPerSpectrogram,this.sampleRateHz=e.sampleRateHz||44100,this.fftSize=e.fftSize||1024,this.frameDurationMillis=this.fftSize/this.sampleRateHz*1e3,this.columnTruncateLength=e.columnTruncateLength||this.fftSize,this.overlapFactor=e.overlapFactor,this.includeRawAudio=e.includeRawAudio,t.util.assert(this.overlapFactor>=0&&this.overlapFactor<1,function(){return"Expected overlapFactor to be >= 0 and < 1, but got "+r.overlapFactor}),this.columnTruncateLength>this.fftSize)throw new Error("columnTruncateLength "+this.columnTruncateLength+" exceeds fftSize ("+this.fftSize+").");this.audioContextConstructor=window.AudioContext||window.webkitAudioContext}return e.prototype.start=function(e){return i(this,void 0,void 0,function(){var t,r,n;return s(this,function(a){switch(a.label){case 0:if(null!=this.frameIntervalTask)throw new Error("Cannot start already-started BrowserFftFeatureExtractor");return t=this,[4,function(e){return i(this,void 0,void 0,function(){return s(this,function(t){return[2,navigator.mediaDevices.getUserMedia({audio:null==e||e,video:!1})]})})}(e)];case 1:return t.stream=a.sent(),this.audioContext=new this.audioContextConstructor,this.audioContext.sampleRate!==this.sampleRateHz&&console.warn("Mismatch in sampling rate: Expected: "+this.sampleRateHz+"; Actual: "+this.audioContext.sampleRate),r=this.audioContext.createMediaStreamSource(this.stream),this.analyser=this.audioContext.createAnalyser(),this.analyser.fftSize=2*this.fftSize,this.analyser.smoothingTimeConstant=0,r.connect(this.analyser),this.freqDataQueue=[],this.freqData=new Float32Array(this.fftSize),this.includeRawAudio&&(this.timeDataQueue=[],this.timeData=new Float32Array(this.fftSize)),n=Math.max(1,Math.round(this.numFrames*(1-this.overlapFactor))),this.tracker=new m(n,Math.round(this.suppressionTimeMillis/this.frameDurationMillis)),this.frameIntervalTask=setInterval(this.onAudioFrame.bind(this),this.fftSize/this.sampleRateHz*1e3),[2]}})})},e.prototype.onAudioFrame=function(){return i(this,void 0,void 0,function(){var e,r,n,a;return s(this,function(i){switch(i.label){case 0:return this.analyser.getFloatFrequencyData(this.freqData),this.freqData[0]===-1/0?[2]:(this.freqDataQueue.push(this.freqData.slice(0,this.columnTruncateLength)),this.includeRawAudio&&(this.analyser.getFloatTimeDomainData(this.timeData),this.timeDataQueue.push(this.timeData.slice())),this.freqDataQueue.length>this.numFrames&&this.freqDataQueue.shift(),this.tracker.tick()?(e=p(this.freqDataQueue),r=f(e,[1,this.numFrames,this.columnTruncateLength,1]),n=void 0,this.includeRawAudio&&(a=p(this.timeDataQueue),n=f(a,[1,this.numFrames*this.fftSize])),[4,this.spectrogramCallback(r,n)]):[3,2]);case 1:i.sent()&&this.tracker.suppress(),t.dispose([r,n]),i.label=2;case 2:return[2]}})})},e.prototype.stop=function(){return i(this,void 0,void 0,function(){return s(this,function(e){if(null==this.frameIntervalTask)throw new Error("Cannot stop because there is no ongoing streaming activity.");return clearInterval(this.frameIntervalTask),this.frameIntervalTask=null,this.analyser.disconnect(),this.audioContext.close(),null!=this.stream&&this.stream.getTracks().length>0&&this.stream.getTracks()[0].stop(),[2]})})},e.prototype.setConfig=function(e){throw new Error("setConfig() is not implemented for BrowserFftFeatureExtractor.")},e.prototype.getFeatures=function(){throw new Error("getFeatures() is not implemented for BrowserFftFeatureExtractor. Use the spectrogramCallback field of the constructor config instead.")},e}();function p(e){var t=e[0].length,r=new Float32Array(e.length*t);return e.forEach(function(e,n){return r.set(e,n*t)}),r}function f(e,r){var n=new Float32Array(t.util.sizeFromShape(r));return n.set(e,n.length-e.length),t.tensor(n,r)}var m=function(){function e(e,r){var n=this;this.period=e,this.suppressionTime=null==r?0:r,this.counter=0,t.util.assert(this.period>0,function(){return"Expected period to be positive, but got "+n.period})}return e.prototype.tick=function(){return this.counter++,this.counter%this.period==0&&(null==this.suppressionOnset||this.counter-this.suppressionOnset>this.suppressionTime)},e.prototype.suppress=function(){this.suppressionOnset=this.counter},e}();function g(e){var t=0;e.forEach(function(e){t+=e.byteLength});var r=new Uint8Array(t),n=0;return e.forEach(function(e){r.set(new Uint8Array(e),n),n+=e.byteLength}),r.buffer}function v(e){var t=0;e.forEach(function(e){return t+=e.length});var r=new Float32Array(t),n=0;return e.forEach(function(e){r.set(e,n),n+=e.length}),r}function y(e){if(null==e)throw new Error("Received null or undefind string");for(var t=unescape(encodeURIComponent(e)),r=new Uint8Array(t.length),n=0;n0,function(){return"Expected label to be a non-empty string, but got "+JSON.stringify(e.label)});var r=function(){function e(){return Math.floor(65536*(1+Math.random())).toString(16).substring(1)}return e()+e()+"-"+e()+"-"+e()+"-"+e()+"-"+e()+e()+e()}();return this.examples[r]=e,e.label in this.label2Ids||(this.label2Ids[e.label]=[]),this.label2Ids[e.label].push(r),r},e.prototype.merge=function(e){var r,n,a,i;t.util.assert(e!==this,function(){return"Cannot merge a dataset into itself"});var s=e.getVocabulary();try{for(var l=o(s),u=l.next();!u.done;u=l.next()){var c=u.value,h=e.getExamples(c);try{for(var d=(a=void 0,o(h)),p=d.next();!p.done;p=d.next()){var f=p.value;this.addExample(f.example)}}catch(e){a={error:e}}finally{try{p&&!p.done&&(i=d.return)&&i.call(d)}finally{if(a)throw a.error}}}}catch(e){r={error:e}}finally{try{u&&!u.done&&(n=l.return)&&n.call(l)}finally{if(r)throw r.error}}},e.prototype.getExampleCounts=function(){var e={};for(var t in this.examples){var r=this.examples[t];r.label in e||(e[r.label]=0),e[r.label]++}return e},e.prototype.getExamples=function(e){var r=this;t.util.assert(null!=e,function(){return"Expected label to be a string, but got "+JSON.stringify(e)}),t.util.assert(e in this.label2Ids,function(){return'No example of label "'+e+'" exists in dataset'});var n=[];return this.label2Ids[e].forEach(function(e){n.push({uid:e,example:r.examples[e]})}),n},e.prototype.getData=function(e,r){var n=this;t.util.assert(this.size()>0,function(){return"Cannot get spectrograms as tensors because the dataset is empty"});var a=this.getVocabulary();null!=e?t.util.assert(-1!==a.indexOf(e),function(){return"Label "+e+" is not in the vocabulary ("+JSON.stringify(a)+")"}):t.util.assert(a.length>1,function(){return"One-hot encoding of labels requires the vocabulary to have at least two words, but it has only "+a.length+" word."}),null==r&&(r={});var i,s,l=this.getSortedUniqueNumFrames();1===l.length?(i=null==r.numFrames?l[0]:r.numFrames,s=null==r.hopFrames?1:r.hopFrames):(i=r.numFrames,t.util.assert(null!=i&&Number.isInteger(i)&&i>0,function(){return"There are "+l.length+" unique lengths among the "+n.size()+" examples of this Dataset, hence numFrames is required. But it is not provided."}),t.util.assert(i<=l[0],function(){return"numFrames ("+i+") exceeds the minimum numFrames ("+l[0]+") among the examples of the Dataset."}),s=r.hopFrames,t.util.assert(null!=s&&Number.isInteger(s)&&s>0,function(){return"There are "+l.length+" unique lengths among the "+n.size()+" examples of this Dataset, hence hopFrames is required. But it is not provided."}));var u=null==r.normalize||r.normalize;return t.tidy(function(){for(var l,c,d,p=[],f=[],m=[],g=0;g0&&T<1,function(){return"Invalid dataset validation split: "+T});var M=f.map(function(e,t){return[e,m[t]]});t.util.shuffle(M),f=M.map(function(e){return e[0]});var D=M.map(function(e){return e[1]}),I=function(e,r,n){var a,i,s,l,u,c,h,d;t.util.assert(n>0&&n<1,function(){return"validationSplit is expected to be >0 and <1, but got "+n});for(var p=!Array.isArray(e[0]),f=r,m=[],g=0;g=0&&r= 0, < "+a+", and an integer."}),n.keyFrameIndex=r},e.prototype.size=function(){return Object.keys(this.examples).length},e.prototype.durationMillis=function(){var e=0;for(var t in this.examples){var r=this.examples[t].spectrogram,n=23.22|r.frameDurationMillis;e+=r.data.length/r.frameSize*n}return e},e.prototype.empty=function(){return 0===this.size()},e.prototype.clear=function(){this.examples={}},e.prototype.getVocabulary=function(){var e=new Set;for(var t in this.examples){var r=this.examples[t];e.add(r.label)}var n=u(e);return n.sort(),n},e.prototype.serialize=function(e){var r,n,a,i,s=this.getVocabulary();t.util.assert(!this.empty(),function(){return"Cannot serialize empty Dataset"}),null!=e&&(Array.isArray(e)||(e=[e]),e.forEach(function(e){if(-1===s.indexOf(e))throw new Error('Word label "'+e+'" does not exist in the vocabulary of this dataset. The vocabulary is: '+JSON.stringify(s)+".")}));var l,u,c,h,d,p=[],f=[];try{for(var m=o(s),v=m.next();!v.done;v=m.next()){var b=v.value;if(null==e||-1!==e.indexOf(b)){var S=this.label2Ids[b];try{for(var T=(a=void 0,o(S)),F=T.next();!F.done;F=T.next()){var M=F.value,z=E(this.examples[M]);p.push(z.spec),f.push(z.data)}}catch(e){a={error:e}}finally{try{F&&!F.done&&(i=T.return)&&i.call(T)}finally{if(a)throw a.error}}}}}catch(e){r={error:e}}finally{try{v&&!v.done&&(n=m.return)&&n.call(m)}finally{if(r)throw r.error}}return l={manifest:p,data:g(f)},u=y(JSON.stringify(l.manifest)),c=y(w),h=new Uint32Array([x]),d=new Uint32Array([u.byteLength]),g([g([c,h.buffer,d.buffer]),u,l.data])},e}();function E(e){var t=null!=e.rawAudio,r={label:e.label,spectrogramNumFrames:e.spectrogram.data.length/e.spectrogram.frameSize,spectrogramFrameSize:e.spectrogram.frameSize};null!=e.spectrogram.keyFrameIndex&&(r.spectrogramKeyFrameIndex=e.spectrogram.keyFrameIndex);var n=e.spectrogram.data.buffer.slice(0);return t&&(r.rawAudioNumSamples=e.rawAudio.data.length,r.rawAudioSampleRateHz=e.rawAudio.sampleRateHz,n=g([n,e.rawAudio.data.buffer])),{spec:r,data:n}}function T(e){var t={frameSize:e.spec.spectrogramFrameSize,data:new Float32Array(e.data.slice(0,4*e.spec.spectrogramFrameSize*e.spec.spectrogramNumFrames))};null!=e.spec.spectrogramKeyFrameIndex&&(t.keyFrameIndex=e.spec.spectrogramKeyFrameIndex);var r={label:e.spec.label,spectrogram:t};return null!=e.spec.rawAudioNumSamples&&(r.rawAudio={sampleRateHz:e.spec.rawAudioSampleRateHz,data:new Float32Array(e.data.slice(4*e.spec.spectrogramFrameSize*e.spec.spectrogramNumFrames))}),r}function F(e,r,n,a){if(t.util.assert(Number.isInteger(e)&&e>0,function(){return"snippetLength must be a positive integer, but got "+e}),null!=r&&t.util.assert(Number.isInteger(r)&&r>=0,function(){return"focusIndex must be a non-negative integer, but got "+r}),t.util.assert(Number.isInteger(n)&&n>0,function(){return"windowLength must be a positive integer, but got "+n}),t.util.assert(Number.isInteger(a)&&a>0,function(){return"windowHop must be a positive integer, but got "+a}),t.util.assert(n<=e,function(){return"windowLength ("+n+") exceeds snippetLength ("+e+")"}),t.util.assert(re&&(l=e-n);!(l-a<0||r>=l-a+n);)l-=a;for(;l+n<=e&&!(r=0&&n<=1,function(){return"Invalid probabilityThreshold value: "+n}),a=null!=r.invokeCallbackOnNoiseAndUnknown&&r.invokeCallbackOnNoiseAndUnknown,r.includeEmbedding&&(a=!0),r.suppressionTimeMillis<0)throw new Error("suppressionTimeMillis is expected to be >= 0, but got "+r.suppressionTimeMillis);return o=null==r.overlapFactor?.5:r.overlapFactor,t.util.assert(o>=0&&o<1,function(){return"Expected overlapFactor to be >= 0 and < 1, but got "+o}),c=function(o,c){return i(f,void 0,void 0,function(){var i,c,d,p,f,m,g,v,y,b,w;return s(this,function(s){switch(s.label){case 0:return i=h(o),r.includeEmbedding?[4,this.ensureModelWithEmbeddingOutputCreated()]:[3,2];case 1:return s.sent(),w=l(this.modelWithEmbeddingOutput.predict(i),2),c=w[0],d=w[1],[3,3];case 2:c=this.model.predict(i),s.label=3;case 3:return[4,c.data()];case 4:return p=s.sent(),[4,(f=c.argMax(-1)).data()];case 5:return m=s.sent()[0],g=Math.max.apply(Math,u(p)),t.dispose([c,f,i]),g=0;--r)if("Dense"===this.model.layers[r].getClassName()){e=this.model.layers[r];break}if(null==e)throw new Error("Failed to find second last dense layer in the original model.");return this.modelWithEmbeddingOutput=t.model({inputs:this.model.inputs,outputs:[this.model.outputs[0],e.output]}),[2]}})})},e.prototype.warmUpModel=function(){var e=this;t.tidy(function(){for(var r=t.zeros([1].concat(e.nonBatchInputShape)),n=0;n<3;++n)e.model.predict(r)})},e.prototype.ensureMetadataLoaded=function(){return i(this,void 0,void 0,function(){var e,t,n;return s(this,function(a){switch(a.label){case 0:return null!=this.words?[2]:"string"!=typeof this.metadataOrURL?[3,2]:[4,function(e){return i(this,void 0,void 0,function(){var t,n,a,i,o,l,u;return s(this,function(s){switch(s.label){case 0:return t="http://",n="https://",a="file://",0!==e.indexOf(t)&&0!==e.indexOf(n)?[3,3]:[4,fetch(e)];case 1:return[4,s.sent().json()];case 2:return[2,s.sent()];case 3:return 0!==e.indexOf(a)?[3,5]:(i=require("fs"),o=r.promisify(i.readFile),u=(l=JSON).parse,[4,o(e.slice(a.length),{encoding:"utf-8"})]);case 4:return[2,u.apply(l,[s.sent()])];case 5:throw new Error("Unsupported URL scheme in metadata URL: "+e+". Supported schemes are: http://, https://, and (node.js-only) file://")}})})}(this.metadataOrURL)];case 1:return t=a.sent(),[3,3];case 2:t=this.metadataOrURL,a.label=3;case 3:if(null==(e=t).wordLabels){if(null==(n=e.words))throw new Error('Cannot find field "words" or "wordLabels" in metadata JSON file');this.words=n}else this.words=e.wordLabels;return[2]}})})},e.prototype.stopListening=function(){return i(this,void 0,void 0,function(){return s(this,function(e){switch(e.label){case 0:if(!this.streaming)throw new Error("Cannot stop streaming when streaming is not ongoing.");return[4,this.audioDataExtractor.stop()];case 1:return e.sent(),this.streaming=!1,[2]}})})},e.prototype.isListening=function(){return this.streaming},e.prototype.wordLabels=function(){return this.words},e.prototype.params=function(){return this.parameters},e.prototype.modelInputShape=function(){if(null==this.model)throw new Error("Model has not been loaded yet. Load model by calling ensureModelLoaded(), recognize(), or listen().");return this.model.inputs[0].shape},e.prototype.recognize=function(e,r){return i(this,void 0,void 0,function(){var n,a,i,o,l,u,c,h,d,p,f,m,g;return s(this,function(s){switch(s.label){case 0:return null==r&&(r={}),[4,this.ensureModelLoaded()];case 1:return s.sent(),null!=e?[3,3]:[4,this.recognizeOnline()];case 2:n=s.sent(),e=n.data,s.label=3;case 3:if(e instanceof t.Tensor)this.checkInputTensorShape(e),i=e,a=e.shape[0];else{if(e.length%this.elementsPerExample)throw new Error("The length of the input Float32Array "+e.length+" is not divisible by the number of tensor elements per per example expected by the model "+this.elementsPerExample+".");a=e.length/this.elementsPerExample,i=t.tensor4d(e,[a].concat(this.nonBatchInputShape))}return l={scores:null},r.includeEmbedding?[4,this.ensureModelWithEmbeddingOutputCreated()]:[3,5];case 4:return s.sent(),u=this.modelWithEmbeddingOutput.predict(i),o=u[0],l.embedding=u[1],[3,6];case 5:o=this.model.predict(i),s.label=6;case 6:return 1!==a?[3,8]:(c=l,[4,o.data()]);case 7:return c.scores=s.sent(),[3,10];case 8:return h=t.unstack(o),d=h.map(function(e){return e.data()}),p=l,[4,Promise.all(d)];case 9:p.scores=s.sent(),t.dispose(h),s.label=10;case 10:return r.includeSpectrogram?(f=l,m={},e instanceof t.Tensor?[4,e.data()]:[3,12]):[3,14];case 11:return g=s.sent(),[3,13];case 12:g=e,s.label=13;case 13:f.spectrogram=(m.data=g,m.frameSize=this.nonBatchInputShape[1],m),s.label=14;case 14:return t.dispose(o),[2,l]}})})},e.prototype.recognizeOnline=function(){return i(this,void 0,void 0,function(){var e=this;return s(this,function(t){return[2,new Promise(function(t,r){e.audioDataExtractor=new d({sampleRateHz:e.parameters.sampleRateHz,numFramesPerSpectrogram:e.nonBatchInputShape[0],columnTruncateLength:e.nonBatchInputShape[1],suppressionTimeMillis:0,spectrogramCallback:function(r){return i(e,void 0,void 0,function(){var e,n,a;return s(this,function(i){switch(i.label){case 0:return e=h(r),[4,this.audioDataExtractor.stop()];case 1:return i.sent(),n=t,a={},[4,e.data()];case 2:return n.apply(void 0,[(a.data=i.sent(),a.frameSize=this.nonBatchInputShape[1],a)]),e.dispose(),[2,!1]}})})},overlapFactor:0}),e.audioDataExtractor.start()})]})})},e.prototype.createTransfer=function(e){if(null==this.model)throw new Error("Model has not been loaded yet. Load model by calling ensureModelLoaded(), recognizer(), or listen().");t.util.assert(null!=e&&"string"==typeof e&&e.length>1,function(){return"Expected the name for a transfer-learning recognized to be a non-empty string, but got "+JSON.stringify(e)}),t.util.assert(null==this.transferRecognizers[e],function(){return"There is already a transfer-learning model named '"+e+"'"});var r=new O(e,this.parameters,this.model);return this.transferRecognizers[e]=r,r},e.prototype.freezeModel=function(){var e,t;try{for(var r=o(this.model.layers),n=r.next();!n.done;n=r.next()){n.value.trainable=!1}}catch(t){e={error:t}}finally{try{n&&!n.done&&(t=r.return)&&t.call(r)}finally{if(e)throw e.error}}},e.prototype.checkInputTensorShape=function(e){var r=this.model.inputs[0].shape.length;if(e.shape.length!==r)throw new Error("Expected input Tensor to have rank "+r+", but got rank "+e.shape.length+" that differs ");var n=e.shape.slice(1),a=this.model.inputs[0].shape.slice(1);if(!t.util.arraysEqual(n,a))throw new Error("Expected input to have shape [null,"+a+"], but got shape [null,"+n+"]")},e.VALID_VOCABULARY_NAMES=["18w","directional4w"],e.DEFAULT_VOCABULARY_NAME="18w",e}(),O=function(e){function r(r,n,a){var i=e.call(this)||this;return i.name=r,i.parameters=n,i.baseModel=a,t.util.assert(null!=r&&"string"==typeof r&&r.length>0,function(){return"The name of a transfer model must be a non-empty string, but got "+JSON.stringify(r)}),i.nonBatchInputShape=i.baseModel.inputs[0].shape.slice(1),i.words=null,i.dataset=new S,i}return function(e,t){function r(){this.constructor=e}n(e,t),e.prototype=null===t?Object.create(t):(r.prototype=t.prototype,new r)}(r,e),r.prototype.collectExample=function(e,r){return i(this,void 0,void 0,function(){var n,a,o,l,u=this;return s(this,function(p){if(t.util.assert(!this.streaming,function(){return"Cannot start collection of transfer-learning example because a streaming recognition or transfer-learning example collection is ongoing"}),t.util.assert(null!=e&&"string"==typeof e&&e.length>0,function(){return"Must provide a non-empty string when collecting transfer-learning example"}),null==r&&(r={}),null!=r.durationMultiplier&&null!=r.durationSec)throw new Error("durationMultiplier and durationSec are mutually exclusive, but are both specified.");return null!=r.durationSec?(t.util.assert(r.durationSec>0,function(){return"Expected durationSec to be > 0, but got "+r.durationSec}),a=this.parameters.fftSize/this.parameters.sampleRateHz,n=Math.ceil(r.durationSec/a)):null!=r.durationMultiplier?(t.util.assert(r.durationMultiplier>=1,function(){return"Expected duration multiplier to be >= 1, but got "+r.durationMultiplier}),n=Math.round(this.nonBatchInputShape[0]*r.durationMultiplier)):n=this.nonBatchInputShape[0],null!=r.snippetDurationSec&&(t.util.assert(r.snippetDurationSec>0,function(){return"snippetDurationSec is expected to be > 0, but got "+r.snippetDurationSec}),t.util.assert(null!=r.onSnippet,function(){return"onSnippet must be provided if snippetDurationSec is provided."})),null!=r.onSnippet&&t.util.assert(null!=r.snippetDurationSec,function(){return"snippetDurationSec must be provided if onSnippet is provided."}),o=this.parameters.fftSize/this.parameters.sampleRateHz,l=o*n,this.streaming=!0,[2,new Promise(function(a){var o=null==r.snippetDurationSec?1:r.snippetDurationSec/l,p=1-o,f=Math.round(1/o),m=0,g=-1,y=[];u.audioDataExtractor=new d({sampleRateHz:u.parameters.sampleRateHz,numFramesPerSpectrogram:n,columnTruncateLength:u.nonBatchInputShape[1],suppressionTimeMillis:0,spectrogramCallback:function(n,o){return i(u,void 0,void 0,function(){var i,l,u,d,p,b,w,x,S,E,T,F,M,z,D,I,L,A,R,O;return s(this,function(s){switch(s.label){case 0:return null!=r.onSnippet?[3,7]:(i=h(n),u=(l=this.dataset).addExample,d={label:e},p={},[4,i.data()]);case 1:return d.spectrogram=(p.data=s.sent(),p.frameSize=this.nonBatchInputShape[1],p),r.includeRawAudio?(w={},[4,o.data()]):[3,3];case 2:return w.data=s.sent(),w.sampleRateHz=this.audioDataExtractor.sampleRateHz,b=w,[3,4];case 3:b=void 0,s.label=4;case 4:return u.apply(l,[(d.rawAudio=b,d)]),i.dispose(),[4,this.audioDataExtractor.stop()];case 5:return s.sent(),this.streaming=!1,this.collateTransferWords(),x=a,S={},[4,n.data()];case 6:return x.apply(void 0,[(S.data=s.sent(),S.frameSize=this.nonBatchInputShape[1],S)]),[3,13];case 7:return[4,n.data()];case 8:for(E=s.sent(),-1===g&&(g=E.length),T=g-1;0!==E[T]&&T>=0;)T--;return F=g-T-1,g=T+1,M=E.slice(E.length-F,E.length),y.push(M),null!=r.onSnippet&&r.onSnippet({data:M,frameSize:this.nonBatchInputShape[1]}),m++!==f?[3,13]:[4,this.audioDataExtractor.stop()];case 9:return s.sent(),this.streaming=!1,this.collateTransferWords(),z=function(e){if(e.length<2)throw new Error("Cannot normalize a Float32Array with fewer than 2 elements.");return null==c&&(c=t.backend().epsilon()),t.tidy(function(){var r=t.moments(t.tensor1d(e)),n=r.mean,a=r.variance,i=n.arraySync(),s=Math.sqrt(a.arraySync()),o=Array.from(e).map(function(e){return(e-i)/(s+c)});return new Float32Array(o)})}(v(y)),D={data:z,frameSize:this.nonBatchInputShape[1]},L=(I=this.dataset).addExample,A={label:e,spectrogram:D},r.includeRawAudio?(O={},[4,o.data()]):[3,11];case 10:return O.data=s.sent(),O.sampleRateHz=this.audioDataExtractor.sampleRateHz,R=O,[3,12];case 11:R=void 0,s.label=12;case 12:L.apply(I,[(A.rawAudio=R,A)]),a(D),s.label=13;case 13:return[2,!1]}})})},overlapFactor:p,includeRawAudio:r.includeRawAudio}),u.audioDataExtractor.start(r.audioTrackConstraints)})]})})},r.prototype.clearExamples=function(){var e=this;t.util.assert(null!=this.words&&this.words.length>0&&!this.dataset.empty(),function(){return"No transfer learning examples exist for model name "+e.name}),this.dataset.clear(),this.words=null},r.prototype.countExamples=function(){if(this.dataset.empty())throw new Error("No examples have been collected for transfer-learning model named '"+this.name+"' yet.");return this.dataset.getExampleCounts()},r.prototype.getExamples=function(e){return this.dataset.getExamples(e)},r.prototype.setExampleKeyFrameIndex=function(e,t){this.dataset.setExampleKeyFrameIndex(e,t)},r.prototype.removeExample=function(e){this.dataset.removeExample(e),this.collateTransferWords()},r.prototype.isDatasetEmpty=function(){return this.dataset.empty()},r.prototype.loadExamples=function(e,t){var r,n,a,i;void 0===t&&(t=!1);var s=new S(e);t&&this.clearExamples();var l=s.getVocabulary();try{for(var u=o(l),c=u.next();!c.done;c=u.next()){var h=c.value,d=s.getExamples(h);try{for(var p=(a=void 0,o(d)),f=p.next();!f.done;f=p.next()){var m=f.value;this.dataset.addExample(m.example)}}catch(e){a={error:e}}finally{try{f&&!f.done&&(i=p.return)&&i.call(p)}finally{if(a)throw a.error}}}}catch(e){r={error:e}}finally{try{c&&!c.done&&(n=u.return)&&n.call(u)}finally{if(r)throw r.error}}this.collateTransferWords()},r.prototype.serializeExamples=function(e){return this.dataset.serialize(e)},r.prototype.collateTransferWords=function(){this.words=this.dataset.getVocabulary()},r.prototype.collectTransferDataAsTensors=function(e,t){var r=this.nonBatchInputShape[0];e=e||.25;var n=Math.round(e*r),i=this.dataset.getData(null,a({numFrames:r,hopFrames:n},t));return{xs:i.xs,ys:i.ys}},r.prototype.collectTransferDataAsTfDataset=function(e,t,r,n){void 0===t&&(t=.15),void 0===r&&(r=32);var i=this.nonBatchInputShape[0];e=e||.25;var s=Math.round(e*i);return this.dataset.getData(null,a({numFrames:i,hopFrames:s,getDataset:!0,datasetBatchSize:r,datasetValidationSplit:t},n))},r.prototype.train=function(e){return i(this,void 0,void 0,function(){var r,n=this;return s(this,function(a){return t.util.assert(null!=this.words&&this.words.length>0,function(){return"Cannot train transfer-learning model '"+n.name+"' because no transfer learning example has been collected."}),t.util.assert(this.words.length>1,function(){return"Cannot train transfer-learning model '"+n.name+"' because only 1 word label ('"+JSON.stringify(n.words)+"') has been collected for transfer learning. Requires at least 2."}),null!=e.fineTuningEpochs&&t.util.assert(e.fineTuningEpochs>=0&&Number.isInteger(e.fineTuningEpochs),function(){return"If specified, fineTuningEpochs must be a non-negative integer, but received "+e.fineTuningEpochs}),null==e&&(e={}),null==this.model&&this.createTransferModelFromBaseModel(),this.secondLastBaseDenseLayer.trainable=!1,this.model.compile({loss:"categoricalCrossentropy",optimizer:e.optimizer||"sgd",metrics:["acc"]}),r=null==e.fitDatasetDurationMillisThreshold?6e4:e.fitDatasetDurationMillisThreshold,this.dataset.durationMillis()>r?(console.log("Detected large dataset: total duration = "+this.dataset.durationMillis()+" ms > "+r+" ms. Training transfer model using fitDataset() instead of fit()"),[2,this.trainOnDataset(e)]):[2,this.trainOnTensors(e)]})})},r.prototype.trainOnDataset=function(e){return i(this,void 0,void 0,function(){var r,n,a,i,o,u,c,h,d;return s(this,function(s){switch(s.label){case 0:return t.util.assert(e.epochs>0,function(){return"Invalid config.epochs"}),r=null==e.batchSize?32:e.batchSize,n=e.windowHopRatio||.25,a=l(this.collectTransferDataAsTfDataset(n,e.validationSplit,r,{augmentByMixingNoiseRatio:e.augmentByMixingNoiseRatio}),2),i=a[0],o=a[1],u=t.util.now(),[4,this.model.fitDataset(i,{epochs:e.epochs,validationData:e.validationSplit>0?o:null,callbacks:null==e.callback?null:[e.callback]})];case 1:return c=s.sent(),console.log("fitDataset() took "+(t.util.now()-u).toFixed(2)+" ms"),null!=e.fineTuningEpochs&&e.fineTuningEpochs>0?(h=t.util.now(),[4,this.fineTuningUsingTfDatasets(e,i,o)]):[3,3];case 2:return d=s.sent(),console.log("fitDataset() (fine-tuning) took "+(t.util.now()-h).toFixed(2)+" ms"),[2,[c,d]];case 3:return[2,c]}})})},r.prototype.trainOnTensors=function(e){return i(this,void 0,void 0,function(){var r,n,a,i,o,l,u,c,h,d;return s(this,function(s){switch(s.label){case 0:r=e.windowHopRatio||.25,n=this.collectTransferDataAsTensors(r,{augmentByMixingNoiseRatio:e.augmentByMixingNoiseRatio}),a=n.xs,i=n.ys,console.log("Training data: xs.shape = "+a.shape+", ys.shape = "+i.shape),s.label=1;case 1:return s.trys.push([1,,6,7]),null!=e.validationSplit?(c=function(e,r,n){return t.util.assert(n>0&&n<1,function(){return"validationSplit is expected to be >0 and <1, but got "+n}),t.tidy(function(){for(var a=r.argMax(-1).dataSync(),i=[],s=0;s0?[4,this.fineTuningUsingTensors(e,o,l,u)]:[3,4];case 3:return d=s.sent(),[2,[h,d]];case 4:return[2,h];case 5:return[3,7];case 6:return t.dispose([a,i,o,l,u]),[7];case 7:return[2]}})})},r.prototype.fineTuningUsingTfDatasets=function(e,t,r){return i(this,void 0,void 0,function(){var n,a,i;return s(this,function(s){switch(s.label){case 0:return n=this.secondLastBaseDenseLayer.trainable,this.secondLastBaseDenseLayer.trainable=!0,a=null==e.fineTuningOptimizer?"sgd":e.fineTuningOptimizer,this.model.compile({loss:"categoricalCrossentropy",optimizer:a,metrics:["acc"]}),[4,this.model.fitDataset(t,{epochs:e.fineTuningEpochs,validationData:r,callbacks:null==e.callback?null:[e.callback]})];case 1:return i=s.sent(),this.secondLastBaseDenseLayer.trainable=n,[2,i]}})})},r.prototype.fineTuningUsingTensors=function(e,t,r,n){return i(this,void 0,void 0,function(){var a,i,o;return s(this,function(s){switch(s.label){case 0:return a=this.secondLastBaseDenseLayer.trainable,this.secondLastBaseDenseLayer.trainable=!0,i=null==e.fineTuningOptimizer?"sgd":e.fineTuningOptimizer,this.model.compile({loss:"categoricalCrossentropy",optimizer:i,metrics:["acc"]}),[4,this.model.fit(t,r,{epochs:e.fineTuningEpochs,validationData:n,batchSize:e.batchSize,callbacks:null==e.fineTuningCallback?null:[e.fineTuningCallback]})];case 1:return o=s.sent(),this.secondLastBaseDenseLayer.trainable=a,[2,o]}})})},r.prototype.evaluate=function(e){return i(this,void 0,void 0,function(){var r,n=this;return s(this,function(a){return t.util.assert(null!=e.wordProbThresholds&&e.wordProbThresholds.length>0,function(){return"Received null or empty wordProbThresholds"}),r=0,t.util.assert("_background_noise_"===this.words[r],function(){return"Cannot perform evaluation when the first tag is not _background_noise_"}),[2,t.tidy(function(){for(var a=[],i=0,s=n.collectTransferDataAsTensors(e.windowHopRatio),o=s.xs,l=s.ys.argMax(-1).dataSync(),u=n.model.predict(o),c=u.slice([0,1],[u.shape[0],u.shape[1]-1]).max(-1),h=u.shape[0],d=0;d=0&&"dense"!==r[n].getClassName().toLowerCase();)n--;if(n<0)throw new Error("Cannot find a hidden dense layer in the base model.");this.secondLastBaseDenseLayer=r[n];var a=this.secondLastBaseDenseLayer.output;this.transferHead=t.sequential(),this.transferHead.add(t.layers.dense({units:this.words.length,activation:"softmax",inputShape:a.shape.slice(1),name:"NewHeadDense"}));var i=this.transferHead.apply(a);this.model=t.model({inputs:this.baseModel.inputs,outputs:i})},r.prototype.modelInputShape=function(){return this.baseModel.inputs[0].shape},r.prototype.getMetadata=function(){return{tfjsSpeechCommandsVersion:D,modelName:this.name,timeStamp:(new Date).toISOString(),wordLabels:this.wordLabels()}},r.prototype.save=function(e){return i(this,void 0,void 0,function(){var t,r,n;return s(this,function(a){return t=null!=e,e=e||C(this.name),t||(r=A.localStorage.getItem(I),(n=null==r?{}:JSON.parse(r))[this.name]=this.getMetadata(),A.localStorage.setItem(I,JSON.stringify(n))),console.log("Saving model to "+e),[2,this.model.save(e)]})})},r.prototype.load=function(e){return i(this,void 0,void 0,function(){var r,n,a;return s(this,function(i){switch(i.label){case 0:if(r=null!=e,e=e||C(this.name),!r){if(null==(n=JSON.parse(A.localStorage.getItem(I)))||null==n[this.name])throw new Error("Cannot find metadata for transfer model named "+this.name+'"');this.words=n[this.name].wordLabels,console.log("Loaded word list for model named "+this.name+": "+this.words)}return a=this,[4,t.loadLayersModel(e)];case 1:return a.model=i.sent(),console.log("Loaded model from "+e+":"),this.model.summary(),[2]}})})},r.prototype.createTransfer=function(e){throw new Error("Creating transfer-learned recognizer from a transfer-learned recognizer is not supported.")},r}(R);function C(e){return""+L+e}var k={concatenateFloat32Arrays:v,playRawAudio:function(e,t){var r=new(window.AudioContext||window.webkitAudioContext),n=r.createBuffer(1,e.data.length,e.sampleRateHz);n.getChannelData(0).set(e.data);var a=r.createBufferSource();a.buffer=n,a.connect(r.destination),a.start(),a.onended=function(){null!=t&&t()}}};e.create=function(e,r,n,a){if(t.util.assert(null==n&&null==a||null!=n&&null!=a,function(){return"customModelURL and customMetadataURL must be both provided or both not provided."}),null!=n&&t.util.assert(null==r,function(){return"vocabulary name must be null or undefined when modelURL is provided."}),"BROWSER_FFT"===e)return new R(r,n,a);throw"SOFT_FFT"===e?new Error("SOFT_FFT SpeechCommandRecognizer has not been implemented yet."):new Error("Invalid fftType: '"+e+"'")},e.utils=k,e.BACKGROUND_NOISE_TAG="_background_noise_",e.Dataset=S,e.getMaxIntensityFrameIndex=z,e.spectrogram2IntensityCurve=M,e.deleteSavedTransferModel=function(e){return i(this,void 0,void 0,function(){var r;return s(this,function(n){switch(n.label){case 0:return null==(r=JSON.parse(A.localStorage.getItem(I)))&&(r={}),null!=r[e]&&delete r[e],A.localStorage.setItem(I,JSON.stringify(r)),[4,t.io.removeModel(C(e))];case 1:return n.sent(),[2]}})})},e.listSavedTransferModels=function(){return i(this,void 0,void 0,function(){var e,r,n;return s(this,function(a){switch(a.label){case 0:return[4,t.io.listModels()];case 1:for(n in e=a.sent(),r=[],e)n.startsWith(L)&&r.push(n.slice(L.length));return[2,r]}})})},e.UNKNOWN_TAG="_unknown_",e.version=D,Object.defineProperty(e,"__esModule",{value:!0})}); 18 | //# sourceMappingURL=speech-commands.min.js.map 19 | --------------------------------------------------------------------------------