├── src
    ├── index.ts
    ├── types.ts
    ├── run.ts
    ├── __tests__
    │   └── lib.test.ts
    └── lib.ts
├── .eslintrc.json
├── tsup.config.ts
├── tsconfig.json
├── package.json
├── .npmignore
├── .gitignore
├── README.md
└── LICENSE


/src/index.ts:
--------------------------------------------------------------------------------
 1 | export * from "./lib";
 2 | export * from "./types";
 3 | 
 4 | // Re-export specific types for better DX
 5 | export type {
 6 |   Speaker,
 7 |   TranscriptSegment,
 8 |   ProcessedTranscript,
 9 |   Config,
10 | } from "./types";
11 | 


--------------------------------------------------------------------------------
/.eslintrc.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "env": {
 3 |     "es2021": true,
 4 |     "node": true
 5 |   },
 6 |   "extends": ["eslint:recommended", "plugin:@typescript-eslint/recommended"],
 7 |   "parser": "@typescript-eslint/parser",
 8 |   "parserOptions": {
 9 |     "ecmaVersion": "latest",
10 |     "sourceType": "module"
11 |   },
12 |   "plugins": ["@typescript-eslint"],
13 |   "rules": {
14 |     "@typescript-eslint/no-explicit-any": "warn",
15 |     "@typescript-eslint/explicit-module-boundary-types": "off",
16 |     "@typescript-eslint/no-unused-vars": [
17 |       "error",
18 |       { "argsIgnorePattern": "^_" }
19 |     ]
20 |   }
21 | }
22 | 


--------------------------------------------------------------------------------
/tsup.config.ts:
--------------------------------------------------------------------------------
 1 | import { defineConfig } from "tsup";
 2 | 
 3 | export default defineConfig({
 4 |   entry: ["src/index.ts", "src/run.ts"],
 5 |   target: "es2020",
 6 |   format: ["cjs", "esm"],
 7 |   splitting: false,
 8 |   sourcemap: true,
 9 |   clean: true,
10 |   dts: true,
11 |   outDir: "dist",
12 |   outExtension({ format }) {
13 |     return {
14 |       js: format === "cjs" ? ".cjs" : ".js",
15 |     };
16 |   },
17 |   // Ensure we handle node builtins properly
18 |   platform: "node",
19 |   // Bundle dependencies for the CLI
20 |   noExternal: ["ora", "chalk", "inquirer", "conf"],
21 |   // Add shebang to CLI entry point
22 |   esbuildOptions(options) {
23 |     if (options.entryPoints?.includes("src/run.ts")) {
24 |       options.banner = {
25 |         js: "#!/usr/bin/env node",
26 |       };
27 |     }
28 |   },
29 | });
30 | 


--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "ts-node": {
 3 |     "files": true
 4 |   },
 5 |   "compilerOptions": {
 6 |     "baseUrl": ".",
 7 |     "target": "es2020",
 8 |     "module": "esnext",
 9 |     "moduleResolution": "bundler",
10 |     "esModuleInterop": true,
11 |     "forceConsistentCasingInFileNames": true,
12 |     "strict": true,
13 |     "strictPropertyInitialization": false,
14 |     "skipLibCheck": true,
15 |     // "strictNullChecks": true,
16 |     "strictBindCallApply": false,
17 |     "declaration": true,
18 |     "emitDecoratorMetadata": true,
19 |     "experimentalDecorators": true,
20 |     "allowSyntheticDefaultImports": true,
21 |     "sourceMap": true,
22 |     "outDir": "./dist",
23 |     "noImplicitAny": false,
24 |     "noFallthroughCasesInSwitch": false,
25 |     "resolveJsonModule": true
26 |   },
27 |   "include": ["src/**/*"]
28 | }
29 | 


--------------------------------------------------------------------------------
/src/types.ts:
--------------------------------------------------------------------------------
 1 | import { z } from "zod";
 2 | 
 3 | export const ConfigSchema = z.object({
 4 |   assemblyAiKey: z.string().optional(),
 5 |   defaultOutputDir: z.string().optional(),
 6 |   knownSpeakers: z.record(z.string(), z.string()).optional(),
 7 |   cacheDir: z.string().optional(),
 8 |   cacheEnabled: z.boolean().default(true),
 9 | });
10 | 
11 | export type Config = z.infer<typeof ConfigSchema>;
12 | 
13 | export interface Speaker {
14 |   id: string;
15 |   name: string;
16 |   confidence: number;
17 | }
18 | 
19 | export interface TranscriptSegment {
20 |   text: string;
21 |   start: number;
22 |   end: number;
23 |   speaker: Speaker;
24 |   confidence: number;
25 | }
26 | 
27 | export interface ProcessedTranscript {
28 |   segments: TranscriptSegment[];
29 |   speakers: Speaker[];
30 |   metadata: {
31 |     fileName: string;
32 |     duration: number;
33 |     processedAt: string;
34 |   };
35 | }
36 | 
37 | export interface SelectorOptions {
38 |   numExamples: number;
39 |   sampleSize?: number;
40 |   prioritizeComplete?: boolean;
41 |   completenessWeight?: number;
42 | }
43 | 
44 | export interface CliOptions {
45 |   input: string;
46 |   output?: string;
47 |   speakers?: string[];
48 |   format?: "json" | "txt" | "srt" | "md";
49 |   verbose?: boolean;
50 |   skipDiarization?: boolean;
51 |   assemblyAiKey?: string;
52 | }
53 | 
54 | export interface CacheEntry {
55 |   timestamp: string;
56 |   hash: string;
57 |   data: {
58 |     audioUrl?: string;
59 |     transcriptId?: string;
60 |     transcript?: ProcessedTranscript;
61 |   };
62 | }
63 | 
64 | export interface CacheOptions {
65 |   enabled?: boolean;
66 |   cacheDir?: string;
67 | }
68 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "meeting-diary",
 3 |   "version": "0.0.1",
 4 |   "author": "Hrishi Olickel <twitter-@hrishioa> (https://olickel.com)",
 5 |   "repository": {
 6 |     "type": "git",
 7 |     "url": "git+https://github.com/southbridgeai/meeting-diary.git"
 8 |   },
 9 |   "main": "./dist/index.cjs",
10 |   "module": "./dist/index.js",
11 |   "type": "module",
12 |   "bin": {
13 |     "meeting-diary": "./dist/run.js"
14 |   },
15 |   "engines": {
16 |     "node": ">=18.0.0"
17 |   },
18 |   "dependencies": {
19 |     "assemblyai": "^3.1.3",
20 |     "chalk": "^5.3.0",
21 |     "clipboardy": "^4.0.0",
22 |     "commander": "^12.0.0",
23 |     "conf": "^12.0.0",
24 |     "inquirer": "^9.2.15",
25 |     "ora": "^8.0.1",
26 |     "zod": "^3.22.4"
27 |   },
28 |   "devDependencies": {
29 |     "@swc/core": "^1.7.26",
30 |     "@types/bun": "^1.1.10",
31 |     "@types/inquirer": "^9.0.7",
32 |     "@types/node": "^22.7.4",
33 |     "@typescript-eslint/eslint-plugin": "^7.0.1",
34 |     "@typescript-eslint/parser": "^7.0.1",
35 |     "eslint": "^8.56.0",
36 |     "typescript": "^5.3.3"
37 |   },
38 |   "exports": {
39 |     ".": {
40 |       "import": {
41 |         "types": "./dist/index.d.ts",
42 |         "default": "./dist/index.js"
43 |       },
44 |       "require": {
45 |         "types": "./dist/index.d.ts",
46 |         "default": "./dist/index.cjs"
47 |       }
48 |     }
49 |   },
50 |   "description": "Quickly diarize and transcribe meetings with AssemblyAI",
51 |   "files": [
52 |     "dist",
53 |     "package.json"
54 |   ],
55 |   "license": "Apache-2.0",
56 |   "scripts": {
57 |     "build": "bun build src/index.ts src/run.ts --outdir dist --target node",
58 |     "dev": "bun src/run.ts",
59 |     "test": "bun test",
60 |     "test:coverage": "bun test --coverage",
61 |     "typecheck": "tsc --noEmit",
62 |     "lint": "eslint . --ext .ts",
63 |     "format": "prettier --write \"src/**/*.ts\"",
64 |     "clean": "rm -rf dist",
65 |     "prepublishOnly": "bun run clean && bun run build"
66 |   }
67 | }
68 | 


--------------------------------------------------------------------------------
/.npmignore:
--------------------------------------------------------------------------------
  1 | archive
  2 | tests
  3 | 
  4 | .DS_Store
  5 | zig-cache
  6 | packages/*/*.wasm
  7 | *.o
  8 | *.a
  9 | profile.json
 10 | 
 11 | .env
 12 | node_modules
 13 | .envrc
 14 | .swcrc
 15 | yarn.lock
 16 | *.tmp
 17 | *.log
 18 | *.out.js
 19 | *.out.refresh.js
 20 | **/package-lock.json
 21 | build
 22 | *.wat
 23 | zig-out
 24 | pnpm-lock.yaml
 25 | README.md.template
 26 | src/deps/zig-clap/example
 27 | src/deps/zig-clap/README.md
 28 | src/deps/zig-clap/.github
 29 | src/deps/zig-clap/.gitattributes
 30 | out
 31 | outdir
 32 | 
 33 | .trace
 34 | cover
 35 | coverage
 36 | coverv
 37 | *.trace
 38 | github
 39 | out.*
 40 | out
 41 | .parcel-cache
 42 | esbuilddir
 43 | *.bun
 44 | parceldist
 45 | esbuilddir
 46 | outdir/
 47 | outcss
 48 | .next
 49 | txt.js
 50 | .idea
 51 | .vscode/cpp*
 52 | .vscode/clang*
 53 | 
 54 | node_modules_*
 55 | *.jsb
 56 | *.zip
 57 | bun-zigld
 58 | bun-singlehtreaded
 59 | bun-nomimalloc
 60 | bun-mimalloc
 61 | examples/lotta-modules/bun-yday
 62 | examples/lotta-modules/bun-old
 63 | examples/lotta-modules/bun-nofscache
 64 | 
 65 | src/node-fallbacks/out/*
 66 | src/node-fallbacks/node_modules
 67 | sign.json
 68 | release/
 69 | *.dmg
 70 | sign.*.json
 71 | packages/debug-*
 72 | packages/bun-cli/postinstall.js
 73 | packages/bun-*/bun
 74 | packages/bun-*/bun-profile
 75 | packages/bun-*/debug-bun
 76 | packages/bun-*/*.o
 77 | packages/bun-cli/postinstall.js
 78 | 
 79 | packages/bun-cli/bin/*
 80 | bun-test-scratch
 81 | misctools/fetch
 82 | 
 83 | src/deps/libiconv
 84 | src/deps/openssl
 85 | src/tests.zig
 86 | *.blob
 87 | src/deps/s2n-tls
 88 | .npm
 89 | .npm.gz
 90 | 
 91 | bun-binary
 92 | 
 93 | src/deps/PLCrashReporter/
 94 | 
 95 | *.dSYM
 96 | *.crash
 97 | misctools/sha
 98 | packages/bun-wasm/*.mjs
 99 | packages/bun-wasm/*.cjs
100 | packages/bun-wasm/*.map
101 | packages/bun-wasm/*.js
102 | packages/bun-wasm/*.d.ts
103 | packages/bun-wasm/*.d.cts
104 | packages/bun-wasm/*.d.mts
105 | *.bc
106 | 
107 | src/fallback.version
108 | src/runtime.version
109 | *.sqlite
110 | *.database
111 | *.db
112 | misctools/machbench
113 | *.big
114 | .eslintcache
115 | 
116 | /bun-webkit
117 | 
118 | src/deps/c-ares/build
119 | src/bun.js/bindings-obj
120 | src/bun.js/debug-bindings-obj
121 | 
122 | failing-tests.txt
123 | test.txt
124 | myscript.sh
125 | 
126 | cold-jsc-start
127 | cold-jsc-start.d
128 | 
129 | /testdir
130 | /test.ts
131 | /test.js
132 | 
133 | src/js/out/modules*
134 | src/js/out/functions*
135 | src/js/out/tmp
136 | src/js/out/DebugPath.h
137 | 
138 | make-dev-stats.csv
139 | 
140 | .uuid
141 | tsconfig.tsbuildinfo
142 | 
143 | test/js/bun/glob/fixtures
144 | *.lib
145 | *.pdb
146 | CMakeFiles
147 | build.ninja
148 | .ninja_deps
149 | .ninja_log
150 | CMakeCache.txt
151 | cmake_install.cmake
152 | compile_commands.json
153 | 
154 | *.lib
155 | x64
156 | **/*.vcxproj*
157 | **/*.sln*
158 | **/*.dir
159 | **/*.pdb
160 | 
161 | /.webkit-cache
162 | /.cache
163 | /src/deps/libuv
164 | /build-*/
165 | /kcov-out
166 | 
167 | .vs
168 | 
169 | **/.verdaccio-db.json
170 | /test-report.md
171 | /test-report.json
172 | 
173 | ########################### MY STUFF
174 | 
175 | tests


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | .DS_Store
  2 | zig-cache
  3 | packages/*/*.wasm
  4 | *.o
  5 | *.a
  6 | profile.json
  7 | dist
  8 | 
  9 | mandark-history.json
 10 | compiled-code.txt
 11 | 
 12 | .env
 13 | node_modules
 14 | .envrc
 15 | .swcrc
 16 | yarn.lock
 17 | *.tmp
 18 | *.log
 19 | *.out.js
 20 | *.out.refresh.js
 21 | **/package-lock.json
 22 | build
 23 | *.wat
 24 | zig-out
 25 | pnpm-lock.yaml
 26 | README.md.template
 27 | src/deps/zig-clap/example
 28 | src/deps/zig-clap/README.md
 29 | src/deps/zig-clap/.github
 30 | src/deps/zig-clap/.gitattributes
 31 | out
 32 | outdir
 33 | 
 34 | .trace
 35 | cover
 36 | coverage
 37 | coverv
 38 | *.trace
 39 | github
 40 | out.*
 41 | out
 42 | .parcel-cache
 43 | esbuilddir
 44 | *.bun
 45 | parceldist
 46 | esbuilddir
 47 | outdir/
 48 | outcss
 49 | .next
 50 | txt.js
 51 | .idea
 52 | .vscode/cpp*
 53 | .vscode/clang*
 54 | 
 55 | node_modules_*
 56 | *.jsb
 57 | *.zip
 58 | bun-zigld
 59 | bun-singlehtreaded
 60 | bun-nomimalloc
 61 | bun-mimalloc
 62 | examples/lotta-modules/bun-yday
 63 | examples/lotta-modules/bun-old
 64 | examples/lotta-modules/bun-nofscache
 65 | 
 66 | src/node-fallbacks/out/*
 67 | src/node-fallbacks/node_modules
 68 | sign.json
 69 | release/
 70 | *.dmg
 71 | sign.*.json
 72 | packages/debug-*
 73 | packages/bun-cli/postinstall.js
 74 | packages/bun-*/bun
 75 | packages/bun-*/bun-profile
 76 | packages/bun-*/debug-bun
 77 | packages/bun-*/*.o
 78 | packages/bun-cli/postinstall.js
 79 | 
 80 | packages/bun-cli/bin/*
 81 | bun-test-scratch
 82 | misctools/fetch
 83 | 
 84 | src/deps/libiconv
 85 | src/deps/openssl
 86 | src/tests.zig
 87 | *.blob
 88 | src/deps/s2n-tls
 89 | .npm
 90 | .npm.gz
 91 | 
 92 | bun-binary
 93 | 
 94 | src/deps/PLCrashReporter/
 95 | 
 96 | *.dSYM
 97 | *.crash
 98 | misctools/sha
 99 | packages/bun-wasm/*.mjs
100 | packages/bun-wasm/*.cjs
101 | packages/bun-wasm/*.map
102 | packages/bun-wasm/*.js
103 | packages/bun-wasm/*.d.ts
104 | packages/bun-wasm/*.d.cts
105 | packages/bun-wasm/*.d.mts
106 | *.bc
107 | 
108 | src/fallback.version
109 | src/runtime.version
110 | *.sqlite
111 | *.database
112 | *.db
113 | misctools/machbench
114 | *.big
115 | .eslintcache
116 | 
117 | /bun-webkit
118 | 
119 | src/deps/c-ares/build
120 | src/bun.js/bindings-obj
121 | src/bun.js/debug-bindings-obj
122 | 
123 | failing-tests.txt
124 | test.txt
125 | myscript.sh
126 | 
127 | cold-jsc-start
128 | cold-jsc-start.d
129 | 
130 | bun.lockb
131 | 
132 | /testdir
133 | /test.ts
134 | /test.js
135 | 
136 | src/js/out/modules*
137 | src/js/out/functions*
138 | src/js/out/tmp
139 | src/js/out/DebugPath.h
140 | 
141 | make-dev-stats.csv
142 | 
143 | .uuid
144 | tsconfig.tsbuildinfo
145 | 
146 | test/js/bun/glob/fixtures
147 | *.lib
148 | *.pdb
149 | CMakeFiles
150 | build.ninja
151 | .ninja_deps
152 | .ninja_log
153 | CMakeCache.txt
154 | cmake_install.cmake
155 | compile_commands.json
156 | 
157 | *.lib
158 | x64
159 | **/*.vcxproj*
160 | **/*.sln*
161 | **/*.dir
162 | **/*.pdb
163 | 
164 | /.webkit-cache
165 | /.cache
166 | /src/deps/libuv
167 | /build-*/
168 | /kcov-out
169 | 
170 | .vs
171 | 
172 | **/.verdaccio-db.json
173 | /test-report.md
174 | /test-report.json
175 | 
176 | ########################### MY STUFF
177 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Meeting Diary
  2 | 
  3 | A powerful CLI tool to transcribe and diarize audio/video files using AssemblyAI. Automatically identifies speakers and generates transcripts in multiple formats.
  4 | 
  5 | ## Features
  6 | 
  7 | - 🎙️ Automatic speaker diarization
  8 | - 👥 Interactive speaker identification with context
  9 | - 📝 Multiple output formats (Markdown, SRT, TXT, JSON)
 10 | - 🕒 Timestamps for each segment
 11 | - 🔑 Secure API key management
 12 | - 💾 Smart caching for faster processing
 13 | - 💻 Cross-platform support
 14 | 
 15 | ## Installation & Usage
 16 | 
 17 | ### Quick Start (Recommended)
 18 | 
 19 | You can use `meeting-diary` directly without installation using `npx` or `bunx`:
 20 | 
 21 | ```bash
 22 | # Using npx (Node.js)
 23 | npx meeting-diary input.mp4
 24 | 
 25 | # Using bunx (Bun)
 26 | bunx meeting-diary input.mp4
 27 | ```
 28 | 
 29 | ### Global Installation (Alternative)
 30 | 
 31 | If you prefer to install the tool globally:
 32 | 
 33 | ```bash
 34 | # Using npm
 35 | npm install -g meeting-diary
 36 | 
 37 | # Using yarn
 38 | yarn global add meeting-diary
 39 | 
 40 | # Using bun
 41 | bun install -g meeting-diary
 42 | ```
 43 | 
 44 | Then use it as:
 45 | 
 46 | ```bash
 47 | meeting-diary input.mp4
 48 | ```
 49 | 
 50 | ## Usage
 51 | 
 52 | ### Basic Usage
 53 | 
 54 | ```bash
 55 | meeting-diary input.mp4
 56 | ```
 57 | 
 58 | This will:
 59 | 
 60 | 1. Transcribe and diarize your audio/video file
 61 | 2. Help you identify each speaker by showing their most significant contributions
 62 | 3. Generate a timestamped transcript in markdown format
 63 | 
 64 | ### Output Formats
 65 | 
 66 | ```bash
 67 | meeting-diary input.mp4 -f txt  # Simple text format
 68 | meeting-diary input.mp4 -f srt  # SubRip subtitle format
 69 | meeting-diary input.mp4 -f json # JSON format with detailed metadata
 70 | meeting-diary input.mp4 -f md   # Markdown format (default)
 71 | ```
 72 | 
 73 | #### Markdown Format (Default)
 74 | 
 75 | The markdown format includes:
 76 | 
 77 | - Timestamp for each segment
 78 | - Speaker list
 79 | - Chronological transcript with speaker attribution
 80 | - Processing metadata
 81 | 
 82 | Example:
 83 | 
 84 | ```markdown
 85 | # Meeting Transcript
 86 | 
 87 | _Processed on 2/10/2024, 3:43:26 PM_
 88 | _Duration: 5 minutes_
 89 | 
 90 | ## Speakers
 91 | 
 92 | - **Hrishi**
 93 | - **Alok**
 94 | 
 95 | ## Transcript
 96 | 
 97 | [0:00] **Hrishi**: Yeah, didn't have a chance yet...
 98 | [0:15] **Alok**: No engagement in terms of my Mushroom photos.
 99 | [0:18] **Hrishi**: Basically Samsung phones have the ability...
100 | ```
101 | 
102 | ### Speaker Identification
103 | 
104 | You can identify speakers in two ways:
105 | 
106 | 1. Interactive identification (default):
107 | 
108 | ```bash
109 | meeting-diary input.mp4
110 | ```
111 | 
112 | The tool will:
113 | 
114 | - Show you the most significant contributions from each speaker
115 | - Display context (what was said before and after)
116 | - Show previously identified speakers for context
117 | - Ask you to identify each speaker in turn
118 | 
119 | 2. Specify speakers up front:
120 | 
121 | ```bash
122 | meeting-diary input.mp4 -s "John Smith" "Jane Doe"
123 | ```
124 | 
125 | ### All Options
126 | 
127 | ```bash
128 | Options:
129 |   -o, --output <file>     Output file (defaults to input file name with new extension)
130 |   -f, --format <format>   Output format (json, txt, srt, md) (default: "md")
131 |   -s, --speakers <names>  Known speaker names (skip interactive identification)
132 |   --skip-diarization     Skip speaker diarization
133 |   -v, --verbose          Show verbose output
134 |   --api-key <key>        AssemblyAI API key (will prompt if not provided)
135 |   --no-cache            Disable caching of uploads and transcripts
136 |   --cache-dir <dir>     Directory to store cache files
137 |   --no-interactive      Skip interactive speaker identification
138 |   -h, --help             display help for command
139 | ```
140 | 
141 | ### Caching
142 | 
143 | The tool automatically caches uploaded audio files and transcripts to avoid unnecessary re-processing. This is especially useful when:
144 | 
145 | - Experimenting with different output formats
146 | - Re-running transcription with different speaker names
147 | - Processing the same file multiple times
148 | 
149 | Cache files are stored in your system's temporary directory by default. You can:
150 | 
151 | - Disable caching with `--no-cache`
152 | - Change cache location with `--cache-dir`
153 | - Cache is enabled by default for faster processing
154 | - Cache files are automatically cleaned up by your OS's temp file management
155 | 
156 | ## API Key
157 | 
158 | You'll need an AssemblyAI API key to use this tool. You can:
159 | 
160 | 1. Set it as an environment variable: `ASSEMBLYAI_API_KEY=your-key`
161 | 2. Pass it via the command line: `--api-key your-key`
162 | 3. Let the tool prompt you for it (it can be saved for future use)
163 | 
164 | ## Development
165 | 
166 | ```bash
167 | # Clone the repository
168 | git clone https://github.com/southbridgeai/meeting-diary.git
169 | cd meeting-diary
170 | 
171 | # Install dependencies
172 | bun install
173 | 
174 | # Build
175 | bun run build
176 | 
177 | # Run tests
178 | bun test
179 | 
180 | # Development mode
181 | bun run dev
182 | ```
183 | 
184 | ## License
185 | 
186 | Apache-2.0 - see [LICENSE](LICENSE) for details.
187 | 
188 | ## Contributing
189 | 
190 | Contributions are welcome! Please feel free to submit a Pull Request.
191 | 


--------------------------------------------------------------------------------
/src/run.ts:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env node
  2 | 
  3 | import { Command } from "commander";
  4 | import fs from "fs/promises";
  5 | import path from "path";
  6 | import ora from "ora";
  7 | import chalk from "chalk";
  8 | import inquirer from "inquirer";
  9 | import Conf from "conf";
 10 | import {
 11 |   ProcessingError,
 12 |   processAudioFile,
 13 |   formatTranscriptAsSRT,
 14 |   formatTranscriptAsText,
 15 |   formatTranscriptAsMarkdown,
 16 |   validateApiKey,
 17 |   identifySpeakers,
 18 |   updateTranscriptSpeakers,
 19 | } from "./lib";
 20 | import type { Config } from "./types";
 21 | 
 22 | const config = new Conf<Config>({
 23 |   projectName: "meeting-diary",
 24 |   schema: {
 25 |     assemblyAiKey: {
 26 |       type: "string",
 27 |       default: undefined,
 28 |     },
 29 |     defaultOutputDir: {
 30 |       type: "string",
 31 |       default: undefined,
 32 |     },
 33 |     knownSpeakers: {
 34 |       type: "object",
 35 |       default: {},
 36 |     },
 37 |     cacheEnabled: {
 38 |       type: "boolean",
 39 |       default: true,
 40 |     },
 41 |     cacheDir: {
 42 |       type: "string",
 43 |       default: undefined,
 44 |     },
 45 |   },
 46 | });
 47 | 
 48 | const program = new Command();
 49 | 
 50 | async function getApiKey(): Promise<string> {
 51 |   const savedKey = config.get("assemblyAiKey");
 52 |   const envKey = process.env.ASSEMBLYAI_API_KEY;
 53 |   let apiKey = savedKey || envKey;
 54 | 
 55 |   if (!apiKey) {
 56 |     const result = await inquirer.prompt<{ apiKey: string; save: boolean }>([
 57 |       {
 58 |         type: "password",
 59 |         name: "apiKey",
 60 |         message: "Please enter your AssemblyAI API key:",
 61 |         validate: async (input: string) => {
 62 |           if (!input) return "API key is required";
 63 |           const isValid = await validateApiKey(input);
 64 |           return isValid ? true : "Invalid API key";
 65 |         },
 66 |       },
 67 |       {
 68 |         type: "confirm",
 69 |         name: "save",
 70 |         message: "Would you like to save this API key for future use?",
 71 |         default: true,
 72 |       },
 73 |     ]);
 74 | 
 75 |     apiKey = result.apiKey;
 76 |     if (result.save) {
 77 |       config.set("assemblyAiKey", apiKey);
 78 |     }
 79 |   }
 80 | 
 81 |   if (!apiKey) {
 82 |     throw new Error("API key is required");
 83 |   }
 84 | 
 85 |   return apiKey;
 86 | }
 87 | 
 88 | async function main() {
 89 |   program
 90 |     .name("meeting-diary")
 91 |     .description("Transcribe and diarize audio/video files")
 92 |     .version("0.0.1")
 93 |     .argument("<input>", "Input audio/video file")
 94 |     .option(
 95 |       "-o, --output <file>",
 96 |       "Output file (defaults to input file name with new extension)"
 97 |     )
 98 |     .option("-f, --format <format>", "Output format (json, txt, srt, md)", "md")
 99 |     .option(
100 |       "-s, --speakers <names...>",
101 |       "Known speaker names (skip interactive identification)"
102 |     )
103 |     .option("--skip-diarization", "Skip speaker diarization")
104 |     .option("-v, --verbose", "Show verbose output")
105 |     .option(
106 |       "--api-key <key>",
107 |       "AssemblyAI API key (will prompt if not provided)"
108 |     )
109 |     .option("--no-cache", "Disable caching of uploads and transcripts")
110 |     .option("--cache-dir <dir>", "Directory to store cache files")
111 |     .option("--no-interactive", "Skip interactive speaker identification")
112 |     .action(async (input, options) => {
113 |       const spinner = ora();
114 |       try {
115 |         // Validate input file
116 |         if (
117 |           !(await fs
118 |             .access(input)
119 |             .then(() => true)
120 |             .catch(() => false))
121 |         ) {
122 |           throw new ProcessingError(
123 |             "Input file does not exist",
124 |             "FILE_NOT_FOUND"
125 |           );
126 |         }
127 | 
128 |         // Get API key
129 |         const apiKey = options.apiKey || (await getApiKey());
130 | 
131 |         // Process known speakers
132 |         const knownSpeakers: Record<string, string> = {};
133 |         if (options.speakers) {
134 |           options.speakers.forEach((name, index) => {
135 |             knownSpeakers[`speaker_${index + 1}`] = name;
136 |           });
137 |         }
138 | 
139 |         // Determine output file
140 |         const outputFile =
141 |           options.output ||
142 |           path.join(
143 |             path.dirname(input),
144 |             `${path.basename(input, path.extname(input))}.${options.format}`
145 |           );
146 | 
147 |         // Process the file
148 |         spinner.start("Processing audio file...");
149 |         let result = await processAudioFile(input, apiKey, {
150 |           knownSpeakers: options.speakers ? knownSpeakers : undefined,
151 |           skipDiarization: options.skipDiarization,
152 |           cache: {
153 |             enabled: options.cache !== false,
154 |             cacheDir: options.cacheDir,
155 |           },
156 |         });
157 | 
158 |         // If no speakers provided and interactive mode is enabled, identify speakers
159 |         if (
160 |           !options.speakers &&
161 |           options.interactive !== false &&
162 |           !options.skipDiarization
163 |         ) {
164 |           spinner.stop();
165 |           console.log(
166 |             chalk.cyan("\nLet's identify the speakers in this recording.\n")
167 |           );
168 |           console.log(
169 |             chalk.dim(
170 |               "I'll show you the most significant contributions from each speaker."
171 |             )
172 |           );
173 |           console.log(
174 |             chalk.dim("Please help identify who is speaking in each case.\n")
175 |           );
176 | 
177 |           const speakerMap = await identifySpeakers(result);
178 |           result = updateTranscriptSpeakers(result, speakerMap);
179 | 
180 |           // Update cache with identified speakers
181 |           if (options.cache !== false) {
182 |             spinner.start("Updating cache with speaker identifications...");
183 |             await processAudioFile(input, apiKey, {
184 |               knownSpeakers: speakerMap,
185 |               skipDiarization: options.skipDiarization,
186 |               cache: {
187 |                 enabled: true,
188 |                 cacheDir: options.cacheDir,
189 |               },
190 |             });
191 |             spinner.stop();
192 |           }
193 |         }
194 | 
195 |         // Format output
196 |         spinner.start("Formatting transcript...");
197 |         let output: string;
198 |         switch (options.format) {
199 |           case "srt":
200 |             output = formatTranscriptAsSRT(result);
201 |             break;
202 |           case "txt":
203 |             output = formatTranscriptAsText(result);
204 |             break;
205 |           case "md":
206 |             output = formatTranscriptAsMarkdown(result);
207 |             break;
208 |           default:
209 |             output = JSON.stringify(result, null, 2);
210 |         }
211 | 
212 |         // Write output
213 |         await fs.writeFile(outputFile, output);
214 |         spinner.succeed(
215 |           chalk.green(`Processed successfully! Output saved to ${outputFile}`)
216 |         );
217 | 
218 |         if (options.verbose) {
219 |           console.log("\nProcessing summary:");
220 |           console.log(
221 |             `- Duration: ${Math.round(
222 |               result.metadata.duration / 60000
223 |             )} minutes`
224 |           );
225 |           console.log(`- Speakers detected: ${result.speakers.length}`);
226 |           console.log(`- Segments: ${result.segments.length}`);
227 |         }
228 |       } catch (error) {
229 |         spinner.fail(chalk.red("Processing failed"));
230 | 
231 |         if (error instanceof ProcessingError) {
232 |           console.error(chalk.red(`\nError (${error.code}): ${error.message}`));
233 |         } else {
234 |           console.error(chalk.red("\nUnexpected error:"), error);
235 |         }
236 | 
237 |         process.exit(1);
238 |       }
239 |     });
240 | 
241 |   await program.parseAsync();
242 | }
243 | 
244 | main().catch((error) => {
245 |   console.error(chalk.red("Fatal error:"), error);
246 |   process.exit(1);
247 | });
248 | 


--------------------------------------------------------------------------------
/src/__tests__/lib.test.ts:
--------------------------------------------------------------------------------
  1 | import { describe, it, expect, beforeEach, mock, spyOn } from "bun:test";
  2 | import {
  3 |   checkFileExists,
  4 |   validateApiKey,
  5 |   formatTranscriptAsSRT,
  6 |   formatTranscriptAsText,
  7 |   processAudioFile,
  8 |   ProcessingError,
  9 | } from "../lib";
 10 | import type { ProcessedTranscript } from "../types";
 11 | import fs from "fs/promises";
 12 | 
 13 | // Mock fs.access
 14 | const mockAccess = spyOn(fs, "access");
 15 | 
 16 | // Create mock responses
 17 | const mockTranscriptResponse = {
 18 |   id: "mock-transcript-id",
 19 |   status: "completed",
 20 |   audio_duration: 60000,
 21 |   utterances: [
 22 |     {
 23 |       text: "Hello world",
 24 |       start: 0,
 25 |       end: 1000,
 26 |       speaker: "speaker_1",
 27 |       confidence: 0.95,
 28 |     },
 29 |   ],
 30 | };
 31 | 
 32 | // Create a mock for AssemblyAI's list method
 33 | const mockList = mock(({ apiKey }: { apiKey: string }) => {
 34 |   if (apiKey === "valid-key") {
 35 |     return Promise.resolve({ items: [] });
 36 |   }
 37 |   return Promise.reject(new Error("Invalid API key"));
 38 | });
 39 | 
 40 | // Create a mock for file upload
 41 | const mockUpload = mock(() => Promise.resolve("mock-audio-url"));
 42 | 
 43 | // Create a mock for transcript creation and retrieval
 44 | const mockCreate = mock(() => Promise.resolve({ id: "mock-transcript-id" }));
 45 | const mockGet = mock(() => Promise.resolve(mockTranscriptResponse));
 46 | 
 47 | // Mock AssemblyAI
 48 | mock.module("assemblyai", () => ({
 49 |   AssemblyAI: mock((config: { apiKey: string }) => ({
 50 |     transcripts: {
 51 |       list: () => mockList(config),
 52 |       create: mockCreate,
 53 |       get: mockGet,
 54 |     },
 55 |     files: {
 56 |       upload: mockUpload,
 57 |     },
 58 |     lemur: {},
 59 |     realtime: {},
 60 |   })),
 61 | }));
 62 | 
 63 | describe("Core functionality", () => {
 64 |   beforeEach(() => {
 65 |     mockAccess.mockClear();
 66 |     mockList.mockClear();
 67 |     mockUpload.mockClear();
 68 |     mockCreate.mockClear();
 69 |     mockGet.mockClear();
 70 | 
 71 |     // Reset default implementations
 72 |     mockUpload.mockImplementation(() => Promise.resolve("mock-audio-url"));
 73 |     mockCreate.mockImplementation(() =>
 74 |       Promise.resolve({ id: "mock-transcript-id" })
 75 |     );
 76 |     mockGet.mockImplementation(() => Promise.resolve(mockTranscriptResponse));
 77 |   });
 78 | 
 79 |   describe("checkFileExists", () => {
 80 |     it("should return true when file exists", async () => {
 81 |       mockAccess.mockImplementation(() => Promise.resolve());
 82 | 
 83 |       const result = await checkFileExists("/path/to/file");
 84 |       expect(result).toBe(true);
 85 |       expect(mockAccess).toHaveBeenCalledWith("/path/to/file");
 86 |     });
 87 | 
 88 |     it("should return false when file does not exist", async () => {
 89 |       mockAccess.mockImplementation(() => Promise.reject(new Error()));
 90 | 
 91 |       const result = await checkFileExists("/path/to/file");
 92 |       expect(result).toBe(false);
 93 |       expect(mockAccess).toHaveBeenCalledWith("/path/to/file");
 94 |     });
 95 |   });
 96 | 
 97 |   describe("validateApiKey", () => {
 98 |     it("should return true for valid API key", async () => {
 99 |       const result = await validateApiKey("valid-key");
100 |       expect(result).toBe(true);
101 |     });
102 | 
103 |     it("should return false for invalid API key", async () => {
104 |       const result = await validateApiKey("invalid-key");
105 |       expect(result).toBe(false);
106 |     });
107 |   });
108 | 
109 |   describe("processAudioFile", () => {
110 |     it("should process audio file successfully", async () => {
111 |       mockAccess.mockImplementation(() => Promise.resolve());
112 | 
113 |       const result = await processAudioFile("/path/to/file", "valid-key");
114 | 
115 |       expect(mockUpload).toHaveBeenCalledTimes(1);
116 |       expect(mockCreate).toHaveBeenCalledTimes(1);
117 |       expect(mockGet).toHaveBeenCalledTimes(1);
118 | 
119 |       expect(result.segments).toHaveLength(1);
120 |       expect(result.speakers).toHaveLength(1);
121 |       expect(result.metadata.duration).toBe(60000);
122 |       expect(result.segments[0].text).toBe("Hello world");
123 |     });
124 | 
125 |     it("should handle file not found error", async () => {
126 |       mockAccess.mockImplementation(() => Promise.reject(new Error()));
127 | 
128 |       await expect(
129 |         processAudioFile("/path/to/file", "valid-key")
130 |       ).rejects.toThrow(
131 |         new ProcessingError("Input file does not exist", "FILE_NOT_FOUND")
132 |       );
133 |     });
134 | 
135 |     it("should handle transcription error", async () => {
136 |       mockAccess.mockImplementation(() => Promise.resolve());
137 |       mockGet.mockImplementation(() =>
138 |         Promise.resolve({
139 |           ...mockTranscriptResponse,
140 |           status: "error",
141 |           error: "Transcription failed",
142 |         })
143 |       );
144 | 
145 |       await expect(
146 |         processAudioFile("/path/to/file", "valid-key")
147 |       ).rejects.toThrow(
148 |         new ProcessingError(
149 |           "Transcription failed: Transcription failed",
150 |           "TRANSCRIPTION_FAILED"
151 |         )
152 |       );
153 |     });
154 | 
155 |     it("should handle polling for completion", async () => {
156 |       mockAccess.mockImplementation(() => Promise.resolve());
157 |       let callCount = 0;
158 |       mockGet.mockImplementation(() => {
159 |         callCount++;
160 |         if (callCount < 3) {
161 |           return Promise.resolve({
162 |             ...mockTranscriptResponse,
163 |             status: "processing",
164 |           });
165 |         }
166 |         return Promise.resolve(mockTranscriptResponse);
167 |       });
168 | 
169 |       const result = await processAudioFile("/path/to/file", "valid-key");
170 |       expect(mockGet).toHaveBeenCalledTimes(3);
171 |       expect(result.segments).toHaveLength(1);
172 |     });
173 | 
174 |     it("should handle non-ProcessingError errors", async () => {
175 |       mockAccess.mockImplementation(() => Promise.resolve());
176 |       mockUpload.mockImplementation(() =>
177 |         Promise.reject(new Error("Network error"))
178 |       );
179 | 
180 |       await expect(
181 |         processAudioFile("/path/to/file", "valid-key")
182 |       ).rejects.toThrow(
183 |         new ProcessingError(
184 |           "Processing failed: Network error",
185 |           "PROCESSING_FAILED"
186 |         )
187 |       );
188 |     });
189 | 
190 |     it("should handle missing utterance data", async () => {
191 |       mockAccess.mockImplementation(() => Promise.resolve());
192 |       mockGet.mockImplementation(() =>
193 |         Promise.resolve({
194 |           id: "mock-transcript-id",
195 |           status: "completed",
196 |           audio_duration: 60000,
197 |           utterances: [],
198 |         })
199 |       );
200 | 
201 |       const result = await processAudioFile("/path/to/file", "valid-key");
202 |       expect(result.segments).toHaveLength(0);
203 |       expect(result.speakers).toHaveLength(0);
204 |       expect(result.metadata.duration).toBe(60000);
205 |     });
206 | 
207 |     it("should support known speakers", async () => {
208 |       mockAccess.mockImplementation(() => Promise.resolve());
209 |       const knownSpeakers = { speaker_1: "John Doe" };
210 | 
211 |       const result = await processAudioFile("/path/to/file", "valid-key", {
212 |         knownSpeakers,
213 |       });
214 | 
215 |       expect(result.segments[0].speaker.name).toBe("John Doe");
216 |       expect(mockCreate).toHaveBeenCalledWith(
217 |         expect.objectContaining({
218 |           speakers_expected: 1,
219 |           speaker_labels: true,
220 |         })
221 |       );
222 |     });
223 |   });
224 | 
225 |   describe("formatTranscriptAsSRT", () => {
226 |     const mockTranscript: ProcessedTranscript = {
227 |       segments: [
228 |         {
229 |           text: "Hello world",
230 |           start: 0,
231 |           end: 1000,
232 |           speaker: { id: "1", name: "Alice", confidence: 0.9 },
233 |           confidence: 0.95,
234 |         },
235 |         {
236 |           text: "Second line",
237 |           start: 3600000, // 1 hour
238 |           end: 3605000, // 1 hour + 5 seconds
239 |           speaker: { id: "2", name: "Bob", confidence: 0.85 },
240 |           confidence: 0.9,
241 |         },
242 |       ],
243 |       speakers: [
244 |         { id: "1", name: "Alice", confidence: 0.9 },
245 |         { id: "2", name: "Bob", confidence: 0.85 },
246 |       ],
247 |       metadata: {
248 |         fileName: "test.mp3",
249 |         duration: 3605000,
250 |         processedAt: "2024-02-10T00:00:00.000Z",
251 |       },
252 |     };
253 | 
254 |     it("should format transcript in SRT format", () => {
255 |       const result = formatTranscriptAsSRT(mockTranscript);
256 |       expect(result).toContain("1");
257 |       expect(result).toContain("00:00:00,000 --> 00:00:01,000");
258 |       expect(result).toContain("Alice: Hello world");
259 |       // Test hour formatting
260 |       expect(result).toContain("01:00:00,000 --> 01:00:05,000");
261 |       expect(result).toContain("Bob: Second line");
262 |     });
263 |   });
264 | 
265 |   describe("formatTranscriptAsText", () => {
266 |     const mockTranscript: ProcessedTranscript = {
267 |       segments: [
268 |         {
269 |           text: "Hello world",
270 |           start: 0,
271 |           end: 1000,
272 |           speaker: { id: "1", name: "Alice", confidence: 0.9 },
273 |           confidence: 0.95,
274 |         },
275 |       ],
276 |       speakers: [{ id: "1", name: "Alice", confidence: 0.9 }],
277 |       metadata: {
278 |         fileName: "test.mp3",
279 |         duration: 1000,
280 |         processedAt: "2024-02-10T00:00:00.000Z",
281 |       },
282 |     };
283 | 
284 |     it("should format transcript in text format", () => {
285 |       const result = formatTranscriptAsText(mockTranscript);
286 |       expect(result).toBe("[Alice] Hello world");
287 |     });
288 |   });
289 | });
290 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 | 1.  Definitions.
  8 | 
  9 |     "License" shall mean the terms and conditions for use, reproduction,
 10 |     and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |     "Licensor" shall mean the copyright owner or entity authorized by
 13 |     the copyright owner that is granting the License.
 14 | 
 15 |     "Legal Entity" shall mean the union of the acting entity and all
 16 |     other entities that control, are controlled by, or are under common
 17 |     control with that entity. For the purposes of this definition,
 18 |     "control" means (i) the power, direct or indirect, to cause the
 19 |     direction or management of such entity, whether by contract or
 20 |     otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |     outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |     "You" (or "Your") shall mean an individual or Legal Entity
 24 |     exercising permissions granted by this License.
 25 | 
 26 |     "Source" form shall mean the preferred form for making modifications,
 27 |     including but not limited to software source code, documentation
 28 |     source, and configuration files.
 29 | 
 30 |     "Object" form shall mean any form resulting from mechanical
 31 |     transformation or translation of a Source form, including but
 32 |     not limited to compiled object code, generated documentation,
 33 |     and conversions to other media types.
 34 | 
 35 |     "Work" shall mean the work of authorship, whether in Source or
 36 |     Object form, made available under the License, as indicated by a
 37 |     copyright notice that is included in or attached to the work
 38 |     (an example is provided in the Appendix below).
 39 | 
 40 |     "Derivative Works" shall mean any work, whether in Source or Object
 41 |     form, that is based on (or derived from) the Work and for which the
 42 |     editorial revisions, annotations, elaborations, or other modifications
 43 |     represent, as a whole, an original work of authorship. For the purposes
 44 |     of this License, Derivative Works shall not include works that remain
 45 |     separable from, or merely link (or bind by name) to the interfaces of,
 46 |     the Work and Derivative Works thereof.
 47 | 
 48 |     "Contribution" shall mean any work of authorship, including
 49 |     the original version of the Work and any modifications or additions
 50 |     to that Work or Derivative Works thereof, that is intentionally
 51 |     submitted to Licensor for inclusion in the Work by the copyright owner
 52 |     or by an individual or Legal Entity authorized to submit on behalf of
 53 |     the copyright owner. For the purposes of this definition, "submitted"
 54 |     means any form of electronic, verbal, or written communication sent
 55 |     to the Licensor or its representatives, including but not limited to
 56 |     communication on electronic mailing lists, source code control systems,
 57 |     and issue tracking systems that are managed by, or on behalf of, the
 58 |     Licensor for the purpose of discussing and improving the Work, but
 59 |     excluding communication that is conspicuously marked or otherwise
 60 |     designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |     "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |     on behalf of whom a Contribution has been received by Licensor and
 64 |     subsequently incorporated within the Work.
 65 | 
 66 | 2.  Grant of Copyright License. Subject to the terms and conditions of
 67 |     this License, each Contributor hereby grants to You a perpetual,
 68 |     worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |     copyright license to reproduce, prepare Derivative Works of,
 70 |     publicly display, publicly perform, sublicense, and distribute the
 71 |     Work and such Derivative Works in Source or Object form.
 72 | 
 73 | 3.  Grant of Patent License. Subject to the terms and conditions of
 74 |     this License, each Contributor hereby grants to You a perpetual,
 75 |     worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |     (except as stated in this section) patent license to make, have made,
 77 |     use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |     where such license applies only to those patent claims licensable
 79 |     by such Contributor that are necessarily infringed by their
 80 |     Contribution(s) alone or by combination of their Contribution(s)
 81 |     with the Work to which such Contribution(s) was submitted. If You
 82 |     institute patent litigation against any entity (including a
 83 |     cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |     or a Contribution incorporated within the Work constitutes direct
 85 |     or contributory patent infringement, then any patent licenses
 86 |     granted to You under this License for that Work shall terminate
 87 |     as of the date such litigation is filed.
 88 | 
 89 | 4.  Redistribution. You may reproduce and distribute copies of the
 90 |     Work or Derivative Works thereof in any medium, with or without
 91 |     modifications, and in Source or Object form, provided that You
 92 |     meet the following conditions:
 93 | 
 94 |     (a) You must give any other recipients of the Work or
 95 |     Derivative Works a copy of this License; and
 96 | 
 97 |     (b) You must cause any modified files to carry prominent notices
 98 |     stating that You changed the files; and
 99 | 
100 |     (c) You must retain, in the Source form of any Derivative Works
101 |     that You distribute, all copyright, patent, trademark, and
102 |     attribution notices from the Source form of the Work,
103 |     excluding those notices that do not pertain to any part of
104 |     the Derivative Works; and
105 | 
106 |     (d) If the Work includes a "NOTICE" text file as part of its
107 |     distribution, then any Derivative Works that You distribute must
108 |     include a readable copy of the attribution notices contained
109 |     within such NOTICE file, excluding those notices that do not
110 |     pertain to any part of the Derivative Works, in at least one
111 |     of the following places: within a NOTICE text file distributed
112 |     as part of the Derivative Works; within the Source form or
113 |     documentation, if provided along with the Derivative Works; or,
114 |     within a display generated by the Derivative Works, if and
115 |     wherever such third-party notices normally appear. The contents
116 |     of the NOTICE file are for informational purposes only and
117 |     do not modify the License. You may add Your own attribution
118 |     notices within Derivative Works that You distribute, alongside
119 |     or as an addendum to the NOTICE text from the Work, provided
120 |     that such additional attribution notices cannot be construed
121 |     as modifying the License.
122 | 
123 |     You may add Your own copyright statement to Your modifications and
124 |     may provide additional or different license terms and conditions
125 |     for use, reproduction, or distribution of Your modifications, or
126 |     for any such Derivative Works as a whole, provided Your use,
127 |     reproduction, and distribution of the Work otherwise complies with
128 |     the conditions stated in this License.
129 | 
130 | 5.  Submission of Contributions. Unless You explicitly state otherwise,
131 |     any Contribution intentionally submitted for inclusion in the Work
132 |     by You to the Licensor shall be under the terms and conditions of
133 |     this License, without any additional terms or conditions.
134 |     Notwithstanding the above, nothing herein shall supersede or modify
135 |     the terms of any separate license agreement you may have executed
136 |     with Licensor regarding such Contributions.
137 | 
138 | 6.  Trademarks. This License does not grant permission to use the trade
139 |     names, trademarks, service marks, or product names of the Licensor,
140 |     except as required for reasonable and customary use in describing the
141 |     origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 | 7.  Disclaimer of Warranty. Unless required by applicable law or
144 |     agreed to in writing, Licensor provides the Work (and each
145 |     Contributor provides its Contributions) on an "AS IS" BASIS,
146 |     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |     implied, including, without limitation, any warranties or conditions
148 |     of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |     PARTICULAR PURPOSE. You are solely responsible for determining the
150 |     appropriateness of using or redistributing the Work and assume any
151 |     risks associated with Your exercise of permissions under this License.
152 | 
153 | 8.  Limitation of Liability. In no event and under no legal theory,
154 |     whether in tort (including negligence), contract, or otherwise,
155 |     unless required by applicable law (such as deliberate and grossly
156 |     negligent acts) or agreed to in writing, shall any Contributor be
157 |     liable to You for damages, including any direct, indirect, special,
158 |     incidental, or consequential damages of any character arising as a
159 |     result of this License or out of the use or inability to use the
160 |     Work (including but not limited to damages for loss of goodwill,
161 |     work stoppage, computer failure or malfunction, or any and all
162 |     other commercial damages or losses), even if such Contributor
163 |     has been advised of the possibility of such damages.
164 | 
165 | 9.  Accepting Warranty or Additional Liability. While redistributing
166 |     the Work or Derivative Works thereof, You may choose to offer,
167 |     and charge a fee for, acceptance of support, warranty, indemnity,
168 |     or other liability obligations and/or rights consistent with this
169 |     License. However, in accepting such obligations, You may act only
170 |     on Your own behalf and on Your sole responsibility, not on behalf
171 |     of any other Contributor, and only if You agree to indemnify,
172 |     defend, and hold each Contributor harmless for any liability
173 |     incurred by, or claims asserted against, such Contributor by reason
174 |     of your accepting any such warranty or additional liability.
175 | 
176 | END OF TERMS AND CONDITIONS
177 | 
178 | APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 | Copyright [yyyy] [name of copyright owner]
190 | 
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 | 


--------------------------------------------------------------------------------
/src/lib.ts:
--------------------------------------------------------------------------------
  1 | import { AssemblyAI } from "assemblyai";
  2 | import fs from "fs/promises";
  3 | import path from "path";
  4 | import crypto from "crypto";
  5 | import os from "os";
  6 | import chalk from "chalk";
  7 | import inquirer from "inquirer";
  8 | import {
  9 |   ProcessedTranscript,
 10 |   Speaker,
 11 |   TranscriptSegment,
 12 |   CacheEntry,
 13 |   CacheOptions,
 14 | } from "./types";
 15 | 
 16 | export class ProcessingError extends Error {
 17 |   constructor(message: string, public readonly code: string) {
 18 |     super(message);
 19 |     this.name = "ProcessingError";
 20 |   }
 21 | }
 22 | 
 23 | export async function checkFileExists(filePath: string): Promise<boolean> {
 24 |   try {
 25 |     await fs.access(filePath);
 26 |     return true;
 27 |   } catch {
 28 |     return false;
 29 |   }
 30 | }
 31 | 
 32 | export async function validateApiKey(apiKey: string): Promise<boolean> {
 33 |   const client = new AssemblyAI({
 34 |     apiKey,
 35 |   });
 36 | 
 37 |   try {
 38 |     await client.transcripts.list({ limit: 1 });
 39 |     return true;
 40 |   } catch (error) {
 41 |     return false;
 42 |   }
 43 | }
 44 | 
 45 | async function calculateFileHash(filePath: string): Promise<string> {
 46 |   const fileBuffer = await fs.readFile(filePath);
 47 |   return crypto.createHash("sha256").update(fileBuffer).digest("hex");
 48 | }
 49 | 
 50 | async function getCacheDir(options?: CacheOptions): Promise<string> {
 51 |   const cacheDir =
 52 |     options?.cacheDir || path.join(os.tmpdir(), "meeting-diary-cache");
 53 |   await fs.mkdir(cacheDir, { recursive: true });
 54 |   return cacheDir;
 55 | }
 56 | 
 57 | async function getCacheEntry(
 58 |   filePath: string,
 59 |   options?: CacheOptions
 60 | ): Promise<CacheEntry | null> {
 61 |   if (!options?.enabled) return null;
 62 | 
 63 |   try {
 64 |     const cacheDir = await getCacheDir(options);
 65 |     const hash = await calculateFileHash(filePath);
 66 |     const cacheFile = path.join(cacheDir, `${hash}.json`);
 67 | 
 68 |     const cacheData = await fs.readFile(cacheFile, "utf-8");
 69 |     const entry: CacheEntry = JSON.parse(cacheData);
 70 | 
 71 |     // Validate cache entry
 72 |     if (entry.hash !== hash) return null;
 73 | 
 74 |     return entry;
 75 |   } catch (error) {
 76 |     return null;
 77 |   }
 78 | }
 79 | 
 80 | async function setCacheEntry(
 81 |   filePath: string,
 82 |   entry: CacheEntry,
 83 |   options?: CacheOptions
 84 | ): Promise<void> {
 85 |   if (!options?.enabled) return;
 86 | 
 87 |   try {
 88 |     const cacheDir = await getCacheDir(options);
 89 |     const cacheFile = path.join(cacheDir, `${entry.hash}.json`);
 90 |     await fs.writeFile(cacheFile, JSON.stringify(entry, null, 2));
 91 |   } catch (error) {
 92 |     console.warn("Failed to write cache entry:", error);
 93 |   }
 94 | }
 95 | 
 96 | export async function processAudioFile(
 97 |   filePath: string,
 98 |   apiKey: string,
 99 |   options: {
100 |     knownSpeakers?: Record<string, string>;
101 |     skipDiarization?: boolean;
102 |     cache?: CacheOptions;
103 |   } = {}
104 | ): Promise<ProcessedTranscript> {
105 |   if (!(await checkFileExists(filePath))) {
106 |     throw new ProcessingError("Input file does not exist", "FILE_NOT_FOUND");
107 |   }
108 | 
109 |   // Check cache first
110 |   const fileHash = await calculateFileHash(filePath);
111 |   const cacheEntry = await getCacheEntry(filePath, options.cache);
112 | 
113 |   if (cacheEntry?.data.transcript) {
114 |     return cacheEntry.data.transcript;
115 |   }
116 | 
117 |   const client = new AssemblyAI({
118 |     apiKey,
119 |   });
120 | 
121 |   try {
122 |     // Try to use cached audio URL
123 |     let audioUrl = cacheEntry?.data.audioUrl;
124 | 
125 |     if (!audioUrl) {
126 |       audioUrl = await client.files.upload(filePath);
127 |       // Cache the audio URL
128 |       await setCacheEntry(
129 |         filePath,
130 |         {
131 |           timestamp: new Date().toISOString(),
132 |           hash: fileHash,
133 |           data: { audioUrl },
134 |         },
135 |         options.cache
136 |       );
137 |     }
138 | 
139 |     // Try to use cached transcript ID
140 |     let transcriptId = cacheEntry?.data.transcriptId;
141 |     let result;
142 | 
143 |     if (transcriptId) {
144 |       result = await client.transcripts.get(transcriptId);
145 |       if (result.status === "error") {
146 |         transcriptId = undefined; // Reset if there was an error
147 |       }
148 |     }
149 | 
150 |     if (!transcriptId) {
151 |       // Create new transcript
152 |       const params = {
153 |         audio_url: audioUrl,
154 |         speaker_labels: !options.skipDiarization,
155 |         speakers_expected:
156 |           options.knownSpeakers && Object.keys(options.knownSpeakers).length > 0
157 |             ? Object.keys(options.knownSpeakers).length
158 |             : undefined,
159 |       };
160 | 
161 |       const transcript = await client.transcripts.create(params);
162 |       transcriptId = transcript.id;
163 |       result = await client.transcripts.get(transcriptId);
164 | 
165 |       // Cache the transcript ID
166 |       await setCacheEntry(
167 |         filePath,
168 |         {
169 |           timestamp: new Date().toISOString(),
170 |           hash: fileHash,
171 |           data: { audioUrl, transcriptId },
172 |         },
173 |         options.cache
174 |       );
175 |     }
176 | 
177 |     if (result.status === "error") {
178 |       throw new ProcessingError(
179 |         `Transcription failed: ${result.error}`,
180 |         "TRANSCRIPTION_FAILED"
181 |       );
182 |     }
183 | 
184 |     // Wait for completion if needed
185 |     while (result.status !== "completed") {
186 |       await new Promise((resolve) => setTimeout(resolve, 1000));
187 |       result = await client.transcripts.get(transcriptId);
188 |       if (result.status === "error") {
189 |         throw new ProcessingError(
190 |           `Transcription failed: ${result.error}`,
191 |           "TRANSCRIPTION_FAILED"
192 |         );
193 |       }
194 |     }
195 | 
196 |     // Process speakers and segments from utterances
197 |     const speakers = new Map<string, Speaker>();
198 |     const segments: TranscriptSegment[] = [];
199 | 
200 |     if (result.utterances) {
201 |       for (const utterance of result.utterances) {
202 |         if (!utterance.speaker || !utterance.text) continue;
203 | 
204 |         const speakerId = `speaker_${utterance.speaker.charCodeAt(0) - 64}`;
205 | 
206 |         if (!speakers.has(speakerId)) {
207 |           speakers.set(speakerId, {
208 |             id: speakerId,
209 |             name:
210 |               options.knownSpeakers?.[speakerId] ||
211 |               `Speaker ${utterance.speaker}`,
212 |             confidence: utterance.confidence || 0,
213 |           });
214 |         }
215 | 
216 |         segments.push({
217 |           text: utterance.text,
218 |           start: utterance.start || 0,
219 |           end: utterance.end || 0,
220 |           speaker: speakers.get(speakerId)!,
221 |           confidence: utterance.confidence || 0,
222 |         });
223 |       }
224 |     }
225 | 
226 |     const processedTranscript: ProcessedTranscript = {
227 |       segments,
228 |       speakers: Array.from(speakers.values()),
229 |       metadata: {
230 |         fileName: path.basename(filePath),
231 |         duration: result.audio_duration || 0,
232 |         processedAt: new Date().toISOString(),
233 |       },
234 |     };
235 | 
236 |     // Cache the final transcript
237 |     await setCacheEntry(
238 |       filePath,
239 |       {
240 |         timestamp: new Date().toISOString(),
241 |         hash: fileHash,
242 |         data: { audioUrl, transcriptId, transcript: processedTranscript },
243 |       },
244 |       options.cache
245 |     );
246 | 
247 |     return processedTranscript;
248 |   } catch (error) {
249 |     if (error instanceof ProcessingError) throw error;
250 |     throw new ProcessingError(
251 |       `Processing failed: ${
252 |         error instanceof Error ? error.message : String(error)
253 |       }`,
254 |       "PROCESSING_FAILED"
255 |     );
256 |   }
257 | }
258 | 
259 | export function formatTranscriptAsSRT(transcript: ProcessedTranscript): string {
260 |   return transcript.segments
261 |     .map((segment, index) => {
262 |       const formatTime = (ms: number) => {
263 |         const date = new Date(ms);
264 |         const hours = Math.floor(ms / 3600000);
265 |         const minutes = date.getUTCMinutes();
266 |         const seconds = date.getUTCSeconds();
267 |         const milliseconds = date.getUTCMilliseconds();
268 |         return `${hours.toString().padStart(2, "0")}:${minutes
269 |           .toString()
270 |           .padStart(2, "0")}:${seconds
271 |           .toString()
272 |           .padStart(2, "0")},${milliseconds.toString().padStart(3, "0")}`;
273 |       };
274 | 
275 |       return `${index + 1}
276 | ${formatTime(segment.start)} --> ${formatTime(segment.end)}
277 | ${segment.speaker.name}: ${segment.text}
278 | `;
279 |     })
280 |     .join("\n");
281 | }
282 | 
283 | export function formatTranscriptAsText(
284 |   transcript: ProcessedTranscript
285 | ): string {
286 |   return transcript.segments
287 |     .map((segment) => `[${segment.speaker.name}] ${segment.text}`)
288 |     .join("\n");
289 | }
290 | 
291 | export function formatTranscriptAsMarkdown(
292 |   transcript: ProcessedTranscript
293 | ): string {
294 |   // Helper to format time
295 |   const formatTime = (ms: number): string => {
296 |     const minutes = Math.floor(ms / 60000);
297 |     const seconds = Math.floor((ms % 60000) / 1000);
298 |     return `${minutes}:${seconds.toString().padStart(2, "0")}`;
299 |   };
300 | 
301 |   // Header with metadata
302 |   const parts: string[] = [
303 |     `# Meeting Transcript\n`,
304 |     `*Processed on ${new Date(
305 |       transcript.metadata.processedAt
306 |     ).toLocaleString()}*\n`,
307 |     `*Duration: ${Math.round(
308 |       transcript.metadata.duration / 60000
309 |     )} minutes*\n\n`,
310 |   ];
311 | 
312 |   // Add speaker list
313 |   parts.push("## Speakers\n");
314 |   transcript.speakers.forEach((speaker) => {
315 |     parts.push(`- **${speaker.name}**\n`);
316 |   });
317 |   parts.push("\n## Transcript\n\n");
318 | 
319 |   // Process segments in chronological order
320 |   transcript.segments.forEach((segment) => {
321 |     parts.push(
322 |       `[${formatTime(segment.start)}] **${segment.speaker.name}**: ${
323 |         segment.text
324 |       }\n\n`
325 |     );
326 |   });
327 | 
328 |   return parts.join("");
329 | }
330 | 
331 | interface SpeakerExample {
332 |   speaker: Speaker;
333 |   segment: TranscriptSegment;
334 |   context: {
335 |     before?: TranscriptSegment;
336 |     after?: TranscriptSegment;
337 |   };
338 | }
339 | 
340 | function findBestExamplesForSpeaker(
341 |   transcript: ProcessedTranscript,
342 |   speakerId: string,
343 |   numExamples: number = 3
344 | ): SpeakerExample[] {
345 |   // Get all segments for this speaker
346 |   const speakerSegments = transcript.segments.filter(
347 |     (s) => s.speaker.id === speakerId
348 |   );
349 | 
350 |   // Sort by length to get the most substantial contributions
351 |   const sortedSegments = [...speakerSegments].sort(
352 |     (a, b) => b.text.length - a.text.length
353 |   );
354 | 
355 |   // Take top N segments and find their context
356 |   return sortedSegments.slice(0, numExamples).map((segment) => {
357 |     const segmentIndex = transcript.segments.findIndex((s) => s === segment);
358 |     return {
359 |       speaker: segment.speaker,
360 |       segment,
361 |       context: {
362 |         before:
363 |           segmentIndex > 0 ? transcript.segments[segmentIndex - 1] : undefined,
364 |         after:
365 |           segmentIndex < transcript.segments.length - 1
366 |             ? transcript.segments[segmentIndex + 1]
367 |             : undefined,
368 |       },
369 |     };
370 |   });
371 | }
372 | 
373 | export async function identifySpeakers(
374 |   transcript: ProcessedTranscript
375 | ): Promise<Record<string, string>> {
376 |   const speakerMap: Record<string, string> = {};
377 | 
378 |   // Process each speaker
379 |   for (const speaker of transcript.speakers) {
380 |     console.log(chalk.cyan("\n----------------------------------------"));
381 |     console.log(chalk.bold(`Identifying ${speaker.name}...\n`));
382 | 
383 |     // Show identified speakers so far
384 |     if (Object.keys(speakerMap).length > 0) {
385 |       console.log(chalk.dim("Speakers identified so far:"));
386 |       Object.entries(speakerMap).forEach(([id, name]) => {
387 |         const originalName =
388 |           transcript.speakers.find((s) => s.id === id)?.name || "Unknown";
389 |         console.log(chalk.dim(`- ${originalName} → ${name}`));
390 |       });
391 |       console.log();
392 |     }
393 | 
394 |     // Get best examples
395 |     const examples = findBestExamplesForSpeaker(transcript, speaker.id);
396 | 
397 |     // Show examples with context
398 |     examples.forEach((example, index) => {
399 |       console.log(chalk.yellow(`Example ${index + 1}:`));
400 |       if (example.context.before) {
401 |         const beforeSpeaker = example.context.before.speaker;
402 |         const knownName = speakerMap[beforeSpeaker.id];
403 |         console.log(
404 |           chalk.dim(
405 |             `${beforeSpeaker.name}${knownName ? ` (${knownName})` : ""}: ${
406 |               example.context.before.text
407 |             }`
408 |           )
409 |         );
410 |       }
411 |       console.log(
412 |         chalk.green(`${example.speaker.name}: ${example.segment.text}`)
413 |       );
414 |       if (example.context.after) {
415 |         const afterSpeaker = example.context.after.speaker;
416 |         const knownName = speakerMap[afterSpeaker.id];
417 |         console.log(
418 |           chalk.dim(
419 |             `${afterSpeaker.name}${knownName ? ` (${knownName})` : ""}: ${
420 |               example.context.after.text
421 |             }`
422 |           )
423 |         );
424 |       }
425 |       console.log();
426 |     });
427 | 
428 |     // Ask for speaker identification
429 |     const { name } = await inquirer.prompt<{ name: string }>([
430 |       {
431 |         type: "input",
432 |         name: "name",
433 |         message: "Who is this speaker?",
434 |         validate: (input: string) => {
435 |           if (!input.trim()) return "Speaker name cannot be empty";
436 |           return true;
437 |         },
438 |       },
439 |     ]);
440 | 
441 |     speakerMap[speaker.id] = name.trim();
442 |   }
443 | 
444 |   return speakerMap;
445 | }
446 | 
447 | export function updateTranscriptSpeakers(
448 |   transcript: ProcessedTranscript,
449 |   speakerMap: Record<string, string>
450 | ): ProcessedTranscript {
451 |   // Update speaker names
452 |   const updatedSpeakers = transcript.speakers.map((speaker) => ({
453 |     ...speaker,
454 |     name: speakerMap[speaker.id] || speaker.name,
455 |   }));
456 | 
457 |   // Update segment speaker names
458 |   const updatedSegments = transcript.segments.map((segment) => ({
459 |     ...segment,
460 |     speaker: {
461 |       ...segment.speaker,
462 |       name: speakerMap[segment.speaker.id] || segment.speaker.name,
463 |     },
464 |   }));
465 | 
466 |   return {
467 |     ...transcript,
468 |     speakers: updatedSpeakers,
469 |     segments: updatedSegments,
470 |   };
471 | }
472 | 


--------------------------------------------------------------------------------