├── test
    ├── output
    │   ├── .gitkeep
    │   ├── opus
    │   │   ├── .gitkeep
    │   │   └── metrics.json
    │   ├── speed
    │   │   ├── .gitkeep
    │   │   └── metrics.json
    │   ├── baseline
    │   │   └── metrics.json
    │   └── comparison-report.json
    ├── .gitignore
    ├── package.json
    ├── test-baseline.ts
    ├── README.md
    ├── test-speed.ts
    ├── test-opus.ts
    └── compare.ts
├── .gitignore
├── .npmignore
├── tsconfig.json
├── src
    ├── types.ts
    ├── index.ts
    ├── youtube.ts
    ├── optimize.ts
    ├── cli.ts
    └── transcribe.ts
├── LICENSE
├── package.json
├── .cursor
    └── rules
    │   ├── cleanup-pattern.mdc
    │   ├── cli-patterns.mdc
    │   ├── youtube-support.mdc
    │   ├── publishing.mdc
    │   ├── optimization.mdc
    │   ├── whisper-api.mdc
    │   ├── architecture.mdc
    │   └── testing.mdc
├── QUICKSTART.md
├── PUBLISHING.md
├── CHANGELOG.md
├── bun.lock
└── README.md


/test/output/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/test/output/opus/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/test/output/speed/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules/
2 | dist/
3 | *.log
4 | .DS_Store
5 | .env
6 | .env.*
7 | *.tgz
8 | .npm/


--------------------------------------------------------------------------------
/.npmignore:
--------------------------------------------------------------------------------
1 | src/
2 | node_modules/
3 | *.log
4 | .DS_Store
5 | tsconfig.json
6 | .env
7 | .env.*
8 | 


--------------------------------------------------------------------------------
/test/.gitignore:
--------------------------------------------------------------------------------
 1 | # Test output files
 2 | output/
 3 | *.srt
 4 | *.mp3
 5 | *.mp4
 6 | *.ogg
 7 | *.opus
 8 | *.wav
 9 | metrics.json
10 | comparison-report.json
11 | 
12 | # Keep directory structure
13 | !output/.gitkeep
14 | !.gitkeep
15 | 


--------------------------------------------------------------------------------
/test/output/baseline/metrics.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "method": "baseline",
 3 |   "originalSize": 2902041008,
 4 |   "processedSize": 2902041008,
 5 |   "compressionRatio": 1,
 6 |   "originalDuration": 1311.6199951171875,
 7 |   "processedDuration": 1311.6199951171875,
 8 |   "transcriptionTime": 72048,
 9 |   "totalTime": 72050,
10 |   "estimatedCost": 0.13116199951171875,
11 |   "costPerMinute": 0.006,
12 |   "language": "english",
13 |   "timestamp": "2025-10-06T17:14:17.382Z"
14 | }


--------------------------------------------------------------------------------
/test/output/speed/metrics.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "method": "speed",
 3 |   "speedFactor": 1.2,
 4 |   "originalSize": 2902041008,
 5 |   "processedSize": 13429580,
 6 |   "compressionRatio": 0.004627632746394327,
 7 |   "originalDuration": 1311.6,
 8 |   "processedDuration": 1093,
 9 |   "transcriptionTime": 52724,
10 |   "totalTime": 65446,
11 |   "estimatedCost": 0.13116,
12 |   "costPerMinute": 0.006,
13 |   "language": "english",
14 |   "timestamp": "2025-10-06T17:15:22.831Z"
15 | }


--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "ES2022",
 4 |     "module": "ESNext",
 5 |     "lib": ["ES2022"],
 6 |     "moduleResolution": "node",
 7 |     "esModuleInterop": true,
 8 |     "allowSyntheticDefaultImports": true,
 9 |     "strict": true,
10 |     "skipLibCheck": true,
11 |     "forceConsistentCasingInFileNames": true,
12 |     "resolveJsonModule": true,
13 |     "declaration": true,
14 |     "outDir": "./dist",
15 |     "rootDir": "./src"
16 |   },
17 |   "include": ["src/**/*"],
18 |   "exclude": ["node_modules", "dist"]
19 | }
20 | 


--------------------------------------------------------------------------------
/test/output/opus/metrics.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "method": "opus",
 3 |   "codec": "libopus",
 4 |   "bitrate": 64,
 5 |   "targetSizeMB": 25,
 6 |   "originalSize": 2902041008,
 7 |   "processedSize": 14964742,
 8 |   "compressionRatio": 0.005156626649570763,
 9 |   "originalDuration": 1311.6199951171875,
10 |   "processedDuration": 1311.6199951171875,
11 |   "transcriptionTime": 67786,
12 |   "totalTime": 86772,
13 |   "estimatedCost": 0.13116199951171875,
14 |   "costPerMinute": 0.006,
15 |   "language": "english",
16 |   "targetAchieved": true,
17 |   "timestamp": "2025-10-06T17:16:49.609Z"
18 | }


--------------------------------------------------------------------------------
/src/types.ts:
--------------------------------------------------------------------------------
 1 | export interface WhisperWord {
 2 |   word: string
 3 |   start: number
 4 |   end: number
 5 | }
 6 | 
 7 | export interface WhisperSegment {
 8 |   id: number
 9 |   seek: number
10 |   start: number
11 |   end: number
12 |   text: string
13 |   tokens: number[]
14 |   temperature: number
15 |   avg_logprob: number
16 |   compression_ratio: number
17 |   no_speech_prob: number
18 |   words?: WhisperWord[]
19 | }
20 | 
21 | export interface WhisperResponse {
22 |   task: string
23 |   language: string
24 |   duration: number
25 |   text: string
26 |   segments: WhisperSegment[]
27 | }
28 | 


--------------------------------------------------------------------------------
/test/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "@illyism/transcribe-tests",
 3 |   "version": "1.0.0",
 4 |   "description": "A/B testing suite for transcription optimization strategies",
 5 |   "type": "module",
 6 |   "scripts": {
 7 |     "test": "bun compare.ts",
 8 |     "baseline": "bun test-baseline.ts",
 9 |     "speed": "bun test-speed.ts",
10 |     "opus": "bun test-opus.ts",
11 |     "compare": "bun compare.ts"
12 |   },
13 |   "keywords": [
14 |     "transcription",
15 |     "optimization",
16 |     "ab-testing",
17 |     "whisper",
18 |     "audio",
19 |     "compression"
20 |   ],
21 |   "author": "Ilias Ismanalijev",
22 |   "license": "MIT",
23 |   "dependencies": {
24 |     "@illyism/transcribe": "^2.0.0"
25 |   },
26 |   "peerDependencies": {
27 |     "ffmpeg": "*"
28 |   }
29 | }
30 | 


--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Programmatic API for transcribe
 3 |  * Use this if you want to integrate transcription into your Node.js application
 4 |  */
 5 | 
 6 | export interface TranscribeOptions {
 7 |   apiKey?: string
 8 |   inputPath: string
 9 |   outputPath?: string
10 |   optimize?: boolean
11 |   /**
12 |    * Shift all subtitle timestamps by this many seconds (useful for editor timecode offsets).
13 |    * Example: 3600 = start captions at 01:00:00,000
14 |    */
15 |   offsetSeconds?: number
16 |   /**
17 |    * Chunk long media into N-minute pieces and merge results.
18 |    * If omitted, chunking is automatically enabled for long/large inputs.
19 |    */
20 |   chunkMinutes?: number
21 | }
22 | 
23 | export interface TranscribeResult {
24 |   srtPath: string
25 |   text: string
26 |   language: string
27 |   duration: number
28 | }
29 | 
30 | export { transcribe } from './transcribe'
31 | export * from './types'
32 | 
33 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 Ilias Ismanalijev
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "@illyism/transcribe",
 3 |   "version": "3.1.0",
 4 |   "description": "CLI tool to transcribe audio/video files to SRT format using OpenAI Whisper API",
 5 |   "type": "module",
 6 |   "main": "dist/index.js",
 7 |   "types": "dist/index.d.ts",
 8 |   "bin": {
 9 |     "transcribe": "./dist/cli.js"
10 |   },
11 |   "scripts": {
12 |     "build": "bun build src/cli.ts --outdir dist --target node --format esm && bun build src/index.ts --outdir dist --target node --format esm && chmod +x dist/cli.js",
13 |     "dev": "bun src/cli.ts",
14 |     "prepublishOnly": "bun run build"
15 |   },
16 |   "keywords": [
17 |     "transcribe",
18 |     "whisper",
19 |     "openai",
20 |     "srt",
21 |     "subtitles",
22 |     "audio",
23 |     "video",
24 |     "speech-to-text",
25 |     "cli"
26 |   ],
27 |   "author": "Ilias Ismanalijev",
28 |   "license": "MIT",
29 |   "repository": {
30 |     "type": "git",
31 |     "url": "https://github.com/Illyism/transcribe-cli.git"
32 |   },
33 |   "bugs": {
34 |     "url": "https://github.com/Illyism/transcribe-cli/issues"
35 |   },
36 |   "homepage": "https://github.com/Illyism/transcribe-cli#readme",
37 |   "engines": {
38 |     "node": ">=18.0.0"
39 |   },
40 |   "dependencies": {
41 |     "openai": "^4.0.0"
42 |   },
43 |   "peerDependencies": {
44 |     "ffmpeg": "*"
45 |   },
46 |   "files": [
47 |     "dist",
48 |     "README.md",
49 |     "LICENSE"
50 |   ]
51 | }
52 | 


--------------------------------------------------------------------------------
/.cursor/rules/cleanup-pattern.mdc:
--------------------------------------------------------------------------------
 1 | ---
 2 | description: How to properly clean up temporary files in transcribe functions
 3 | ---
 4 | 
 5 | # Cleanup Pattern
 6 | 
 7 | ## Always Use `finally` Blocks
 8 | 
 9 | All temporary files MUST be cleaned up in `finally` blocks:
10 | 
11 | ```typescript
12 | let tempFile: string | null = null
13 | let optimizedFile: string | null = null
14 | 
15 | try {
16 |   // Processing logic...
17 |   tempFile = createTempFile()
18 |   optimizedFile = processFile(tempFile)
19 |   // ...
20 | } finally {
21 |   // Clean up in reverse order of creation
22 |   if (tempFile && existsSync(tempFile)) {
23 |     unlinkSync(tempFile)
24 |   }
25 |   if (optimizedFile && existsSync(optimizedFile)) {
26 |     unlinkSync(optimizedFile)
27 |   }
28 |   if (tempFile || optimizedFile) {
29 |     console.log('🧹 Cleaned up temporary files')
30 |   }
31 | }
32 | ```
33 | 
34 | ## Temporary File Naming
35 | 
36 | Use timestamps to avoid conflicts:
37 | ```typescript
38 | const tempPath = join(dir, `temp_${Date.now()}.mp3`)
39 | const optimizedPath = join(dir, `optimized_${Date.now()}.mp3`)
40 | ```
41 | 
42 | ## Files to Clean Up
43 | 
44 | 1. **Extracted audio** from videos (`*_temp.mp3`)
45 | 2. **Optimized audio** after speed adjustment (`optimized_*.mp3`)
46 | 3. **Downloaded YouTube files** from temp directory
47 | 4. **Test output files** (in test suite only)
48 | 
49 | ## Never Delete
50 | 
51 | - Original input files
52 | - Generated SRT files
53 | - User-specified output paths
54 | 
55 | ## Error Handling
56 | 
57 | Even if an error occurs, cleanup MUST run. That's why we use `finally` blocks, not just at the end of the function.


--------------------------------------------------------------------------------
/.cursor/rules/cli-patterns.mdc:
--------------------------------------------------------------------------------
 1 | ---
 2 | globs: src/cli.ts
 3 | ---
 4 | 
 5 | # CLI Patterns
 6 | 
 7 | ## Argument Parsing
 8 | 
 9 | ```typescript
10 | const input = args.find(arg => !arg.startsWith('--')) || args[0]
11 | const useRaw = args.includes('--raw')
12 | ```
13 | 
14 | Always extract the file/URL first (non-flag argument), then check for flags.
15 | 
16 | ## Help Text
17 | 
18 | Must include:
19 | - Usage line with placeholder
20 | - All flags and options
21 | - Multiple examples (local file, YouTube, with flags)
22 | - Current optimizations status
23 | - Supported formats
24 | - Configuration instructions
25 | 
26 | ## Error Messages
27 | 
28 | Pattern: Always include helpful links and copy-paste commands:
29 | 
30 | ```typescript
31 | throw new Error(
32 |   'OPENAI_API_KEY not found.\n\n' +
33 |   '🔑 Get your API key: https://platform.openai.com/api-keys\n\n' +
34 |   'Then set it using ONE of these methods:\n\n' +
35 |   '1️⃣  Environment variable...\n' +
36 |   '2️⃣  Config file...\n\n' +
37 |   '📚 Full setup guide: https://github.com/...'
38 | )
39 | ```
40 | 
41 | ## Config Resolution
42 | 
43 | Priority order:
44 | 1. Environment variable (`OPENAI_API_KEY`)
45 | 2. Config file (`~/.transcribe/config.json`)
46 | 
47 | Always try both before throwing error.
48 | 
49 | ## Output Format
50 | 
51 | ```typescript
52 | console.log(`\n✅ SRT file saved to: ${result.srtPath}`)
53 | console.log(`\nTranscription preview:`)
54 | console.log('─'.repeat(60))
55 | console.log(result.text.substring(0, 500) + '...')
56 | console.log('─'.repeat(60))
57 | console.log(`\nLanguage: ${result.language}`)
58 | console.log(`Duration: ${result.duration.toFixed(2)}s`)
59 | ```
60 | 
61 | Use emoji icons for progress steps: 🎬 🎥 📊 ⚡ 🎙️ ✅ ⏱️ 🧹


--------------------------------------------------------------------------------
/.cursor/rules/youtube-support.mdc:
--------------------------------------------------------------------------------
 1 | ---
 2 | globs: src/youtube.ts,src/cli.ts
 3 | ---
 4 | 
 5 | # YouTube Integration
 6 | 
 7 | ## URL Detection
 8 | 
 9 | See [src/youtube.ts](mdc:src/youtube.ts):
10 | 
11 | ```typescript
12 | function isYouTubeUrl(input: string): boolean {
13 |   const youtubeRegex = /^(https?:\/\/)?(www\.)?(youtube\.com\/(watch\?v=|shorts\/)|youtu\.be\/)[\w-]+/
14 |   return youtubeRegex.test(input)
15 | }
16 | ```
17 | 
18 | Supports:
19 | - `https://youtube.com/watch?v=VIDEO_ID`
20 | - `https://www.youtube.com/watch?v=VIDEO_ID`
21 | - `https://youtu.be/VIDEO_ID`
22 | - `https://youtube.com/shorts/VIDEO_ID`
23 | - Without `https://` prefix
24 | 
25 | ## Download Strategy
26 | 
27 | Always download **audio-only** for faster processing:
28 | 
29 | ```typescript
30 | const audioStream = ytdl(url, {
31 |   quality: 'highestaudio',
32 |   filter: 'audioonly'
33 | })
34 | ```
35 | 
36 | This is much faster than downloading entire video (2-4GB → ~20-40MB).
37 | 
38 | ## Temporary File Management
39 | 
40 | YouTube downloads go to system temp directory:
41 | ```typescript
42 | const outputPath = join(tmpdir(), `${title}_${Date.now()}.mp3`)
43 | ```
44 | 
45 | **Must be cleaned up** after transcription:
46 | ```typescript
47 | finally {
48 |   if (downloadedFile && existsSync(downloadedFile)) {
49 |     unlinkSync(downloadedFile)
50 |     console.log('🧹 Cleaned up downloaded file')
51 |   }
52 | }
53 | ```
54 | 
55 | ## File Naming
56 | 
57 | YouTube video titles are sanitized:
58 | ```typescript
59 | const title = info.videoDetails.title
60 |   .replace(/[^\w\s-]/g, '')  // Remove special chars
61 |   .replace(/\s+/g, '_')       // Replace spaces with underscores
62 | ```
63 | 
64 | ## User Experience
65 | 
66 | Show progress during YouTube download:
67 | ```
68 | 🎥 Fetching YouTube video info...
69 | 📹 Downloading: Video Title Here
70 | ⏱️  Duration: 21 minutes
71 | ✅ Download complete!
72 | ```


--------------------------------------------------------------------------------
/.cursor/rules/publishing.mdc:
--------------------------------------------------------------------------------
 1 | ---
 2 | description: How to publish and version the package
 3 | ---
 4 | 
 5 | # Publishing Workflow
 6 | 
 7 | ## Version Strategy
 8 | 
 9 | Follow semantic versioning strictly:
10 | 
11 | - **Patch** (3.0.0 → 3.0.1): Bug fixes, documentation updates
12 | - **Minor** (3.0.0 → 3.1.0): New features, backward compatible
13 | - **Major** (3.0.0 → 4.0.0): Breaking changes
14 | 
15 | ### Breaking Changes
16 | 
17 | Examples of breaking changes:
18 | - Changing default behavior (like enabling optimization by default)
19 | - Removing or renaming CLI flags
20 | - Changing API function signatures
21 | - Changing output format
22 | 
23 | ## Publishing Commands
24 | 
25 | ```bash
26 | cd /Users/illyism/Products/magicspace/magicspace-old/packages/transcribe
27 | 
28 | # 1. Version bump
29 | npm version major  # or minor, or patch
30 | 
31 | # 2. Build (happens automatically via prepublishOnly)
32 | # bun run build
33 | 
34 | # 3. Publish
35 | npm publish
36 | 
37 | # 4. Push to GitHub
38 | git push && git push --tags
39 | 
40 | # 5. Create GitHub release
41 | gh release create v3.0.0 --title "..." --notes "..."
42 | ```
43 | 
44 | ## Pre-Publish Checklist
45 | 
46 | - [ ] All changes committed
47 | - [ ] Tests pass (run manual tests in test/)
48 | - [ ] CHANGELOG.md updated
49 | - [ ] README.md updated with new features
50 | - [ ] No API keys or secrets in code
51 | - [ ] Build succeeds (`bun run build`)
52 | - [ ] Help text is current (`--help`)
53 | 
54 | ## Files Included in Package
55 | 
56 | See `.npmignore`:
57 | - ✅ `dist/` (compiled code)
58 | - ✅ `README.md`
59 | - ✅ `LICENSE`
60 | - ❌ `src/` (source code)
61 | - ❌ `test/` (test suite)
62 | - ❌ `.env`, `.env.*`
63 | 
64 | ## GitHub Integration
65 | 
66 | Always create a release after publishing:
67 | - Tag matches npm version (v3.0.0)
68 | - Include changelog in release notes
69 | - Link to NPM package
70 | - Mention breaking changes prominently


--------------------------------------------------------------------------------
/.cursor/rules/optimization.mdc:
--------------------------------------------------------------------------------
 1 | ---
 2 | description: How audio optimization works and when it's applied
 3 | ---
 4 | 
 5 | # Audio Optimization Strategy
 6 | 
 7 | ## Default Behavior
 8 | 
 9 | **ALL files are optimized by default with 1.2x speed** (based on A/B test results)
10 | 
11 | ### Why 1.2x Speed?
12 | 
13 | A/B testing showed (see [test/compare.ts](mdc:test/compare.ts)):
14 | - 99.5% file size reduction (2.7GB → 12.8MB)
15 | - 9% faster processing (65.4s vs 72s)
16 | - Same cost ($0.006/min charged on original duration)
17 | - ~98% accuracy maintained
18 | - Automatic timestamp adjustment back to original speed
19 | 
20 | ### Implementation
21 | 
22 | See [src/optimize.ts](mdc:src/optimize.ts):
23 | 
24 | 1. **Check file size**: Display size for user awareness
25 | 2. **Speed up audio**: Use FFmpeg `atempo=1.2` filter
26 | 3. **Adjust timestamps**: Divide all SRT timestamps by 1.2 to restore original timing
27 | 4. **Cleanup**: Remove optimized file after transcription
28 | 
29 | ### Disabling Optimization
30 | 
31 | Users can disable with `--raw` flag:
32 | ```bash
33 | transcribe video.mp4 --raw  # Use original audio
34 | ```
35 | 
36 | Or programmatically:
37 | ```typescript
38 | await transcribe({ inputPath, apiKey, optimize: false })
39 | ```
40 | 
41 | ### Timestamp Adjustment
42 | 
43 | Critical: All SRT timestamps must be divided by the speed factor to match original video timing.
44 | 
45 | See `adjustSRTTimestamps()` in [src/optimize.ts](mdc:src/optimize.ts) - converts timestamp to milliseconds, divides by speedFactor, then converts back.
46 | 
47 | ## Alternative: Opus Compression
48 | 
49 | Tested but not used by default (see [test/test-opus.ts](mdc:test/test-opus.ts)):
50 | - Target: <25MB files
51 | - Uses Opus codec in OGG container
52 | - ~99% accuracy
53 | - Slower than speed optimization (86.8s vs 65.4s)
54 | 
55 | ## When Modifying Optimization
56 | 
57 | 1. Update [test/](mdc:test/) with new strategy
58 | 2. Run comparison tests
59 | 3. Update CHANGELOG with results
60 | 4. Consider making it opt-in first (new flag)


--------------------------------------------------------------------------------
/.cursor/rules/whisper-api.mdc:
--------------------------------------------------------------------------------
 1 | ---
 2 | description: OpenAI Whisper API integration patterns and supported formats
 3 | ---
 4 | 
 5 | # Whisper API Integration
 6 | 
 7 | ## Supported Formats
 8 | 
 9 | OpenAI Whisper API accepts:
10 | - `flac`, `m4a`, `mp3`, `mp4`, `mpeg`, `mpga`, `oga`, `ogg`, `wav`, `webm`
11 | 
12 | **Important**: `opus` files are NOT supported directly. Use `ogg` container with Opus codec instead:
13 | ```bash
14 | ffmpeg -i input.mp3 -acodec libopus -f ogg output.ogg
15 | ```
16 | 
17 | ## API Call Pattern
18 | 
19 | See [src/transcribe.ts](mdc:src/transcribe.ts):
20 | 
21 | ```typescript
22 | const { default: OpenAI } = await import('openai')
23 | const openai = new OpenAI({ apiKey })
24 | 
25 | const fs = await import('fs')
26 | const audioFile = fs.createReadStream(audioPath)
27 | 
28 | const transcription = await openai.audio.transcriptions.create({
29 |   file: audioFile,
30 |   model: 'whisper-1',
31 |   response_format: 'verbose_json',
32 |   timestamp_granularities: ['segment']  // Required for SRT timestamps
33 | })
34 | ```
35 | 
36 | ## Response Format
37 | 
38 | Always use `verbose_json` with `segment` granularity to get:
39 | - Segment-level timestamps (required for SRT)
40 | - Language detection
41 | - Full transcription text
42 | - Individual segment texts
43 | 
44 | ## Cost
45 | 
46 | - $0.006 per minute of audio
47 | - Charged based on ORIGINAL audio duration (not sped-up duration)
48 | - No additional charges for multiple calls or retries
49 | 
50 | ## Error Handling
51 | 
52 | Common errors:
53 | - **400 Invalid file format**: Check file extension matches actual format
54 | - **502 Bad Gateway**: OpenAI API temporary issue, retry after delay
55 | - **401 Unauthorized**: Invalid API key
56 | - **413 Request Entity Too Large**: File too large (max ~25MB recommended)
57 | 
58 | ## File Size Optimization
59 | 
60 | To stay under 25MB and speed up uploads:
61 | 1. Extract audio from video (removes video track)
62 | 2. Speed up by 1.2x (reduces duration by 17%)
63 | 3. Or use Opus compression at ~64kbps
64 | 
65 | See [src/optimize.ts](mdc:src/optimize.ts) for implementation.


--------------------------------------------------------------------------------
/.cursor/rules/architecture.mdc:
--------------------------------------------------------------------------------
 1 | ---
 2 | alwaysApply: true
 3 | ---
 4 | 
 5 | # @illyism/transcribe - Project Architecture
 6 | 
 7 | ## Package Structure
 8 | 
 9 | This is a dual-mode package (CLI + Library):
10 | 
11 | ### Core Files
12 | 
13 | - **[src/cli.ts](mdc:src/cli.ts)**: CLI entry point with argument parsing and user-facing commands
14 | - **[src/transcribe.ts](mdc:src/transcribe.ts)**: Core transcription logic with automatic optimization
15 | - **[src/optimize.ts](mdc:src/optimize.ts)**: Audio optimization (1.2x speed) and SRT timestamp adjustment
16 | - **[src/youtube.ts](mdc:src/youtube.ts)**: YouTube video download and audio extraction
17 | - **[src/types.ts](mdc:src/types.ts)**: TypeScript interfaces for Whisper API responses
18 | - **[src/index.ts](mdc:src/index.ts)**: Library entry point with public API exports
19 | 
20 | ### Key Patterns
21 | 
22 | 1. **Optimization by Default**: All files are automatically optimized with 1.2x speed unless `--raw` flag is used
23 | 2. **Automatic Cleanup**: All temporary files (extracted audio, optimized audio, downloaded files) are cleaned up in `finally` blocks
24 | 3. **Progressive Enhancement**: Works with local files, videos, and YouTube URLs
25 | 4. **Error Messages**: Include helpful links and copy-paste commands for setup
26 | 
27 | ## Data Flow
28 | 
29 | ```
30 | Input → YouTube Download (if URL) → Extract Audio (if video) → Optimize (if enabled) → Whisper API → SRT Generation → Timestamp Adjustment → Cleanup
31 | ```
32 | 
33 | ## Dependencies
34 | 
35 | - **openai**: Official OpenAI SDK for Whisper API
36 | - **@distube/ytdl-core**: YouTube video/audio download
37 | - **FFmpeg** (peer): Required for video/audio processing
38 | 
39 | ## Build Process
40 | 
41 | - Uses Bun to bundle to ESM format
42 | - Targets Node.js 18+
43 | - Two outputs: `cli.js` (executable) and `index.js` (library)
44 | - CLI has shebang: `#!/usr/bin/env node`
45 | 
46 | ## Testing
47 | 
48 | - **[test/](mdc:test/)**: A/B testing suite for optimization strategies
49 | - Includes baseline, speed, and Opus compression tests
50 | - Generates comparison reports and recommendations


--------------------------------------------------------------------------------
/.cursor/rules/testing.mdc:
--------------------------------------------------------------------------------
 1 | ---
 2 | description: A/B testing framework for optimization strategies
 3 | ---
 4 | 
 5 | # Testing Framework
 6 | 
 7 | ## Test Structure
 8 | 
 9 | All tests in [test/](mdc:test/) follow this pattern:
10 | 
11 | ```typescript
12 | async function testMethodName(inputPath: string) {
13 |   const startTime = Date.now()
14 |   
15 |   // 1. Validate input
16 |   // 2. Process audio with strategy
17 |   // 3. Transcribe with Whisper API
18 |   // 4. Calculate metrics
19 |   // 5. Save metrics.json
20 |   // 6. Return metrics object
21 |   
22 |   return metrics
23 | }
24 | ```
25 | 
26 | ## Required Metrics
27 | 
28 | Every test must track:
29 | 
30 | ```typescript
31 | interface TestMetrics {
32 |   method: string              // 'baseline', 'speed', 'opus'
33 |   originalSize: number        // Bytes
34 |   processedSize: number       // Bytes
35 |   compressionRatio: number    // processedSize / originalSize
36 |   originalDuration: number    // Seconds
37 |   processedDuration: number   // Seconds (may differ if sped up)
38 |   transcriptionTime: number   // Milliseconds
39 |   totalTime: number          // Milliseconds
40 |   estimatedCost: number      // Dollars
41 |   language: string
42 |   // Method-specific fields...
43 | }
44 | ```
45 | 
46 | ## Adding New Optimization Strategies
47 | 
48 | 1. Create `test/test-newmethod.ts`
49 | 2. Implement the test function following the pattern
50 | 3. Export the function
51 | 4. Add to [test/compare.ts](mdc:test/compare.ts) in `runAllTests()`
52 | 5. Update comparison table logic if needed
53 | 6. Document hypothesis in [test/README.md](mdc:test/README.md)
54 | 
55 | ## Running Tests
56 | 
57 | ```bash
58 | cd test
59 | bun compare.ts /path/to/video.mp4
60 | ```
61 | 
62 | This runs all tests and generates:
63 | - Individual metrics in `output/{method}/metrics.json`
64 | - Comparison table
65 | - Recommendations based on file size
66 | - Full report in `output/comparison-report.json`
67 | 
68 | ## Test Output
69 | 
70 | Never commit test output files. They're in `.gitignore`:
71 | - `*.srt`, `*.mp3`, `*.ogg` files
72 | - `metrics.json`
73 | - `comparison-report.json`
74 | 
75 | Keep the directory structure with `.gitkeep` files.


--------------------------------------------------------------------------------
/QUICKSTART.md:
--------------------------------------------------------------------------------
  1 | # Quick Start: Publishing to NPM
  2 | 
  3 | ## 🚀 Ready to Publish in 5 Steps
  4 | 
  5 | ### 1. Check Package Name Availability
  6 | 
  7 | ```bash
  8 | npm view @illyism/transcribe
  9 | ```
 10 | 
 11 | If it's taken, update the name in `package.json` to something unique like:
 12 | - `@yourusername/transcribe`
 13 | - `transcribe-cli`
 14 | - `whisper-transcribe`
 15 | 
 16 | ### 2. Login to NPM
 17 | 
 18 | ```bash
 19 | npm login
 20 | ```
 21 | 
 22 | Don't have an account? Sign up at [npmjs.com/signup](https://www.npmjs.com/signup)
 23 | 
 24 | ### 3. Test Locally (Optional but Recommended)
 25 | 
 26 | ```bash
 27 | cd /Users/illyism/Products/magicspace/magicspace-old/packages/transcribe
 28 | 
 29 | # Build
 30 | bun run build
 31 | 
 32 | # Test
 33 | node dist/cli.js --help
 34 | 
 35 | # Test with a real file
 36 | node dist/cli.js /path/to/test.mp4
 37 | ```
 38 | 
 39 | ### 4. Dry Run
 40 | 
 41 | See what will be published:
 42 | 
 43 | ```bash
 44 | npm publish --dry-run
 45 | ```
 46 | 
 47 | ### 5. Publish!
 48 | 
 49 | ```bash
 50 | npm publish --access public
 51 | ```
 52 | 
 53 | ✅ Done! Your package is now live on NPM!
 54 | 
 55 | ## Verify It Worked
 56 | 
 57 | Install globally and test:
 58 | 
 59 | ```bash
 60 | npm install -g @illyism/transcribe
 61 | transcribe --version
 62 | transcribe --help
 63 | ```
 64 | 
 65 | ## Update Later
 66 | 
 67 | When you want to release a new version:
 68 | 
 69 | ```bash
 70 | # Update version (patch for bug fixes, minor for features, major for breaking changes)
 71 | npm version patch
 72 | 
 73 | # Build and publish
 74 | bun run build
 75 | npm publish
 76 | 
 77 | # Push the version tag to git
 78 | git push --tags
 79 | ```
 80 | 
 81 | ## Current Package Structure
 82 | 
 83 | ```
 84 | packages/transcribe/
 85 | ├── src/
 86 | │   └── cli.ts          # Source code
 87 | ├── dist/
 88 | │   └── cli.js          # Built executable
 89 | ├── package.json        # Package metadata
 90 | ├── README.md          # User documentation
 91 | ├── LICENSE            # MIT License
 92 | ├── PUBLISHING.md      # Detailed publishing guide
 93 | ├── QUICKSTART.md      # This file
 94 | └── tsconfig.json      # TypeScript config
 95 | ```
 96 | 
 97 | ## What Users Will Get
 98 | 
 99 | After publishing, users can:
100 | 
101 | ```bash
102 | # Install globally
103 | npm install -g @illyism/transcribe
104 | 
105 | # Use anywhere
106 | transcribe video.mp4
107 | transcribe audio.mp3
108 | 
109 | # Configure API key
110 | export OPENAI_API_KEY=sk-...
111 | 
112 | # Or create config file
113 | mkdir -p ~/.transcribe
114 | echo '{"apiKey": "sk-..."}' > ~/.transcribe/config.json
115 | ```
116 | 
117 | ## Need Help?
118 | 
119 | - **Detailed guide**: See `PUBLISHING.md`
120 | - **NPM docs**: [docs.npmjs.com](https://docs.npmjs.com)
121 | - **Package issues**: Open issue on GitHub
122 | 


--------------------------------------------------------------------------------
/test/output/comparison-report.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "timestamp": "2025-10-06T17:16:49.615Z",
 3 |   "inputFile": "/Users/illyism/Movies/Pod/sitespeak.mp4",
 4 |   "results": [
 5 |     {
 6 |       "method": "baseline",
 7 |       "originalSize": 2902041008,
 8 |       "processedSize": 2902041008,
 9 |       "compressionRatio": 1,
10 |       "originalDuration": 1311.6199951171875,
11 |       "processedDuration": 1311.6199951171875,
12 |       "transcriptionTime": 72048,
13 |       "totalTime": 72050,
14 |       "estimatedCost": 0.13116199951171875,
15 |       "costPerMinute": 0.006,
16 |       "language": "english",
17 |       "timestamp": "2025-10-06T17:14:17.382Z"
18 |     },
19 |     {
20 |       "method": "speed",
21 |       "speedFactor": 1.2,
22 |       "originalSize": 2902041008,
23 |       "processedSize": 13429580,
24 |       "compressionRatio": 0.004627632746394327,
25 |       "originalDuration": 1311.6,
26 |       "processedDuration": 1093,
27 |       "transcriptionTime": 52724,
28 |       "totalTime": 65446,
29 |       "estimatedCost": 0.13116,
30 |       "costPerMinute": 0.006,
31 |       "language": "english",
32 |       "timestamp": "2025-10-06T17:15:22.831Z"
33 |     },
34 |     {
35 |       "method": "opus",
36 |       "codec": "libopus",
37 |       "bitrate": 64,
38 |       "targetSizeMB": 25,
39 |       "originalSize": 2902041008,
40 |       "processedSize": 14964742,
41 |       "compressionRatio": 0.005156626649570763,
42 |       "originalDuration": 1311.6199951171875,
43 |       "processedDuration": 1311.6199951171875,
44 |       "transcriptionTime": 67786,
45 |       "totalTime": 86772,
46 |       "estimatedCost": 0.13116199951171875,
47 |       "costPerMinute": 0.006,
48 |       "language": "english",
49 |       "targetAchieved": true,
50 |       "timestamp": "2025-10-06T17:16:49.609Z"
51 |     }
52 |   ],
53 |   "comparison": "\n📊 COMPARISON RESULTS\n════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════\n| Method | File Size | Size Reduction | Duration | Upload Time* | Processing | Cost | Total Time | Accuracy** |\n|--------|-----------|----------------|----------|--------------|------------|------|------------|------------|\n| Baseline | 2767.60 MB | 0% | 21.9m | ~30s | 72.0s | $0.1312 | 72.0s | 100% |\n| Speed (1.2x) | 12.81 MB | 99.5% | 21.9m | ~15s | 52.7s | $0.1312 | 65.4s | ~98% |\n| Opus (64k) | 14.27 MB | 99.5% | 21.9m | ~15s | 67.8s | $0.1312 | 86.8s | ~99% |\n\n*Upload time estimates based on file size\n**Accuracy estimates based on optimization impact\n",
54 |   "recommendations": "\n🎯 RECOMMENDATIONS\n══════════════════════════════════════════════════\n\n📦 **Very large file size (>100MB)**\n   → Use **Speed** method for cost optimization\n   → Consider **Opus** method for upload speed\n\n💰 **Cost Analysis:**\n   • Speed method saves $0.0000 (0.0%)\n   • Opus method: same cost as baseline\n\n⚡ **Speed Analysis:**\n   • Fastest method: Speed\n   • Time savings: 6.6s\n\n🎯 **Quality Impact:**\n   • Baseline: 100% accuracy\n   • Speed: ~98% accuracy (minimal impact)\n   • Opus: ~99% accuracy (minimal impact)\n\n"
55 | }


--------------------------------------------------------------------------------
/test/test-baseline.ts:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bun
  2 | 
  3 | /**
  4 |  * Baseline Test - Original audio without optimization
  5 |  */
  6 | 
  7 | import { existsSync, statSync } from 'fs'
  8 | import { join } from 'path'
  9 | 
 10 | const OUTPUT_DIR = join(import.meta.dir, 'output', 'baseline')
 11 | 
 12 | async function testBaseline(inputPath: string) {
 13 |   const startTime = Date.now()
 14 |   
 15 |   console.log('🧪 Running Baseline Test (No Optimization)')
 16 |   console.log('─'.repeat(60))
 17 |   
 18 |   if (!existsSync(inputPath)) {
 19 |     throw new Error(`File not found: ${inputPath}`)
 20 |   }
 21 |   
 22 |   const originalSize = statSync(inputPath).size
 23 |   console.log(`📁 Original file size: ${(originalSize / 1024 / 1024).toFixed(2)} MB`)
 24 |   
 25 |   // Import transcribe dynamically
 26 |   const { transcribe } = await import('../src/transcribe')
 27 |   const { homedir } = await import('os')
 28 |   const configPath = join(homedir(), '.transcribe', 'config.json')
 29 |   
 30 |   let apiKey = process.env.OPENAI_API_KEY
 31 |   if (!apiKey && existsSync(configPath)) {
 32 |     const config = JSON.parse(await Bun.file(configPath).text())
 33 |     apiKey = config.apiKey
 34 |   }
 35 |   
 36 |   if (!apiKey) {
 37 |     throw new Error('OPENAI_API_KEY not found')
 38 |   }
 39 |   
 40 |   console.log('🎙️  Transcribing with baseline (original audio)...')
 41 |   const transcribeStart = Date.now()
 42 |   
 43 |   const result = await transcribe({
 44 |     inputPath,
 45 |     apiKey,
 46 |   })
 47 |   
 48 |   const transcribeTime = Date.now() - transcribeStart
 49 |   const totalTime = Date.now() - startTime
 50 |   
 51 |   // Calculate cost ($0.006 per minute)
 52 |   const costPerMinute = 0.006
 53 |   const durationMinutes = result.duration / 60
 54 |   const estimatedCost = durationMinutes * costPerMinute
 55 |   
 56 |   // Save metrics
 57 |   const metrics = {
 58 |     method: 'baseline',
 59 |     originalSize: originalSize,
 60 |     processedSize: originalSize,
 61 |     compressionRatio: 1.0,
 62 |     originalDuration: result.duration,
 63 |     processedDuration: result.duration,
 64 |     transcriptionTime: transcribeTime,
 65 |     totalTime: totalTime,
 66 |     estimatedCost: estimatedCost,
 67 |     costPerMinute: costPerMinute,
 68 |     language: result.language,
 69 |     timestamp: new Date().toISOString()
 70 |   }
 71 |   
 72 |   // Create output directory
 73 |   await Bun.write(join(OUTPUT_DIR, 'metrics.json'), JSON.stringify(metrics, null, 2))
 74 |   
 75 |   console.log('─'.repeat(60))
 76 |   console.log('✅ Baseline Test Complete')
 77 |   console.log(`📊 Metrics:`)
 78 |   console.log(`   File Size: ${(originalSize / 1024 / 1024).toFixed(2)} MB`)
 79 |   console.log(`   Duration: ${(result.duration / 60).toFixed(2)} minutes`)
 80 |   console.log(`   Transcription Time: ${(transcribeTime / 1000).toFixed(1)}s`)
 81 |   console.log(`   Total Time: ${(totalTime / 1000).toFixed(1)}s`)
 82 |   console.log(`   Estimated Cost: $${estimatedCost.toFixed(4)}`)
 83 |   console.log(`   Language: ${result.language}`)
 84 |   console.log(`   SRT saved: ${result.srtPath}`)
 85 |   
 86 |   return metrics
 87 | }
 88 | 
 89 | // Run if called directly
 90 | if (import.meta.main) {
 91 |   const inputPath = process.argv[2]
 92 |   
 93 |   if (!inputPath) {
 94 |     console.error('Usage: bun test-baseline.ts <video-file>')
 95 |     process.exit(1)
 96 |   }
 97 |   
 98 |   testBaseline(inputPath).catch(console.error)
 99 | }
100 | 
101 | export { testBaseline }
102 | 


--------------------------------------------------------------------------------
/test/README.md:
--------------------------------------------------------------------------------
  1 | # Transcription Optimization Tests
  2 | 
  3 | A/B testing different optimization strategies to improve transcription speed, cost, and accuracy.
  4 | 
  5 | ## Test Strategies
  6 | 
  7 | ### 1. Speed Up Audio (1.2x)
  8 | **Hypothesis**: Speeding up audio reduces transcription time and cost without significant accuracy loss.
  9 | 
 10 | **Benefits**:
 11 | - ⏱️ Faster processing (20% time reduction)
 12 | - 💰 Lower cost (20% reduction: $0.006 → $0.005 per original minute)
 13 | - 📦 Smaller file size
 14 | 
 15 | **Potential Issues**:
 16 | - Accuracy might decrease
 17 | - Timestamps need adjustment (divide by 1.2)
 18 | - Voice quality degradation
 19 | 
 20 | ### 2. Opus Compression (<25MB)
 21 | **Hypothesis**: Using Opus codec with optimized bitrate maintains quality while reducing file size.
 22 | 
 23 | **Benefits**:
 24 | - 🚀 Faster uploads (smaller files)
 25 | - 💾 Optimized for voice (better than MP3 for speech)
 26 | - 📊 Consistent file sizes under 25MB
 27 | 
 28 | **Potential Issues**:
 29 | - Compression artifacts
 30 | - Need to find optimal bitrate
 31 | - Accuracy impact unknown
 32 | 
 33 | ## Running Tests
 34 | 
 35 | ### Setup
 36 | ```bash
 37 | cd test
 38 | bun install
 39 | ```
 40 | 
 41 | ### Run Individual Tests
 42 | 
 43 | ```bash
 44 | # Test baseline (original audio)
 45 | bun test-baseline.ts <video-file>
 46 | 
 47 | # Test 1.2x speed
 48 | bun test-speed.ts <video-file>
 49 | 
 50 | # Test Opus compression
 51 | bun test-opus.ts <video-file>
 52 | 
 53 | # Run all tests and compare
 54 | bun compare.ts <video-file>
 55 | ```
 56 | 
 57 | ## Test Output
 58 | 
 59 | Each test generates:
 60 | - Processed audio file
 61 | - SRT subtitle file
 62 | - Metrics JSON file with:
 63 |   - File size (original vs processed)
 64 |   - Processing time
 65 |   - Transcription time
 66 |   - Cost estimate
 67 |   - Accuracy metrics (if reference available)
 68 | 
 69 | ## Comparison Metrics
 70 | 
 71 | The `compare.ts` script generates a comparison table:
 72 | 
 73 | | Method | File Size | Upload Time | Processing | Cost | Accuracy | Total Time |
 74 | |--------|-----------|-------------|------------|------|----------|------------|
 75 | | Baseline | 45MB | 30s | 120s | $0.72 | 100% | 150s |
 76 | | 1.2x Speed | 38MB | 25s | 100s | $0.60 | 98% | 125s |
 77 | | Opus | 18MB | 12s | 120s | $0.72 | 99% | 132s |
 78 | 
 79 | ## Expected Results
 80 | 
 81 | ### Speed Test (1.2x)
 82 | - **Best for**: Cost optimization, faster results
 83 | - **Accuracy**: Expected 95-99% of baseline
 84 | - **Cost savings**: 20%
 85 | - **Speed improvement**: 20%
 86 | 
 87 | ### Opus Compression
 88 | - **Best for**: Large files, slow connections
 89 | - **Accuracy**: Expected 98-100% of baseline
 90 | - **File size**: 50-70% reduction
 91 | - **Upload speed**: 2-3x faster
 92 | 
 93 | ## Recommendations
 94 | 
 95 | Based on file size:
 96 | 
 97 | - **< 25MB**: Use baseline (no optimization needed)
 98 | - **25-50MB**: Use Opus compression
 99 | - **50-100MB**: Consider 1.2x speed + Opus
100 | - **> 100MB**: Use 1.2x speed for cost savings
101 | 
102 | ## Contributing
103 | 
104 | Add new optimization strategies in this format:
105 | 1. Create `test-<strategy>.ts`
106 | 2. Update `compare.ts` to include new strategy
107 | 3. Document hypothesis and expected results
108 | 4. Run tests with various file types
109 | 
110 | ## Notes
111 | 
112 | - All timestamps in SRT files are adjusted automatically
113 | - Original files are never modified
114 | - Test files are saved in `test/output/`
115 | - Requires OpenAI API key in config
116 | 


--------------------------------------------------------------------------------
/src/youtube.ts:
--------------------------------------------------------------------------------
  1 | import { spawn } from 'child_process'
  2 | import { tmpdir } from 'os'
  3 | import { join } from 'path'
  4 | 
  5 | export function isYouTubeUrl(input: string): boolean {
  6 |   const youtubeRegex = /^(https?:\/\/)?(www\.)?(youtube\.com\/(watch\?v=|shorts\/)|youtu\.be\/)[\w-]+/
  7 |   return youtubeRegex.test(input)
  8 | }
  9 | 
 10 | export function getVideoId(url: string): string | null {
 11 |   const patterns = [
 12 |     /(?:youtube\.com\/watch\?v=|youtu\.be\/)([^&\n?#]+)/,
 13 |     /youtube\.com\/shorts\/([^&\n?#]+)/
 14 |   ]
 15 |   
 16 |   for (const pattern of patterns) {
 17 |     const match = url.match(pattern)
 18 |     if (match) return match[1]
 19 |   }
 20 |   
 21 |   return null
 22 | }
 23 | 
 24 | export async function downloadYouTubeAudio(url: string): Promise<string> {
 25 |   const videoId = getVideoId(url)
 26 |   if (!videoId) {
 27 |     throw new Error('Invalid YouTube URL')
 28 |   }
 29 |   
 30 |   console.log('🎥 Downloading YouTube audio...')
 31 |   
 32 |   const outputPath = join(tmpdir(), `youtube_${videoId}_${Date.now()}.mp3`)
 33 |   
 34 |   return new Promise((resolve, reject) => {
 35 |     const ytdlp = spawn('yt-dlp', [
 36 |       '-x',                          // Extract audio
 37 |       '--audio-format', 'mp3',       // Convert to MP3
 38 |       '--audio-quality', '0',        // Best quality
 39 |       '-o', outputPath,              // Output path
 40 |       '--no-playlist',               // Don't download playlists
 41 |       '--no-warnings',               // Suppress warnings
 42 |       '--progress',                  // Show progress
 43 |       url
 44 |     ])
 45 |     
 46 |     let output = ''
 47 |     
 48 |     ytdlp.stdout.on('data', (data) => {
 49 |       const line = data.toString()
 50 |       output += line
 51 |       // Show download progress
 52 |       if (line.includes('[download]')) {
 53 |         process.stdout.write('\r' + line.trim())
 54 |       }
 55 |     })
 56 |     
 57 |     ytdlp.stderr.on('data', (data) => {
 58 |       output += data.toString()
 59 |     })
 60 |     
 61 |     ytdlp.on('close', (code) => {
 62 |       process.stdout.write('\n')
 63 |       
 64 |       if (code === 0) {
 65 |         console.log('✅ Download complete!')
 66 |         resolve(outputPath)
 67 |       } else {
 68 |         let errorMsg = `yt-dlp exited with code ${code}`
 69 |         
 70 |         if (output.includes('ERROR')) {
 71 |           const errorLines = output.split('\n').filter(line => line.includes('ERROR'))
 72 |           errorMsg += '\n\n' + errorLines.join('\n')
 73 |         }
 74 |         
 75 |         if (code === 127 || output.includes('command not found')) {
 76 |           errorMsg = 'yt-dlp is not installed. Please install it:\n' +
 77 |             '  macOS: brew install yt-dlp\n' +
 78 |             '  Ubuntu: sudo apt install yt-dlp\n' +
 79 |             '  Windows: winget install yt-dlp\n' +
 80 |             '  Or: pip install yt-dlp'
 81 |         }
 82 |         
 83 |         reject(new Error(errorMsg))
 84 |       }
 85 |     })
 86 |     
 87 |     ytdlp.on('error', (err) => {
 88 |       if (err.message.includes('ENOENT')) {
 89 |         reject(new Error(
 90 |           'yt-dlp is not installed. Please install it:\n' +
 91 |           '  macOS: brew install yt-dlp\n' +
 92 |           '  Ubuntu: sudo apt install yt-dlp\n' +
 93 |           '  Windows: winget install yt-dlp\n' +
 94 |           '  Or: pip install yt-dlp'
 95 |         ))
 96 |       } else {
 97 |         reject(err)
 98 |       }
 99 |     })
100 |   })
101 | }
102 | 


--------------------------------------------------------------------------------
/src/optimize.ts:
--------------------------------------------------------------------------------
  1 | import { spawn } from 'child_process'
  2 | import { statSync, unlinkSync } from 'fs'
  3 | import { dirname, join } from 'path'
  4 | 
  5 | const SPEED_FACTOR = 1.2
  6 | const MAX_FILE_SIZE_MB = 24 // Keep under 25MB API limit
  7 | const MAX_FILE_SIZE_BYTES = MAX_FILE_SIZE_MB * 1024 * 1024
  8 | 
  9 | export async function optimizeAudio(inputPath: string): Promise<{ path: string; speedFactor: number }> {
 10 |   const fileSize = statSync(inputPath).size
 11 |   const fileSizeMB = fileSize / 1024 / 1024
 12 |   
 13 |   console.log(`📊 File size: ${fileSizeMB.toFixed(2)} MB`)
 14 |   
 15 |   // Always optimize with speed first (best results from A/B testing)
 16 |   console.log(`⚡ Optimizing: Speeding up audio by ${SPEED_FACTOR}x for faster processing...`)
 17 |   
 18 |   const dir = dirname(inputPath)
 19 |   const speedOptimizedPath = join(dir, `optimized_speed_${Date.now()}.mp3`)
 20 |   
 21 |   await new Promise<void>((resolve, reject) => {
 22 |     const ffmpeg = spawn('ffmpeg', [
 23 |       '-i', inputPath,
 24 |       '-filter:a', `atempo=${SPEED_FACTOR}`,  // Speed up audio
 25 |       '-ac', '1',                              // Ensure mono (if not already)
 26 |       '-ar', '16000',                          // Maintain 16kHz (optimal for speech)
 27 |       '-acodec', 'libmp3lame',
 28 |       '-q:a', '2',
 29 |       '-y',
 30 |       speedOptimizedPath
 31 |     ])
 32 |     
 33 |     ffmpeg.on('close', (code) => {
 34 |       if (code === 0) {
 35 |         const optimizedSize = statSync(speedOptimizedPath).size
 36 |         const optimizedSizeMB = optimizedSize / 1024 / 1024
 37 |         const reduction = ((1 - optimizedSize / fileSize) * 100).toFixed(1)
 38 |         console.log(`✅ Speed optimization complete: ${fileSizeMB.toFixed(2)} MB → ${optimizedSizeMB.toFixed(2)} MB (${reduction}% reduction)`)
 39 |         resolve()
 40 |       } else {
 41 |         reject(new Error(`FFmpeg optimization failed with code ${code}`))
 42 |       }
 43 |     })
 44 |     
 45 |     ffmpeg.on('error', reject)
 46 |   })
 47 |   
 48 |   // Check if we need additional compression (must be <25MB for Whisper API)
 49 |   const speedOptimizedSize = statSync(speedOptimizedPath).size
 50 |   const speedOptimizedSizeMB = speedOptimizedSize / 1024 / 1024
 51 |   
 52 |   if (speedOptimizedSize > MAX_FILE_SIZE_BYTES) {
 53 |     console.log(`⚠️  File still too large (${speedOptimizedSizeMB.toFixed(2)} MB > 24 MB), applying additional compression...`)
 54 |     
 55 |     const finalPath = join(dir, `optimized_final_${Date.now()}.ogg`)
 56 |     
 57 |     // Calculate bitrate needed to stay under 24MB
 58 |     const durationSeconds = fileSizeMB / (128 / 8) // Rough estimate: original bitrate ~128kbps
 59 |     const targetBitrate = Math.floor((MAX_FILE_SIZE_BYTES / durationSeconds) * 8 / 1000) - 5 // -5k for safety
 60 |     const safeBitrate = Math.max(24, Math.min(targetBitrate, 64)) // Clamp between 24-64kbps
 61 |     
 62 |     await new Promise<void>((resolve, reject) => {
 63 |       const ffmpeg = spawn('ffmpeg', [
 64 |         '-i', speedOptimizedPath,
 65 |         '-acodec', 'libopus',
 66 |         '-b:a', `${safeBitrate}k`,
 67 |         '-ac', '1', // Mono
 68 |         '-f', 'ogg',
 69 |         '-y',
 70 |         finalPath
 71 |       ])
 72 |       
 73 |       ffmpeg.on('close', (code) => {
 74 |         if (code === 0) {
 75 |           const finalSize = statSync(finalPath).size
 76 |           const finalSizeMB = finalSize / 1024 / 1024
 77 |           console.log(`✅ Additional compression complete: ${speedOptimizedSizeMB.toFixed(2)} MB → ${finalSizeMB.toFixed(2)} MB (${safeBitrate}k bitrate)`)
 78 |           resolve()
 79 |         } else {
 80 |           reject(new Error(`FFmpeg compression failed with code ${code}`))
 81 |         }
 82 |       })
 83 |       
 84 |       ffmpeg.on('error', reject)
 85 |     })
 86 |     
 87 |     // Clean up intermediate file
 88 |     unlinkSync(speedOptimizedPath)
 89 |     
 90 |     return { path: finalPath, speedFactor: SPEED_FACTOR }
 91 |   }
 92 |   
 93 |   return { path: speedOptimizedPath, speedFactor: SPEED_FACTOR }
 94 | }
 95 | 
 96 | export function adjustSRTTimestamps(srtContent: string, speedFactor: number): string {
 97 |   if (speedFactor === 1.0) return srtContent
 98 |   
 99 |   // SRT timestamp format: HH:MM:SS,mmm --> HH:MM:SS,mmm
100 |   const timestampRegex = /(\d{2}:\d{2}:\d{2},\d{3}) --> (\d{2}:\d{2}:\d{2},\d{3})/g
101 |   
102 |   return srtContent.replace(timestampRegex, (_match, start, end) => {
103 |     const adjustTimestamp = (timestamp: string) => {
104 |       const [time, ms] = timestamp.split(',')
105 |       const [hours, minutes, seconds] = time.split(':').map(Number)
106 |       
107 |       const totalMs = (hours * 3600 + minutes * 60 + seconds) * 1000 + parseInt(ms)
108 |       const adjustedMs = Math.round(totalMs * speedFactor)
109 |       
110 |       const adjHours = Math.floor(adjustedMs / 3600000)
111 |       const adjMinutes = Math.floor((adjustedMs % 3600000) / 60000)
112 |       const adjSeconds = Math.floor((adjustedMs % 60000) / 1000)
113 |       const adjMs = adjustedMs % 1000
114 |       
115 |       return `${String(adjHours).padStart(2, '0')}:${String(adjMinutes).padStart(2, '0')}:${String(adjSeconds).padStart(2, '0')},${String(adjMs).padStart(3, '0')}`
116 |     }
117 |     
118 |     return `${adjustTimestamp(start)} --> ${adjustTimestamp(end)}`
119 |   })
120 | }
121 | 


--------------------------------------------------------------------------------
/PUBLISHING.md:
--------------------------------------------------------------------------------
  1 | # Publishing Guide for @illyism/transcribe
  2 | 
  3 | ## Prerequisites
  4 | 
  5 | 1. **NPM Account**: Sign up at [npmjs.com](https://www.npmjs.com/signup)
  6 | 2. **NPM Login**: Run `npm login` in your terminal
  7 | 3. **Package Name**: Make sure `@illyism/transcribe` is available or change it in `package.json`
  8 | 
  9 | ## Pre-publish Checklist
 10 | 
 11 | - [ ] Update version in `package.json` (use semantic versioning)
 12 | - [ ] Test the package locally (see Testing section below)
 13 | - [ ] Update README.md with any new features
 14 | - [ ] Commit all changes to git
 15 | - [ ] No API keys or secrets in the code
 16 | 
 17 | ## Testing Locally
 18 | 
 19 | ### Test the build
 20 | 
 21 | ```bash
 22 | cd packages/transcribe
 23 | bun run build
 24 | ```
 25 | 
 26 | ### Test the CLI locally
 27 | 
 28 | ```bash
 29 | # Test with node
 30 | node dist/cli.js --help
 31 | 
 32 | # Or test with the dev script
 33 | bun run dev /path/to/test/file.mp4
 34 | ```
 35 | 
 36 | ### Test as a global package
 37 | 
 38 | ```bash
 39 | # Link the package globally
 40 | npm link
 41 | 
 42 | # Now you can use it anywhere
 43 | transcribe --help
 44 | transcribe /path/to/test.mp4
 45 | 
 46 | # Unlink when done testing
 47 | npm unlink -g @illyism/transcribe
 48 | ```
 49 | 
 50 | ## Publishing Steps
 51 | 
 52 | ### 1. Update Version
 53 | 
 54 | Follow [Semantic Versioning](https://semver.org/):
 55 | 
 56 | - **Patch** (1.0.0 → 1.0.1): Bug fixes
 57 | - **Minor** (1.0.0 → 1.1.0): New features (backward compatible)
 58 | - **Major** (1.0.0 → 2.0.0): Breaking changes
 59 | 
 60 | ```bash
 61 | # Update version automatically
 62 | npm version patch  # or minor, or major
 63 | 
 64 | # Or manually edit package.json
 65 | ```
 66 | 
 67 | ### 2. Build the Package
 68 | 
 69 | ```bash
 70 | bun run build
 71 | ```
 72 | 
 73 | ### 3. Login to NPM
 74 | 
 75 | ```bash
 76 | npm login
 77 | ```
 78 | 
 79 | Enter your credentials when prompted.
 80 | 
 81 | ### 4. Dry Run (Optional but Recommended)
 82 | 
 83 | See what will be published:
 84 | 
 85 | ```bash
 86 | npm publish --dry-run
 87 | ```
 88 | 
 89 | This shows you the files that will be included in the package.
 90 | 
 91 | ### 5. Publish!
 92 | 
 93 | For first-time publishing:
 94 | 
 95 | ```bash
 96 | npm publish --access public
 97 | ```
 98 | 
 99 | For subsequent publishes:
100 | 
101 | ```bash
102 | npm publish
103 | ```
104 | 
105 | ### 6. Verify
106 | 
107 | Check your package on NPM:
108 | - `https://www.npmjs.com/package/@illyism/transcribe`
109 | 
110 | Try installing it:
111 | 
112 | ```bash
113 | npm install -g @illyism/transcribe
114 | transcribe --version
115 | ```
116 | 
117 | ## Publishing a Beta/Alpha Version
118 | 
119 | For testing before official release:
120 | 
121 | ```bash
122 | # Update version to include tag
123 | npm version 1.1.0-beta.0
124 | 
125 | # Publish with tag
126 | npm publish --tag beta
127 | 
128 | # Users can install with
129 | npm install -g @illyism/transcribe@beta
130 | ```
131 | 
132 | ## Updating Documentation
133 | 
134 | After publishing, update:
135 | 
136 | 1. **GitHub Repository**: Push all changes
137 | 2. **README.md**: Ensure installation instructions are current
138 | 3. **CHANGELOG.md**: Document what changed (create one if needed)
139 | 
140 | ## Troubleshooting
141 | 
142 | ### Package name already taken
143 | 
144 | Either:
145 | 1. Choose a different name in `package.json`
146 | 2. Use a scope: `@yourusername/transcribe`
147 | 
148 | ### Permission denied
149 | 
150 | Make sure you're logged in:
151 | ```bash
152 | npm whoami
153 | ```
154 | 
155 | If not logged in:
156 | ```bash
157 | npm logout
158 | npm login
159 | ```
160 | 
161 | ### Files not included
162 | 
163 | Check your `.npmignore` file. By default, we include:
164 | - `dist/` (compiled code)
165 | - `README.md`
166 | - `LICENSE`
167 | 
168 | ### Package size too large
169 | 
170 | NPM has a size limit. Check package size:
171 | ```bash
172 | npm pack --dry-run
173 | ```
174 | 
175 | ## Unpublishing (Emergency Only)
176 | 
177 | ⚠️ Only unpublish if absolutely necessary (security issue, etc.)
178 | 
179 | ```bash
180 | # Within 72 hours of publishing
181 | npm unpublish @illyism/transcribe@1.0.0
182 | 
183 | # Unpublish entire package
184 | npm unpublish @illyism/transcribe --force
185 | ```
186 | 
187 | Note: Unpublishing is not recommended and may be prevented by NPM for popular packages.
188 | 
189 | ## Continuous Deployment (Optional)
190 | 
191 | Set up GitHub Actions for automatic publishing:
192 | 
193 | Create `.github/workflows/publish.yml`:
194 | 
195 | ```yaml
196 | name: Publish to NPM
197 | 
198 | on:
199 |   release:
200 |     types: [published]
201 | 
202 | jobs:
203 |   publish:
204 |     runs-on: ubuntu-latest
205 |     steps:
206 |       - uses: actions/checkout@v3
207 |       - uses: actions/setup-node@v3
208 |         with:
209 |           node-version: '18'
210 |           registry-url: 'https://registry.npmjs.org'
211 |       - run: npm ci
212 |       - run: npm run build
213 |       - run: npm publish --access public
214 |         env:
215 |           NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
216 | ```
217 | 
218 | Add your NPM token to GitHub Secrets.
219 | 
220 | ## Support
221 | 
222 | If you encounter issues:
223 | - NPM support: [npmjs.com/support](https://www.npmjs.com/support)
224 | - Check [NPM status](https://status.npmjs.org/)
225 | 
226 | ## Quick Reference
227 | 
228 | ```bash
229 | # Full publish workflow
230 | npm version patch        # Bump version
231 | bun run build           # Build
232 | npm publish --dry-run   # Verify
233 | npm publish             # Publish!
234 | git push --tags         # Push version tag to git
235 | ```
236 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | # Changelog
  2 | 
  3 | All notable changes to this project will be documented in this file.
  4 | 
  5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
  6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
  7 | 
  8 | ## [3.1.0] - 2025-12-19
  9 | 
 10 | ### Added
 11 | - **Automatic Chunking**: Long media files (45+ minutes) are now automatically split into 20-minute chunks for improved reliability and faster processing.
 12 | - **New CLI Options**:
 13 |   - `--output` (-o): Specify custom SRT output path or directory.
 14 |   - `--offset`: Shift subtitle timestamps (supports seconds or HH:MM:SS.mmm format).
 15 |   - `--chunk-minutes`: Force custom chunk duration for long inputs.
 16 | - **Editor-Friendly Features**: Improved support for video editing workflows with timecode offsets and custom output paths.
 17 | 
 18 | ### Improved
 19 | - **Audio Extraction**: Optimized audio processing for speech transcription (mono, 16kHz) to reduce file size significantly while maintaining dialogue clarity.
 20 | - **Error Handling**: Enhanced guidance for FFmpeg failures and automatic chunking errors.
 21 | - **Documentation**: Updated README with detailed usage for long movies and editor workflows.
 22 | 
 23 | ## [3.0.4] - 2025-12-19
 24 | 
 25 | ### Changed
 26 | - Improved file handling in transcription process
 27 |   - Now uses file buffer and OpenAI SDK's `toFile` helper for more reliable file uploads
 28 |   - Better compatibility with different file systems and edge cases
 29 | 
 30 | ## [3.0.3] - 2025-10-10
 31 | 
 32 | ### Fixed
 33 | - **Critical**: Fixed timestamp adjustment direction when using speed optimization
 34 |   - Timestamps were being divided by speed factor instead of multiplied
 35 |   - This caused SRT files to be 17% shorter than original audio (e.g., 40min audio had timestamps ending at 28min)
 36 |   - Now correctly multiplies timestamps by speed factor to match original audio duration
 37 |   - Example: 40:17 audio → sped up to 33:34 → Whisper timestamps correctly adjusted back to 40:17
 38 | 
 39 | ## [3.0.1] - 2025-10-06
 40 | 
 41 | ### Fixed
 42 | - **YouTube Support**: Switched from ytdl-core to yt-dlp for reliable YouTube downloads
 43 | - Smart two-stage optimization: Speed (1.2x) + Opus compression if still >24MB
 44 | - Now handles large YouTube videos that exceed 25MB API limit
 45 | - Automatic compression to stay under Whisper API limit
 46 | 
 47 | ### Changed
 48 | - Removed @distube/ytdl-core dependency (unreliable)
 49 | - Now uses system yt-dlp command (more reliable, smaller package)
 50 | - Package size reduced: 2.0 MB → 0.77 MB
 51 | - Better error messages for missing yt-dlp installation
 52 | - Added yt-dlp to prerequisites in README
 53 | 
 54 | ### Performance
 55 | - Tested with 45-min YouTube video (66.7 MB):
 56 |   - Speed optimization: 66.7 MB → 41.6 MB
 57 |   - Additional Opus: 41.6 MB → 22.2 MB
 58 |   - Final: Under 24MB limit ✅
 59 |   - Transcription successful!
 60 | 
 61 | ## [3.0.0] - 2025-10-06
 62 | 
 63 | ### Added
 64 | - ⚡ **Automatic Optimization**: All files now optimized with 1.2x speed by default
 65 | - 99.5% file size reduction (2.7GB → 12.8MB)
 66 | - 9% faster processing time
 67 | - Automatic SRT timestamp adjustment back to original speed
 68 | - New `--raw` flag to disable optimization
 69 | - A/B testing suite with baseline, speed, and Opus tests
 70 | - Comparison tool with recommendations
 71 | - Cursor rules (.mdc) for better codebase navigation
 72 | 
 73 | ### Changed
 74 | - **BREAKING**: Optimization now enabled by default for all files
 75 | - Users must use `--raw` flag to get original audio behavior
 76 | - Improved configuration error messages with setup links
 77 | - Better help text with optimization status
 78 | 
 79 | ### Performance
 80 | - Based on A/B test results with 2.7GB, 22-min video:
 81 |   - Baseline: 72s, 15.13 MB
 82 |   - Speed (1.2x): 65.4s, 12.81 MB (9% faster, 15% smaller)
 83 |   - Opus: 86.8s, 14.27 MB
 84 | - Winner: Speed optimization (fastest + smallest)
 85 | 
 86 | ## [2.0.0] - 2025-10-06
 87 | 
 88 | ### Added
 89 | - 🎥 **YouTube Support**: Download and transcribe YouTube videos directly with just a URL
 90 | - Support for youtube.com, youtu.be, and youtube.com/shorts URLs
 91 | - Automatic audio-only download for faster processing
 92 | - Real-world use case documentation for large video files
 93 | 
 94 | ### Changed
 95 | - **BREAKING**: Package now includes ytdl-core dependency (increases bundle size to ~2MB)
 96 | - Improved error messages with links to get API key and setup instructions
 97 | - Better configuration documentation with step-by-step guide
 98 | - Enhanced comparison table with YouTube support row
 99 | 
100 | ### Fixed
101 | - More helpful error message when API key is not configured
102 | - Added verification steps for config file setup
103 | 
104 | ## [1.0.3] - 2025-10-06
105 | 
106 | ### Fixed
107 | - Show actual FFmpeg error output for better debugging
108 | - Added detection for empty/invalid video streams
109 | - Display last 5 lines of FFmpeg output when conversion fails
110 | 
111 | ### Changed
112 | - More informative error messages when FFmpeg fails
113 | - Easier to diagnose issues with corrupted or unsupported video files
114 | 
115 | ## [1.0.2] - 2025-10-06
116 | 
117 | ### Fixed
118 | - Added `"type": "module"` to package.json to eliminate module type detection warning
119 | - Improved FFmpeg error handling with more helpful error messages
120 | - Better error messages for common issues (permissions, missing files, invalid formats)
121 | 
122 | ### Added
123 | - Progress indicators with emoji icons during transcription
124 | - More detailed console output showing each step of the process
125 | - Better FFmpeg installation error messages with platform-specific instructions
126 | 
127 | ## [1.0.1] - 2025-10-06
128 | 
129 | ### Changed
130 | - Added `npx` and `bunx` usage examples to README
131 | - Improved documentation with "Try Without Installing" section
132 | - Better quick start experience for new users
133 | 
134 | ## [1.0.0] - 2025-10-06
135 | 
136 | ### Added
137 | - Initial release
138 | - Transcribe audio and video files to SRT format
139 | - Support for multiple formats: MP4, MP3, WAV, M4A, WebM, OGG, MOV, AVI, MKV
140 | - Automatic audio extraction from video files using FFmpeg
141 | - OpenAI Whisper API integration for high-accuracy transcription
142 | - Automatic language detection
143 | - Precise timestamp generation for subtitles
144 | - Configuration via environment variable or config file (`~/.transcribe/config.json`)
145 | - CLI with help and version commands
146 | - Automatic cleanup of temporary files
147 | 
148 | ### Features
149 | - Fast processing with efficient audio extraction
150 | - Standard SRT subtitle format output
151 | - Multi-language support (powered by Whisper)
152 | - Simple setup and configuration
153 | - Detailed error messages and troubleshooting
154 | 
155 | ### Documentation
156 | - Comprehensive README with examples
157 | - Full publishing guide
158 | - Quick start guide
159 | - MIT License
160 | 


--------------------------------------------------------------------------------
/test/test-speed.ts:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bun
  2 | 
  3 | /**
  4 |  * Speed Test - Audio sped up by 1.2x for faster processing
  5 |  */
  6 | 
  7 | import { spawn } from 'child_process'
  8 | import { existsSync, statSync, unlinkSync } from 'fs'
  9 | import { join } from 'path'
 10 | 
 11 | const OUTPUT_DIR = join(import.meta.dir, 'output', 'speed')
 12 | const SPEED_FACTOR = 1.2
 13 | 
 14 | async function speedUpAudio(inputPath: string, outputPath: string): Promise<void> {
 15 |   return new Promise((resolve, reject) => {
 16 |     console.log(`⚡ Speeding up audio by ${SPEED_FACTOR}x...`)
 17 |     
 18 |     const ffmpeg = spawn('ffmpeg', [
 19 |       '-i', inputPath,
 20 |       '-vn', // No video
 21 |       '-filter:a', `atempo=${SPEED_FACTOR}`, // Speed up audio
 22 |       '-acodec', 'libmp3lame',
 23 |       '-q:a', '2', // High quality
 24 |       '-y', // Overwrite output
 25 |       outputPath
 26 |     ])
 27 |     
 28 |     let errorOutput = ''
 29 |     
 30 |     ffmpeg.stderr.on('data', (data) => {
 31 |       errorOutput += data.toString()
 32 |     })
 33 |     
 34 |     ffmpeg.on('close', (code) => {
 35 |       if (code === 0) {
 36 |         console.log('✅ Audio speed adjustment complete!')
 37 |         resolve()
 38 |       } else {
 39 |         reject(new Error(`FFmpeg exited with code ${code}: ${errorOutput}`))
 40 |       }
 41 |     })
 42 |     
 43 |     ffmpeg.on('error', (err) => {
 44 |       reject(err)
 45 |     })
 46 |   })
 47 | }
 48 | 
 49 | function adjustSRTTimestamps(srtContent: string, speedFactor: number): string {
 50 |   // SRT timestamp format: HH:MM:SS,mmm --> HH:MM:SS,mmm
 51 |   const timestampRegex = /(\d{2}:\d{2}:\d{2},\d{3}) --> (\d{2}:\d{2}:\d{2},\d{3})/g
 52 |   
 53 |   return srtContent.replace(timestampRegex, (match, start, end) => {
 54 |     const adjustTimestamp = (timestamp: string) => {
 55 |       const [time, ms] = timestamp.split(',')
 56 |       const [hours, minutes, seconds] = time.split(':').map(Number)
 57 |       
 58 |       const totalMs = (hours * 3600 + minutes * 60 + seconds) * 1000 + parseInt(ms)
 59 |       const adjustedMs = Math.round(totalMs * speedFactor)
 60 |       
 61 |       const adjHours = Math.floor(adjustedMs / 3600000)
 62 |       const adjMinutes = Math.floor((adjustedMs % 3600000) / 60000)
 63 |       const adjSeconds = Math.floor((adjustedMs % 60000) / 1000)
 64 |       const adjMs = adjustedMs % 1000
 65 |       
 66 |       return `${String(adjHours).padStart(2, '0')}:${String(adjMinutes).padStart(2, '0')}:${String(adjSeconds).padStart(2, '0')},${String(adjMs).padStart(3, '0')}`
 67 |     }
 68 |     
 69 |     return `${adjustTimestamp(start)} --> ${adjustTimestamp(end)}`
 70 |   })
 71 | }
 72 | 
 73 | async function testSpeed(inputPath: string) {
 74 |   const startTime = Date.now()
 75 |   
 76 |   console.log('🧪 Running Speed Test (1.2x Audio Speed)')
 77 |   console.log('─'.repeat(60))
 78 |   
 79 |   if (!existsSync(inputPath)) {
 80 |     throw new Error(`File not found: ${inputPath}`)
 81 |   }
 82 |   
 83 |   const originalSize = statSync(inputPath).size
 84 |   console.log(`📁 Original file size: ${(originalSize / 1024 / 1024).toFixed(2)} MB`)
 85 |   
 86 |   // Create output directory
 87 |   await Bun.write(join(OUTPUT_DIR, '.gitkeep'), '')
 88 |   
 89 |   // Extract audio and speed it up
 90 |   const tempAudioPath = join(OUTPUT_DIR, `temp_audio_${Date.now()}.mp3`)
 91 |   const spedUpAudioPath = join(OUTPUT_DIR, `sped_up_${Date.now()}.mp3`)
 92 |   
 93 |   try {
 94 |     // First extract audio
 95 |     console.log('🎬 Extracting audio from video...')
 96 |     await new Promise<void>((resolve, reject) => {
 97 |       const ffmpeg = spawn('ffmpeg', [
 98 |         '-i', inputPath,
 99 |         '-vn',
100 |         '-acodec', 'libmp3lame',
101 |         '-q:a', '2',
102 |         '-y',
103 |         tempAudioPath
104 |       ])
105 |       
106 |       ffmpeg.on('close', (code) => {
107 |         if (code === 0) resolve()
108 |         else reject(new Error(`Audio extraction failed with code ${code}`))
109 |       })
110 |       
111 |       ffmpeg.on('error', reject)
112 |     })
113 |     
114 |     // Then speed it up
115 |     await speedUpAudio(tempAudioPath, spedUpAudioPath)
116 |     
117 |     const processedSize = statSync(spedUpAudioPath).size
118 |     console.log(`📁 Processed file size: ${(processedSize / 1024 / 1024).toFixed(2)} MB`)
119 |     console.log(`📊 Size reduction: ${((1 - processedSize / originalSize) * 100).toFixed(1)}%`)
120 |     
121 |     // Import transcribe dynamically
122 |     const { transcribe } = await import('../src/transcribe')
123 |     const { homedir } = await import('os')
124 |     const configPath = join(homedir(), '.transcribe', 'config.json')
125 |     
126 |     let apiKey = process.env.OPENAI_API_KEY
127 |     if (!apiKey && existsSync(configPath)) {
128 |       const config = JSON.parse(await Bun.file(configPath).text())
129 |       apiKey = config.apiKey
130 |     }
131 |     
132 |     if (!apiKey) {
133 |       throw new Error('OPENAI_API_KEY not found')
134 |     }
135 |     
136 |     console.log('🎙️  Transcribing sped-up audio...')
137 |     const transcribeStart = Date.now()
138 |     
139 |     const result = await transcribe({
140 |       inputPath: spedUpAudioPath,
141 |       apiKey,
142 |     })
143 |     
144 |     const transcribeTime = Date.now() - transcribeStart
145 |     const totalTime = Date.now() - startTime
146 |     
147 |     // Adjust SRT timestamps back to original speed
148 |     const originalSRT = await Bun.file(result.srtPath).text()
149 |     const adjustedSRT = adjustSRTTimestamps(originalSRT, SPEED_FACTOR)
150 |     
151 |     // Save adjusted SRT
152 |     const adjustedSRTPath = join(OUTPUT_DIR, `sped_up_${Date.now()}.srt`)
153 |     await Bun.write(adjustedSRTPath, adjustedSRT)
154 |     
155 |     // Calculate metrics
156 |     const originalDuration = result.duration * SPEED_FACTOR // Adjust back to original duration
157 |     const durationMinutes = originalDuration / 60
158 |     const costPerMinute = 0.006
159 |     const estimatedCost = durationMinutes * costPerMinute
160 |     
161 |     const metrics = {
162 |       method: 'speed',
163 |       speedFactor: SPEED_FACTOR,
164 |       originalSize: originalSize,
165 |       processedSize: processedSize,
166 |       compressionRatio: processedSize / originalSize,
167 |       originalDuration: originalDuration,
168 |       processedDuration: result.duration,
169 |       transcriptionTime: transcribeTime,
170 |       totalTime: totalTime,
171 |       estimatedCost: estimatedCost,
172 |       costPerMinute: costPerMinute,
173 |       language: result.language,
174 |       timestamp: new Date().toISOString()
175 |     }
176 |     
177 |     // Save metrics
178 |     await Bun.write(join(OUTPUT_DIR, 'metrics.json'), JSON.stringify(metrics, null, 2))
179 |     
180 |     console.log('─'.repeat(60))
181 |     console.log('✅ Speed Test Complete')
182 |     console.log(`📊 Metrics:`)
183 |     console.log(`   Speed Factor: ${SPEED_FACTOR}x`)
184 |     console.log(`   Original Size: ${(originalSize / 1024 / 1024).toFixed(2)} MB`)
185 |     console.log(`   Processed Size: ${(processedSize / 1024 / 1024).toFixed(2)} MB`)
186 |     console.log(`   Size Reduction: ${((1 - processedSize / originalSize) * 100).toFixed(1)}%`)
187 |     console.log(`   Original Duration: ${(originalDuration / 60).toFixed(2)} minutes`)
188 |     console.log(`   Transcription Time: ${(transcribeTime / 1000).toFixed(1)}s`)
189 |     console.log(`   Total Time: ${(totalTime / 1000).toFixed(1)}s`)
190 |     console.log(`   Estimated Cost: $${estimatedCost.toFixed(4)}`)
191 |     console.log(`   Language: ${result.language}`)
192 |     console.log(`   Adjusted SRT saved: ${adjustedSRTPath}`)
193 |     
194 |     return metrics
195 |     
196 |   } finally {
197 |     // Clean up temporary files
198 |     if (existsSync(tempAudioPath)) unlinkSync(tempAudioPath)
199 |     if (existsSync(spedUpAudioPath)) unlinkSync(spedUpAudioPath)
200 |   }
201 | }
202 | 
203 | // Run if called directly
204 | if (import.meta.main) {
205 |   const inputPath = process.argv[2]
206 |   
207 |   if (!inputPath) {
208 |     console.error('Usage: bun test-speed.ts <video-file>')
209 |     process.exit(1)
210 |   }
211 |   
212 |   testSpeed(inputPath).catch(console.error)
213 | }
214 | 
215 | export { testSpeed }
216 | 


--------------------------------------------------------------------------------
/bun.lock:
--------------------------------------------------------------------------------
 1 | {
 2 |   "lockfileVersion": 1,
 3 |   "workspaces": {
 4 |     "": {
 5 |       "name": "@magicspace/transcribe",
 6 |       "dependencies": {
 7 |         "openai": "^4.0.0",
 8 |       },
 9 |       "peerDependencies": {
10 |         "ffmpeg": "*",
11 |       },
12 |     },
13 |   },
14 |   "packages": {
15 |     "@types/node": ["@types/node@18.19.129", "", { "dependencies": { "undici-types": "~5.26.4" } }, "sha512-hrmi5jWt2w60ayox3iIXwpMEnfUvOLJCRtrOPbHtH15nTjvO7uhnelvrdAs0dO0/zl5DZ3ZbahiaXEVb54ca/A=="],
16 | 
17 |     "@types/node-fetch": ["@types/node-fetch@2.6.13", "", { "dependencies": { "@types/node": "*", "form-data": "^4.0.4" } }, "sha512-QGpRVpzSaUs30JBSGPjOg4Uveu384erbHBoT1zeONvyCfwQxIkUshLAOqN/k9EjGviPRmWTTe6aH2qySWKTVSw=="],
18 | 
19 |     "abort-controller": ["abort-controller@3.0.0", "", { "dependencies": { "event-target-shim": "^5.0.0" } }, "sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg=="],
20 | 
21 |     "agentkeepalive": ["agentkeepalive@4.6.0", "", { "dependencies": { "humanize-ms": "^1.2.1" } }, "sha512-kja8j7PjmncONqaTsB8fQ+wE2mSU2DJ9D4XKoJ5PFWIdRMa6SLSN1ff4mOr4jCbfRSsxR4keIiySJU0N9T5hIQ=="],
22 | 
23 |     "asynckit": ["asynckit@0.4.0", "", {}, "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q=="],
24 | 
25 |     "call-bind-apply-helpers": ["call-bind-apply-helpers@1.0.2", "", { "dependencies": { "es-errors": "^1.3.0", "function-bind": "^1.1.2" } }, "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ=="],
26 | 
27 |     "combined-stream": ["combined-stream@1.0.8", "", { "dependencies": { "delayed-stream": "~1.0.0" } }, "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg=="],
28 | 
29 |     "delayed-stream": ["delayed-stream@1.0.0", "", {}, "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ=="],
30 | 
31 |     "dunder-proto": ["dunder-proto@1.0.1", "", { "dependencies": { "call-bind-apply-helpers": "^1.0.1", "es-errors": "^1.3.0", "gopd": "^1.2.0" } }, "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A=="],
32 | 
33 |     "es-define-property": ["es-define-property@1.0.1", "", {}, "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g=="],
34 | 
35 |     "es-errors": ["es-errors@1.3.0", "", {}, "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw=="],
36 | 
37 |     "es-object-atoms": ["es-object-atoms@1.1.1", "", { "dependencies": { "es-errors": "^1.3.0" } }, "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA=="],
38 | 
39 |     "es-set-tostringtag": ["es-set-tostringtag@2.1.0", "", { "dependencies": { "es-errors": "^1.3.0", "get-intrinsic": "^1.2.6", "has-tostringtag": "^1.0.2", "hasown": "^2.0.2" } }, "sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA=="],
40 | 
41 |     "event-target-shim": ["event-target-shim@5.0.1", "", {}, "sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ=="],
42 | 
43 |     "ffmpeg": ["ffmpeg@0.0.4", "", { "dependencies": { "when": ">= 0.0.1" } }, "sha512-3TgWUJJlZGQn+crJFyhsO/oNeRRnGTy6GhgS98oUCIfZrOW5haPPV7DUfOm3xJcHr5q3TJpjk2GudPutrNisRA=="],
44 | 
45 |     "form-data": ["form-data@4.0.4", "", { "dependencies": { "asynckit": "^0.4.0", "combined-stream": "^1.0.8", "es-set-tostringtag": "^2.1.0", "hasown": "^2.0.2", "mime-types": "^2.1.12" } }, "sha512-KrGhL9Q4zjj0kiUt5OO4Mr/A/jlI2jDYs5eHBpYHPcBEVSiipAvn2Ko2HnPe20rmcuuvMHNdZFp+4IlGTMF0Ow=="],
46 | 
47 |     "form-data-encoder": ["form-data-encoder@1.7.2", "", {}, "sha512-qfqtYan3rxrnCk1VYaA4H+Ms9xdpPqvLZa6xmMgFvhO32x7/3J/ExcTd6qpxM0vH2GdMI+poehyBZvqfMTto8A=="],
48 | 
49 |     "formdata-node": ["formdata-node@4.4.1", "", { "dependencies": { "node-domexception": "1.0.0", "web-streams-polyfill": "4.0.0-beta.3" } }, "sha512-0iirZp3uVDjVGt9p49aTaqjk84TrglENEDuqfdlZQ1roC9CWlPk6Avf8EEnZNcAqPonwkG35x4n3ww/1THYAeQ=="],
50 | 
51 |     "function-bind": ["function-bind@1.1.2", "", {}, "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA=="],
52 | 
53 |     "get-intrinsic": ["get-intrinsic@1.3.0", "", { "dependencies": { "call-bind-apply-helpers": "^1.0.2", "es-define-property": "^1.0.1", "es-errors": "^1.3.0", "es-object-atoms": "^1.1.1", "function-bind": "^1.1.2", "get-proto": "^1.0.1", "gopd": "^1.2.0", "has-symbols": "^1.1.0", "hasown": "^2.0.2", "math-intrinsics": "^1.1.0" } }, "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ=="],
54 | 
55 |     "get-proto": ["get-proto@1.0.1", "", { "dependencies": { "dunder-proto": "^1.0.1", "es-object-atoms": "^1.0.0" } }, "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g=="],
56 | 
57 |     "gopd": ["gopd@1.2.0", "", {}, "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg=="],
58 | 
59 |     "has-symbols": ["has-symbols@1.1.0", "", {}, "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ=="],
60 | 
61 |     "has-tostringtag": ["has-tostringtag@1.0.2", "", { "dependencies": { "has-symbols": "^1.0.3" } }, "sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw=="],
62 | 
63 |     "hasown": ["hasown@2.0.2", "", { "dependencies": { "function-bind": "^1.1.2" } }, "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ=="],
64 | 
65 |     "humanize-ms": ["humanize-ms@1.2.1", "", { "dependencies": { "ms": "^2.0.0" } }, "sha512-Fl70vYtsAFb/C06PTS9dZBo7ihau+Tu/DNCk/OyHhea07S+aeMWpFFkUaXRa8fI+ScZbEI8dfSxwY7gxZ9SAVQ=="],
66 | 
67 |     "math-intrinsics": ["math-intrinsics@1.1.0", "", {}, "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g=="],
68 | 
69 |     "mime-db": ["mime-db@1.52.0", "", {}, "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg=="],
70 | 
71 |     "mime-types": ["mime-types@2.1.35", "", { "dependencies": { "mime-db": "1.52.0" } }, "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw=="],
72 | 
73 |     "ms": ["ms@2.1.3", "", {}, "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA=="],
74 | 
75 |     "node-domexception": ["node-domexception@1.0.0", "", {}, "sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ=="],
76 | 
77 |     "node-fetch": ["node-fetch@2.7.0", "", { "dependencies": { "whatwg-url": "^5.0.0" }, "peerDependencies": { "encoding": "^0.1.0" }, "optionalPeers": ["encoding"] }, "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A=="],
78 | 
79 |     "openai": ["openai@4.104.0", "", { "dependencies": { "@types/node": "^18.11.18", "@types/node-fetch": "^2.6.4", "abort-controller": "^3.0.0", "agentkeepalive": "^4.2.1", "form-data-encoder": "1.7.2", "formdata-node": "^4.3.2", "node-fetch": "^2.6.7" }, "peerDependencies": { "ws": "^8.18.0", "zod": "^3.23.8" }, "optionalPeers": ["ws", "zod"], "bin": { "openai": "bin/cli" } }, "sha512-p99EFNsA/yX6UhVO93f5kJsDRLAg+CTA2RBqdHK4RtK8u5IJw32Hyb2dTGKbnnFmnuoBv5r7Z2CURI9sGZpSuA=="],
80 | 
81 |     "tr46": ["tr46@0.0.3", "", {}, "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw=="],
82 | 
83 |     "undici-types": ["undici-types@5.26.5", "", {}, "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA=="],
84 | 
85 |     "web-streams-polyfill": ["web-streams-polyfill@4.0.0-beta.3", "", {}, "sha512-QW95TCTaHmsYfHDybGMwO5IJIM93I/6vTRk+daHTWFPhwh+C8Cg7j7XyKrwrj8Ib6vYXe0ocYNrmzY4xAAN6ug=="],
86 | 
87 |     "webidl-conversions": ["webidl-conversions@3.0.1", "", {}, "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ=="],
88 | 
89 |     "whatwg-url": ["whatwg-url@5.0.0", "", { "dependencies": { "tr46": "~0.0.3", "webidl-conversions": "^3.0.0" } }, "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw=="],
90 | 
91 |     "when": ["when@3.7.8", "", {}, "sha512-5cZ7mecD3eYcMiCH4wtRPA5iFJZ50BJYDfckI5RRpQiktMiYTcn0ccLTZOvcbBume+1304fQztxeNzNS9Gvrnw=="],
92 |   }
93 | }
94 | 


--------------------------------------------------------------------------------
/test/test-opus.ts:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bun
  2 | 
  3 | /**
  4 |  * Opus Test - Compress audio using Opus codec optimized for voice
  5 |  */
  6 | 
  7 | import { spawn } from 'child_process'
  8 | import { existsSync, statSync, unlinkSync } from 'fs'
  9 | import { join } from 'path'
 10 | 
 11 | const OUTPUT_DIR = join(import.meta.dir, 'output', 'opus')
 12 | const TARGET_SIZE_MB = 25
 13 | const TARGET_SIZE_BYTES = TARGET_SIZE_MB * 1024 * 1024
 14 | 
 15 | async function compressWithOpus(inputPath: string, outputPath: string, targetBitrate: number): Promise<void> {
 16 |   return new Promise((resolve, reject) => {
 17 |     console.log(`🎵 Compressing with Opus at ${targetBitrate}k bitrate...`)
 18 |     
 19 |     const ffmpeg = spawn('ffmpeg', [
 20 |       '-i', inputPath,
 21 |       '-vn', // No video
 22 |       '-acodec', 'libopus',
 23 |       '-b:a', `${targetBitrate}k`,
 24 |       '-ac', '1', // Mono for voice (can reduce size further)
 25 |       '-f', 'ogg', // Use OGG container (supported by Whisper API)
 26 |       '-y', // Overwrite output
 27 |       outputPath
 28 |     ])
 29 |     
 30 |     let errorOutput = ''
 31 |     
 32 |     ffmpeg.stderr.on('data', (data) => {
 33 |       errorOutput += data.toString()
 34 |     })
 35 |     
 36 |     ffmpeg.on('close', (code) => {
 37 |       if (code === 0) {
 38 |         console.log('✅ Opus compression complete!')
 39 |         resolve()
 40 |       } else {
 41 |         reject(new Error(`FFmpeg exited with code ${code}: ${errorOutput}`))
 42 |       }
 43 |     })
 44 |     
 45 |     ffmpeg.on('error', (err) => {
 46 |       reject(err)
 47 |     })
 48 |   })
 49 | }
 50 | 
 51 | async function findOptimalBitrate(inputPath: string): Promise<number> {
 52 |   console.log('🔍 Finding optimal bitrate for <25MB target...')
 53 |   
 54 |   // Start with a reasonable bitrate and adjust
 55 |   let bitrate = 64 // Start with 64kbps
 56 |   let lastValidBitrate = bitrate
 57 |   
 58 |   for (let attempt = 0; attempt < 5; attempt++) {
 59 |     const testPath = join(OUTPUT_DIR, `test_${bitrate}k_${Date.now()}.ogg`)
 60 |     
 61 |     try {
 62 |       await compressWithOpus(inputPath, testPath, bitrate)
 63 |       const size = statSync(testPath).size
 64 |       
 65 |       console.log(`   ${bitrate}k bitrate → ${(size / 1024 / 1024).toFixed(2)} MB`)
 66 |       
 67 |       if (size <= TARGET_SIZE_BYTES) {
 68 |         unlinkSync(testPath)
 69 |         console.log(`✅ Found optimal bitrate: ${bitrate}k`)
 70 |         return bitrate
 71 |       } else {
 72 |         // File too big, reduce bitrate
 73 |         lastValidBitrate = bitrate
 74 |         bitrate = Math.floor(bitrate * 0.8) // Reduce by 20%
 75 |         unlinkSync(testPath)
 76 |       }
 77 |     } catch (error) {
 78 |       console.log(`   ${bitrate}k bitrate failed, trying lower...`)
 79 |       bitrate = Math.floor(bitrate * 0.8)
 80 |     }
 81 |   }
 82 |   
 83 |   console.log(`⚠️  Could not achieve <25MB, using ${lastValidBitrate}k bitrate`)
 84 |   return lastValidBitrate
 85 | }
 86 | 
 87 | async function testOpus(inputPath: string) {
 88 |   const startTime = Date.now()
 89 |   
 90 |   console.log('🧪 Running Opus Compression Test')
 91 |   console.log('─'.repeat(60))
 92 |   
 93 |   if (!existsSync(inputPath)) {
 94 |     throw new Error(`File not found: ${inputPath}`)
 95 |   }
 96 |   
 97 |   const originalSize = statSync(inputPath).size
 98 |   console.log(`📁 Original file size: ${(originalSize / 1024 / 1024).toFixed(2)} MB`)
 99 |   
100 |   // Create output directory
101 |   await Bun.write(join(OUTPUT_DIR, '.gitkeep'), '')
102 |   
103 |   // Extract audio first
104 |   const tempAudioPath = join(OUTPUT_DIR, `temp_audio_${Date.now()}.mp3`)
105 |   const opusAudioPath = join(OUTPUT_DIR, `opus_${Date.now()}.ogg`)
106 |   
107 |   try {
108 |     // Extract audio
109 |     console.log('🎬 Extracting audio from video...')
110 |     await new Promise<void>((resolve, reject) => {
111 |       const ffmpeg = spawn('ffmpeg', [
112 |         '-i', inputPath,
113 |         '-vn',
114 |         '-acodec', 'libmp3lame',
115 |         '-q:a', '2',
116 |         '-y',
117 |         tempAudioPath
118 |       ])
119 |       
120 |       ffmpeg.on('close', (code) => {
121 |         if (code === 0) resolve()
122 |         else reject(new Error(`Audio extraction failed with code ${code}`))
123 |       })
124 |       
125 |       ffmpeg.on('error', reject)
126 |     })
127 |     
128 |     // Find optimal bitrate
129 |     const optimalBitrate = await findOptimalBitrate(tempAudioPath)
130 |     
131 |     // Compress with optimal bitrate
132 |     await compressWithOpus(tempAudioPath, opusAudioPath, optimalBitrate)
133 |     
134 |     const processedSize = statSync(opusAudioPath).size
135 |     console.log(`📁 Processed file size: ${(processedSize / 1024 / 1024).toFixed(2)} MB`)
136 |     console.log(`📊 Size reduction: ${((1 - processedSize / originalSize) * 100).toFixed(1)}%`)
137 |     console.log(`🎯 Target achieved: ${processedSize <= TARGET_SIZE_BYTES ? '✅' : '❌'} (<25MB)`)
138 |     
139 |     // Import transcribe dynamically
140 |     const { transcribe } = await import('../src/transcribe')
141 |     const { homedir } = await import('os')
142 |     const configPath = join(homedir(), '.transcribe', 'config.json')
143 |     
144 |     let apiKey = process.env.OPENAI_API_KEY
145 |     if (!apiKey && existsSync(configPath)) {
146 |       const config = JSON.parse(await Bun.file(configPath).text())
147 |       apiKey = config.apiKey
148 |     }
149 |     
150 |     if (!apiKey) {
151 |       throw new Error('OPENAI_API_KEY not found')
152 |     }
153 |     
154 |     console.log('🎙️  Transcribing Opus-compressed audio...')
155 |     const transcribeStart = Date.now()
156 |     
157 |     const result = await transcribe({
158 |       inputPath: opusAudioPath,
159 |       apiKey,
160 |     })
161 |     
162 |     const transcribeTime = Date.now() - transcribeStart
163 |     const totalTime = Date.now() - startTime
164 |     
165 |     // Calculate metrics
166 |     const durationMinutes = result.duration / 60
167 |     const costPerMinute = 0.006
168 |     const estimatedCost = durationMinutes * costPerMinute
169 |     
170 |     const metrics = {
171 |       method: 'opus',
172 |       codec: 'libopus',
173 |       bitrate: optimalBitrate,
174 |       targetSizeMB: TARGET_SIZE_MB,
175 |       originalSize: originalSize,
176 |       processedSize: processedSize,
177 |       compressionRatio: processedSize / originalSize,
178 |       originalDuration: result.duration,
179 |       processedDuration: result.duration,
180 |       transcriptionTime: transcribeTime,
181 |       totalTime: totalTime,
182 |       estimatedCost: estimatedCost,
183 |       costPerMinute: costPerMinute,
184 |       language: result.language,
185 |       targetAchieved: processedSize <= TARGET_SIZE_BYTES,
186 |       timestamp: new Date().toISOString()
187 |     }
188 |     
189 |     // Save metrics
190 |     await Bun.write(join(OUTPUT_DIR, 'metrics.json'), JSON.stringify(metrics, null, 2))
191 |     
192 |     console.log('─'.repeat(60))
193 |     console.log('✅ Opus Test Complete')
194 |     console.log(`📊 Metrics:`)
195 |     console.log(`   Codec: Opus`)
196 |     console.log(`   Bitrate: ${optimalBitrate}k`)
197 |     console.log(`   Original Size: ${(originalSize / 1024 / 1024).toFixed(2)} MB`)
198 |     console.log(`   Processed Size: ${(processedSize / 1024 / 1024).toFixed(2)} MB`)
199 |     console.log(`   Size Reduction: ${((1 - processedSize / originalSize) * 100).toFixed(1)}%`)
200 |     console.log(`   Target (<25MB): ${processedSize <= TARGET_SIZE_BYTES ? '✅' : '❌'}`)
201 |     console.log(`   Duration: ${(result.duration / 60).toFixed(2)} minutes`)
202 |     console.log(`   Transcription Time: ${(transcribeTime / 1000).toFixed(1)}s`)
203 |     console.log(`   Total Time: ${(totalTime / 1000).toFixed(1)}s`)
204 |     console.log(`   Estimated Cost: $${estimatedCost.toFixed(4)}`)
205 |     console.log(`   Language: ${result.language}`)
206 |     console.log(`   SRT saved: ${result.srtPath}`)
207 |     
208 |     return metrics
209 |     
210 |   } finally {
211 |     // Clean up temporary files
212 |     if (existsSync(tempAudioPath)) unlinkSync(tempAudioPath)
213 |     if (existsSync(opusAudioPath)) unlinkSync(opusAudioPath)
214 |   }
215 | }
216 | 
217 | // Run if called directly
218 | if (import.meta.main) {
219 |   const inputPath = process.argv[2]
220 |   
221 |   if (!inputPath) {
222 |     console.error('Usage: bun test-opus.ts <video-file>')
223 |     process.exit(1)
224 |   }
225 |   
226 |   testOpus(inputPath).catch(console.error)
227 | }
228 | 
229 | export { testOpus }
230 | 


--------------------------------------------------------------------------------
/test/compare.ts:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bun
  2 | 
  3 | /**
  4 |  * Compare all optimization methods and generate a comparison table
  5 |  */
  6 | 
  7 | import { existsSync } from 'fs'
  8 | import { join } from 'path'
  9 | import { testBaseline } from './test-baseline'
 10 | import { testOpus } from './test-opus'
 11 | import { testSpeed } from './test-speed'
 12 | 
 13 | interface TestMetrics {
 14 |   method: string
 15 |   originalSize: number
 16 |   processedSize: number
 17 |   compressionRatio: number
 18 |   originalDuration: number
 19 |   processedDuration: number
 20 |   transcriptionTime: number
 21 |   totalTime: number
 22 |   estimatedCost: number
 23 |   costPerMinute: number
 24 |   language: string
 25 |   timestamp: string
 26 |   speedFactor?: number
 27 |   bitrate?: number
 28 |   targetAchieved?: boolean
 29 | }
 30 | 
 31 | async function runAllTests(inputPath: string): Promise<TestMetrics[]> {
 32 |   console.log('🚀 Running All Optimization Tests')
 33 |   console.log('═'.repeat(80))
 34 |   
 35 |   const results: TestMetrics[] = []
 36 |   
 37 |   try {
 38 |     // Test 1: Baseline
 39 |     console.log('\n1️⃣  Running Baseline Test...')
 40 |     const baseline = await testBaseline(inputPath)
 41 |     results.push(baseline)
 42 |     
 43 |     // Test 2: Speed Optimization
 44 |     console.log('\n2️⃣  Running Speed Test...')
 45 |     const speed = await testSpeed(inputPath)
 46 |     results.push(speed)
 47 |     
 48 |     // Test 3: Opus Compression
 49 |     console.log('\n3️⃣  Running Opus Test...')
 50 |     const opus = await testOpus(inputPath)
 51 |     results.push(opus)
 52 |     
 53 |   } catch (error) {
 54 |     console.error('❌ Test failed:', error)
 55 |     throw error
 56 |   }
 57 |   
 58 |   return results
 59 | }
 60 | 
 61 | function formatBytes(bytes: number): string {
 62 |   return `${(bytes / 1024 / 1024).toFixed(2)} MB`
 63 | }
 64 | 
 65 | function formatTime(ms: number): string {
 66 |   return `${(ms / 1000).toFixed(1)}s`
 67 | }
 68 | 
 69 | function formatCost(cost: number): string {
 70 |   return `$${cost.toFixed(4)}`
 71 | }
 72 | 
 73 | function generateComparisonTable(results: TestMetrics[]): string {
 74 |   const baseline = results.find(r => r.method === 'baseline')
 75 |   if (!baseline) throw new Error('Baseline test required for comparison')
 76 |   
 77 |   let table = '\n📊 COMPARISON RESULTS\n'
 78 |   table += '═'.repeat(120) + '\n'
 79 |   
 80 |   // Header
 81 |   table += '| Method | File Size | Size Reduction | Duration | Upload Time* | Processing | Cost | Total Time | Accuracy** |\n'
 82 |   table += '|--------|-----------|----------------|----------|--------------|------------|------|------------|------------|\n'
 83 |   
 84 |   // Rows
 85 |   for (const result of results) {
 86 |     const sizeReduction = result.method === 'baseline' 
 87 |       ? '0%' 
 88 |       : `${((1 - result.compressionRatio) * 100).toFixed(1)}%`
 89 |     
 90 |     const uploadTimeEstimate = result.method === 'baseline'
 91 |       ? '~30s'
 92 |       : result.compressionRatio < 0.5 
 93 |         ? '~15s' 
 94 |         : '~25s'
 95 |     
 96 |     const accuracy = result.method === 'baseline'
 97 |       ? '100%'
 98 |       : result.method === 'speed'
 99 |         ? '~98%'
100 |         : '~99%'
101 |     
102 |     const methodName = result.method === 'baseline' 
103 |       ? 'Baseline' 
104 |       : result.method === 'speed'
105 |         ? `Speed (${result.speedFactor}x)`
106 |         : `Opus (${result.bitrate}k)`
107 |     
108 |     table += `| ${methodName} | ${formatBytes(result.processedSize)} | ${sizeReduction} | ${(result.originalDuration / 60).toFixed(1)}m | ${uploadTimeEstimate} | ${formatTime(result.transcriptionTime)} | ${formatCost(result.estimatedCost)} | ${formatTime(result.totalTime)} | ${accuracy} |\n`
109 |   }
110 |   
111 |   table += '\n*Upload time estimates based on file size\n'
112 |   table += '**Accuracy estimates based on optimization impact\n'
113 |   
114 |   return table
115 | }
116 | 
117 | function generateRecommendations(results: TestMetrics[]): string {
118 |   const baseline = results.find(r => r.method === 'baseline')
119 |   const speed = results.find(r => r.method === 'speed')
120 |   const opus = results.find(r => r.method === 'opus')
121 |   
122 |   if (!baseline || !speed || !opus) {
123 |     return '❌ Cannot generate recommendations - missing test results'
124 |   }
125 |   
126 |   let recommendations = '\n🎯 RECOMMENDATIONS\n'
127 |   recommendations += '═'.repeat(50) + '\n\n'
128 |   
129 |   const originalSizeMB = baseline.originalSize / 1024 / 1024
130 |   
131 |   if (originalSizeMB < 25) {
132 |     recommendations += '✅ **File is already small (<25MB)**\n'
133 |     recommendations += '   → Use **Baseline** method (no optimization needed)\n'
134 |     recommendations += '   → Consider **Speed** method for 20% cost savings\n\n'
135 |   } else if (originalSizeMB < 50) {
136 |     recommendations += '📦 **Medium file size (25-50MB)**\n'
137 |     recommendations += '   → Use **Opus** method for faster uploads\n'
138 |     recommendations += '   → Consider **Speed** method for cost savings\n\n'
139 |   } else if (originalSizeMB < 100) {
140 |     recommendations += '📦 **Large file size (50-100MB)**\n'
141 |     recommendations += '   → Use **Opus** method (best balance of speed + quality)\n'
142 |     recommendations += '   → Consider **Speed** method for significant cost savings\n\n'
143 |   } else {
144 |     recommendations += '📦 **Very large file size (>100MB)**\n'
145 |     recommendations += '   → Use **Speed** method for cost optimization\n'
146 |     recommendations += '   → Consider **Opus** method for upload speed\n\n'
147 |   }
148 |   
149 |   // Cost comparison
150 |   const costSavings = baseline.estimatedCost - speed.estimatedCost
151 |   const costSavingsPercent = (costSavings / baseline.estimatedCost) * 100
152 |   
153 |   recommendations += '💰 **Cost Analysis:**\n'
154 |   recommendations += `   • Speed method saves $${costSavings.toFixed(4)} (${costSavingsPercent.toFixed(1)}%)\n`
155 |   recommendations += `   • Opus method: same cost as baseline\n\n`
156 |   
157 |   // Speed comparison
158 |   const timeSavings = baseline.totalTime - Math.min(speed.totalTime, opus.totalTime)
159 |   const fastestMethod = speed.totalTime < opus.totalTime ? 'Speed' : 'Opus'
160 |   
161 |   recommendations += '⚡ **Speed Analysis:**\n'
162 |   recommendations += `   • Fastest method: ${fastestMethod}\n`
163 |   recommendations += `   • Time savings: ${(timeSavings / 1000).toFixed(1)}s\n\n`
164 |   
165 |   // Quality impact
166 |   recommendations += '🎯 **Quality Impact:**\n'
167 |   recommendations += '   • Baseline: 100% accuracy\n'
168 |   recommendations += '   • Speed: ~98% accuracy (minimal impact)\n'
169 |   recommendations += '   • Opus: ~99% accuracy (minimal impact)\n\n'
170 |   
171 |   return recommendations
172 | }
173 | 
174 | async function compare(inputPath: string) {
175 |   console.log('🔬 Transcription Optimization Comparison Tool')
176 |   console.log('═'.repeat(80))
177 |   
178 |   if (!existsSync(inputPath)) {
179 |     throw new Error(`File not found: ${inputPath}`)
180 |   }
181 |   
182 |   console.log(`📁 Testing file: ${inputPath}`)
183 |   console.log(`📅 Started at: ${new Date().toLocaleString()}\n`)
184 |   
185 |   try {
186 |     // Run all tests
187 |     const results = await runAllTests(inputPath)
188 |     
189 |     // Generate comparison
190 |     const comparisonTable = generateComparisonTable(results)
191 |     const recommendations = generateRecommendations(results)
192 |     
193 |     // Save results
194 |     const outputDir = join(import.meta.dir, 'output')
195 |     await Bun.write(join(outputDir, '.gitkeep'), '')
196 |     
197 |     const report = {
198 |       timestamp: new Date().toISOString(),
199 |       inputFile: inputPath,
200 |       results: results,
201 |       comparison: comparisonTable,
202 |       recommendations: recommendations
203 |     }
204 |     
205 |     await Bun.write(join(outputDir, 'comparison-report.json'), JSON.stringify(report, null, 2))
206 |     
207 |     // Display results
208 |     console.log(comparisonTable)
209 |     console.log(recommendations)
210 |     
211 |     console.log('═'.repeat(80))
212 |     console.log('✅ Comparison complete!')
213 |     console.log(`📊 Report saved: ${join(outputDir, 'comparison-report.json')}`)
214 |     console.log(`📁 Individual results: ${join(outputDir, 'baseline')}, ${join(outputDir, 'speed')}, ${join(outputDir, 'opus')}`)
215 |     
216 |   } catch (error) {
217 |     console.error('❌ Comparison failed:', error)
218 |     process.exit(1)
219 |   }
220 | }
221 | 
222 | // Run if called directly
223 | if (import.meta.main) {
224 |   const inputPath = process.argv[2]
225 |   
226 |   if (!inputPath) {
227 |     console.error('Usage: bun compare.ts <video-file>')
228 |     console.error('Example: bun compare.ts /path/to/video.mp4')
229 |     process.exit(1)
230 |   }
231 |   
232 |   compare(inputPath).catch(console.error)
233 | }
234 | 
235 | export { compare, generateComparisonTable, generateRecommendations, runAllTests }
236 | 
237 | 


--------------------------------------------------------------------------------
/src/cli.ts:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env node
  2 | 
  3 | /**
  4 |  * Transcribe audio/video files to SRT format
  5 |  * 
  6 |  * Usage: transcribe <path-to-file>
  7 |  * 
  8 |  * Supports: .mp4, .mp3, .wav, .m4a, .webm, .ogg
  9 |  * Requires: OPENAI_API_KEY environment variable
 10 |  */
 11 | 
 12 | import { existsSync, unlinkSync } from 'fs'
 13 | import { homedir } from 'os'
 14 | import { basename, extname, join } from 'path'
 15 | import { transcribe } from './transcribe'
 16 | import { downloadYouTubeAudio, getVideoId, isYouTubeUrl } from './youtube'
 17 | 
 18 | function parseTimeToSeconds(input: string): number {
 19 |   const raw = input.trim()
 20 |   if (!raw) {
 21 |     throw new Error('Invalid time format: empty value')
 22 |   }
 23 | 
 24 |   // Seconds (supports negatives and decimals)
 25 |   if (/^-?\d+(\.\d+)?$/.test(raw)) {
 26 |     return parseFloat(raw)
 27 |   }
 28 | 
 29 |   // HH:MM:SS(.mmm) or MM:SS(.mmm)
 30 |   const normalized = raw.replace(',', '.')
 31 |   const parts = normalized.split(':')
 32 | 
 33 |   const parsePart = (value: string) => {
 34 |     const n = parseFloat(value)
 35 |     if (!Number.isFinite(n)) throw new Error(`Invalid time format: ${input}`)
 36 |     return n
 37 |   }
 38 | 
 39 |   if (parts.length === 2) {
 40 |     const mm = parsePart(parts[0])
 41 |     const ss = parsePart(parts[1])
 42 |     return mm * 60 + ss
 43 |   }
 44 | 
 45 |   if (parts.length === 3) {
 46 |     const hh = parsePart(parts[0])
 47 |     const mm = parsePart(parts[1])
 48 |     const ss = parsePart(parts[2])
 49 |     return hh * 3600 + mm * 60 + ss
 50 |   }
 51 | 
 52 |   throw new Error(`Invalid time format: ${input}\nUse seconds (123.45) or HH:MM:SS(.mmm)`)
 53 | }
 54 | 
 55 | function getApiKey(): string {
 56 |   // Try environment variable first
 57 |   let apiKey = process.env.OPENAI_API_KEY
 58 |   
 59 |   if (!apiKey) {
 60 |     // Try reading from config file in home directory
 61 |     try {
 62 |       const configPath = join(homedir(), '.transcribe', 'config.json')
 63 |       if (existsSync(configPath)) {
 64 |         const config = require(configPath)
 65 |         apiKey = config.apiKey
 66 |       }
 67 |     } catch (error) {
 68 |       // Config file doesn't exist or is invalid
 69 |     }
 70 |   }
 71 |   
 72 |   if (!apiKey) {
 73 |     throw new Error(
 74 |       'OPENAI_API_KEY not found.\n\n' +
 75 |       '🔑 Get your API key: https://platform.openai.com/api-keys\n\n' +
 76 |       'Then set it using ONE of these methods:\n\n' +
 77 |       '1️⃣  Environment variable (recommended for one-time use):\n' +
 78 |       '   export OPENAI_API_KEY=sk-...\n\n' +
 79 |       '2️⃣  Config file (recommended for permanent setup):\n' +
 80 |       '   mkdir -p ~/.transcribe\n' +
 81 |       '   echo \'{"apiKey": "sk-..."}\' > ~/.transcribe/config.json\n\n' +
 82 |       '📚 Full setup guide: https://github.com/Illyism/transcribe-cli#configuration'
 83 |     )
 84 |   }
 85 |   
 86 |   return apiKey
 87 | }
 88 | 
 89 | async function main() {
 90 |   const args = process.argv.slice(2)
 91 |   
 92 |   if (args.length === 0 || args.includes('--help') || args.includes('-h')) {
 93 |     console.log(`
 94 | Transcribe - Audio/Video to SRT
 95 | 
 96 | Usage: transcribe <path-to-file-or-youtube-url> [options]
 97 | 
 98 | Options:
 99 |   -h, --help     Show this help message
100 |   -v, --version  Show version
101 |   --raw          Disable optimizations (use original audio)
102 |   -o, --output   Output .srt path (file) OR output directory (folder)
103 |   --offset       Shift subtitle timestamps (seconds or HH:MM:SS.mmm)
104 |   --chunk-minutes  Force chunking into N-minute pieces (helps long movies)
105 | 
106 | Examples:
107 |   transcribe video.mp4
108 |   transcribe audio.mp3
109 |   transcribe /path/to/podcast.wav
110 |   transcribe https://www.youtube.com/watch?v=VIDEO_ID
111 |   transcribe large-video.mp4 --raw
112 |   transcribe movie.mkv --offset 01:00:00.000
113 |   transcribe movie.mkv --output ./subs
114 |   transcribe long_movie.mkv --chunk-minutes 15
115 | 
116 | Optimizations (enabled by default):
117 |   • 1.2x speed: Faster processing, 99.5% size reduction
118 |   • Automatic timestamp adjustment to original speed
119 |   • Use --raw to disable and use original audio
120 | 
121 | Long movies:
122 |   • Chunking is automatically enabled for long inputs to improve reliability.
123 |   • Use --chunk-minutes to override.
124 | 
125 | Supported formats: mp4, mp3, wav, m4a, webm, ogg, opus, mov, avi, mkv
126 | YouTube: youtube.com, youtu.be, youtube.com/shorts
127 | 
128 | Configuration:
129 |   Set OPENAI_API_KEY environment variable or create ~/.transcribe/config.json
130 |     `)
131 |     process.exit(0)
132 |   }
133 |   
134 |   if (args.includes('--version') || args.includes('-v')) {
135 |     const pkg = require('../package.json')
136 |     console.log(pkg.version)
137 |     process.exit(0)
138 |   }
139 |   
140 |   let input: string | null = null
141 |   let useRaw = false
142 |   let outputArg: string | null = null
143 |   let offsetSeconds: number | undefined
144 |   let chunkMinutes: number | undefined
145 | 
146 |   for (let i = 0; i < args.length; i++) {
147 |     const arg = args[i]
148 | 
149 |     if (arg === '--raw') {
150 |       useRaw = true
151 |       continue
152 |     }
153 | 
154 |     if (arg === '--output' || arg === '-o') {
155 |       outputArg = args[i + 1] || null
156 |       i++
157 |       continue
158 |     }
159 | 
160 |     if (arg === '--offset') {
161 |       const raw = args[i + 1]
162 |       if (!raw) {
163 |         console.error('Error: --offset requires a value (seconds or HH:MM:SS.mmm)')
164 |         process.exit(1)
165 |       }
166 |       offsetSeconds = parseTimeToSeconds(raw)
167 |       i++
168 |       continue
169 |     }
170 | 
171 |     if (arg === '--chunk-minutes') {
172 |       const raw = args[i + 1]
173 |       if (!raw) {
174 |         console.error('Error: --chunk-minutes requires a number')
175 |         process.exit(1)
176 |       }
177 |       const n = parseFloat(raw)
178 |       if (!Number.isFinite(n) || n <= 0) {
179 |         console.error('Error: --chunk-minutes must be a positive number')
180 |         process.exit(1)
181 |       }
182 |       chunkMinutes = n
183 |       i++
184 |       continue
185 |     }
186 | 
187 |     if (arg.startsWith('-')) {
188 |       console.error(`Error: Unknown option: ${arg}\nRun: transcribe --help`)
189 |       process.exit(1)
190 |     }
191 | 
192 |     if (!input) {
193 |       input = arg
194 |       continue
195 |     }
196 |   }
197 | 
198 |   if (!input) {
199 |     console.error('Error: Missing input file or YouTube URL\nRun: transcribe --help')
200 |     process.exit(1)
201 |   }
202 |   
203 |   let inputPath = input
204 |   let downloadedFile: string | null = null
205 |   let youtubeVideoId: string | null = null
206 |   let outputPath: string | undefined
207 |   
208 |   try {
209 |     const apiKey = getApiKey()
210 |     
211 |     // Check if input is a YouTube URL
212 |     if (isYouTubeUrl(input)) {
213 |       youtubeVideoId = getVideoId(input)
214 |       downloadedFile = await downloadYouTubeAudio(input)
215 |       inputPath = downloadedFile
216 |       // Default YouTube output to current working directory (temp downloads are cleaned up)
217 |       if (!outputArg && youtubeVideoId) {
218 |         outputPath = join(process.cwd(), `youtube_${youtubeVideoId}.srt`)
219 |       }
220 |     } else if (!existsSync(inputPath)) {
221 |       console.error(`Error: File not found: ${inputPath}`)
222 |       process.exit(1)
223 |     }
224 | 
225 |     // Resolve output argument:
226 |     // - if ends with .srt, treat as file path
227 |     // - otherwise treat as directory and write <inputBase>.srt inside it
228 |     if (outputArg) {
229 |       if (outputArg.toLowerCase().endsWith('.srt')) {
230 |         outputPath = outputArg
231 |       } else {
232 |         const base = youtubeVideoId ? `youtube_${youtubeVideoId}` : basename(input, extname(input))
233 |         outputPath = join(outputArg, `${base}.srt`)
234 |       }
235 |     }
236 |     
237 |     const result = await transcribe({ 
238 |       inputPath, 
239 |       apiKey,
240 |       optimize: !useRaw,
241 |       outputPath,
242 |       offsetSeconds,
243 |       chunkMinutes,
244 |     })
245 |     
246 |     console.log(`\n✅ SRT file saved to: ${result.srtPath}`)
247 |     console.log(`\nTranscription preview:`)
248 |     console.log('─'.repeat(60))
249 |     console.log(result.text.substring(0, 500) + (result.text.length > 500 ? '...' : ''))
250 |     console.log('─'.repeat(60))
251 |     console.log(`\nLanguage: ${result.language}`)
252 |     console.log(`Duration: ${result.duration.toFixed(2)}s`)
253 |     
254 |   } catch (error) {
255 |     console.error('Error:', error instanceof Error ? error.message : String(error))
256 |     process.exit(1)
257 |   } finally {
258 |     // Clean up downloaded YouTube file
259 |     if (downloadedFile && existsSync(downloadedFile)) {
260 |       unlinkSync(downloadedFile)
261 |       console.log('🧹 Cleaned up downloaded file')
262 |     }
263 |   }
264 | }
265 | 
266 | main().catch((error) => {
267 |   console.error('Error:', error.message)
268 |   process.exit(1)
269 | })
270 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # @illyism/transcribe
  2 | 
  3 | [![npm version](https://img.shields.io/npm/v/@illyism/transcribe.svg)](https://www.npmjs.com/package/@illyism/transcribe)
  4 | [![npm downloads](https://img.shields.io/npm/dt/@illyism/transcribe.svg)](https://www.npmjs.com/package/@illyism/transcribe)
  5 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
  6 | 
  7 | Transcribe audio/video files to SRT subtitles in one command. Optimized for large files, long movies, and video editing workflows.
  8 | 
  9 | ## Quick Start
 10 | 
 11 | ```bash
 12 | # 1. Try it instantly (no install needed)
 13 | npx @illyism/transcribe video.mp4
 14 | 
 15 | # 2. Set your OpenAI API key (one-time setup)
 16 | export OPENAI_API_KEY=sk-...
 17 | 
 18 | # 3. Transcribe anything
 19 | npx @illyism/transcribe video.mp4
 20 | npx @illyism/transcribe https://www.youtube.com/watch?v=VIDEO_ID
 21 | ```
 22 | 
 23 | **That's it!** Get your [free API key here](https://platform.openai.com/api-keys) and start transcribing.
 24 | 
 25 | ---
 26 | 
 27 | ## Why Use This Instead of Whisper CLI?
 28 | 
 29 | While OpenAI's Whisper has multiple ways to use it, this tool provides a **simpler, more convenient** experience:
 30 | 
 31 | | Feature | @illyism/transcribe | Official Whisper CLI | Local Whisper (whisper.cpp) |
 32 | |---------|---------------------|---------------------|----------------------------|
 33 | | **Setup** | Zero setup with `npx`/`bunx` | Install Python package | Download models (~1-5GB) |
 34 | | **Video Support** | ✅ Automatic with FFmpeg | ❌ Audio only | ❌ Audio only |
 35 | | **YouTube Support** | ✅ Built-in | ❌ Manual download | ❌ Manual download |
 36 | | **SRT Output** | ✅ Built-in | ❌ Manual formatting | ✅ Available |
 37 | | **Processing** | ☁️ Cloud (fast) | ☁️ Cloud (fast) | 💻 Local (slower) |
 38 | | **Cost** | $0.006/min | $0.006/min | Free (after setup) |
 39 | | **Internet Required** | ✅ Yes | ✅ Yes | ❌ No |
 40 | | **Best For** | Quick tasks, videos, YouTube | API integration | Privacy, offline use |
 41 | 
 42 | ### Key Advantages
 43 | 
 44 | - 🎬 **Handles videos directly** - No need to manually extract audio
 45 | - 🎥 **YouTube support** - Transcribe YouTube videos with just the URL
 46 | - 📝 **SRT format ready** - Generates subtitles automatically
 47 | - 🚀 **Zero installation** - Just run `npx @illyism/transcribe video.mp4`
 48 | - 🔧 **Simple config** - One-time API key setup
 49 | - 🌐 **Cross-platform** - Works on macOS, Linux, Windows
 50 | 
 51 | **Perfect for**: Content creators, podcasters, and developers who need quick, accurate transcriptions with minimal setup.
 52 | 
 53 | ### Real-World Use Case
 54 | 
 55 | Got a 30-60 minute video that's 2-4GB? Other tools like Descript upload the **entire video** file, which takes forever and costs more.
 56 | 
 57 | This tool:
 58 | 1. 🎬 Extracts only the audio locally (takes seconds with FFmpeg)
 59 | 2. ☁️ Uploads only ~20-40MB of audio to Whisper
 60 | 3. 📝 Generates SRT subtitles
 61 | 
 62 | **Result**: 10-100x faster than uploading multi-GB video files. Same quality, fraction of the time and bandwidth.
 63 | 
 64 | ## Features
 65 | 
 66 | - 🎬 **Video & Audio Support**: Works with MP4, MP3, WAV, M4A, WebM, OGG, MOV, AVI, and MKV
 67 | - 🎥 **YouTube Support**: Download and transcribe YouTube videos directly
 68 | - 🎯 **High Accuracy**: Powered by OpenAI's Whisper API
 69 | - ⚡ **Smart Optimization**: Automatic 1.2x speed processing + mono/16kHz extraction (optimized for dialogue)
 70 | - 📝 **SRT Format**: Generates standard SRT subtitle files with precise timestamps
 71 | - 🎞️ **Long Movies**: Automatic chunking for feature-length content (45+ minutes)
 72 | - 🎬 **Editor-Friendly**: Timecode offset, custom output paths, chunk size control
 73 | - 🔧 **Simple Setup**: Easy configuration via environment variable or config file
 74 | - 🌍 **Multi-language**: Automatically detects language
 75 | - 🚀 **Lightning Fast**: Optimized for 2-4GB+ video files
 76 | 
 77 | ## Installation & Setup
 78 | 
 79 | ### Option 1: Use Instantly (No Install)
 80 | 
 81 | ```bash
 82 | npx @illyism/transcribe video.mp4
 83 | ```
 84 | 
 85 | ### Option 2: Install Globally
 86 | 
 87 | ```bash
 88 | npm install -g @illyism/transcribe
 89 | # or: bun install -g @illyism/transcribe
 90 | ```
 91 | 
 92 | ### Prerequisites
 93 | 
 94 | <details>
 95 | <summary><b>📦 Install FFmpeg</b> (required)</summary>
 96 | 
 97 | ```bash
 98 | # macOS
 99 | brew install ffmpeg
100 | 
101 | # Ubuntu/Debian
102 | sudo apt-get install ffmpeg
103 | 
104 | # Windows
105 | choco install ffmpeg
106 | ```
107 | </details>
108 | 
109 | <details>
110 | <summary><b>🎥 Install yt-dlp</b> (optional, for YouTube)</summary>
111 | 
112 | ```bash
113 | # macOS
114 | brew install yt-dlp
115 | 
116 | # Ubuntu/Debian
117 | sudo apt install yt-dlp
118 | 
119 | # Windows
120 | winget install yt-dlp
121 | 
122 | # Or with pip
123 | pip install yt-dlp
124 | ```
125 | </details>
126 | 
127 | <details>
128 | <summary><b>🔑 Get OpenAI API Key</b> (required)</summary>
129 | 
130 | 1. Go to [platform.openai.com/api-keys](https://platform.openai.com/api-keys)
131 | 2. Create a new API key
132 | 3. Copy it and set it up below ⬇️
133 | </details>
134 | 
135 | ## API Key Setup (30 seconds)
136 | 
137 | **One-time setup** - Choose your preferred method:
138 | 
139 | ### Method 1: Config File (Recommended)
140 | 
141 | ```bash
142 | mkdir -p ~/.transcribe && echo '{"apiKey": "sk-YOUR_KEY"}' > ~/.transcribe/config.json
143 | ```
144 | 
145 | ### Method 2: Environment Variable  
146 | 
147 | ```bash
148 | export OPENAI_API_KEY=sk-YOUR_KEY
149 | ```
150 | 
151 | **Don't have a key?** [Get one free here](https://platform.openai.com/api-keys) (takes 1 minute)
152 | 
153 | ## Usage Examples
154 | 
155 | ```bash
156 | # Local video file
157 | transcribe video.mp4
158 | 
159 | # YouTube video
160 | transcribe https://www.youtube.com/watch?v=VIDEO_ID
161 | 
162 | # Audio file
163 | transcribe podcast.mp3
164 | 
165 | # Disable optimization (use original audio)
166 | transcribe video.mp4 --raw
167 | ```
168 | 
169 | **Outputs:** Creates `video.srt` in the same directory.
170 | 
171 | ### Editor-Friendly Features
172 | 
173 | Perfect for video editing workflows:
174 | 
175 | ```bash
176 | # Custom output path (file or directory)
177 | transcribe movie.mkv --output ./subtitles
178 | transcribe movie.mkv --output ./subtitles/movie.srt
179 | 
180 | # Timecode offset (for editorial timelines)
181 | transcribe movie.mkv --offset 01:00:00.000  # Start at 1 hour
182 | transcribe movie.mkv --offset 3600         # Same, in seconds
183 | 
184 | # Force chunking for very long movies
185 | transcribe long_movie.mkv --chunk-minutes 15
186 | ```
187 | 
188 | **Why chunking?** Movies 45+ minutes are automatically split into ~20-minute chunks for reliability. Each chunk is transcribed separately, then merged seamlessly with correct timestamps.
189 | 
190 | ### What Happens Automatically
191 | 
192 | By default, the tool optimizes large files:
193 | 
194 | ```
195 | 2.7GB video → Extract audio (mono, 16kHz) → Speed up 1.2x → Chunk if >45min → Upload chunks → Transcribe → Merge & adjust timestamps
196 | ```
197 | 
198 | **For long movies (45+ minutes):**
199 | - Automatically splits into ~20-minute chunks
200 | - Transcribes each chunk separately
201 | - Merges results with correct timestamps
202 | - Handles 2+ hour movies reliably
203 | 
204 | **Result:** 
205 | - ⚡ 99.5% smaller uploads (2.7GB → 12.8MB)
206 | - 🚀 10-100x faster than uploading full video  
207 | - 🎯 ~98% accuracy maintained
208 | - 💰 Same cost ($0.006/min)
209 | 
210 | **Want original audio?** Add `--raw` flag.
211 | 
212 | ### Use as a Library
213 | 
214 | ```bash
215 | npm install @illyism/transcribe
216 | ```
217 | 
218 | ```typescript
219 | import { transcribe } from '@illyism/transcribe'
220 | 
221 | const result = await transcribe({
222 |   inputPath: 'video.mp4',
223 |   apiKey: process.env.OPENAI_API_KEY,
224 |   optimize: true // default, set false to disable
225 | })
226 | 
227 | console.log(result.srtPath)  // Path to generated SRT file
228 | console.log(result.text)     // Full transcription text
229 | ```
230 | 
231 | <details>
232 | <summary>Full API reference</summary>
233 | 
234 | ```typescript
235 | interface TranscribeOptions {
236 |   inputPath: string        // Path to video/audio file
237 |   apiKey?: string         // OpenAI API key (or use env var)
238 |   outputPath?: string     // Custom output path (optional)
239 |   optimize?: boolean      // Enable optimization (default: true)
240 | }
241 | 
242 | interface TranscribeResult {
243 |   srtPath: string         // Path to generated SRT file
244 |   text: string           // Full transcription text
245 |   language: string       // Detected language
246 |   duration: number       // Duration in seconds
247 | }
248 | ```
249 | </details>
250 | 
251 | ---
252 | 
253 | ## Details
254 | 
255 | <details>
256 | <summary><b>📋 Supported Formats</b></summary>
257 | 
258 | - **Video**: MP4, WebM, MOV, AVI, MKV
259 | - **Audio**: MP3, WAV, M4A, OGG, Opus
260 | - **YouTube**: All videos, Shorts, youtu.be links
261 | </details>
262 | 
263 | <details>
264 | <summary><b>💰 Cost</b></summary>
265 | 
266 | OpenAI Whisper API: **$0.006 per minute**
267 | 
268 | Examples:
269 | - 5 min: $0.03
270 | - 30 min: $0.18
271 | - 2 hours: $0.72
272 | </details>
273 | 
274 | <details>
275 | <summary><b>⚙️ How It Works</b></summary>
276 | 
277 | 1. Extract audio from video (mono, 16kHz - optimized for speech)
278 | 2. Optimize: 1.2x speed + compression if >24MB
279 | 3. Auto-chunk if >45 minutes (for reliability)
280 | 4. Upload chunks to Whisper API (or single file)
281 | 5. Generate SRT with timestamps
282 | 6. Merge chunks (if needed) and adjust timestamps to match original
283 | 7. Apply timecode offset (if specified)
284 | 8. Clean up temp files
285 | </details>
286 | 
287 | <details>
288 | <summary><b>📄 SRT Output Example</b></summary>
289 | 
290 | ```srt
291 | 1
292 | 00:00:00,000 --> 00:00:03,420
293 | Hey and thank you for getting the SEO roast.
294 | 
295 | 2
296 | 00:00:03,420 --> 00:00:06,840
297 | I'll take a look at your website and see what things we can improve.
298 | ```
299 | </details>
300 | 
301 | ## Troubleshooting
302 | 
303 | <details>
304 | <summary><b>"OPENAI_API_KEY not found"</b></summary>
305 | 
306 | Set up your API key using one of the methods in [API Key Setup](#api-key-setup-30-seconds).
307 | </details>
308 | 
309 | <details>
310 | <summary><b>"FFmpeg not found"</b></summary>
311 | 
312 | Install FFmpeg:
313 | ```bash
314 | brew install ffmpeg  # macOS
315 | sudo apt install ffmpeg  # Ubuntu
316 | choco install ffmpeg  # Windows
317 | ```
318 | </details>
319 | 
320 | <details>
321 | <summary><b>"yt-dlp not found" (YouTube only)</b></summary>
322 | 
323 | Install yt-dlp:
324 | ```bash
325 | brew install yt-dlp  # macOS
326 | sudo apt install yt-dlp  # Ubuntu
327 | pip install yt-dlp  # Any platform
328 | ```
329 | </details>
330 | 
331 | <details>
332 | <summary><b>File not found error</b></summary>
333 | 
334 | Use absolute paths:
335 | ```bash
336 | transcribe /full/path/to/video.mp4
337 | ```
338 | </details>
339 | 
340 | <details>
341 | <summary><b>API errors (502, timeout, etc.)</b></summary>
342 | 
343 | OpenAI API may be temporarily down. Wait 30 seconds and try again.
344 | </details>
345 | 
346 | <details>
347 | <summary><b>"Could not parse multipart form" error</b></summary>
348 | 
349 | If you're using Bun runtime, switch to Node.js:
350 | 
351 | ```bash
352 | # Use Node.js instead of Bun
353 | node dist/cli.js video.mp4
354 | 
355 | # Or install globally and use the transcribe command
356 | npm install -g @illyism/transcribe
357 | transcribe video.mp4
358 | ```
359 | 
360 | The CLI works best with Node.js 18+ due to OpenAI SDK compatibility.
361 | </details>
362 | 
363 | ---
364 | 
365 | ## Links
366 | 
367 | - 📦 [NPM Package](https://www.npmjs.com/package/@illyism/transcribe)
368 | - 🐙 [GitHub Repo](https://github.com/Illyism/transcribe-cli)
369 | - 📚 [Full Changelog](https://github.com/Illyism/transcribe-cli/blob/main/CHANGELOG.md)
370 | - 🧪 [A/B Test Results](https://github.com/Illyism/transcribe-cli/tree/main/test)
371 | - 🐛 [Report Issues](https://github.com/Illyism/transcribe-cli/issues)
372 | 
373 | ## Contributing
374 | 
375 | Pull requests welcome! See [GitHub repo](https://github.com/Illyism/transcribe-cli).
376 | 
377 | ## License
378 | 
379 | MIT © [Ilias Ismanalijev](https://github.com/Illyism)
380 | 


--------------------------------------------------------------------------------
/src/transcribe.ts:
--------------------------------------------------------------------------------
  1 | import { spawn } from 'child_process'
  2 | import { existsSync, mkdirSync, readdirSync, statSync, unlinkSync } from 'fs'
  3 | import { writeFile } from 'fs/promises'
  4 | import { basename, dirname, extname, join } from 'path'
  5 | import type { TranscribeOptions, TranscribeResult } from './index'
  6 | import { optimizeAudio } from './optimize'
  7 | import type { WhisperResponse, WhisperSegment, WhisperWord } from './types'
  8 | 
  9 | const MAX_UPLOAD_MB = 24 // Keep under ~25MB Whisper API limit (with headroom)
 10 | const MAX_UPLOAD_BYTES = MAX_UPLOAD_MB * 1024 * 1024
 11 | const AUTO_CHUNK_MINUTES = 20
 12 | const AUTO_CHUNK_THRESHOLD_MINUTES = 45
 13 | 
 14 | function formatTime(seconds: number): string {
 15 |   const hours = Math.floor(seconds / 3600)
 16 |   const minutes = Math.floor((seconds % 3600) / 60)
 17 |   const secs = Math.floor(seconds % 60)
 18 |   const millis = Math.floor((seconds % 1) * 1000)
 19 |   
 20 |   return `${String(hours).padStart(2, '0')}:${String(minutes).padStart(2, '0')}:${String(secs).padStart(2, '0')},${String(millis).padStart(3, '0')}`
 21 | }
 22 | 
 23 | function convertSegmentsToSRT(segments: Array<Pick<WhisperSegment, 'start' | 'end' | 'text'>>): string {
 24 |   let srt = ''
 25 |   
 26 |   segments.forEach((segment, index) => {
 27 |     srt += `${index + 1}\n`
 28 |     srt += `${formatTime(segment.start)} --> ${formatTime(segment.end)}\n`
 29 |     srt += `${segment.text.trim()}\n\n`
 30 |   })
 31 |   
 32 |   return srt
 33 | }
 34 | 
 35 | function transformSegments(
 36 |   segments: WhisperSegment[],
 37 |   transform: (seconds: number) => number
 38 | ): WhisperSegment[] {
 39 |   return segments.map((segment) => ({
 40 |     ...segment,
 41 |     start: transform(segment.start),
 42 |     end: transform(segment.end),
 43 |     words: segment.words?.map((word: WhisperWord) => ({
 44 |       ...word,
 45 |       start: transform(word.start),
 46 |       end: transform(word.end),
 47 |     })),
 48 |   }))
 49 | }
 50 | 
 51 | async function extractAudio(inputPath: string, outputPath: string): Promise<void> {
 52 |   return new Promise((resolve, reject) => {
 53 |     let errorOutput = ''
 54 |     
 55 |     // Optimize for speech transcription: mono, 16kHz sample rate (Whisper's native)
 56 |     // This reduces file size significantly for movies while maintaining dialogue clarity
 57 |     const ffmpeg = spawn('ffmpeg', [
 58 |       '-i', inputPath,
 59 |       '-vn',                    // No video
 60 |       '-ac', '1',               // Mono (dialogue-focused)
 61 |       '-ar', '16000',           // 16kHz sample rate (optimal for speech, reduces size)
 62 |       '-acodec', 'libmp3lame',
 63 |       '-q:a', '2',              // High quality MP3
 64 |       '-y',
 65 |       outputPath
 66 |     ])
 67 |     
 68 |     // Capture stderr for error messages
 69 |     ffmpeg.stderr.on('data', (data) => {
 70 |       errorOutput += data.toString()
 71 |     })
 72 |     
 73 |     ffmpeg.on('close', (code) => {
 74 |       if (code === 0) {
 75 |         resolve()
 76 |       } else {
 77 |         // Provide more helpful error messages
 78 |         let errorMsg = `FFmpeg exited with code ${code}`
 79 |         
 80 |         if (errorOutput.includes('Permission denied')) {
 81 |           errorMsg += '\nPermission denied. Check file/folder permissions.'
 82 |         } else if (errorOutput.includes('No such file or directory')) {
 83 |           errorMsg += '\nInput file not found or output directory does not exist.'
 84 |         } else if (errorOutput.includes('Invalid data found')) {
 85 |           errorMsg += '\nInvalid or corrupted video file.'
 86 |         } else if (errorOutput.includes('does not contain any stream')) {
 87 |           errorMsg += '\nVideo file does not contain a valid audio or video stream.'
 88 |         } else {
 89 |           // Show last few lines of FFmpeg output for debugging
 90 |           const lines = errorOutput.trim().split('\n')
 91 |           const relevantLines = lines.slice(-5).join('\n')
 92 |           if (relevantLines) {
 93 |             errorMsg += '\n\nFFmpeg output:\n' + relevantLines
 94 |           } else {
 95 |             errorMsg += '\nFFmpeg conversion failed. Make sure FFmpeg is installed and the video file is valid.'
 96 |           }
 97 |         }
 98 |         
 99 |         reject(new Error(errorMsg))
100 |       }
101 |     })
102 |     
103 |     ffmpeg.on('error', (err) => {
104 |       if (err.message.includes('ENOENT')) {
105 |         reject(new Error('FFmpeg is not installed. Please install FFmpeg:\n  macOS: brew install ffmpeg\n  Ubuntu: sudo apt-get install ffmpeg\n  Windows: choco install ffmpeg'))
106 |       } else {
107 |         reject(err)
108 |       }
109 |     })
110 |   })
111 | }
112 | 
113 | async function getMediaDurationSeconds(inputPath: string): Promise<number> {
114 |   return new Promise((resolve, reject) => {
115 |     let stdout = ''
116 |     let stderr = ''
117 | 
118 |     const ffprobe = spawn('ffprobe', [
119 |       '-v', 'error',
120 |       '-show_entries', 'format=duration',
121 |       '-of', 'default=noprint_wrappers=1:nokey=1',
122 |       inputPath,
123 |     ])
124 | 
125 |     ffprobe.stdout.on('data', (data) => {
126 |       stdout += data.toString()
127 |     })
128 | 
129 |     ffprobe.stderr.on('data', (data) => {
130 |       stderr += data.toString()
131 |     })
132 | 
133 |     ffprobe.on('close', (code) => {
134 |       if (code === 0) {
135 |         const duration = parseFloat(stdout.trim())
136 |         if (!Number.isFinite(duration)) {
137 |           reject(new Error(`FFprobe returned invalid duration for: ${inputPath}`))
138 |           return
139 |         }
140 |         resolve(duration)
141 |       } else {
142 |         reject(new Error(`FFprobe failed with code ${code}${stderr ? `\n\nFFprobe output:\n${stderr.trim()}` : ''}`))
143 |       }
144 |     })
145 | 
146 |     ffprobe.on('error', (err) => {
147 |       if (err.message.includes('ENOENT')) {
148 |         reject(new Error('FFprobe is not installed. Please install FFmpeg (includes ffprobe):\n  macOS: brew install ffmpeg\n  Ubuntu: sudo apt-get install ffmpeg\n  Windows: choco install ffmpeg'))
149 |       } else {
150 |         reject(err)
151 |       }
152 |     })
153 |   })
154 | }
155 | 
156 | async function splitAudioIntoChunks(inputPath: string, chunkSeconds: number): Promise<string[]> {
157 |   if (!Number.isFinite(chunkSeconds) || chunkSeconds <= 0) {
158 |     throw new Error(`Invalid chunkSeconds: ${chunkSeconds}`)
159 |   }
160 | 
161 |   const ext = inputPath.toLowerCase().split('.').pop() || 'mp3'
162 |   const dir = dirname(inputPath)
163 |   const prefix = `chunks_${Date.now()}`
164 |   const outputPattern = join(dir, `${prefix}_%03d.${ext}`)
165 | 
166 |   await new Promise<void>((resolve, reject) => {
167 |     let stderr = ''
168 | 
169 |     const ffmpeg = spawn('ffmpeg', [
170 |       '-i', inputPath,
171 |       '-f', 'segment',
172 |       '-segment_time', String(chunkSeconds),
173 |       '-reset_timestamps', '1',
174 |       '-c', 'copy',
175 |       '-y',
176 |       outputPattern,
177 |     ])
178 | 
179 |     ffmpeg.stderr.on('data', (data) => {
180 |       stderr += data.toString()
181 |     })
182 | 
183 |     ffmpeg.on('close', (code) => {
184 |       if (code === 0) {
185 |         resolve()
186 |       } else {
187 |         reject(new Error(`FFmpeg chunking failed with code ${code}${stderr ? `\n\nFFmpeg output:\n${stderr.trim().split('\n').slice(-8).join('\n')}` : ''}`))
188 |       }
189 |     })
190 | 
191 |     ffmpeg.on('error', (err) => {
192 |       if (err.message.includes('ENOENT')) {
193 |         reject(new Error('FFmpeg is not installed. Please install FFmpeg:\n  macOS: brew install ffmpeg\n  Ubuntu: sudo apt-get install ffmpeg\n  Windows: choco install ffmpeg'))
194 |       } else {
195 |         reject(err)
196 |       }
197 |     })
198 |   })
199 | 
200 |   const created = readdirSync(dir)
201 |     .filter((name) => name.startsWith(`${prefix}_`) && name.toLowerCase().endsWith(`.${ext}`))
202 |     .sort()
203 |     .map((name) => join(dir, name))
204 | 
205 |   if (created.length === 0) {
206 |     throw new Error('Chunking produced no output files. Please try again.')
207 |   }
208 | 
209 |   // Sanity check: if any chunk is still too large, give actionable guidance
210 |   const tooLarge = created.find((p) => statSync(p).size > MAX_UPLOAD_BYTES)
211 |   if (tooLarge) {
212 |     throw new Error(
213 |       `Audio chunk is still too large for Whisper API (~${MAX_UPLOAD_MB}MB).\n\n` +
214 |       `Chunk: ${tooLarge}\n\n` +
215 |       `Try:\n` +
216 |       `- removing --raw (use default optimization)\n` +
217 |       `- or using a smaller chunk size (e.g. --chunk-minutes 10)\n`
218 |     )
219 |   }
220 | 
221 |   return created
222 | }
223 | 
224 | async function transcribeWithWhisper(audioPath: string, apiKey: string): Promise<WhisperResponse> {
225 |   const { default: OpenAI, toFile } = await import('openai')
226 |   const openai = new OpenAI({ apiKey })
227 |   
228 |   // Read file as buffer and use SDK's toFile helper to create proper File object
229 |   const fs = await import('fs/promises')
230 |   const { basename } = await import('path')
231 |   const fileBuffer = await fs.readFile(audioPath)
232 |   const fileName = basename(audioPath)
233 |   
234 |   const ext = fileName.toLowerCase().split('.').pop()
235 |   const mimeTypes: Record<string, string> = {
236 |     mp3: 'audio/mpeg',
237 |     mp4: 'audio/mp4',
238 |     m4a: 'audio/mp4',
239 |     wav: 'audio/wav',
240 |     ogg: 'audio/ogg',
241 |     webm: 'audio/webm',
242 |     flac: 'audio/flac',
243 |   }
244 |   const mimeType = (ext && mimeTypes[ext]) || 'application/octet-stream'
245 | 
246 |   // Use SDK's toFile helper to create a proper File object
247 |   const audioFile = await toFile(fileBuffer, fileName, { type: mimeType })
248 |   
249 |   const transcription = await openai.audio.transcriptions.create({
250 |     file: audioFile,
251 |     model: 'whisper-1',
252 |     response_format: 'verbose_json',
253 |     timestamp_granularities: ['segment']
254 |   })
255 |   
256 |   return transcription as WhisperResponse
257 | }
258 | 
259 | /**
260 |  * Transcribe an audio or video file to SRT format
261 |  * 
262 |  * @param options - Transcription options
263 |  * @returns Transcription result with path to SRT file and transcription details
264 |  * 
265 |  * @example
266 |  * ```typescript
267 |  * import { transcribe } from '@magicspace/transcribe'
268 |  * 
269 |  * const result = await transcribe({
270 |  *   inputPath: '/path/to/video.mp4',
271 |  *   apiKey: 'sk-...'
272 |  * })
273 |  * 
274 |  * console.log('SRT saved to:', result.srtPath)
275 |  * console.log('Language:', result.language)
276 |  * console.log('Duration:', result.duration)
277 |  * ```
278 |  */
279 | export async function transcribe(options: TranscribeOptions): Promise<TranscribeResult> {
280 |   const { inputPath, apiKey, outputPath, optimize = true, offsetSeconds = 0, chunkMinutes } = options
281 |   
282 |   if (!existsSync(inputPath)) {
283 |     throw new Error(`File not found: ${inputPath}`)
284 |   }
285 |   
286 |   if (!apiKey) {
287 |     throw new Error('API key is required. Provide it in options or set OPENAI_API_KEY environment variable.')
288 |   }
289 |   
290 |   const ext = inputPath.toLowerCase().split('.').pop()
291 |   const supportedFormats = ['mp4', 'mp3', 'wav', 'm4a', 'webm', 'ogg', 'opus', 'mov', 'avi', 'mkv']
292 |   
293 |   if (!ext || !supportedFormats.includes(ext)) {
294 |     throw new Error(`Unsupported format. Supported formats: ${supportedFormats.join(', ')}`)
295 |   }
296 |   
297 |   let audioPath = inputPath
298 |   let tempAudioPath: string | null = null
299 |   let optimizedPath: string | null = null
300 |   let chunkPaths: string[] = []
301 |   let speedFactor = 1.0
302 |   
303 |   // Extract audio if it's a video file
304 |   if (['mp4', 'webm', 'mov', 'avi', 'mkv'].includes(ext)) {
305 |     console.log('🎬 Extracting audio from video...')
306 |     const dir = dirname(inputPath)
307 |     const baseName = basename(inputPath, extname(inputPath))
308 |     tempAudioPath = join(dir, `${baseName}_temp.mp3`)
309 |     
310 |     await extractAudio(inputPath, tempAudioPath)
311 |     console.log('✅ Audio extraction complete!')
312 |     audioPath = tempAudioPath
313 |   }
314 |   
315 |   try {
316 |     // Optimize audio if enabled
317 |     if (optimize) {
318 |       const optimized = await optimizeAudio(audioPath)
319 |       if (optimized.path !== audioPath) {
320 |         optimizedPath = optimized.path
321 |         audioPath = optimized.path
322 |       }
323 |       speedFactor = optimized.speedFactor
324 |     }
325 | 
326 |     const fileSizeBytes = statSync(audioPath).size
327 |     const durationOptimized = await getMediaDurationSeconds(audioPath)
328 |     const durationOriginal = durationOptimized * speedFactor
329 | 
330 |     const chunkMinutesToUse = chunkMinutes ?? AUTO_CHUNK_MINUTES
331 |     const shouldChunk =
332 |       chunkMinutes !== undefined ||
333 |       fileSizeBytes > MAX_UPLOAD_BYTES ||
334 |       durationOriginal > AUTO_CHUNK_THRESHOLD_MINUTES * 60
335 | 
336 |     if (offsetSeconds !== 0) {
337 |       console.log(`🕒 Applying timestamp offset: ${offsetSeconds}s`)
338 |     }
339 | 
340 |     let mergedSegments: WhisperSegment[] = []
341 |     let mergedText = ''
342 |     let language = 'unknown'
343 |     let originalDurationSeconds = durationOriginal
344 | 
345 |     if (shouldChunk) {
346 |       const chunkSecondsOriginal = Math.max(60, chunkMinutesToUse * 60)
347 |       const chunkSecondsOptimized = chunkSecondsOriginal / speedFactor
348 | 
349 |       console.log(`🧩 Chunking for reliability: ~${chunkMinutesToUse} min chunks (${chunkSecondsOriginal}s)`)
350 |       chunkPaths = await splitAudioIntoChunks(audioPath, chunkSecondsOptimized)
351 |       console.log(`✅ Created ${chunkPaths.length} chunks`)
352 | 
353 |       let offsetOptimizedSeconds = 0
354 |       let totalOptimizedSeconds = 0
355 | 
356 |       for (let i = 0; i < chunkPaths.length; i++) {
357 |         const chunkPath = chunkPaths[i]
358 |         console.log(`🎙️  Transcribing chunk ${i + 1}/${chunkPaths.length}...`)
359 | 
360 |         const chunkDuration = await getMediaDurationSeconds(chunkPath)
361 |         const chunkTranscription = await transcribeWithWhisper(chunkPath, apiKey)
362 | 
363 |         if (i === 0) {
364 |           language = chunkTranscription.language
365 |         }
366 | 
367 |         mergedText += chunkTranscription.text + '\n'
368 | 
369 |         const transformed = transformSegments(chunkTranscription.segments, (t) => {
370 |           // chunk audio timestamps are in optimized time; map to global original timeline:
371 |           // (localChunkTime + chunkOffsetOptimized) * speedFactor + userOffsetSeconds
372 |           return (t + offsetOptimizedSeconds) * speedFactor + offsetSeconds
373 |         })
374 | 
375 |         mergedSegments.push(...transformed)
376 | 
377 |         offsetOptimizedSeconds += chunkDuration
378 |         totalOptimizedSeconds += chunkDuration
379 |       }
380 | 
381 |       originalDurationSeconds = totalOptimizedSeconds * speedFactor
382 |       console.log(`✅ Transcription complete! Language: ${language}, Duration: ${originalDurationSeconds.toFixed(2)}s`)
383 |     } else {
384 |       // Transcribe with Whisper
385 |       console.log('🎙️  Transcribing with OpenAI Whisper API...')
386 |       const transcription = await transcribeWithWhisper(audioPath, apiKey)
387 |       language = transcription.language
388 |       mergedText = transcription.text
389 | 
390 |       mergedSegments = transformSegments(transcription.segments, (t) => t * speedFactor + offsetSeconds)
391 |       originalDurationSeconds = transcription.duration * speedFactor
392 | 
393 |       console.log(`✅ Transcription complete! Language: ${language}, Duration: ${originalDurationSeconds.toFixed(2)}s`)
394 |     }
395 | 
396 |     // Sort segments by start time (important for chunked transcriptions)
397 |     mergedSegments.sort((a, b) => a.start - b.start)
398 | 
399 |     // Convert to SRT format
400 |     const srt = convertSegmentsToSRT(mergedSegments)
401 | 
402 |     // Save SRT file (ensure directory exists)
403 |     const defaultSrtPath = join(dirname(inputPath), `${basename(inputPath, extname(inputPath))}.srt`)
404 |     const srtPath = outputPath || defaultSrtPath
405 |     mkdirSync(dirname(srtPath), { recursive: true })
406 |     await writeFile(srtPath, srt, 'utf-8')
407 | 
408 |     return {
409 |       srtPath,
410 |       text: mergedText.trim(),
411 |       language,
412 |       duration: originalDurationSeconds
413 |     }
414 |   } finally {
415 |     // Clean up temporary files
416 |     for (const chunkPath of chunkPaths) {
417 |       if (chunkPath && existsSync(chunkPath)) {
418 |         unlinkSync(chunkPath)
419 |       }
420 |     }
421 |     if (tempAudioPath && existsSync(tempAudioPath)) {
422 |       unlinkSync(tempAudioPath)
423 |     }
424 |     if (optimizedPath && existsSync(optimizedPath)) {
425 |       unlinkSync(optimizedPath)
426 |     }
427 |     if (chunkPaths.length || tempAudioPath || optimizedPath) {
428 |       console.log('🧹 Cleaned up temporary files')
429 |     }
430 |   }
431 | }
432 | 


--------------------------------------------------------------------------------