├── .python-version
├── .gitignore
├── LICENSE
├── pyproject.toml
├── .dockerignore
├── README.md
├── Dockerfile
├── tts.py
├── main.py
├── app.py
└── templates
    └── index.html


/.python-version:
--------------------------------------------------------------------------------
1 | 3.11
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pth
2 | *.onnx
3 | __pycache__/
4 | .venv/
5 | .DS_Store
6 | sample_output.wav
7 | output_unvocalized.wav
8 | output_*.wav


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | License:
2 | This dataset is licensed under CC BY-NC 4.0, with an additional restriction:
3 | It is intended only for academic research and educational use.
4 | 
5 | Commercial use and non-academic non-commercial use are not permitted.
6 | For any other use, please contact the dataset creators.
7 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "phonikud-styletts2-dockerized"
 3 | version = "0.1.0"
 4 | description = "Add your description here"
 5 | readme = "README.md"
 6 | requires-python = ">=3.11"
 7 | dependencies = [
 8 |     "flask>=3.0.0",
 9 |     "flask-restx>=1.3.0",
10 |     "phonikud",
11 |     "phonikud-onnx>=1.0.4",
12 |     "soundfile>=0.13.1",
13 |     "stts2-light",
14 | ]
15 | 
16 | [tool.uv.sources]
17 | stts2-light = { path = "StyleTTS2-lite" }
18 | phonikud = { git = "https://github.com/thewh1teagle/phonikud" }
19 | 


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
 1 | # Git and version control
 2 | .git
 3 | .gitignore
 4 | .github
 5 | 
 6 | # Python cache and virtual environments
 7 | __pycache__
 8 | *.pyc
 9 | *.pyo
10 | *.pyd
11 | .Python
12 | .venv
13 | venv/
14 | env/
15 | ENV/
16 | 
17 | # IDE and editor files
18 | .vscode/
19 | .idea/
20 | *.swp
21 | *.swo
22 | *~
23 | 
24 | # OS generated files
25 | .DS_Store
26 | .DS_Store?
27 | ._*
28 | .Spotlight-V100
29 | .Trashes
30 | ehthumbs.db
31 | Thumbs.db
32 | 
33 | # Documentation and readme files
34 | README.md
35 | *.md
36 | docs/
37 | 
38 | # Build artifacts
39 | build/
40 | dist/
41 | *.egg-info/
42 | 
43 | # Development and testing
44 | .pytest_cache/
45 | .coverage
46 | .tox/
47 | .mypy_cache/
48 | .ruff_cache/
49 | 
50 | # Docker files
51 | Dockerfile*
52 | docker-compose*
53 | .dockerignore 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # phonikud-StyleTTS2-dockerized
 2 | 
 3 | 
 4 | https://github.com/user-attachments/assets/bd6aae78-feb5-4896-923a-4fe77e1b5f61
 5 | 
 6 | 
 7 | 
 8 | 
 9 | ## Prepare models
10 | 
11 | ```console
12 | wget https://huggingface.co/thewh1teagle/phonikud-onnx/resolve/main/phonikud-1.0.int8.onnx
13 | wget https://huggingface.co/thewh1teagle/phonikud-tts-checkpoints/resolve/main/saspeech_automatic_stts2-light_epoch_00010.pth
14 | ```
15 | 
16 | ## Setup without Docker
17 | 
18 | 1. Install https://docs.astral.sh/uv/getting-started/installation
19 | 2. Run
20 | ```console
21 | uv sync
22 | uv run main.py
23 | ```
24 | 
25 | ## Setup with Docker
26 | 
27 | ```console
28 | wget https://github.com/thewh1teagle/StyleTTS2-lite branch: hebrew2
29 | docker build --platform linux/amd64 -t phonikud-styletts2-app .
30 | docker run -p 7860:7860 phonikud-styletts2-app
31 | ```
32 | 
33 | ## License
34 | 
35 | Non commercial. See [LICENSE](LICENSE)
36 | 
37 | Trained on data from OpenSLR Dataset 134, released under CC BY-NC-SA 4.0
38 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Use uv base image with Python 3.11
 2 | FROM ghcr.io/astral-sh/uv:python3.11-bookworm-slim AS builder
 3 | 
 4 | # Install git and build tools for dependencies that need compilation
 5 | RUN apt-get update && apt-get install -y \
 6 |     git \
 7 |     build-essential \
 8 |     gcc \
 9 |     g++ \
10 |     && rm -rf /var/lib/apt/lists/*
11 | 
12 | # Set working directory
13 | WORKDIR /app
14 | 
15 | # Set environment variables for optimal uv performance
16 | ENV UV_COMPILE_BYTECODE=1 \
17 |     UV_LINK_MODE=copy
18 | 
19 | # Install dependencies first (better caching)
20 | RUN --mount=type=cache,target=/root/.cache/uv \
21 |     --mount=type=bind,source=uv.lock,target=uv.lock \
22 |     --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
23 |     uv sync --frozen --no-install-project
24 | 
25 | # Copy source code
26 | COPY . /app
27 | 
28 | # Install the project
29 | RUN --mount=type=cache,target=/root/.cache/uv \
30 |     uv sync --frozen
31 | 
32 | # Production stage
33 | FROM python:3.11-slim-bookworm
34 | 
35 | # Copy the application and virtual environment
36 | COPY --from=builder /app /app
37 | 
38 | # Set environment variables
39 | ENV PATH="/app/.venv/bin:$PATH" \
40 |     PYTHONPATH="/app" \
41 |     PYTHONDONTWRITEBYTECODE=1 \
42 |     PYTHONUNBUFFERED=1
43 | 
44 | # Set working directory
45 | WORKDIR /app
46 | 
47 | # Expose port for Gradio
48 | EXPOSE 7860
49 | 
50 | # Run the application
51 | CMD ["python", "app.py"]
52 | 


--------------------------------------------------------------------------------
/tts.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import torch
 3 | import numpy as np
 4 | from pathlib import Path
 5 | from functools import lru_cache
 6 | 
 7 | # Add parent directory to path to import StyleTTS2
 8 | root_dir = Path(__file__).parent / 'StyleTTS2-lite'
 9 | sys.path.append(str(root_dir))
10 | from inference import StyleTTS2
11 | 
12 | 
13 | class TextToSpeech:
14 |     def __init__(self, config_path, models_path):
15 |         self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
16 |         self.config_path = config_path
17 |         self.models_path = models_path
18 |         self.model = StyleTTS2(config_path, models_path).eval().to(self.device)
19 |     
20 |     @lru_cache(maxsize=128)
21 |     def get_styles(self, speaker_path, speed, denoise, avg_style):
22 |         """Get styles from speaker audio with LRU caching"""
23 |         speaker = {
24 |             "path": speaker_path,
25 |             "speed": speed
26 |         }
27 |         with torch.no_grad():
28 |             return self.model.get_styles(speaker, denoise, avg_style)
29 |     
30 |     def _create(self, phonemes, styles, stabilize=True, alpha=18):
31 |         """Generate audio from phonemes and styles"""
32 |         with torch.no_grad():
33 |             audio = self.model.generate(phonemes, styles, stabilize, alpha)
34 |             # Normalize audio
35 |             audio = audio / np.max(np.abs(audio))
36 |             return audio
37 |     
38 |     def create(self, phonemes, speaker_path, speed=0.82, denoise=0.2, avg_style=True, stabilize=True, alpha=18):
39 |         """Complete synthesis pipeline from phonemes to audio with cached styles"""
40 |         # Use cached style extraction
41 |         styles = self.get_styles(speaker_path, speed, denoise, avg_style)
42 |         audio = self._create(phonemes, styles, stabilize, alpha)
43 |         return audio 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | """
 2 | wget https://huggingface.co/thewh1teagle/phonikud-onnx/resolve/main/phonikud-1.0.int8.onnx
 3 | wget https://huggingface.co/thewh1teagle/phonikud-tts-checkpoints/resolve/main/saspeech_automatic_stts2-light_epoch_00010.pth
 4 | wget https://github.com/thewh1teagle/StyleTTS2-lite branch: hebrew2
 5 | """
 6 | 
 7 | """
 8 | Script to create sample audio files for all WAV files in Demo/Audio directory
 9 | """
10 | import soundfile as sf
11 | from pathlib import Path
12 | from phonikud_onnx import Phonikud
13 | import phonikud
14 | from tts import TextToSpeech
15 | 
16 | phonikud_model = Phonikud('phonikud-1.0.int8.onnx')
17 | 
18 | default_text = """
19 | ירושלים היא עיר עתיקה וחשובה במיוחד, שמכילה בתוכה שכבות רבות של היסטוריה, תרבות ורוחניות שנמשכות אלפי שנים, והיא מהווה מוקד מרכזי לשלושת הדתות הגדולות, יהדות, נצרות, ואסלאם. שמתחברות יחד במקום אחד ייחודי, מלא אנרגיה ומורכבות, שם אפשר למצוא אתרים קדושים, שכונות עתיקות ושווקים צבעוניים, וכל פינה מספרת סיפור של תקופות שונות, אנשים שונים ואירועים שהשפיעו על ההיסטוריה של העולם כולו, מה שהופך את ירושלים לא רק לעיר גאוגרפית, אלא גם למרכז של זהות, אמונה, וזיכרון קולקטיבי שממשיך לעורר השראה ולחבר בין אנשים מרקע שונה מכל קצוות תבל.
20 | """.strip()
21 | 
22 | def phonemize(vocalized):
23 |     phonemes = phonikud.phonemize(vocalized)
24 |     return phonemes
25 | 
26 | def main():
27 |     # Create samples directory
28 |     samples_dir = Path("samples")
29 |     samples_dir.mkdir(exist_ok=True)
30 |     
31 |     # Setup TTS model
32 |     config_path = str(Path("StyleTTS2-lite") / "Configs" / "config.yaml")
33 |     models_path = 'saspeech_automatic_stts2-light_epoch_00010.pth'
34 |     tts = TextToSpeech(config_path, models_path)
35 |     
36 |     # Sample text to use for audio generation
37 |     text = default_text
38 |     vocalized = phonikud_model.add_diacritics(text)
39 |     phonemes = phonemize(vocalized)
40 |     
41 |     # Parameters
42 |     speed = 0.82
43 |     denoise = 0.2
44 |     avg_style = True
45 |     stabilize = True
46 |     
47 |     # Use hardcoded reference audio file
48 |     ref_audio_path = "StyleTTS2-lite/Demo/Audio/10_michael.wav"
49 |     
50 |     print(f"Processing reference audio: {ref_audio_path}")
51 |     
52 |     try:
53 |         # Use the TTS synthesize method
54 |         audio = tts.create(
55 |             phonemes=phonemes,
56 |             speaker_path=ref_audio_path,
57 |             speed=speed,
58 |             denoise=denoise,
59 |             avg_style=avg_style,
60 |             stabilize=stabilize,
61 |             alpha=18
62 |         )
63 |         
64 |         # Create output filename
65 |         output_name = "sample_output.wav"
66 |         output_path = samples_dir / output_name
67 |         
68 |         # Save audio
69 |         sr = 24000
70 |         sf.write(str(output_path), audio, sr)
71 |         print(f"Created {output_name}")
72 |         
73 |     except Exception as e:
74 |         print(f"Error processing {ref_audio_path}: {e}")
75 |     
76 |     print(f"\nSample created in {samples_dir} directory")
77 | 
78 | 
79 | if __name__ == "__main__":
80 |     main() 


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
  1 | from flask import Flask, render_template, request, jsonify, send_file
  2 | from flask_restx import Api, Resource, fields
  3 | from pathlib import Path
  4 | import soundfile as sf
  5 | from phonikud_onnx import Phonikud
  6 | import phonikud
  7 | from tts import TextToSpeech
  8 | import os
  9 | 
 10 | app = Flask(__name__)
 11 | 
 12 | # Configure Flask-RESTX
 13 | api = Api(
 14 |     app,
 15 |     version='1.0',
 16 |     title='StyleTTS2 Hebrew TTS API',
 17 |     description='Hebrew Text-to-Speech API using StyleTTS2 and Phonikud',
 18 |     doc='/api/',
 19 |     prefix='/api'
 20 | )
 21 | 
 22 | # Initialize models
 23 | phonikud_model = Phonikud('phonikud-1.0.int8.onnx')
 24 | 
 25 | # Setup TTS model
 26 | config_path = str(Path("StyleTTS2-lite") / "Configs" / "config.yaml")
 27 | models_path = 'saspeech_automatic_stts2-light_epoch_00010.pth'
 28 | tts = TextToSpeech(config_path, models_path)
 29 | 
 30 | # Create samples directory
 31 | samples_dir = Path("samples")
 32 | samples_dir.mkdir(exist_ok=True)
 33 | 
 34 | # API Models
 35 | generate_model = api.model('GenerateRequest', {
 36 |     'text': fields.String(required=True, description='The input text'),
 37 |     'type': fields.String(required=False, description='Input type: phonemes, unvocalized, or vocalized (default: unvocalized)', 
 38 |                          enum=['phonemes', 'unvocalized', 'vocalized'], default='unvocalized'),
 39 |     'ref_audio': fields.String(required=False, description='Reference audio filename (default: 10_michael.wav)', default='10_michael.wav')
 40 | })
 41 | 
 42 | generate_response = api.model('GenerateResponse', {
 43 |     'success': fields.Boolean(description='Whether generation was successful'),
 44 |     'filename': fields.String(description='Generated audio filename'),
 45 |     'phonemes': fields.String(description='Generated phonemes'),
 46 |     'vocalized_text': fields.String(description='Vocalized Hebrew text (if applicable)')
 47 | })
 48 | 
 49 | error_response = api.model('ErrorResponse', {
 50 |     'error': fields.String(description='Error message')
 51 | })
 52 | 
 53 | def get_reference_audio_files():
 54 |     """Get all WAV files from StyleTTS2-lite/Demo/Audio/"""
 55 |     audio_dir = Path("StyleTTS2-lite/Demo/Audio")
 56 |     if audio_dir.exists():
 57 |         return sorted([f.name for f in audio_dir.glob("*.wav")])
 58 |     return []
 59 | 
 60 | def phonemize_text(text):
 61 |     """Convert text to phonemes"""
 62 |     vocalized = phonikud_model.add_diacritics(text)
 63 |     phonemes = phonikud.phonemize(vocalized)
 64 |     return phonemes
 65 | 
 66 | def vocalize_and_phonemize(text):
 67 |     """Vocalize text and convert to phonemes"""
 68 |     vocalized = phonikud_model.add_diacritics(text)
 69 |     phonemes = phonikud.phonemize(vocalized)
 70 |     return phonemes
 71 | 
 72 | @app.route('/')
 73 | def index():
 74 |     """Serve the main page"""
 75 |     audio_files = get_reference_audio_files()
 76 |     return render_template('index.html', audio_files=audio_files)
 77 | 
 78 | # API namespace
 79 | ns = api.namespace('tts', description='Text-to-Speech operations')
 80 | 
 81 | @ns.route('/generate')
 82 | class GenerateAudio(Resource):
 83 |     @api.expect(generate_model)
 84 |     @api.marshal_with(generate_response, code=200)
 85 |     @api.marshal_with(error_response, code=400)
 86 |     @api.marshal_with(error_response, code=500)
 87 |     def post(self):
 88 |         """Generate audio from text input"""
 89 |         try:
 90 |             data = request.json
 91 |             if not data:
 92 |                 return {'error': 'No JSON data provided'}, 400
 93 |                 
 94 |             input_text = data.get('text', '').strip()
 95 |             input_type = data.get('type', 'unvocalized')
 96 |             ref_audio = data.get('ref_audio', '10_michael.wav')
 97 |             
 98 |             if not input_text:
 99 |                 return {'error': 'Text input is required'}, 400
100 |             
101 |             if not ref_audio:
102 |                 return {'error': 'Reference audio file is required'}, 400
103 |             
104 |             # Process input based on type
105 |             vocalized_text = None
106 |             if input_type == 'phonemes':
107 |                 phonemes = input_text
108 |             elif input_type == 'unvocalized':
109 |                 vocalized_text = phonikud_model.add_diacritics(input_text)
110 |                 phonemes = phonikud.phonemize(vocalized_text)
111 |             else:  # vocalized
112 |                 vocalized_text = input_text
113 |                 phonemes = phonikud.phonemize(input_text)
114 |             
115 |             # Reference audio path
116 |             ref_audio_path = str(Path("StyleTTS2-lite/Demo/Audio") / ref_audio)
117 |             
118 |             # Generate audio
119 |             audio = tts.create(
120 |                 phonemes=phonemes,
121 |                 speaker_path=ref_audio_path,
122 |                 speed=0.82,
123 |                 denoise=0.2,
124 |                 avg_style=True,
125 |                 stabilize=True,
126 |                 alpha=18
127 |             )
128 |             
129 |             # Save audio
130 |             output_filename = f"output_{input_type}.wav"
131 |             output_path = samples_dir / output_filename
132 |             sr = 24000
133 |             sf.write(str(output_path), audio, sr)
134 |             
135 |             return {
136 |                 'success': True,
137 |                 'filename': output_filename,
138 |                 'phonemes': phonemes,
139 |                 'vocalized_text': vocalized_text
140 |             }
141 |             
142 |         except Exception as e:
143 |             return {'error': str(e)}, 500
144 | 
145 | @ns.route('/voices')
146 | class GetVoices(Resource):
147 |     @api.marshal_with(api.model('VoicesResponse', {
148 |         'voices': fields.List(fields.String, description='Available voice files')
149 |     }))
150 |     def get(self):
151 |         """Get list of available reference voices"""
152 |         voices = get_reference_audio_files()
153 |         return {'voices': voices}
154 | 
155 | # Keep the original route for the web interface
156 | @app.route('/generate', methods=['POST'])
157 | def generate_audio_web():
158 |     """Generate audio based on input type (for web interface)"""
159 |     try:
160 |         data = request.json
161 |         if not data:
162 |             return jsonify({'error': 'No JSON data provided'}), 400
163 |             
164 |         input_text = data.get('text', '').strip()
165 |         input_type = data.get('type', 'vocalized')
166 |         ref_audio = data.get('ref_audio', '')
167 |         
168 |         if not input_text:
169 |             return jsonify({'error': 'Text input is required'}), 400
170 |         
171 |         if not ref_audio:
172 |             return jsonify({'error': 'Reference audio file is required'}), 400
173 |         
174 |         # Process input based on type
175 |         vocalized_text = None
176 |         if input_type == 'phonemes':
177 |             phonemes = input_text
178 |         elif input_type == 'unvocalized':
179 |             vocalized_text = phonikud_model.add_diacritics(input_text)
180 |             phonemes = phonikud.phonemize(vocalized_text)
181 |         else:  # vocalized
182 |             vocalized_text = input_text
183 |             phonemes = phonikud.phonemize(input_text)
184 |         
185 |         # Reference audio path
186 |         ref_audio_path = str(Path("StyleTTS2-lite/Demo/Audio") / ref_audio)
187 |         
188 |         # Generate audio
189 |         audio = tts.create(
190 |             phonemes=phonemes,
191 |             speaker_path=ref_audio_path,
192 |             speed=0.82,
193 |             denoise=0.2,
194 |             avg_style=True,
195 |             stabilize=True,
196 |             alpha=18
197 |         )
198 |         
199 |         # Save audio
200 |         output_filename = f"output_{input_type}.wav"
201 |         output_path = samples_dir / output_filename
202 |         sr = 24000
203 |         sf.write(str(output_path), audio, sr)
204 |         
205 |         return jsonify({
206 |             'success': True,
207 |             'filename': output_filename,
208 |             'phonemes': phonemes,
209 |             'vocalized_text': vocalized_text
210 |         })
211 |         
212 |     except Exception as e:
213 |         return jsonify({'error': str(e)}), 500
214 | 
215 | @app.route('/audio/<filename>')
216 | def serve_audio(filename):
217 |     """Serve audio file for playback"""
218 |     file_path = samples_dir / filename
219 |     if file_path.exists():
220 |         return send_file(file_path, mimetype='audio/wav')
221 |     return jsonify({'error': 'File not found'}), 404
222 | 
223 | if __name__ == '__main__':
224 |     app.run(host='0.0.0.0', port=7860, debug=True) 


--------------------------------------------------------------------------------
/templates/index.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="en">
  3 | <head>
  4 |     <meta charset="UTF-8">
  5 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
  6 |     <title>StyleTTS2 - Hebrew TTS</title>
  7 |     <style>
  8 |         body {
  9 |             font-family: Arial, sans-serif;
 10 |             max-width: 800px;
 11 |             margin: 0 auto;
 12 |             padding: 20px;
 13 |             background-color: #f5f5f5;
 14 |         }
 15 |         .container {
 16 |             background-color: white;
 17 |             padding: 30px;
 18 |             border-radius: 10px;
 19 |             box-shadow: 0 2px 10px rgba(0,0,0,0.1);
 20 |         }
 21 |         h1 {
 22 |             color: #333;
 23 |             text-align: center;
 24 |             margin-bottom: 30px;
 25 |         }
 26 |         .form-group {
 27 |             margin-bottom: 20px;
 28 |         }
 29 |         label {
 30 |             display: block;
 31 |             margin-bottom: 5px;
 32 |             font-weight: bold;
 33 |             color: #555;
 34 |         }
 35 |         select, textarea {
 36 |             width: 100%;
 37 |             padding: 10px;
 38 |             border: 1px solid #ddd;
 39 |             border-radius: 5px;
 40 |             font-size: 16px;
 41 |         }
 42 |         textarea {
 43 |             height: 120px;
 44 |             resize: vertical;
 45 |             font-family: monospace;
 46 |         }
 47 | 
 48 |         button {
 49 |             padding: 12px 20px;
 50 |             border: none;
 51 |             border-radius: 5px;
 52 |             font-size: 16px;
 53 |             cursor: pointer;
 54 |             transition: background-color 0.3s;
 55 |         }
 56 |         .btn-generate {
 57 |             background-color: #007bff;
 58 |             color: white;
 59 |         }
 60 |         .btn-generate:hover {
 61 |             background-color: #0056b3;
 62 |         }
 63 |         .result {
 64 |             margin-top: 20px;
 65 |             padding: 15px;
 66 |             border-radius: 5px;
 67 |             display: none;
 68 |         }
 69 |         .result.success {
 70 |             background-color: #d4edda;
 71 |             color: #155724;
 72 |             border: 1px solid #c3e6cb;
 73 |         }
 74 |         .result.error {
 75 |             background-color: #f8d7da;
 76 |             color: #721c24;
 77 |             border: 1px solid #f5c6cb;
 78 |         }
 79 |         .loading {
 80 |             text-align: center;
 81 |             color: #666;
 82 |             font-style: italic;
 83 |         }
 84 |         .phonemes-display {
 85 |             background-color: #f8f9fa;
 86 |             padding: 10px;
 87 |             border-radius: 5px;
 88 |             margin-top: 10px;
 89 |             font-family: monospace;
 90 |             font-size: 14px;
 91 |         }
 92 |         .audio-player {
 93 |             margin-top: 10px;
 94 |             width: 100%;
 95 |         }
 96 |         .rtl-input {
 97 |             direction: rtl;
 98 |             text-align: right;
 99 |         }
100 |     </style>
101 | </head>
102 | <body>
103 |     <div class="container">
104 |         <div style="text-align: center; margin-bottom: 20px;">
105 |             <a href="https://phonikud.github.io" target="_blank" style="color: #007bff; text-decoration: none; font-size: 16px;">
106 |                 🔗 See Phonikud Project
107 |             </a>
108 |         </div>
109 |         <h1>StyleTTS2 - Hebrew Text-to-Speech</h1>
110 |         
111 |         <div class="form-group">
112 |             <label for="ref-audio">Reference Audio:</label>
113 |             <select id="ref-audio" required>
114 |                 {% for audio_file in audio_files %}
115 |                 <option value="{{ audio_file }}" {% if audio_file == "10_michael.wav" %}selected{% endif %}>{{ audio_file }}</option>
116 |                 {% endfor %}
117 |             </select>
118 |         </div>
119 |         
120 |         <div class="form-group">
121 |             <label for="unvocalized-input">Unvocalized Hebrew Text:</label>
122 |             <textarea id="unvocalized-input" class="rtl-input" placeholder="Enter unvocalized Hebrew text..." style="height: 80px;">ירושלים היא עיר עתיקה וחשובה במיוחד, שמכילה בתוכה שכבות רבות של היסטוריה, תרבות ורוחניות שנמשכות אלפי שנים, והיא מהווה מוקד מרכזי לשלושת הדתות הגדולות, יהדות, נצרות, ואסלאם. שמתחברות יחד במקום אחד ייחודי, מלא אנרגיה ומורכבות, שם אפשר למצוא אתרים קדושים, שכונות עתיקות ושווקים צבעוניים, וכל פינה מספרת סיפור של תקופות שונות, אנשים שונים ואירועים שהשפיעו על ההיסטוריה של העולם כולו, מה שהופך את ירושלים לא רק לעיר גאוגרפית, אלא גם למרכז של זהות, אמונה, וזיכרון קולקטיבי שממשיך לעורר השראה ולחבר בין אנשים מרקע שונה מכל קצוות תבל.</textarea>
123 |             <button class="btn-generate" onclick="generateAudio('unvocalized')" style="margin-top: 10px; width: 100%;">
124 |                 Generate from Unvocalized Text
125 |             </button>
126 |         </div>
127 |         
128 |         <div class="form-group">
129 |             <label for="vocalized-input">Vocalized Hebrew Text:</label>
130 |             <textarea id="vocalized-input" class="rtl-input" placeholder="Enter vocalized Hebrew text..." style="height: 80px;"></textarea>
131 |             <button class="btn-generate" onclick="generateAudio('vocalized')" style="margin-top: 10px; width: 100%;">
132 |                 Generate from Vocalized Text
133 |             </button>
134 |         </div>
135 |         
136 |         <div class="form-group">
137 |             <label for="phonemes-input">Phonemes Input:</label>
138 |             <textarea id="phonemes-input" placeholder="Enter phonemes directly..." style="height: 80px;"></textarea>
139 |             <button class="btn-generate" onclick="generateAudio('phonemes')" style="margin-top: 10px; width: 100%;">
140 |                 Generate from Phonemes
141 |             </button>
142 |         </div>
143 |         
144 |         <div id="result" class="result"></div>
145 |         
146 |         <div style="text-align: center; margin-top: 30px; padding-top: 20px; border-top: 1px solid #eee; color: #666;">
147 |             <a href="/api/" target="_blank" style="color: #007bff; text-decoration: none; font-size: 14px;">
148 |                 📖 API Documentation (Swagger)
149 |             </a>
150 |         </div>
151 |     </div>
152 | 
153 |     <script>
154 |         async function generateAudio(type) {
155 |             let textInput;
156 |             if (type === 'phonemes') {
157 |                 textInput = document.getElementById('phonemes-input').value.trim();
158 |             } else if (type === 'unvocalized') {
159 |                 textInput = document.getElementById('unvocalized-input').value.trim();
160 |             } else {
161 |                 textInput = document.getElementById('vocalized-input').value.trim();
162 |             }
163 |             
164 |             const refAudio = document.getElementById('ref-audio').value;
165 |             const resultDiv = document.getElementById('result');
166 |             
167 |             if (!textInput) {
168 |                 showResult('Please enter some text.', 'error');
169 |                 return;
170 |             }
171 |             
172 |             if (!refAudio) {
173 |                 showResult('Please select a reference audio file.', 'error');
174 |                 return;
175 |             }
176 |             
177 |             // Show loading
178 |             resultDiv.className = 'result';
179 |             resultDiv.style.display = 'block';
180 |             resultDiv.innerHTML = '<div class="loading">Generating audio...</div>';
181 |             
182 |             try {
183 |                 const response = await fetch('/generate', {
184 |                     method: 'POST',
185 |                     headers: {
186 |                         'Content-Type': 'application/json',
187 |                     },
188 |                     body: JSON.stringify({
189 |                         text: textInput,
190 |                         type: type,
191 |                         ref_audio: refAudio
192 |                     })
193 |                 });
194 |                 
195 |                 const data = await response.json();
196 |                 
197 |                 if (data.success) {
198 |                     // Update the other input fields based on generation
199 |                     if (type === 'unvocalized' && data.vocalized_text) {
200 |                         document.getElementById('vocalized-input').value = data.vocalized_text;
201 |                     }
202 |                     
203 |                     if (data.phonemes) {
204 |                         document.getElementById('phonemes-input').value = data.phonemes;
205 |                     }
206 |                     
207 |                     let html = `
208 |                         <strong>Success!</strong> Audio generated successfully.
209 |                         <audio controls class="audio-player" autoplay>
210 |                             <source src="/audio/${data.filename}" type="audio/wav">
211 |                             Your browser does not support the audio element.
212 |                         </audio>
213 |                     `;
214 |                     
215 |                     if (data.phonemes) {
216 |                         html += `
217 |                             <div class="phonemes-display">
218 |                                 <strong>Phonemes:</strong><br>
219 |                                 ${data.phonemes}
220 |                             </div>
221 |                         `;
222 |                     }
223 |                     
224 |                     showResult(html, 'success');
225 |                 } else {
226 |                     showResult(`Error: ${data.error}`, 'error');
227 |                 }
228 |             } catch (error) {
229 |                 showResult(`Error: ${error.message}`, 'error');
230 |             }
231 |         }
232 |         
233 |         function showResult(message, type) {
234 |             const resultDiv = document.getElementById('result');
235 |             resultDiv.className = `result ${type}`;
236 |             resultDiv.style.display = 'block';
237 |             resultDiv.innerHTML = message;
238 |         }
239 |     </script>
240 | </body>
241 | </html> 


--------------------------------------------------------------------------------