├── speech_gateway
├── __init__.py
├── converter
│ ├── __init__.py
│ ├── pcm.py
│ ├── mp3.py
│ ├── wave.py
│ └── mulaw.py
├── source
│ ├── sbv2.py
│ ├── voicevox.py
│ ├── nijivoice_encoded.py
│ ├── azure.py
│ ├── openai_speech.py
│ ├── nijivoice.py
│ └── __init__.py
├── cache
│ ├── __init__.py
│ └── file.py
├── performance_recorder
│ ├── __init__.py
│ ├── sqlite.py
│ └── postgres.py
└── gateway
│ ├── unified.py
│ ├── voicevox.py
│ ├── openai_speech.py
│ ├── sbv2.py
│ ├── azure.py
│ ├── nijivoice_encoded.py
│ ├── nijivoice.py
│ └── __init__.py
├── requirements.txt
├── .gitattributes
├── tests
├── data
│ └── test.wav
├── converter
│ ├── test_mp3.py
│ └── test_wave.py
├── conftest.py
├── source
│ ├── test_sbv2_source.py
│ ├── test_azure_source.py
│ ├── test_nijivoice_encoded_source.py
│ ├── test_nijivoice_source.py
│ ├── test_voicevox_source.py
│ └── test_openai_speech_source.py
├── gateway
│ ├── test_sbv2.py
│ ├── test_voicevox.py
│ ├── test_unified.py
│ ├── test_azure.py
│ ├── test_openai_speech.py
│ ├── test_azure_openai_speech.py
│ ├── test_nijivoice.py
│ └── test_nijivoice_encoded.py
├── cache
│ └── test_file.py
└── performance_recorder
│ ├── test_sqlite.py
│ └── test_postgres.py
├── docker
├── requirements.txt
├── pgadmin-servers.json
├── init-db.sh
├── README.md
├── setup-volumes.sh
├── Dockerfile.app
├── .env.sample
├── docker-compose.yaml
└── run.py
├── setup.py
├── run.py
├── .gitignore
├── README.md
└── LICENSE
/speech_gateway/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | aiofiles==24.1.0
2 | fastapi==0.115.6
3 | httpx==0.28.1
4 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 |
--------------------------------------------------------------------------------
/tests/data/test.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uezo/speech-gateway/HEAD/tests/data/test.wav
--------------------------------------------------------------------------------
/docker/requirements.txt:
--------------------------------------------------------------------------------
1 | psycopg2-binary==2.9.9
2 | python-dotenv==1.0.0
3 | git+https://github.com/uezo/speech-gateway
4 |
--------------------------------------------------------------------------------
/docker/pgadmin-servers.json:
--------------------------------------------------------------------------------
1 | {
2 | "Servers": {
3 | "1": {
4 | "Name": "speech-gateway",
5 | "Group": "Servers",
6 | "Host": "spgw-db",
7 | "Port": 5432,
8 | "MaintenanceDB": "postgres",
9 | "Username": "postgres",
10 | "SSLMode": "prefer"
11 | }
12 | }
13 | }
14 |
--------------------------------------------------------------------------------
/speech_gateway/converter/__init__.py:
--------------------------------------------------------------------------------
1 | from abc import ABC, abstractmethod
2 | from typing import AsyncIterator
3 |
4 |
5 | class FormatConverter(ABC):
6 | @abstractmethod
7 | async def convert(self, input_stream: AsyncIterator[bytes]) -> AsyncIterator[bytes]:
8 | pass
9 |
10 |
11 | class FormatConverterError(Exception):
12 | def __init__(self, message: str):
13 | super().__init__(message)
14 |
15 |
16 | from .mp3 import MP3Converter
17 |
--------------------------------------------------------------------------------
/docker/init-db.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -e
4 |
5 | psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" <<-EOSQL
6 | CREATE DATABASE $SPGW_DB_NAME;
7 | CREATE USER "$SPGW_DB_USER" WITH PASSWORD '$SPGW_DB_PASSWORD';
8 | GRANT ALL PRIVILEGES ON DATABASE $SPGW_DB_NAME TO "$SPGW_DB_USER";
9 | EOSQL
10 |
11 | psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" --dbname "$SPGW_DB_NAME" <<-EOSQL
12 | GRANT ALL ON SCHEMA public TO "$SPGW_DB_USER";
13 | ALTER SCHEMA public OWNER TO "$SPGW_DB_USER";
14 | EOSQL
15 |
--------------------------------------------------------------------------------
/speech_gateway/source/sbv2.py:
--------------------------------------------------------------------------------
1 | from . import StreamSource
2 |
3 |
4 | class StyleBertVits2StreamSource(StreamSource):
5 | def get_cache_key(self, audio_format: str, query_params: dict, **kwargs) -> str:
6 | return f"{hash(str(query_params))}.{audio_format or 'wav'}"
7 |
8 | def parse_text(self, query_params: dict, **kwargs) -> str:
9 | return query_params.get("text")
10 |
11 | def make_stream_request(self, query_params: dict, **kwargs):
12 | return {
13 | "method": "GET",
14 | "url": self.base_url + "/voice",
15 | "params": query_params,
16 | }
17 |
--------------------------------------------------------------------------------
/speech_gateway/cache/__init__.py:
--------------------------------------------------------------------------------
1 | from abc import ABC, abstractmethod
2 | from typing import AsyncIterator
3 |
4 |
5 | class CacheStorage(ABC):
6 | @abstractmethod
7 | async def has_cache(self, cache_key: str) -> bool:
8 | pass
9 |
10 | @abstractmethod
11 | async def fetch_cache_stream(self, cache_key: str) -> AsyncIterator[bytes]:
12 | pass
13 |
14 | @abstractmethod
15 | async def write_cache(self, input_stream: AsyncIterator[bytes], cache_key: str) -> AsyncIterator[bytes]:
16 | pass
17 |
18 |
19 | class CacheStorageError(Exception):
20 | def __init__(self, message: str):
21 | super().__init__(message)
22 |
23 |
24 | from .file import FileCacheStorage
25 |
--------------------------------------------------------------------------------
/speech_gateway/converter/pcm.py:
--------------------------------------------------------------------------------
1 | import io
2 | import wave
3 | import soundfile as sf
4 | import numpy as np
5 |
6 |
7 | def convert_float32bit_to_int16bit(input_data: bytes) -> bytes:
8 | data, samplerate = sf.read(io.BytesIO(input_data))
9 | pcm16_data = (data * 32767).astype(np.int16)
10 | channels = pcm16_data.shape[1] if pcm16_data.ndim > 1 else 1
11 |
12 | wav_bytes_io = io.BytesIO()
13 | with wave.open(wav_bytes_io, "wb") as wav_file:
14 | wav_file.setnchannels(channels)
15 | wav_file.setsampwidth(2)
16 | wav_file.setframerate(samplerate)
17 | wav_file.writeframes(pcm16_data.tobytes())
18 |
19 | wav_bytes = wav_bytes_io.getvalue()
20 | return wav_bytes
21 |
--------------------------------------------------------------------------------
/docker/README.md:
--------------------------------------------------------------------------------
1 | # Speech Gateway Docker Setup
2 |
3 | ## Quick Start
4 |
5 | ### 1. Setup Environment
6 | ```bash
7 | cp .env.sample .env
8 | # Edit .env and set your API keys
9 | ```
10 |
11 | ### 2. Create Volume Directories
12 | ```bash
13 | ./setup-volumes.sh
14 | ```
15 |
16 | ### 3. Start Services
17 | ```bash
18 | docker compose up -d
19 | ```
20 |
21 | ## Access
22 |
23 | - Application: http://localhost:18000
24 | - PgAdmin: http://localhost:18001
25 |
26 | ## Configuration
27 |
28 | Edit `.env` file to:
29 | - Set API keys (AZURE_API_KEY, OPENAI_API_KEY, etc.)
30 | - Enable/disable services (AZURE_ENABLED=true/false)
31 | - Change ports if needed
32 |
33 | ## Stop Services
34 |
35 | ```bash
36 | docker compose down
37 | ```
38 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, find_packages
2 |
3 | setup(
4 | name="speech_gateway",
5 | version="0.1.7",
6 | url="https://github.com/uezo/speech-gateway",
7 | author="uezo",
8 | author_email="uezo@uezo.net",
9 | maintainer="uezo",
10 | maintainer_email="uezo@uezo.net",
11 | description="A reverse proxy server that enhances speech synthesis with essential, extensible features. 🦉💬",
12 | long_description=open("README.md").read(),
13 | long_description_content_type="text/markdown",
14 | packages=find_packages(exclude=["tests*"]),
15 | install_requires=["aiofiles==24.1.0", "fastapi==0.115.6", "httpx==0.28.1", "uvicorn==0.34.0"],
16 | license="Apache v2",
17 | classifiers=[
18 | "Programming Language :: Python :: 3"
19 | ]
20 | )
21 |
--------------------------------------------------------------------------------
/speech_gateway/performance_recorder/__init__.py:
--------------------------------------------------------------------------------
1 | from abc import ABC, abstractmethod
2 | from dataclasses import dataclass
3 |
4 |
5 | class PerformanceRecorder(ABC):
6 | @abstractmethod
7 | def record(
8 | self,
9 | *,
10 | process_id: str,
11 | source: str = None,
12 | text: str = None,
13 | audio_format: str = None,
14 | cached: int = 0,
15 | elapsed: float = None,
16 | ):
17 | pass
18 |
19 | @abstractmethod
20 | def close(self):
21 | pass
22 |
23 |
24 | @dataclass
25 | class PerformanceRecord:
26 | process_id: str
27 | source: str = None
28 | text: str = None
29 | audio_format: str = None
30 | cached: int = 0,
31 | elapsed: float = None,
32 |
33 |
34 | from .sqlite import SQLitePerformanceRecorder
35 |
--------------------------------------------------------------------------------
/docker/setup-volumes.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Setup script for Docker volumes
4 | # This script creates necessary directories for Docker named volumes
5 |
6 | set -e
7 |
8 | # Load environment variables
9 | if [ -f .env ]; then
10 | export $(grep -v '^#' .env | xargs)
11 | fi
12 |
13 | # Default to ./data if DATA_PATH is not set
14 | DATA_PATH=${DATA_PATH:-./data}
15 |
16 | echo "Setting up volumes at: $DATA_PATH"
17 |
18 | # Create directories if they don't exist
19 | mkdir -p "$DATA_PATH/postgres"
20 | mkdir -p "$DATA_PATH/pgadmin"
21 | mkdir -p "$DATA_PATH/cache"
22 |
23 | # Set appropriate permissions
24 | # PostgreSQL needs UID 999 (in most PostgreSQL Docker images)
25 | # PgAdmin needs UID 5050
26 | if [ "$(uname)" = "Linux" ]; then
27 | sudo chown -R 999:999 "$DATA_PATH/postgres" 2>/dev/null || true
28 | sudo chown -R 5050:5050 "$DATA_PATH/pgadmin" 2>/dev/null || true
29 | fi
30 |
31 | echo "Volume directories created successfully:"
32 | echo " - $DATA_PATH/postgres"
33 | echo " - $DATA_PATH/pgadmin"
34 | echo " - $DATA_PATH/cache"
35 | echo ""
36 | echo "You can now run: docker compose up -d"
37 |
--------------------------------------------------------------------------------
/docker/Dockerfile.app:
--------------------------------------------------------------------------------
1 | # Multi-stage build for optimized image
2 | FROM python:3.11-slim AS builder
3 |
4 | # Install build dependencies (git needed for GitHub installation)
5 | RUN apt-get update && apt-get install -y \
6 | gcc \
7 | git \
8 | && rm -rf /var/lib/apt/lists/*
9 |
10 | # Create virtual environment
11 | RUN python -m venv /opt/venv
12 | ENV PATH="/opt/venv/bin:$PATH"
13 |
14 | # Copy requirements and install dependencies
15 | COPY requirements.txt /tmp/
16 | RUN pip install --upgrade pip && \
17 | pip install --no-cache-dir -r /tmp/requirements.txt
18 |
19 | # Runtime stage
20 | FROM python:3.11-slim
21 |
22 | # Install runtime dependencies
23 | RUN apt-get update && apt-get install -y \
24 | curl \
25 | && rm -rf /var/lib/apt/lists/* \
26 | && useradd -m -u 1000 app
27 |
28 | # Copy virtual environment from builder
29 | COPY --from=builder /opt/venv /opt/venv
30 | ENV PATH="/opt/venv/bin:$PATH"
31 |
32 | WORKDIR /app
33 |
34 | # Copy application
35 | COPY --chown=app:app run.py /app/
36 |
37 | # Switch to non-root user
38 | USER app
39 |
40 | EXPOSE 8000
41 |
42 | CMD ["uvicorn", "run:app", "--host", "0.0.0.0", "--port", "8000"]
43 |
--------------------------------------------------------------------------------
/speech_gateway/source/voicevox.py:
--------------------------------------------------------------------------------
1 | import urllib.parse
2 | import httpx
3 | from . import StreamSource, StreamSourceError
4 |
5 |
6 | class VoicevoxStreamSource(StreamSource):
7 | def get_cache_key(self, audio_format: str, speaker: str, audio_query: dict, **kwargs) -> str:
8 | return f"{speaker}_{hash(str(audio_query))}.{audio_format or 'wav'}"
9 |
10 | def parse_text(self, audio_query: dict, **kwargs) -> str:
11 | return audio_query.get("kana")
12 |
13 | def make_stream_request(self, speaker: str, audio_query: dict, **kwargs):
14 | return {
15 | "method": "POST",
16 | "url": self.base_url + "/synthesis",
17 | "params": {"speaker": speaker},
18 | "json": audio_query
19 | }
20 |
21 | async def get_audio_query(self, speaker: str, text: str, **kwargs):
22 | try:
23 | url = f"{self.base_url}/audio_query"
24 |
25 | response = await self.http_client.post(url, params={"speaker": speaker, "text": text})
26 | response.raise_for_status()
27 |
28 | return response.json()
29 |
30 | except httpx.RequestError as ex:
31 | raise StreamSourceError(f"HTTP request failed: {ex}") from ex
32 |
--------------------------------------------------------------------------------
/docker/.env.sample:
--------------------------------------------------------------------------------
1 | COMPOSE_PROJECT_NAME=speech-gateway
2 |
3 | # Database settings
4 | POSTGRES_USER=postgres
5 | POSTGRES_PASSWORD=postgres
6 |
7 | SPGW_DB_NAME=speech_gateway
8 | SPGW_DB_USER=spgw-runtime
9 | SPGW_DB_PASSWORD=spgw-runtime-password
10 |
11 | # Port settings
12 | PORT_SPGW=18000
13 | PORT_DB=5432
14 | PORT_PGADMIN=18001
15 |
16 | # PgAdmin settings
17 | PGADMIN_USER=pgadmin@example.com
18 | PGADMIN_PASSWORD=pgadmin-password
19 |
20 |
21 | # Application settings
22 | DEBUG=true
23 |
24 | # Service enable/disable flags
25 | AZURE_ENABLED=true
26 | OPENAI_ENABLED=true
27 | VOICEVOX_ENABLED=false
28 | SBV2_ENABLED=false
29 | NIJIVOICE_ENABLED=false
30 |
31 | # Azure TTS
32 | AZURE_API_KEY=
33 | AZURE_REGION=
34 | # AZURE_LANGUAGES=en-US,zh-CN,fr-FR
35 |
36 | # OpenAI TTS
37 | OPENAI_API_KEY=
38 | # OPENAI_LANGUAGES=ja-JP,es-ES
39 |
40 | # VOICEVOX
41 | VOICEVOX_URL=http://voicevox-host:50021
42 | # VOICEVOX_LANGUAGES=ja-JP
43 |
44 | # Style-Bert-VITS2
45 | SBV2_URL==http://sbv2-host:5000
46 | # SBV2_LANGUAGES=ja-JP
47 |
48 | # NIJIVOICE
49 | NIJIVOICE_API_KEY=
50 | # NIJIVOICE_LANGUAGES=ja-JP
51 |
52 | # Data storage path (for external disk mounting)
53 | # Examples:
54 | # DATA_PATH=./data # Local directory (default)
55 | # DATA_PATH=/mnt/external-disk/spgw # External disk
56 | DATA_PATH=./data
57 |
--------------------------------------------------------------------------------
/tests/converter/test_mp3.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import os
3 | from typing import AsyncIterator
4 | from speech_gateway.converter import MP3Converter, FormatConverterError
5 |
6 | @pytest.fixture
7 | def mp3_converter():
8 | # Create an instance of MP3Converter for testing
9 | return MP3Converter()
10 |
11 | @pytest.mark.asyncio
12 | async def test_mp3_conversion(mp3_converter):
13 | # Test the convert method using a real WAV file
14 | input_file = "tests/data/test.wav"
15 |
16 | async def input_stream() -> AsyncIterator[bytes]:
17 | with open(input_file, "rb") as f:
18 | while chunk := f.read(1024):
19 | yield chunk
20 |
21 | output = b""
22 | try:
23 | async for chunk in mp3_converter.convert(input_stream()):
24 | output += chunk
25 | except FormatConverterError as e:
26 | pytest.fail(f"MP3 conversion failed with error: {e}")
27 |
28 | # Assert that the output is not empty (indicating conversion occurred)
29 | assert output != b""
30 |
31 | @pytest.mark.asyncio
32 | async def test_mp3_conversion_error_handling(mp3_converter):
33 | # Test error handling in the convert method with invalid input
34 |
35 | async def input_stream() -> AsyncIterator[bytes]:
36 | yield b"Invalid input data"
37 |
38 | with pytest.raises(FormatConverterError):
39 | async for _ in mp3_converter.convert(input_stream()):
40 | pass
41 |
--------------------------------------------------------------------------------
/run.py:
--------------------------------------------------------------------------------
1 | from contextlib import asynccontextmanager
2 | import logging
3 | from fastapi import FastAPI
4 | from speech_gateway.gateway.voicevox import VoicevoxGateway
5 | from speech_gateway.gateway.nijivoice import NijiVoiceGateway
6 | from speech_gateway.gateway.sbv2 import StyleBertVits2Gateway
7 | from speech_gateway.gateway.unified import UnifiedGateway
8 |
9 | # Configure root logger
10 | logger = logging.getLogger("speech_gateway")
11 | logger.setLevel(logging.INFO)
12 | log_format = logging.Formatter("[%(levelname)s] %(asctime)s : %(message)s")
13 | streamHandler = logging.StreamHandler()
14 | streamHandler.setFormatter(log_format)
15 | logger.addHandler(streamHandler)
16 |
17 | NIJIVOICE_API_KEY = "YOUR_API_KEY"
18 |
19 | # Create gateways
20 | aivisspeech_gateway = VoicevoxGateway(base_url="http://127.0.0.1:10101", debug=True)
21 | sbv2_gateway = StyleBertVits2Gateway(base_url="http://127.0.0.1:5000", debug=True)
22 | nijivoice_gateway = NijiVoiceGateway(api_key=NIJIVOICE_API_KEY, prefix="/nijivoice", debug=True)
23 |
24 | # Unified gateway
25 | unified_gateway = UnifiedGateway(debug=True)
26 | unified_gateway.add_gateway("aivisspeech", aivisspeech_gateway, True)
27 | unified_gateway.add_gateway("sbv2", sbv2_gateway)
28 | unified_gateway.add_gateway("nijivoice", nijivoice_gateway)
29 |
30 | # Create app
31 | app = FastAPI()
32 |
33 | # Add gateways to app
34 | app.include_router(aivisspeech_gateway.get_router(), prefix="/aivisspeech")
35 | app.include_router(sbv2_gateway.get_router(), prefix="/sbv2")
36 | app.include_router(nijivoice_gateway.get_router(), prefix="/nijivoice")
37 | app.include_router(unified_gateway.get_router())
38 |
39 | # On app down
40 | @asynccontextmanager
41 | async def lifespan(app: FastAPI):
42 | yield
43 | await aivisspeech_gateway.shutdown()
44 | await sbv2_gateway.shutdown()
45 | await nijivoice_gateway.shutdown()
46 |
--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
1 | import os
2 | import random
3 | import pytest
4 | import httpx
5 |
6 | OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
7 |
8 |
9 | def make_random_text():
10 | random_key = "{:,}".format(random.randint(100000, 999999))
11 | return f"これは音声合成のテストです。ランダムキーは、{random_key}です。"
12 |
13 |
14 | def is_wave(data: bytes) -> bool:
15 | if len(data) < 12:
16 | return False
17 | return data[:4] == b"RIFF" and data[8:12] == b"WAVE"
18 |
19 |
20 | def is_mp3(data: bytes) -> bool:
21 | if data[:3] == b"ID3":
22 | id3_size = 10
23 | if len(data) >= 10:
24 | tag_size = (
25 | (data[6] << 21)
26 | | (data[7] << 14)
27 | | (data[8] << 7)
28 | | data[9]
29 | )
30 | id3_size += tag_size
31 | data = data[id3_size:]
32 |
33 | if len(data) < 2:
34 | return False
35 | return data[:2] in [b"\xFF\xFB", b"\xFF\xF3", b"\xFF\xF2"]
36 |
37 |
38 | def transcribe(data: bytes, audio_format: str) -> str:
39 | headers = {"Authorization": f"Bearer {OPENAI_API_KEY}"}
40 | form_data = {"model": "whisper-1"}
41 | files = {"file": (f"voice.{audio_format}", data, f"audio/{audio_format}")}
42 | resp = httpx.post(
43 | "https://api.openai.com/v1/audio/transcriptions",
44 | headers=headers,
45 | data=form_data,
46 | files=files
47 | )
48 | return resp.json().get("text")
49 |
50 |
51 | @pytest.fixture
52 | def random_text():
53 | random_key = "{:,}".format(random.randint(100000, 999999))
54 | return f"これは音声合成のテストです。ランダムキーは、{random_key}です。"
55 |
56 | @pytest.fixture
57 | def wave_checker():
58 | return is_wave
59 |
60 | @pytest.fixture
61 | def mp3_checker():
62 | return is_mp3
63 |
64 | @pytest.fixture
65 | def audio_transcriber():
66 | return transcribe
67 |
--------------------------------------------------------------------------------
/speech_gateway/source/nijivoice_encoded.py:
--------------------------------------------------------------------------------
1 | from typing import Dict
2 | from . import StreamSource
3 | from ..cache import CacheStorage
4 | from ..cache.file import FileCacheStorage
5 | from ..converter import FormatConverter
6 | from ..performance_recorder import PerformanceRecorder
7 |
8 |
9 | class NijiVoiceEncodedStreamSource(StreamSource):
10 | def __init__(self,
11 | *,
12 | api_key: str = None,
13 | base_url: str = "https://api.nijivoice.com",
14 | cache_storage: CacheStorage = None,
15 | format_converters: Dict[str, FormatConverter] = None,
16 | max_connections: int = 100,
17 | max_keepalive_connections: int = 20,
18 | timeout: float = 10.0,
19 | performance_recorder: PerformanceRecorder = None,
20 | debug: bool = False
21 | ):
22 | super().__init__(
23 | base_url=base_url,
24 | cache_storage=cache_storage or FileCacheStorage(cache_dir="nijivoice_encoded_cache"),
25 | format_converters=format_converters,
26 | max_connections=max_connections,
27 | max_keepalive_connections=max_keepalive_connections,
28 | timeout=timeout,
29 | performance_recorder=performance_recorder,
30 | debug=debug
31 | )
32 | self.base_url = base_url
33 | self.api_key = api_key
34 |
35 | def get_cache_key(self, audio_format: str, voice_actor_id: str, request_json: dict, **kwargs) -> str:
36 | if not audio_format:
37 | audio_format = request_json.get("format", "mp3")
38 | return f"{voice_actor_id}_{hash(str(request_json))}.{audio_format}.json"
39 |
40 | def parse_text(self, request_json: dict, **kwargs) -> str:
41 | return request_json.get("script")
42 |
43 | def make_stream_request(self, voice_actor_id: str, request_json: dict, **kwargs):
44 | return {
45 | "method": "POST",
46 | "url": self.base_url + f"/api/platform/v1/voice-actors/{voice_actor_id}/generate-encoded-voice",
47 | "headers": {"x-api-key": self.api_key},
48 | "json": request_json
49 | }
50 |
--------------------------------------------------------------------------------
/speech_gateway/converter/mp3.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | from typing import AsyncIterator
3 | from . import FormatConverter, FormatConverterError
4 |
5 |
6 | class MP3Converter(FormatConverter):
7 | def __init__(self, ffmpeg_path: str = "ffmpeg", bitrate: str = "64k", output_chunksize: int = 1024):
8 | self.ffmpeg_path = ffmpeg_path
9 | self.bitrate = bitrate
10 | self.output_chunksize = output_chunksize
11 |
12 | async def convert(self, input_stream: AsyncIterator[bytes]) -> AsyncIterator[bytes]:
13 | try:
14 | ffmpeg_proc = await asyncio.create_subprocess_exec(
15 | self.ffmpeg_path,
16 | "-y",
17 | "-i", "-", # Read from stdin
18 | "-f", "mp3",
19 | "-b:a", self.bitrate,
20 | "-", # Write to stdout
21 | stdin=asyncio.subprocess.PIPE,
22 | stdout=asyncio.subprocess.PIPE,
23 | stderr=asyncio.subprocess.PIPE
24 | )
25 |
26 | async def feed_ffmpeg_stdin():
27 | try:
28 | async for chunk in input_stream:
29 | ffmpeg_proc.stdin.write(chunk)
30 | await ffmpeg_proc.stdin.drain()
31 | ffmpeg_proc.stdin.close()
32 |
33 | except Exception as ex:
34 | ffmpeg_proc.stdin.close()
35 | raise FormatConverterError(f"Error feeding data to ffmpeg: {str(ex)}")
36 |
37 | asyncio.create_task(feed_ffmpeg_stdin())
38 |
39 | while True:
40 | chunk = await ffmpeg_proc.stdout.read(self.output_chunksize)
41 | if not chunk:
42 | break
43 | yield chunk
44 |
45 | await ffmpeg_proc.wait()
46 |
47 | if ffmpeg_proc.returncode != 0:
48 | stderr = await ffmpeg_proc.stderr.read()
49 | raise FormatConverterError(f"FFmpeg conversion error: {stderr.decode('utf-8')}")
50 |
51 | except Exception as ex:
52 | raise FormatConverterError(f"Error during MP3 conversion: {str(ex)}")
53 |
--------------------------------------------------------------------------------
/speech_gateway/source/azure.py:
--------------------------------------------------------------------------------
1 | from . import StreamSource
2 | from typing import Dict
3 | from ..cache import CacheStorage
4 | from ..cache.file import FileCacheStorage
5 | from ..converter import FormatConverter
6 | from ..performance_recorder import PerformanceRecorder
7 |
8 |
9 | class AzureStreamSource(StreamSource):
10 | def __init__(self,
11 | *,
12 | api_key: str = None,
13 | region: str = None,
14 | base_url: str = "https://{region}.tts.speech.microsoft.com/cognitiveservices/v1",
15 | cache_storage: CacheStorage = None,
16 | format_converters: Dict[str, FormatConverter] = None,
17 | max_connections: int = 100,
18 | max_keepalive_connections: int = 20,
19 | timeout: float = 10.0,
20 | performance_recorder: PerformanceRecorder = None,
21 | debug: bool = False
22 | ):
23 | super().__init__(
24 | base_url=base_url,
25 | cache_storage=cache_storage or FileCacheStorage(cache_dir="azure_cache"),
26 | format_converters=format_converters,
27 | max_connections=max_connections,
28 | max_keepalive_connections=max_keepalive_connections,
29 | timeout=timeout,
30 | performance_recorder=performance_recorder,
31 | debug=debug
32 | )
33 | self.api_key = api_key
34 | self.region = region
35 |
36 | def get_cache_key(self, audio_format: str, encoded_ssml: bytes, **kwargs) -> str:
37 | return f"{hash(encoded_ssml)}.{audio_format or 'wav'}"
38 |
39 | def parse_text(self, encoded_ssml: bytes, **kwargs) -> str:
40 | return encoded_ssml.decode("utf-8")
41 |
42 | def make_stream_request(self, encoded_ssml: bytes, azure_audio_format: str, **kwargs):
43 | return {
44 | "method": "POST",
45 | "url": self.base_url.format(region=self.region),
46 | "headers": {
47 | "X-Microsoft-OutputFormat": azure_audio_format,
48 | "Content-Type": "application/ssml+xml",
49 | "Ocp-Apim-Subscription-Key": self.api_key
50 | },
51 | "data": encoded_ssml
52 | }
53 |
--------------------------------------------------------------------------------
/speech_gateway/source/openai_speech.py:
--------------------------------------------------------------------------------
1 | from . import StreamSource
2 | from typing import Dict
3 | from ..cache import CacheStorage
4 | from ..cache.file import FileCacheStorage
5 | from ..converter import FormatConverter
6 | from ..performance_recorder import PerformanceRecorder
7 |
8 |
9 | class OpenAIStreamSource(StreamSource):
10 | def __init__(self,
11 | *,
12 | api_key: str = None,
13 | base_url: str = "https://api.openai.com/v1",
14 | cache_storage: CacheStorage = None,
15 | format_converters: Dict[str, FormatConverter] = None,
16 | max_connections: int = 100,
17 | max_keepalive_connections: int = 20,
18 | timeout: float = 10.0,
19 | performance_recorder: PerformanceRecorder = None,
20 | debug: bool = False
21 | ):
22 | super().__init__(
23 | base_url=base_url,
24 | cache_storage=cache_storage or FileCacheStorage(cache_dir="openai_cache"),
25 | format_converters=format_converters,
26 | max_connections=max_connections,
27 | max_keepalive_connections=max_keepalive_connections,
28 | timeout=timeout,
29 | performance_recorder=performance_recorder,
30 | debug=debug
31 | )
32 | self.base_url = base_url
33 | self.api_key = api_key
34 |
35 | def get_cache_key(self, audio_format: str, request_json: dict, **kwargs) -> str:
36 | if not audio_format:
37 | audio_format = request_json.get("response_format", "mp3")
38 | return f"{hash(str(request_json))}.{audio_format}"
39 |
40 | def parse_text(self, request_json: dict, **kwargs) -> str:
41 | return request_json.get("input")
42 |
43 | def make_stream_request(self, request_json: dict, **kwargs):
44 | if "azure" in self.base_url:
45 | url = self.base_url
46 | headers = {"api-key": self.api_key}
47 | else:
48 | url = f"{self.base_url}/audio/speech"
49 | headers = {"Authorization": f"Bearer {self.api_key}"}
50 |
51 | return {
52 | "method": "POST",
53 | "url": url,
54 | "headers": headers,
55 | "json": request_json
56 | }
57 |
--------------------------------------------------------------------------------
/tests/source/test_sbv2_source.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import os
3 | from speech_gateway.source.sbv2 import StyleBertVits2StreamSource
4 |
5 | SBV2_URL = os.getenv("SBV2_URL")
6 |
7 |
8 | @pytest.fixture
9 | def source():
10 | # Create an instance of StyleBertVits2StreamSource
11 | return StyleBertVits2StreamSource(base_url=SBV2_URL)
12 |
13 | @pytest.mark.asyncio
14 | async def test_get_cache_key(source):
15 | # Test get_cache_key method
16 | query_params = {"text": "こんにちは。これはテストです。", "voice": "test"}
17 | cache_key = source.get_cache_key("mp3", query_params)
18 | assert cache_key.endswith(".mp3")
19 |
20 | cache_key = source.get_cache_key("wav", query_params)
21 | assert cache_key.endswith(".wav")
22 |
23 | @pytest.mark.asyncio
24 | async def test_parse_text(source):
25 | # Test parse_text method
26 | query_params = {"text": "こんにちは。これはテストです。", "voice": "test"}
27 | text = source.parse_text(query_params)
28 | assert text == "こんにちは。これはテストです。"
29 |
30 | @pytest.mark.asyncio
31 | async def test_make_stream_request(source):
32 | # Test make_stream_request method
33 | query_params = {"text": "こんにちは。これはテストです。", "voice": "test"}
34 | request = source.make_stream_request(query_params)
35 | assert request["method"] == "GET"
36 | assert request["url"] == f"{SBV2_URL}/voice"
37 | assert request["params"] == query_params
38 |
39 | @pytest.mark.asyncio
40 | async def test_fetch_stream_raw(source):
41 | # Test fetch_stream_raw with a real request (ensure server is running locally)
42 | query_params = {"text": "こんにちは。これはテストです。", "voice": "test"}
43 | http_request = source.make_stream_request(query_params)
44 |
45 | try:
46 | # Replace this part with a live test against the actual service
47 | async for chunk in source.fetch_stream_raw(http_request):
48 | assert isinstance(chunk, bytes)
49 | except Exception as e:
50 | pytest.fail(f"fetch_stream_raw failed: {e}")
51 |
52 | @pytest.mark.asyncio
53 | async def test_fetch_stream(source):
54 | # Test fetch_stream method with conversion and caching
55 | query_params = {"text": "こんにちは。", "voice": "test"}
56 | audio_format = "mp3"
57 |
58 | try:
59 | async for chunk in await source.fetch_stream(audio_format, query_params=query_params):
60 | assert isinstance(chunk, bytes)
61 | except Exception as e:
62 | pytest.fail(f"fetch_stream failed: {e}")
63 |
--------------------------------------------------------------------------------
/docker/docker-compose.yaml:
--------------------------------------------------------------------------------
1 | services:
2 | app:
3 | container_name: spgw-app
4 | build:
5 | context: .
6 | dockerfile: Dockerfile.app
7 | env_file:
8 | - .env
9 | environment:
10 | - DATABASE_URL=postgresql://${SPGW_DB_USER}:${SPGW_DB_PASSWORD}@db:5432/${SPGW_DB_NAME}
11 | ports:
12 | - "${PORT_SPGW}:8000"
13 | volumes:
14 | - spgw-app-cache:/app/cache
15 | depends_on:
16 | db:
17 | condition: service_healthy
18 | healthcheck:
19 | test: ["CMD", "curl", "-f", "http://localhost:8000/docs"]
20 | interval: 30s
21 | timeout: 10s
22 | retries: 3
23 | start_period: 40s
24 | restart: unless-stopped
25 |
26 | db:
27 | container_name: spgw-db
28 | image: postgres:16
29 | environment:
30 | - POSTGRES_USER=${POSTGRES_USER}
31 | - POSTGRES_PASSWORD=${POSTGRES_PASSWORD}
32 | - SPGW_DB_NAME=${SPGW_DB_NAME}
33 | - SPGW_DB_USER=${SPGW_DB_USER}
34 | - SPGW_DB_PASSWORD=${SPGW_DB_PASSWORD}
35 | ports:
36 | - "${PORT_DB}:5432"
37 | volumes:
38 | - spgw-postgres-data:/var/lib/postgresql/data
39 | - ./init-db.sh:/docker-entrypoint-initdb.d/init-db.sh:ro
40 | healthcheck:
41 | test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER} -d ${SPGW_DB_NAME}"]
42 | interval: 10s
43 | timeout: 5s
44 | retries: 5
45 | start_period: 30s
46 | restart: unless-stopped
47 |
48 | pgadmin4:
49 | container_name: spgw-pgadmin4
50 | image: dpage/pgadmin4:8.14
51 | environment:
52 | PGADMIN_DEFAULT_EMAIL: ${PGADMIN_USER}
53 | PGADMIN_DEFAULT_PASSWORD: ${PGADMIN_PASSWORD}
54 | PGADMIN_CONFIG_SERVER_MODE: "True"
55 | ports:
56 | - "${PORT_PGADMIN}:80"
57 | volumes:
58 | - spgw-pgadmin-data:/var/lib/pgadmin
59 | - ./pgadmin-servers.json:/pgadmin4/servers.json
60 | depends_on:
61 | db:
62 | condition: service_healthy
63 | restart: unless-stopped
64 |
65 | volumes:
66 | spgw-postgres-data:
67 | driver: local
68 | driver_opts:
69 | type: none
70 | o: bind
71 | device: ${DATA_PATH:-./data}/postgres
72 | spgw-pgadmin-data:
73 | driver: local
74 | driver_opts:
75 | type: none
76 | o: bind
77 | device: ${DATA_PATH:-./data}/pgadmin
78 | spgw-app-cache:
79 | driver: local
80 | driver_opts:
81 | type: none
82 | o: bind
83 | device: ${DATA_PATH:-./data}/cache
84 |
--------------------------------------------------------------------------------
/speech_gateway/gateway/unified.py:
--------------------------------------------------------------------------------
1 | from typing import Dict, List
2 | from fastapi import HTTPException
3 | from fastapi import Request, APIRouter
4 | from . import SpeechGateway, UnifiedTTSRequest
5 |
6 |
7 | class UnifiedGateway(SpeechGateway):
8 | def __init__(self, *, default_gateway: SpeechGateway = None, default_language: str = "ja-JP", debug = False):
9 | super().__init__(stream_source=None, debug=debug)
10 | self.service_map: Dict[str, SpeechGateway] = {}
11 | self.language_map: Dict[str, SpeechGateway] = {}
12 | self.default_speakers: Dict[SpeechGateway, str] = {}
13 | self.default_gateway: SpeechGateway = default_gateway
14 | self.default_language = default_language
15 |
16 | def add_gateway(self, service_name: str, gateway: SpeechGateway, *, languages: List[str] = None, default_speaker: str = None, default: bool = False):
17 | self.service_map[service_name] = gateway
18 | if languages:
19 | for lang in languages:
20 | self.language_map[lang] = gateway
21 | if default:
22 | self.default_gateway = gateway
23 | self.language_map[self.default_language] = gateway
24 | self.default_speakers[gateway] = default_speaker
25 |
26 | def get_gateway(self, tts_request: UnifiedTTSRequest):
27 | if tts_request.service_name:
28 | return self.service_map.get(tts_request.service_name)
29 | elif tts_request.language:
30 | return self.language_map.get(tts_request.language)
31 | elif self.default_gateway:
32 | return self.default_gateway
33 | return None
34 |
35 | def get_router(self) -> APIRouter:
36 | router = APIRouter()
37 | self.register_endpoint(router)
38 | return router
39 |
40 | def register_endpoint(self, router: APIRouter):
41 | @router.post("/tts")
42 | async def post_tts(request: Request, tts_request: UnifiedTTSRequest, x_audio_format: str = "wav"):
43 | gateway = self.get_gateway(tts_request)
44 |
45 | if not gateway:
46 | raise HTTPException(status_code=404, detail="No gateway found.")
47 |
48 | if not tts_request.speaker:
49 | tts_request.speaker = self.default_speakers.get(gateway)
50 |
51 | return await gateway.unified_tts_handler(request, tts_request, x_audio_format)
52 |
53 | async def shutdown(self):
54 | pass
55 |
--------------------------------------------------------------------------------
/speech_gateway/converter/wave.py:
--------------------------------------------------------------------------------
1 | import audioop
2 | import io
3 | import wave
4 | from typing import AsyncIterator
5 | from . import FormatConverter, FormatConverterError
6 |
7 |
8 | class WaveConverter(FormatConverter):
9 | def __init__(self, output_sample_rate: int = 16000, output_sample_width: int = 2):
10 | self.output_sample_rate = output_sample_rate
11 | self.output_sample_width = output_sample_width
12 |
13 | def convert_wave_bytes(self, input_bytes, output_sample_rate, output_sample_width):
14 | input_io = io.BytesIO(input_bytes)
15 | with wave.open(input_io, 'rb') as wf:
16 | input_sample_rate = wf.getframerate()
17 | input_sample_width = wf.getsampwidth()
18 | channels = wf.getnchannels()
19 | frames = wf.readframes(wf.getnframes())
20 |
21 | # Convert sample rate
22 | if input_sample_rate != output_sample_rate:
23 | frames, _ = audioop.ratecv(frames, input_sample_width, channels, input_sample_rate, output_sample_rate, None)
24 |
25 | # Convert sample width
26 | if input_sample_width != output_sample_width:
27 | # 16 -> 8
28 | if input_sample_width == 2 and output_sample_width == 1:
29 | frames = audioop.lin2lin(frames, 2, 1)
30 | frames = audioop.bias(frames, 1, 128)
31 | # 8 -> 16
32 | elif input_sample_width == 1 and output_sample_width == 2:
33 | frames = audioop.bias(frames, 1, -128)
34 | frames = audioop.lin2lin(frames, 1, 2)
35 | else:
36 | frames = audioop.lin2lin(frames, input_sample_width, output_sample_width)
37 |
38 | output_io = io.BytesIO()
39 | with wave.open(output_io, "wb") as wf_out:
40 | wf_out.setframerate(output_sample_rate)
41 | wf_out.setsampwidth(output_sample_width)
42 | wf_out.setnchannels(channels)
43 | wf_out.writeframes(frames)
44 |
45 | return output_io.getvalue()
46 |
47 | async def convert(self, input_stream: AsyncIterator[bytes]) -> AsyncIterator[bytes]:
48 | try:
49 | wav_data = b""
50 | async for chunk in input_stream:
51 | wav_data += chunk
52 |
53 | yield self.convert_wave_bytes(wav_data, self.output_sample_rate, self.output_sample_width)
54 |
55 | except Exception as ex:
56 | raise FormatConverterError(f"Error during Mu-Law conversion: {str(ex)}")
57 |
--------------------------------------------------------------------------------
/speech_gateway/cache/file.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | from typing import AsyncIterator
3 | import aiofiles
4 | from . import CacheStorage, CacheStorageError
5 |
6 |
7 | class FileCacheStorage(CacheStorage):
8 | def __init__(self, cache_dir: str = "voice_cache"):
9 | self.cache_dir = Path(cache_dir)
10 | if not self.cache_dir.exists():
11 | self.cache_dir.mkdir(parents=True)
12 |
13 | async def has_cache(self, cache_key: str) -> bool:
14 | file_path = self.cache_dir / cache_key
15 | if not file_path.exists():
16 | return False
17 |
18 | if file_path.stat().st_size == 0:
19 | await self.delete_cache(cache_key)
20 | return False
21 |
22 | return True
23 |
24 | async def fetch_cache_stream(self, cache_key: str) -> AsyncIterator[bytes]:
25 | try:
26 | file_path = self.cache_dir / cache_key
27 | async with aiofiles.open(file_path, mode="rb") as file:
28 | while chunk := await file.read(1024):
29 | yield chunk
30 |
31 | except Exception as ex:
32 | raise IOError(f"Error reading file {file_path}: {str(ex)}")
33 |
34 | async def write_cache(self, input_stream: AsyncIterator[bytes], cache_key: str) -> AsyncIterator[bytes]:
35 | file_path = self.cache_dir / cache_key
36 | try:
37 | async with aiofiles.open(file_path, "wb") as file:
38 | async for chunk in input_stream:
39 | await file.write(chunk)
40 | await file.flush()
41 | yield chunk
42 |
43 | except Exception as ex:
44 | # Clean up partial file if it was created
45 | if file_path.exists():
46 | try:
47 | file_path.unlink()
48 | except:
49 | pass
50 | raise CacheStorageError(f"Error during file save operation: {str(ex)}")
51 |
52 | async def delete_cache(self, cache_key: str) -> None:
53 | file_path = self.cache_dir / cache_key
54 | try:
55 | if file_path.exists():
56 | file_path.unlink()
57 |
58 | except Exception as ex:
59 | raise CacheStorageError(f"Error deleting cache file {file_path}: {str(ex)}")
60 |
61 | async def clear_all_cache(self) -> None:
62 | try:
63 | for file_path in self.cache_dir.iterdir():
64 | if file_path.is_file():
65 | file_path.unlink()
66 |
67 | except Exception as ex:
68 | raise CacheStorageError(f"Error clearing cache directory {self.cache_dir}: {str(ex)}")
69 |
--------------------------------------------------------------------------------
/tests/source/test_azure_source.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import os
3 | from speech_gateway.source.azure import AzureStreamSource
4 |
5 | AZURE_API_KEY = os.getenv("AZURE_API_KEY")
6 | AZURE_REGION = os.getenv("AZURE_REGION")
7 |
8 |
9 | @pytest.fixture
10 | def source():
11 | # Create an instance of Azure Speech
12 | return AzureStreamSource(api_key=AZURE_API_KEY, region=AZURE_REGION)
13 |
14 | @pytest.mark.asyncio
15 | async def test_get_cache_key(source):
16 | cache_key = source.get_cache_key("mp3", b"dummy")
17 | assert cache_key.endswith(".mp3")
18 |
19 | cache_key = source.get_cache_key("wav", b"dummy")
20 | assert cache_key.endswith(".wav")
21 |
22 | @pytest.mark.asyncio
23 | async def test_parse_text(source):
24 | text = source.parse_text(encoded_ssml=b"dummy")
25 | assert text == "dummy"
26 |
27 | @pytest.mark.asyncio
28 | async def test_make_stream_request(source):
29 | # Test make_stream_request method
30 | request = source.make_stream_request(encoded_ssml=b"dummy", azure_audio_format="dummy_mp3")
31 | assert request["method"] == "POST"
32 | assert request["url"] == f"https://{AZURE_REGION}.tts.speech.microsoft.com/cognitiveservices/v1"
33 | assert request["headers"]["X-Microsoft-OutputFormat"] == "dummy_mp3"
34 | assert request["headers"]["Content-Type"] == "application/ssml+xml"
35 | assert request["headers"]["Ocp-Apim-Subscription-Key"] == source.api_key
36 | assert request["data"] == b"dummy"
37 |
38 | @pytest.mark.asyncio
39 | async def test_fetch_stream_raw(source):
40 | # Test fetch_stream_raw with a real request (ensure server is running locally)
41 | ssml_text = f"こんにちは。これは音声合成のテストです。"
42 | http_request = source.make_stream_request(ssml_text.encode("utf-8"), "riff-16khz-16bit-mono-pcm")
43 |
44 | try:
45 | # Replace this part with a live test against the actual service
46 | async for chunk in source.fetch_stream_raw(http_request):
47 | assert isinstance(chunk, bytes)
48 | except Exception as e:
49 | pytest.fail(f"fetch_stream_raw failed: {e}")
50 |
51 | @pytest.mark.asyncio
52 | async def test_fetch_stream(source):
53 | # Test fetch_stream method with conversion and caching
54 | ssml_text = f"こんにちは。これは音声合成のテストです。"
55 | audio_format = "mp3"
56 |
57 | try:
58 | async for chunk in await source.fetch_stream(audio_format, azure_audio_format="audio-16khz-32kbitrate-mono-mp3", encoded_ssml=ssml_text.encode("utf-8")):
59 | assert isinstance(chunk, bytes)
60 | except Exception as e:
61 | pytest.fail(f"fetch_stream failed: {e}")
62 |
--------------------------------------------------------------------------------
/tests/source/test_nijivoice_encoded_source.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import os
3 | from speech_gateway.source.nijivoice_encoded import NijiVoiceEncodedStreamSource
4 |
5 | BASE_URL = "https://api.nijivoice.com"
6 | GATEWAY_BASE_URL = "http://127.0.0.1:8000/nijivoice"
7 | NIJIVOICE_API_KEY = os.getenv("NIJIVOICE_API_KEY")
8 | VOICE_ACTOR_ID = "a192db5f-bd8b-4fc7-bc08-af5ca5957c12"
9 | PAYLOAD = {
10 | "script": "こんにちは。これはテストです。",
11 | "speed": "1.0",
12 | "emotionalLevel": "0.1",
13 | "soundDuration": "0.1",
14 | "format": "mp3",
15 | }
16 |
17 |
18 | @pytest.fixture
19 | def source():
20 | # Create an instance of NijiVoiceEncodedStreamSource
21 | return NijiVoiceEncodedStreamSource(base_url=BASE_URL, api_key=NIJIVOICE_API_KEY, debug=True)
22 |
23 | @pytest.mark.asyncio
24 | async def test_get_cache_key(source):
25 | # Test get_cache_key method
26 | cache_key = source.get_cache_key("mp3", VOICE_ACTOR_ID, PAYLOAD)
27 | assert cache_key.endswith(".mp3.json")
28 | assert VOICE_ACTOR_ID in cache_key
29 |
30 | cache_key = source.get_cache_key("wav", VOICE_ACTOR_ID, PAYLOAD)
31 | assert cache_key.endswith(".wav.json")
32 | assert VOICE_ACTOR_ID in cache_key
33 |
34 | @pytest.mark.asyncio
35 | async def test_parse_text(source):
36 | # Test parse_text method
37 | text = source.parse_text(request_json=PAYLOAD)
38 | assert text == PAYLOAD["script"]
39 |
40 | @pytest.mark.asyncio
41 | async def test_make_stream_request(source):
42 | # Test make_stream_request method
43 | request = source.make_stream_request(VOICE_ACTOR_ID, PAYLOAD)
44 | assert request["method"] == "POST"
45 | assert request["url"] == f"{BASE_URL}/api/platform/v1/voice-actors/{VOICE_ACTOR_ID}/generate-encoded-voice"
46 | assert request["headers"]["x-api-key"] == NIJIVOICE_API_KEY
47 | assert request["json"] == PAYLOAD
48 |
49 | @pytest.mark.asyncio
50 | async def test_fetch_stream_raw(source):
51 | # Test fetch_stream_raw with a real request (ensure server is running locally)
52 | http_request = source.make_stream_request(VOICE_ACTOR_ID, PAYLOAD)
53 |
54 | try:
55 | # Replace this part with a live test against the actual service
56 | async for chunk in source.fetch_stream_raw(http_request):
57 | assert isinstance(chunk, bytes)
58 | except Exception as e:
59 | pytest.fail(f"fetch_stream_raw failed: {e}")
60 |
61 | @pytest.mark.asyncio
62 | async def test_fetch_stream(source):
63 | # Test fetch_stream method with conversion and caching
64 | try:
65 | async for chunk in await source.fetch_stream(
66 | audio_format="mp3",
67 | voice_actor_id=VOICE_ACTOR_ID,
68 | request_json=PAYLOAD,
69 | ):
70 | assert isinstance(chunk, bytes)
71 | except Exception as e:
72 | pytest.fail(f"fetch_stream failed: {e}")
73 |
--------------------------------------------------------------------------------
/speech_gateway/gateway/voicevox.py:
--------------------------------------------------------------------------------
1 | from typing import Dict
2 | from fastapi import APIRouter, Request
3 | from fastapi.responses import StreamingResponse
4 | from . import SpeechGateway, UnifiedTTSRequest
5 | from ..cache.file import FileCacheStorage
6 | from ..converter.mp3 import MP3Converter
7 | from ..performance_recorder import SQLitePerformanceRecorder
8 | from ..source.voicevox import VoicevoxStreamSource
9 |
10 |
11 | class VoicevoxGateway(SpeechGateway):
12 | def __init__(self, *, stream_source: VoicevoxStreamSource = None, base_url: str = None, cache_dir: str = None, style_mapper: Dict[str, Dict[str, str]] = None, debug = False):
13 | self.stream_source: VoicevoxStreamSource = None
14 | if stream_source:
15 | super().__init__(stream_source=stream_source, debug=debug)
16 | else:
17 | super().__init__(
18 | stream_source=VoicevoxStreamSource(
19 | base_url=base_url or "http://127.0.0.1:50021",
20 | cache_storage=FileCacheStorage(cache_dir=cache_dir or "voicevox_cache"),
21 | format_converters={"mp3": MP3Converter(bitrate="64k")},
22 | performance_recorder=SQLitePerformanceRecorder(),
23 | debug=debug
24 | ),
25 | debug=debug
26 | )
27 | self.style_mapper = style_mapper or {}
28 |
29 | def register_endpoint(self, router: APIRouter):
30 | @router.post("/synthesis")
31 | async def synthesis_handler(speaker: str, request: Request, x_audio_format: str = "wav"):
32 | audio_format = "mp3" if x_audio_format == "mp3" else "wav"
33 | stream_resp = await self.stream_source.fetch_stream(
34 | audio_format=audio_format,
35 | speaker=speaker,
36 | audio_query=await request.json(),
37 | )
38 | return StreamingResponse(stream_resp, media_type=f"audio/{audio_format}")
39 |
40 | async def unified_tts_handler(self, request: Request, tts_request: UnifiedTTSRequest, x_audio_format: str = "wav"):
41 | speaker = tts_request.speaker
42 |
43 | # Apply style
44 | if tts_request.style is not None and (styles_for_speaker := self.style_mapper.get(tts_request.speaker)):
45 | for k, v in styles_for_speaker.items():
46 | if k.lower() == tts_request.style.lower():
47 | speaker = v
48 | break
49 |
50 | audio_query = await self.stream_source.get_audio_query(speaker, tts_request.text)
51 |
52 | if tts_request.speed:
53 | audio_query["speedScale"] = tts_request.speed
54 |
55 | stream_resp = await self.stream_source.fetch_stream(
56 | audio_format=x_audio_format,
57 | speaker=speaker,
58 | audio_query=audio_query,
59 | )
60 | return StreamingResponse(stream_resp, media_type=f"audio/{x_audio_format}")
61 |
--------------------------------------------------------------------------------
/tests/gateway/test_sbv2.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pytest
3 | import httpx
4 |
5 |
6 | @pytest.mark.asyncio
7 | async def test_sbv2(random_text, wave_checker, audio_transcriber):
8 | query_params = {
9 | "text": random_text,
10 | "model_id": "0",
11 | "speaker_id": "0"
12 | }
13 | resp = httpx.get("http://127.0.0.1:8000/sbv2/voice", params=query_params)
14 | audio_data = resp.content
15 | assert wave_checker(audio_data)
16 | assert "音声合成" in audio_transcriber(audio_data, "wav")
17 |
18 |
19 | @pytest.mark.asyncio
20 | async def test_sbv2_wav(random_text, wave_checker, audio_transcriber):
21 | query_params = {
22 | "text": random_text,
23 | "model_id": "0",
24 | "speaker_id": "0",
25 | "x_audio_format": "wav"
26 | }
27 | resp = httpx.get("http://127.0.0.1:8000/sbv2/voice", params=query_params)
28 | audio_data = resp.content
29 | assert wave_checker(audio_data)
30 | assert "音声合成" in audio_transcriber(audio_data, "wav")
31 |
32 |
33 | @pytest.mark.asyncio
34 | async def test_sbv2_mp3(random_text, mp3_checker, audio_transcriber):
35 | query_params = {
36 | "text": random_text,
37 | "model_id": "0",
38 | "speaker_id": "0",
39 | "x_audio_format": "mp3"
40 | }
41 | resp = httpx.get("http://127.0.0.1:8000/sbv2/voice", params=query_params)
42 | audio_data = resp.content
43 | assert mp3_checker(audio_data)
44 | assert "音声合成" in audio_transcriber(audio_data, "mp3")
45 |
46 |
47 | @pytest.mark.asyncio
48 | async def test_sbv2_unified(random_text, wave_checker, audio_transcriber):
49 | req = {
50 | "text": random_text,
51 | "speaker": "0-0",
52 | "service_name": "sbv2"
53 | }
54 | resp = httpx.post("http://127.0.0.1:8000/tts", json=req)
55 | audio_data = resp.content
56 | assert wave_checker(audio_data)
57 | assert "音声合成" in audio_transcriber(audio_data, "wav")
58 |
59 |
60 | @pytest.mark.asyncio
61 | async def test_sbv2_unified_wav(random_text, wave_checker, audio_transcriber):
62 | req = {
63 | "text": random_text,
64 | "speaker": "0-0",
65 | "service_name": "sbv2"
66 | }
67 | query_params = {
68 | "x_audio_format": "wav"
69 | }
70 | resp = httpx.post("http://127.0.0.1:8000/tts", params=query_params, json=req)
71 | audio_data = resp.content
72 | assert wave_checker(audio_data)
73 | assert "音声合成" in audio_transcriber(audio_data, "wav")
74 |
75 |
76 | @pytest.mark.asyncio
77 | async def test_sbv2_unified_mp3(random_text, mp3_checker, audio_transcriber):
78 | req = {
79 | "text": random_text,
80 | "speaker": "0-0",
81 | "service_name": "sbv2"
82 | }
83 | query_params = {
84 | "x_audio_format": "mp3"
85 | }
86 | resp = httpx.post("http://127.0.0.1:8000/tts", params=query_params, json=req)
87 | audio_data = resp.content
88 | assert mp3_checker(audio_data)
89 | assert "音声合成" in audio_transcriber(audio_data, "mp3")
90 |
--------------------------------------------------------------------------------
/tests/cache/test_file.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from speech_gateway.cache import FileCacheStorage
3 |
4 |
5 | @pytest.fixture
6 | def temp_cache_dir(tmp_path):
7 | # Create a temporary cache directory for testing
8 | cache_dir = tmp_path / "test_cache"
9 | cache_dir.mkdir()
10 | return cache_dir
11 |
12 |
13 | @pytest.fixture
14 | def file_cache_storage(temp_cache_dir):
15 | # Create a FileCacheStorage instance using the temporary directory
16 | return FileCacheStorage(cache_dir=str(temp_cache_dir))
17 |
18 |
19 | @pytest.mark.asyncio
20 | async def test_has_cache(file_cache_storage, temp_cache_dir):
21 | # Test has_cache method
22 | cache_key = "test_file"
23 | file_path = temp_cache_dir / cache_key
24 |
25 | # Case 1: File does not exist
26 | assert not await file_cache_storage.has_cache(cache_key)
27 |
28 | # Case 2: File exists and has content
29 | file_path.write_text("test content")
30 | assert await file_cache_storage.has_cache(cache_key)
31 |
32 | # Case 3: File exists but is empty
33 | file_path.write_text("")
34 | assert not await file_cache_storage.has_cache(cache_key)
35 | assert not file_path.exists() # Should be deleted
36 |
37 |
38 | @pytest.mark.asyncio
39 | async def test_fetch_cache_stream(file_cache_storage, temp_cache_dir):
40 | # Test fetch_cache_stream method
41 | cache_key = "test_file"
42 | file_path = temp_cache_dir / cache_key
43 | content = b"This is test content."
44 | file_path.write_bytes(content)
45 |
46 | result = b""
47 | async for chunk in file_cache_storage.fetch_cache_stream(cache_key):
48 | result += chunk
49 |
50 | assert result == content
51 |
52 |
53 | @pytest.mark.asyncio
54 | async def test_write_cache(file_cache_storage, temp_cache_dir):
55 | # Test write_cache method
56 | cache_key = "test_file"
57 | file_path = temp_cache_dir / cache_key
58 |
59 | async def input_stream():
60 | yield b"Part 1 "
61 | yield b"Part 2"
62 |
63 | result = b""
64 | async for chunk in file_cache_storage.write_cache(input_stream(), cache_key):
65 | result += chunk
66 |
67 | assert file_path.exists()
68 | assert file_path.read_bytes() == b"Part 1 Part 2"
69 | assert result == b"Part 1 Part 2"
70 |
71 |
72 | @pytest.mark.asyncio
73 | async def test_delete_cache(file_cache_storage, temp_cache_dir):
74 | # Test delete_cache method
75 | cache_key = "test_file"
76 | file_path = temp_cache_dir / cache_key
77 | file_path.write_text("test content")
78 |
79 | await file_cache_storage.delete_cache(cache_key)
80 | assert not file_path.exists()
81 |
82 |
83 | @pytest.mark.asyncio
84 | async def test_clear_all_cache(file_cache_storage, temp_cache_dir):
85 | # Test clear_all_cache method
86 | (temp_cache_dir / "file1").write_text("content1")
87 | (temp_cache_dir / "file2").write_text("content2")
88 |
89 | await file_cache_storage.clear_all_cache()
90 |
91 | assert len(list(temp_cache_dir.iterdir())) == 0
92 |
--------------------------------------------------------------------------------
/tests/converter/test_wave.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import os
3 | import wave
4 | import io
5 | from typing import AsyncIterator
6 | from speech_gateway.converter.wave import WaveConverter, FormatConverterError
7 |
8 |
9 | @pytest.fixture
10 | def wave_converter():
11 | return WaveConverter()
12 |
13 |
14 | @pytest.fixture
15 | def wave_converter_custom():
16 | return WaveConverter(output_sample_rate=8000, output_sample_width=1)
17 |
18 |
19 | @pytest.mark.asyncio
20 | async def test_wave_conversion(wave_converter):
21 | input_file = "tests/data/test.wav"
22 |
23 | async def input_stream() -> AsyncIterator[bytes]:
24 | with open(input_file, "rb") as f:
25 | while chunk := f.read(1024):
26 | yield chunk
27 |
28 | output = b""
29 | try:
30 | async for chunk in wave_converter.convert(input_stream()):
31 | output += chunk
32 | except FormatConverterError as e:
33 | pytest.fail(f"Wave conversion failed with error: {e}")
34 |
35 | assert output != b""
36 |
37 | with wave.open(io.BytesIO(output), 'rb') as wf:
38 | assert wf.getframerate() == 16000
39 | assert wf.getsampwidth() == 2
40 |
41 |
42 | @pytest.mark.asyncio
43 | async def test_wave_conversion_custom_params(wave_converter_custom):
44 | input_file = "tests/data/test.wav"
45 |
46 | async def input_stream() -> AsyncIterator[bytes]:
47 | with open(input_file, "rb") as f:
48 | while chunk := f.read(1024):
49 | yield chunk
50 |
51 | output = b""
52 | try:
53 | async for chunk in wave_converter_custom.convert(input_stream()):
54 | output += chunk
55 | except FormatConverterError as e:
56 | pytest.fail(f"Wave conversion failed with error: {e}")
57 |
58 | assert output != b""
59 |
60 | with wave.open(io.BytesIO(output), 'rb') as wf:
61 | assert wf.getframerate() == 8000
62 | assert wf.getsampwidth() == 1
63 |
64 |
65 | @pytest.mark.asyncio
66 | async def test_wave_conversion_error_handling(wave_converter):
67 | async def input_stream() -> AsyncIterator[bytes]:
68 | yield b"Invalid wave data"
69 |
70 | with pytest.raises(FormatConverterError) as exc_info:
71 | async for _ in wave_converter.convert(input_stream()):
72 | pass
73 |
74 | assert "Error during Mu-Law conversion" in str(exc_info.value)
75 |
76 |
77 | @pytest.mark.asyncio
78 | async def test_convert_wave_bytes():
79 | converter = WaveConverter(output_sample_rate=8000, output_sample_width=1)
80 |
81 | input_io = io.BytesIO()
82 | with wave.open(input_io, 'wb') as wf:
83 | wf.setframerate(16000)
84 | wf.setsampwidth(2)
85 | wf.setnchannels(1)
86 | wf.writeframes(b'\x00\x00' * 1000)
87 |
88 | input_bytes = input_io.getvalue()
89 | output_bytes = converter.convert_wave_bytes(input_bytes, 8000, 1)
90 |
91 | assert output_bytes != b""
92 |
93 | with wave.open(io.BytesIO(output_bytes), 'rb') as wf:
94 | assert wf.getframerate() == 8000
95 | assert wf.getsampwidth() == 1
96 | assert wf.getnchannels() == 1
97 |
--------------------------------------------------------------------------------
/speech_gateway/performance_recorder/sqlite.py:
--------------------------------------------------------------------------------
1 | from dataclasses import fields
2 | from datetime import datetime, timezone
3 | import queue
4 | import sqlite3
5 | import threading
6 | from . import PerformanceRecorder, PerformanceRecord
7 |
8 |
9 | class SQLitePerformanceRecorder(PerformanceRecorder):
10 | def __init__(self, db_path="performance.db"):
11 | self.db_path = db_path
12 | self.record_queue = queue.Queue()
13 | self.stop_event = threading.Event()
14 |
15 | self.init_db()
16 |
17 | self.worker_thread = threading.Thread(target=self.start_worker, daemon=True)
18 | self.worker_thread.start()
19 |
20 | def init_db(self):
21 | conn = sqlite3.connect(self.db_path)
22 | try:
23 | with conn:
24 | conn.execute(
25 | """
26 | CREATE TABLE IF NOT EXISTS performance_records (
27 | id INTEGER PRIMARY KEY AUTOINCREMENT,
28 | process_id TEXT NOT NULL,
29 | created_at TEXT NOT NULL,
30 | source TEXT,
31 | text TEXT,
32 | audio_format TEXT,
33 | cached INTEGER,
34 | elapsed REAL
35 | )
36 | """
37 | )
38 | finally:
39 | conn.close()
40 |
41 | def start_worker(self):
42 | conn = sqlite3.connect(self.db_path)
43 | try:
44 | while not self.stop_event.is_set() or not self.record_queue.empty():
45 | try:
46 | record = self.record_queue.get(timeout=0.5)
47 | except queue.Empty:
48 | continue
49 |
50 | self.insert_record(conn, record)
51 | self.record_queue.task_done()
52 | finally:
53 | conn.close()
54 |
55 | def insert_record(self, conn: sqlite3.Connection, record: PerformanceRecord):
56 | columns = [field.name for field in fields(PerformanceRecord)] + ["created_at"]
57 | placeholders = ["?"] * len(columns)
58 | values = [getattr(record, field.name) for field in fields(PerformanceRecord)] + [datetime.now(timezone.utc)]
59 | sql = f"INSERT INTO performance_records ({', '.join(columns)}) VALUES ({', '.join(placeholders)})"
60 | conn.execute(sql, values)
61 | conn.commit()
62 |
63 | def record(
64 | self,
65 | *,
66 | process_id: str,
67 | source: str = None,
68 | text: str = None,
69 | audio_format: str = None,
70 | cached: int = 0,
71 | elapsed: float = None,
72 | ):
73 | performance_record = PerformanceRecord(
74 | process_id=process_id,
75 | source=source,
76 | text=text,
77 | audio_format=audio_format,
78 | cached = cached,
79 | elapsed = elapsed
80 | )
81 |
82 | self.record_queue.put(performance_record)
83 |
84 | def close(self):
85 | self.stop_event.set()
86 | self.record_queue.join()
87 | self.worker_thread.join()
88 |
--------------------------------------------------------------------------------
/speech_gateway/gateway/openai_speech.py:
--------------------------------------------------------------------------------
1 | from fastapi import APIRouter, Request
2 | from fastapi.responses import StreamingResponse
3 | from . import SpeechGateway, UnifiedTTSRequest
4 | from ..cache.file import FileCacheStorage
5 | from ..performance_recorder import PerformanceRecorder, SQLitePerformanceRecorder
6 | from ..source.openai_speech import OpenAIStreamSource
7 |
8 |
9 | class OpenAIGateway(SpeechGateway):
10 | def __init__(self, *, stream_source: OpenAIStreamSource = None, api_key: str = None, model: str = "tts-1", speed: float = 1.0, instructions: str = None, base_url: str = None, cache_dir: str = None, performance_recorder: PerformanceRecorder = None, debug = False):
11 | self.stream_source: OpenAIStreamSource = None
12 | if stream_source:
13 | super().__init__(stream_source=stream_source, debug=debug)
14 | else:
15 | super().__init__(
16 | stream_source=OpenAIStreamSource(
17 | api_key=api_key,
18 | base_url=base_url or "https://api.openai.com/v1",
19 | cache_storage=FileCacheStorage(cache_dir=cache_dir or "openai_cache"),
20 | format_converters={},
21 | performance_recorder=performance_recorder or SQLitePerformanceRecorder(),
22 | debug=debug
23 | ),
24 | debug=debug
25 | )
26 | self.model = model
27 | self.speed = speed
28 | self.instructions = instructions
29 |
30 | def register_endpoint(self, router: APIRouter):
31 | @router.post("/audio/speech")
32 | async def synthesis_handler(request: Request, x_audio_format: str = None):
33 | request_json = await request.json()
34 |
35 | if x_audio_format:
36 | if x_audio_format in ["mp3", "opus", "aac", "flac", "wav", "pcm"]:
37 | request_json["response_format"] = x_audio_format
38 | else:
39 | # Set wave to convert to other format later
40 | request_json["response_format"] = "wav"
41 | else:
42 | x_audio_format = request_json.get("response_format", "mp3")
43 |
44 | stream_resp = await self.stream_source.fetch_stream(
45 | request_json=request_json,
46 | audio_format=x_audio_format
47 | )
48 | return StreamingResponse(stream_resp, media_type=f"audio/{x_audio_format}")
49 |
50 | async def unified_tts_handler(self, request: Request, tts_request: UnifiedTTSRequest, x_audio_format: str = "wav"):
51 | request_json = {
52 | "model": self.model,
53 | "voice": tts_request.speaker,
54 | "input": tts_request.text,
55 | "speed": tts_request.speed or self.speed,
56 | "instructions": self.instructions,
57 | "response_format": x_audio_format
58 | }
59 |
60 | stream_resp = await self.stream_source.fetch_stream(
61 | audio_format=x_audio_format,
62 | request_json=request_json,
63 | )
64 | return StreamingResponse(stream_resp, media_type=f"audio/{x_audio_format}")
65 |
--------------------------------------------------------------------------------
/speech_gateway/converter/mulaw.py:
--------------------------------------------------------------------------------
1 | import audioop
2 | import io
3 | import struct
4 | from typing import AsyncIterator
5 | import wave
6 | from . import FormatConverter, FormatConverterError
7 |
8 |
9 | class MuLawConverter(FormatConverter):
10 | def __init__(self, rate: int = 8000, include_header: bool = False, to_linear16: callable = None):
11 | self.rate = rate
12 | self.include_header = include_header
13 | self.to_linear16 = to_linear16
14 |
15 | def create_au_header(self, data_size: int, sample_rate: int, channels: int) -> bytes:
16 | magic_number = b".snd" # Magic number
17 | header_size = 24 # Fixed header size (24 bytes for standard .au header)
18 | encoding = 1 # Mu-Law encoding
19 | reserved = 0 # Reserved field, must be 0
20 |
21 | # Create header
22 | header = struct.pack(
23 | ">4sIIIIII", # Big-endian: 4-char string, 6 unsigned integers
24 | magic_number, # Magic number
25 | header_size, # Header size
26 | data_size, # Data size
27 | encoding, # Encoding format
28 | sample_rate, # Sample rate
29 | channels, # Number of channels
30 | reserved # Reserved field
31 | )
32 | return header
33 |
34 | async def convert(self, input_stream: AsyncIterator[bytes]) -> AsyncIterator[bytes]:
35 | try:
36 | # Load whole wave data
37 | wav_data = b""
38 | async for chunk in input_stream:
39 | wav_data += chunk
40 |
41 | if self.to_linear16:
42 | wav_data = self.to_linear16(wav_data)
43 |
44 | # Parse wave info
45 | with wave.open(io.BytesIO(wav_data), "rb") as wf:
46 | nchannels = wf.getnchannels()
47 | sampwidth = wf.getsampwidth()
48 | framerate = wf.getframerate()
49 | nframes = wf.getnframes()
50 | raw_frames = wf.readframes(nframes)
51 |
52 | # Convert channel
53 | if nchannels > 1:
54 | mono_frames = audioop.tomono(raw_frames, sampwidth, 0.5, 0.5)
55 | else:
56 | mono_frames = raw_frames
57 |
58 | # Convert sample rate
59 | if framerate != self.rate:
60 | converted_frames, _ = audioop.ratecv(
61 | mono_frames,
62 | sampwidth,
63 | 1,
64 | framerate,
65 | self.rate,
66 | None
67 | )
68 | else:
69 | converted_frames = mono_frames
70 |
71 | # Convert format
72 | mulaw_data = audioop.lin2ulaw(converted_frames, sampwidth)
73 |
74 | if self.include_header:
75 | # Create .au header
76 | header = self.create_au_header(len(mulaw_data), self.rate, 1)
77 | mulaw_data = header + mulaw_data
78 |
79 | # Return whole data at once
80 | yield mulaw_data
81 |
82 | except Exception as ex:
83 | raise FormatConverterError(f"Error during Mu-Law conversion: {str(ex)}")
84 |
--------------------------------------------------------------------------------
/speech_gateway/gateway/sbv2.py:
--------------------------------------------------------------------------------
1 | from typing import Dict
2 | from fastapi import APIRouter, Request
3 | from fastapi.responses import StreamingResponse
4 | from . import SpeechGateway, UnifiedTTSRequest
5 | from ..cache.file import FileCacheStorage
6 | from ..converter.mp3 import MP3Converter
7 | from ..performance_recorder import PerformanceRecorder, SQLitePerformanceRecorder
8 | from ..source.sbv2 import StyleBertVits2StreamSource
9 |
10 |
11 | class StyleBertVits2Gateway(SpeechGateway):
12 | def __init__(self, *, stream_source: StyleBertVits2StreamSource = None, base_url: str = None, cache_dir: str = None, performance_recorder: PerformanceRecorder = None, style_mapper: Dict[str, Dict[str, str]] = None, debug = False):
13 | self.stream_source: StyleBertVits2StreamSource = None
14 | if stream_source:
15 | super().__init__(stream_source=stream_source, debug=debug)
16 | else:
17 | super().__init__(
18 | stream_source=StyleBertVits2StreamSource(
19 | base_url=base_url or "http://127.0.0.1:5000",
20 | cache_storage=FileCacheStorage(cache_dir=cache_dir or "sbv2_cache"),
21 | format_converters={"mp3": MP3Converter(bitrate="64k")},
22 | performance_recorder=performance_recorder or SQLitePerformanceRecorder(),
23 | debug=debug
24 | ),
25 | debug=debug
26 | )
27 | self.style_mapper = style_mapper or {}
28 |
29 | def register_endpoint(self, router: APIRouter):
30 | @router.get("/voice")
31 | async def get_voice_handler(request: Request):
32 | query_params = dict(request.query_params)
33 | filtered_params = {
34 | k: v for k, v in query_params.items() if v is not None and k not in {"x_audio_format"}
35 | }
36 | audio_format = query_params.get("x_audio_format", "wav")
37 |
38 | stream_resp = await self.stream_source.fetch_stream(
39 | audio_format=audio_format,
40 | query_params=filtered_params,
41 | )
42 | return StreamingResponse(stream_resp, media_type=f"audio/{audio_format}")
43 |
44 | async def unified_tts_handler(self, request: Request, tts_request: UnifiedTTSRequest, x_audio_format: str = "wav"):
45 | # Basic params
46 | model_id, speaker_id = tts_request.speaker.split("-")
47 | query_params = {
48 | "text": tts_request.text,
49 | "model_id": model_id,
50 | "speaker_id": speaker_id
51 | }
52 |
53 | if tts_request.speed:
54 | query_params["length"] = 1 / tts_request.speed
55 |
56 | # Apply style
57 | if tts_request.style is not None and (styles_for_speaker := self.style_mapper.get(tts_request.speaker)):
58 | for k, v in styles_for_speaker.items():
59 | if k.lower() == tts_request.style.lower():
60 | query_params["style"] = v
61 | break
62 |
63 | # Additional params
64 | for k, v in dict(request.query_params).items():
65 | if v is not None and k not in {"x_audio_format"}:
66 | query_params[k] = v
67 |
68 | stream_resp = await self.stream_source.fetch_stream(
69 | audio_format=x_audio_format,
70 | query_params=query_params,
71 | )
72 |
73 | return StreamingResponse(stream_resp, media_type=f"audio/{x_audio_format}")
74 |
--------------------------------------------------------------------------------
/speech_gateway/gateway/azure.py:
--------------------------------------------------------------------------------
1 | from fastapi import APIRouter, Request
2 | from fastapi.responses import StreamingResponse
3 | from . import SpeechGateway, UnifiedTTSRequest
4 | from ..cache.file import FileCacheStorage
5 | from ..performance_recorder import PerformanceRecorder, SQLitePerformanceRecorder
6 | from ..source.azure import AzureStreamSource
7 |
8 |
9 | class AzureGateway(SpeechGateway):
10 | def __init__(self, *, stream_source: AzureStreamSource = None, api_key: str = None, region: str = None, base_url: str = None, language: str = "ja-JP", cache_dir: str = None, performance_recorder: PerformanceRecorder = None, debug = False):
11 | self.stream_source: AzureStreamSource = None
12 | if stream_source:
13 | super().__init__(stream_source=stream_source, debug=debug)
14 | else:
15 | super().__init__(
16 | stream_source=AzureStreamSource(
17 | api_key=api_key,
18 | region=region,
19 | base_url=base_url or "https://{region}.tts.speech.microsoft.com/cognitiveservices/v1",
20 | cache_storage=FileCacheStorage(cache_dir=cache_dir or "azure_cache"),
21 | format_converters={},
22 | performance_recorder=performance_recorder or SQLitePerformanceRecorder(),
23 | debug=debug
24 | ),
25 | debug=debug
26 | )
27 | self.default_language = language
28 |
29 | def register_endpoint(self, router: APIRouter):
30 | @router.post("/cognitiveservices/v1")
31 | async def synthesis_handler(request: Request, x_audio_format: str = None):
32 | if x_audio_format == "wav":
33 | azure_audio_format = "riff-16khz-16bit-mono-pcm"
34 | elif x_audio_format == "mp3":
35 | azure_audio_format = "audio-16khz-32kbitrate-mono-mp3"
36 | else:
37 | azure_audio_format = request.headers["X-Microsoft-OutputFormat"]
38 | if "pcm" in azure_audio_format:
39 | x_audio_format = "wav"
40 | else:
41 | x_audio_format = "mp3"
42 |
43 | stream_resp = await self.stream_source.fetch_stream(
44 | encoded_ssml=await request.body(),
45 | azure_audio_format=azure_audio_format,
46 | audio_format=x_audio_format
47 | )
48 | return StreamingResponse(stream_resp, media_type=f"audio/{x_audio_format}")
49 |
50 | async def unified_tts_handler(self, request: Request, tts_request: UnifiedTTSRequest, x_audio_format: str = "wav"):
51 | if x_audio_format == "wav":
52 | azure_audio_format = "riff-16khz-16bit-mono-pcm"
53 | elif x_audio_format == "mp3":
54 | azure_audio_format = "audio-16khz-32kbitrate-mono-mp3"
55 |
56 | if tts_request.speed:
57 | speed_percentage = (tts_request.speed - 1.0) * 100
58 | else:
59 | speed_percentage = 0
60 | ssml_text = f"{tts_request.text}"
61 |
62 | stream_resp = await self.stream_source.fetch_stream(
63 | encoded_ssml=ssml_text.encode("utf-8"),
64 | azure_audio_format=azure_audio_format,
65 | audio_format=x_audio_format
66 | )
67 | return StreamingResponse(stream_resp, media_type=f"audio/{x_audio_format}")
68 |
--------------------------------------------------------------------------------
/speech_gateway/gateway/nijivoice_encoded.py:
--------------------------------------------------------------------------------
1 | import base64
2 | import io
3 | import json
4 | from typing import Dict
5 | from fastapi import APIRouter, Request
6 | from fastapi.responses import StreamingResponse, Response
7 | from . import SpeechGateway, UnifiedTTSRequest
8 | from ..cache.file import FileCacheStorage
9 | from ..performance_recorder import PerformanceRecorder, SQLitePerformanceRecorder
10 | from ..source.nijivoice_encoded import NijiVoiceEncodedStreamSource
11 |
12 |
13 | class NijiVoiceEncodedGateway(SpeechGateway):
14 | def __init__(self, *, stream_source: NijiVoiceEncodedStreamSource = None, api_key: str = None, speeds: Dict[str, float] = None, base_url: str = None, cache_dir: str = None, performance_recorder: PerformanceRecorder = None, debug = False):
15 | self.stream_source: NijiVoiceEncodedStreamSource = None
16 | if stream_source:
17 | super().__init__(stream_source=stream_source, debug=debug)
18 | else:
19 | super().__init__(
20 | stream_source=NijiVoiceEncodedStreamSource(
21 | api_key=api_key,
22 | base_url=base_url or "https://api.nijivoice.com",
23 | cache_storage=FileCacheStorage(cache_dir=cache_dir or "nijivoice_encoded_cache"),
24 | format_converters={},
25 | performance_recorder=performance_recorder or SQLitePerformanceRecorder(),
26 | debug=debug
27 | ),
28 | debug=debug
29 | )
30 | self.speeds = speeds or {}
31 |
32 | def register_endpoint(self, router: APIRouter):
33 | @router.post("/api/platform/v1/voice-actors/{voice_actor_id}/generate-encoded-voice")
34 | async def get_voice_handler(voice_actor_id: str, request: Request, x_audio_format: str = None):
35 | request_json = await request.json()
36 |
37 | if x_audio_format:
38 | if x_audio_format in ["mp3", "wav"]:
39 | request_json["format"] = x_audio_format
40 | else:
41 | # Set wave to convert to other format later
42 | request_json["format"] = "wav"
43 | else:
44 | x_audio_format = request_json.get("format", "mp3")
45 |
46 | stream_resp = await self.stream_source.fetch_stream(
47 | voice_actor_id=voice_actor_id,
48 | audio_format=x_audio_format,
49 | request_json=request_json,
50 | )
51 |
52 | json_bytes = b""
53 | async for chunk in stream_resp:
54 | json_bytes += chunk
55 |
56 | return Response(content=json_bytes, media_type=f"application/json")
57 |
58 | async def unified_tts_handler(self, request: Request, tts_request: UnifiedTTSRequest, x_audio_format: str = "wav"):
59 | request_json = {
60 | "script": tts_request.text,
61 | "speed": str(tts_request.speed) if tts_request.speed else str(self.speeds.get(tts_request.speaker, "1.0")),
62 | "format": x_audio_format if x_audio_format == "mp3" else "wav"
63 | }
64 |
65 | stream_resp = await self.stream_source.fetch_stream(
66 | voice_actor_id=tts_request.speaker,
67 | audio_format=x_audio_format,
68 | request_json=request_json,
69 | )
70 |
71 | json_bytes = b""
72 | async for chunk in stream_resp:
73 | json_bytes += chunk
74 | response_json = json.loads(json_bytes)
75 | base64_audio = response_json["generatedVoice"]["base64Audio"]
76 | audio_bytes = base64.b64decode(base64_audio)
77 |
78 | return StreamingResponse(io.BytesIO(audio_bytes), media_type=f"audio/{x_audio_format}")
79 |
--------------------------------------------------------------------------------
/tests/gateway/test_voicevox.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pytest
3 | import httpx
4 |
5 | SPEAKER = 46
6 |
7 |
8 | @pytest.mark.asyncio
9 | async def test_voicevox(random_text, wave_checker, audio_transcriber):
10 | audio_query = httpx.post(
11 | "http://127.0.0.1:8000/voicevox/audio_query",
12 | params={"speaker": SPEAKER, "text": random_text}
13 | ).json()
14 |
15 | query_params = {
16 | "speaker": SPEAKER
17 | }
18 | resp = httpx.post(
19 | "http://127.0.0.1:8000/voicevox/synthesis",
20 | params=query_params,
21 | json=audio_query
22 | )
23 | audio_data = resp.content
24 | assert wave_checker(audio_data)
25 | assert "音声合成" in audio_transcriber(audio_data, "wav")
26 |
27 |
28 | @pytest.mark.asyncio
29 | async def test_voicevox_wav(random_text, wave_checker, audio_transcriber):
30 | audio_query = httpx.post(
31 | "http://127.0.0.1:8000/voicevox/audio_query",
32 | params={"speaker": SPEAKER, "text": random_text}
33 | ).json()
34 |
35 | query_params = {
36 | "speaker": SPEAKER,
37 | "x_audio_format": "wav"
38 | }
39 | resp = httpx.post(
40 | "http://127.0.0.1:8000/voicevox/synthesis",
41 | params=query_params,
42 | json=audio_query
43 | )
44 | audio_data = resp.content
45 | assert wave_checker(audio_data)
46 | assert "音声合成" in audio_transcriber(audio_data, "wav")
47 |
48 |
49 | @pytest.mark.asyncio
50 | async def test_voicevox_mp3(random_text, mp3_checker, audio_transcriber):
51 | audio_query = httpx.post(
52 | "http://127.0.0.1:8000/voicevox/audio_query",
53 | params={"speaker": SPEAKER, "text": random_text}
54 | ).json()
55 |
56 | query_params = {
57 | "speaker": SPEAKER,
58 | "x_audio_format": "mp3"
59 | }
60 | resp = httpx.post(
61 | "http://127.0.0.1:8000/voicevox/synthesis",
62 | params=query_params,
63 | json=audio_query
64 | )
65 | audio_data = resp.content
66 | assert mp3_checker(audio_data)
67 | assert "音声合成" in audio_transcriber(audio_data, "mp3")
68 |
69 |
70 | @pytest.mark.asyncio
71 | async def test_voicevox_unified(random_text, wave_checker, audio_transcriber):
72 | req = {
73 | "text": random_text,
74 | "speaker": str(SPEAKER),
75 | "service_name": "voicevox"
76 | }
77 | resp = httpx.post("http://127.0.0.1:8000/tts", json=req)
78 | audio_data = resp.content
79 | assert wave_checker(audio_data)
80 | assert "音声合成" in audio_transcriber(audio_data, "wav")
81 |
82 |
83 | @pytest.mark.asyncio
84 | async def test_voicevox_unified_wav(random_text, wave_checker, audio_transcriber):
85 | req = {
86 | "text": random_text,
87 | "speaker": str(SPEAKER),
88 | "service_name": "voicevox"
89 | }
90 | query_params = {
91 | "x_audio_format": "wav"
92 | }
93 | resp = httpx.post("http://127.0.0.1:8000/tts", params=query_params, json=req)
94 | audio_data = resp.content
95 | assert wave_checker(audio_data)
96 | assert "音声合成" in audio_transcriber(audio_data, "wav")
97 |
98 |
99 | @pytest.mark.asyncio
100 | async def test_voicevox_unified_mp3(random_text, mp3_checker, audio_transcriber):
101 | req = {
102 | "text": random_text,
103 | "speaker": str(SPEAKER),
104 | "service_name": "voicevox"
105 | }
106 | query_params = {
107 | "x_audio_format": "mp3"
108 | }
109 | resp = httpx.post("http://127.0.0.1:8000/tts", params=query_params, json=req)
110 | audio_data = resp.content
111 | assert mp3_checker(audio_data)
112 | assert "音声合成" in audio_transcriber(audio_data, "mp3")
113 |
--------------------------------------------------------------------------------
/tests/gateway/test_unified.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import os
3 | import httpx
4 | from speech_gateway.gateway.voicevox import VoicevoxGateway
5 | from speech_gateway.gateway.nijivoice import NijiVoiceGateway
6 | from speech_gateway.gateway.sbv2 import StyleBertVits2Gateway
7 | from speech_gateway.gateway.openai_speech import OpenAIGateway
8 | from speech_gateway.gateway.unified import UnifiedGateway
9 | from speech_gateway.gateway import UnifiedTTSRequest
10 |
11 | VOICEVOX_URL = os.getenv("VOICEVOX_URL")
12 | SBV2_URL = os.getenv("SBV2_URL")
13 | NIJIVOICE_API_KEY = os.getenv("NIJIVOICE_API_KEY")
14 | OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
15 |
16 |
17 | @pytest.mark.asyncio
18 | async def test_unified_gateway_default():
19 | # Create gateways
20 | voicevox_gateway = VoicevoxGateway(base_url=VOICEVOX_URL, debug=True)
21 | sbv2_gateway = StyleBertVits2Gateway(base_url=SBV2_URL, debug=True)
22 | nijivoice_gateway = NijiVoiceGateway(api_key=NIJIVOICE_API_KEY, prefix="/nijivoice", debug=True)
23 | openai_gateway = OpenAIGateway(api_key=OPENAI_API_KEY, debug=True)
24 |
25 | # Unified gateway
26 | unified_gateway = UnifiedGateway(debug=True)
27 | unified_gateway.add_gateway("voicevox", voicevox_gateway, default_speaker="46", default=True)
28 | unified_gateway.add_gateway("sbv2", sbv2_gateway)
29 | unified_gateway.add_gateway("nijivoice", nijivoice_gateway)
30 | unified_gateway.add_gateway("openai", openai_gateway, languages=["en-US", "zh-CN"], default_speaker="alloy")
31 |
32 | assert unified_gateway.get_gateway(UnifiedTTSRequest(text="hello")) == voicevox_gateway
33 |
34 | assert unified_gateway.get_gateway(UnifiedTTSRequest(text="hello", service_name="voicevox")) == voicevox_gateway
35 | assert unified_gateway.get_gateway(UnifiedTTSRequest(text="hello", service_name="sbv2")) == sbv2_gateway
36 | assert unified_gateway.get_gateway(UnifiedTTSRequest(text="hello", service_name="nijivoice")) == nijivoice_gateway
37 | assert unified_gateway.get_gateway(UnifiedTTSRequest(text="hello", service_name="openai")) == openai_gateway
38 | assert unified_gateway.get_gateway(UnifiedTTSRequest(text="hello", service_name="dummy")) is None
39 |
40 | assert unified_gateway.get_gateway(UnifiedTTSRequest(text="hello", language="ja-JP")) == voicevox_gateway
41 | assert unified_gateway.get_gateway(UnifiedTTSRequest(text="hello", language="en-US")) == openai_gateway
42 | assert unified_gateway.get_gateway(UnifiedTTSRequest(text="hello", language="zh-CN")) == openai_gateway
43 |
44 | assert unified_gateway.get_gateway(UnifiedTTSRequest(text="hello", service_name="sbv2", language="en-US")) == sbv2_gateway
45 |
46 |
47 |
48 | @pytest.mark.asyncio
49 | async def test_voicevox_unified(random_text, wave_checker, audio_transcriber):
50 | req = {
51 | "text": random_text
52 | }
53 | resp = httpx.post("http://127.0.0.1:8000/tts", json=req)
54 | audio_data = resp.content
55 | assert wave_checker(audio_data)
56 | assert "音声合成" in audio_transcriber(audio_data, "wav")
57 |
58 |
59 | @pytest.mark.asyncio
60 | async def test_voicevox_unified_wav(random_text, wave_checker, audio_transcriber):
61 | req = {
62 | "text": random_text
63 | }
64 | query_params = {
65 | "x_audio_format": "wav"
66 | }
67 | resp = httpx.post("http://127.0.0.1:8000/tts", params=query_params, json=req)
68 | audio_data = resp.content
69 | assert wave_checker(audio_data)
70 | assert "音声合成" in audio_transcriber(audio_data, "wav")
71 |
72 |
73 | @pytest.mark.asyncio
74 | async def test_voicevox_unified_mp3(random_text, mp3_checker, audio_transcriber):
75 | req = {
76 | "text": random_text
77 | }
78 | query_params = {
79 | "x_audio_format": "mp3"
80 | }
81 | resp = httpx.post("http://127.0.0.1:8000/tts", params=query_params, json=req)
82 | audio_data = resp.content
83 | assert mp3_checker(audio_data)
84 | assert "音声合成" in audio_transcriber(audio_data, "mp3")
85 |
--------------------------------------------------------------------------------
/tests/gateway/test_azure.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import httpx
3 |
4 |
5 | @pytest.mark.asyncio
6 | async def test_azure(random_text, wave_checker, audio_transcriber):
7 | ssml_text = f"{random_text}"
8 | resp = httpx.post(
9 | url="http://127.0.0.1:8000/azure/cognitiveservices/v1",
10 | headers={
11 | "X-Microsoft-OutputFormat": "riff-16khz-16bit-mono-pcm",
12 | "Content-Type": "application/ssml+xml"
13 | },
14 | data=ssml_text.encode("utf-8")
15 | )
16 | audio_data = resp.content
17 | assert wave_checker(audio_data)
18 | assert "音声合成" in audio_transcriber(audio_data, "wav")
19 |
20 |
21 | @pytest.mark.asyncio
22 | async def test_azure_wav(random_text, wave_checker, audio_transcriber):
23 | ssml_text = f"{random_text}"
24 | resp = httpx.post(
25 | url="http://127.0.0.1:8000/azure/cognitiveservices/v1",
26 | headers={
27 | "X-Microsoft-OutputFormat": "audio-16khz-32kbitrate-mono-mp3", # <- set mp3 to header
28 | "Content-Type": "application/ssml+xml"
29 | },
30 | params={"x_audio_format": "wav"}, # <- overwrite format to wav
31 | data=ssml_text.encode("utf-8")
32 | )
33 | audio_data = resp.content
34 | assert wave_checker(audio_data)
35 | assert "音声合成" in audio_transcriber(audio_data, "wav")
36 |
37 |
38 | @pytest.mark.asyncio
39 | async def test_azure_mp3(random_text, mp3_checker, audio_transcriber):
40 | ssml_text = f"{random_text}"
41 | resp = httpx.post(
42 | url="http://127.0.0.1:8000/azure/cognitiveservices/v1",
43 | headers={
44 | "X-Microsoft-OutputFormat": "riff-16khz-16bit-mono-pcm", # <- set wav to header
45 | "Content-Type": "application/ssml+xml"
46 | },
47 | params={"x_audio_format": "mp3"}, # <- overwrite format to mp3
48 | data=ssml_text.encode("utf-8")
49 | )
50 | audio_data = resp.content
51 | assert mp3_checker(audio_data)
52 | assert "音声合成" in audio_transcriber(audio_data, "mp3")
53 |
54 |
55 | @pytest.mark.asyncio
56 | async def test_azure_unified(random_text, wave_checker, audio_transcriber):
57 | req = {
58 | "text": random_text,
59 | "speaker": "zh-CN-XiaoyuMultilingualNeural",
60 | "service_name": "azure"
61 | }
62 | resp = httpx.post("http://127.0.0.1:8000/tts", json=req)
63 | audio_data = resp.content
64 | assert wave_checker(audio_data)
65 | assert "音声合成" in audio_transcriber(audio_data, "wav")
66 |
67 |
68 | @pytest.mark.asyncio
69 | async def test_azure_unified_wav(random_text, wave_checker, audio_transcriber):
70 | req = {
71 | "text": random_text,
72 | "speaker": "zh-CN-XiaoyuMultilingualNeural",
73 | "service_name": "azure"
74 | }
75 | query_params = {
76 | "x_audio_format": "wav"
77 | }
78 | resp = httpx.post("http://127.0.0.1:8000/tts", params=query_params, json=req)
79 | audio_data = resp.content
80 | assert wave_checker(audio_data)
81 | assert "音声合成" in audio_transcriber(audio_data, "wav")
82 |
83 |
84 | @pytest.mark.asyncio
85 | async def test_azure_unified_mp3(random_text, mp3_checker, audio_transcriber):
86 | req = {
87 | "text": random_text,
88 | "speaker": "zh-CN-XiaoyuMultilingualNeural",
89 | "service_name": "azure"
90 | }
91 | query_params = {
92 | "x_audio_format": "mp3"
93 | }
94 | resp = httpx.post("http://127.0.0.1:8000/tts", params=query_params, json=req)
95 | audio_data = resp.content
96 | assert mp3_checker(audio_data)
97 | assert "音声合成" in audio_transcriber(audio_data, "mp3")
98 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .nox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | *.py,cover
50 | .hypothesis/
51 | .pytest_cache/
52 | cover/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | .pybuilder/
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | # For a library or package, you might want to ignore these files since the code is
87 | # intended to run in multiple environments; otherwise, check them in:
88 | # .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # poetry
98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99 | # This is especially recommended for binary packages to ensure reproducibility, and is more
100 | # commonly ignored for libraries.
101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 |
104 | # pdm
105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | # in version control.
109 | # https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 |
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 |
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 |
119 | # SageMath parsed files
120 | *.sage.py
121 |
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 |
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 |
135 | # Rope project settings
136 | .ropeproject
137 |
138 | # mkdocs documentation
139 | /site
140 |
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 |
146 | # Pyre type checker
147 | .pyre/
148 |
149 | # pytype static type analyzer
150 | .pytype/
151 |
152 | # Cython debug symbols
153 | cython_debug/
154 |
155 | # PyCharm
156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | # and can be added to the global gitignore or merged into this file. For a more nuclear
159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 |
162 | voicevox_cache/
163 | sbv2_cache/
164 | nijivoice_cache/
165 | openai_cache/
166 | example.py
167 | testrun.py
168 | client.py
169 | pytest.ini
170 | performance.db
171 |
--------------------------------------------------------------------------------
/tests/performance_recorder/test_sqlite.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import sqlite3
3 | import threading
4 | from time import sleep
5 | from speech_gateway.performance_recorder.sqlite import SQLitePerformanceRecorder
6 |
7 |
8 | @pytest.fixture
9 | def sqlite_recorder(tmp_path):
10 | """
11 | Create a new database file in a temporary directory for each test.
12 | After the test finishes, call close() to release resources.
13 | """
14 | db_path = tmp_path / "test_performance.db"
15 | recorder = SQLitePerformanceRecorder(str(db_path))
16 | yield recorder
17 | # Ensure that we close the recorder after the test to release all resources
18 | recorder.close()
19 |
20 |
21 | def test_single_thread_record_and_close(sqlite_recorder):
22 | """
23 | Verify that the record -> close flow finishes without deadlocks
24 | in a single-thread scenario, and confirm the correct number of rows is inserted.
25 | Also, check that the 'id' field is auto-incrementing correctly.
26 | """
27 | # Insert 5 records
28 | for i in range(5):
29 | sqlite_recorder.record(
30 | process_id=f"process_{i}",
31 | source="test_source",
32 | text=f"test_text_{i}",
33 | audio_format="wav",
34 | cached=0,
35 | elapsed=0.01 * i,
36 | )
37 |
38 | # Although close() will be called by the fixture teardown,
39 | # here we explicitly call it for clarity.
40 | sqlite_recorder.close()
41 |
42 | # Directly open the database to check how many records were inserted
43 | conn = sqlite3.connect(sqlite_recorder.db_path)
44 | try:
45 | cursor = conn.cursor()
46 | cursor.execute("SELECT COUNT(*) FROM performance_records;")
47 | count = cursor.fetchone()[0]
48 | assert count == 5, f"Expected 5 records, got {count}"
49 |
50 | # Retrieve all IDs in ascending order
51 | cursor.execute("SELECT id FROM performance_records ORDER BY id;")
52 | ids = [row[0] for row in cursor.fetchall()]
53 |
54 | # Confirm we have 5 IDs
55 | assert len(ids) == 5, f"Expected 5 IDs, got {len(ids)}"
56 |
57 | # Check they are strictly increasing by 1
58 | for i in range(1, len(ids)):
59 | assert ids[i] == ids[i - 1] + 1, "IDs are not incrementing as expected"
60 | finally:
61 | conn.close()
62 |
63 |
64 | def test_multi_thread_record_no_deadlock(sqlite_recorder):
65 | """
66 | Verify that concurrent calls to record() do not cause deadlocks
67 | and that data is correctly committed to the database.
68 | """
69 | NUM_THREADS = 5
70 | RECORDS_PER_THREAD = 100
71 |
72 | def worker(thread_id: int):
73 | for i in range(RECORDS_PER_THREAD):
74 | sqlite_recorder.record(
75 | process_id=f"thread_{thread_id}_process_{i}",
76 | source="test_source",
77 | text=f"test_text_{i}",
78 | audio_format="wav",
79 | cached=1,
80 | elapsed=0.1 * i,
81 | )
82 | # Sleep a bit to make concurrency testing more likely to expose issues
83 | sleep(0.001)
84 |
85 | threads = []
86 | for t_id in range(NUM_THREADS):
87 | t = threading.Thread(target=worker, args=(t_id,))
88 | t.start()
89 | threads.append(t)
90 |
91 | # Wait for all threads to complete
92 | for t in threads:
93 | t.join()
94 |
95 | # Close the recorder to ensure the queue is fully processed
96 | sqlite_recorder.close()
97 |
98 | # Check that all records were indeed written to the database
99 | total_expected = NUM_THREADS * RECORDS_PER_THREAD
100 | conn = sqlite3.connect(sqlite_recorder.db_path)
101 | try:
102 | cursor = conn.cursor()
103 | cursor.execute("SELECT COUNT(*) FROM performance_records;")
104 | count = cursor.fetchone()[0]
105 | assert count == total_expected, f"Expected {total_expected} records, got {count}"
106 | finally:
107 | conn.close()
108 |
--------------------------------------------------------------------------------
/tests/performance_recorder/test_postgres.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pytest
3 | import threading
4 | from time import sleep
5 | from speech_gateway.performance_recorder.postgres import PostgreSQLPerformanceRecorder
6 |
7 | POSTGRES_USER = os.getenv("POSTGRES_USER")
8 | POSTGRES_PASSWORD = os.getenv("POSTGRES_PASSWORD")
9 | POSTGRES_DBNAME = os.getenv("POSTGRES_DBNAME")
10 |
11 |
12 | @pytest.fixture
13 | def postgres_recorder(tmp_path):
14 | recorder = PostgreSQLPerformanceRecorder(
15 | dbname=POSTGRES_DBNAME,
16 | user=POSTGRES_USER,
17 | password=POSTGRES_PASSWORD
18 | )
19 | yield recorder
20 | conn = recorder.connect_db()
21 | cursor = conn.cursor()
22 | cursor.execute("TRUNCATE TABLE performance_records;")
23 | conn.commit()
24 | conn.close()
25 | recorder.close()
26 |
27 | def test_single_thread_record_and_close(postgres_recorder):
28 | """
29 | Verify that the record -> close flow finishes without deadlocks
30 | in a single-thread scenario, and confirm the correct number of rows is inserted.
31 | Also, check that the 'id' field is auto-incrementing correctly.
32 | """
33 | # Insert 5 records
34 | for i in range(5):
35 | postgres_recorder.record(
36 | process_id=f"process_{i}",
37 | source="test_source",
38 | text=f"test_text_{i}",
39 | audio_format="wav",
40 | cached=0,
41 | elapsed=0.01 * i,
42 | )
43 |
44 | # Although close() will be called by the fixture teardown,
45 | # here we explicitly call it for clarity.
46 | postgres_recorder.close()
47 |
48 | # Directly open the database to check how many records were inserted
49 | conn = postgres_recorder.connect_db()
50 | try:
51 | cursor = conn.cursor()
52 | cursor.execute("SELECT COUNT(*) FROM performance_records;")
53 | count = cursor.fetchone()[0]
54 | assert count == 5, f"Expected 5 records, got {count}"
55 |
56 | # Retrieve all IDs in ascending order
57 | cursor.execute("SELECT id FROM performance_records ORDER BY id;")
58 | ids = [row[0] for row in cursor.fetchall()]
59 |
60 | # Confirm we have 5 IDs
61 | assert len(ids) == 5, f"Expected 5 IDs, got {len(ids)}"
62 |
63 | # Check they are strictly increasing by 1
64 | for i in range(1, len(ids)):
65 | assert ids[i] == ids[i - 1] + 1, "IDs are not incrementing as expected"
66 | finally:
67 | conn.close()
68 |
69 |
70 | def test_multi_thread_record_no_deadlock(postgres_recorder):
71 | """
72 | Verify that concurrent calls to record() do not cause deadlocks
73 | and that data is correctly committed to the database.
74 | """
75 | NUM_THREADS = 5
76 | RECORDS_PER_THREAD = 100
77 |
78 | def worker(thread_id: int):
79 | for i in range(RECORDS_PER_THREAD):
80 | postgres_recorder.record(
81 | process_id=f"thread_{thread_id}_process_{i}",
82 | source="test_source",
83 | text=f"test_text_{i}",
84 | audio_format="wav",
85 | cached=1,
86 | elapsed=0.1 * i,
87 | )
88 | # Sleep a bit to make concurrency testing more likely to expose issues
89 | sleep(0.001)
90 |
91 | threads = []
92 | for t_id in range(NUM_THREADS):
93 | t = threading.Thread(target=worker, args=(t_id,))
94 | t.start()
95 | threads.append(t)
96 |
97 | # Wait for all threads to complete
98 | for t in threads:
99 | t.join()
100 |
101 | # Close the recorder to ensure the queue is fully processed
102 | postgres_recorder.close()
103 |
104 | # Check that all records were indeed written to the database
105 | total_expected = NUM_THREADS * RECORDS_PER_THREAD
106 | conn = postgres_recorder.connect_db()
107 | try:
108 | cursor = conn.cursor()
109 | cursor.execute("SELECT COUNT(*) FROM performance_records;")
110 | count = cursor.fetchone()[0]
111 | assert count == total_expected, f"Expected {total_expected} records, got {count}"
112 | finally:
113 | conn.close()
114 |
--------------------------------------------------------------------------------
/speech_gateway/gateway/nijivoice.py:
--------------------------------------------------------------------------------
1 | from typing import Dict
2 | from fastapi import APIRouter, Request
3 | from fastapi.responses import StreamingResponse, JSONResponse
4 | from . import SpeechGateway, UnifiedTTSRequest
5 | from ..cache.file import FileCacheStorage
6 | from ..performance_recorder import PerformanceRecorder, SQLitePerformanceRecorder
7 | from ..source.nijivoice import NijiVoiceStreamSource
8 |
9 |
10 | class NijiVoiceGateway(SpeechGateway):
11 | def __init__(self, *, stream_source: NijiVoiceStreamSource = None, api_key: str = None, speeds: Dict[str, float] = None, base_url: str = None, prefix: str = None, cache_dir: str = None, performance_recorder: PerformanceRecorder = None, debug = False):
12 | self.stream_source: NijiVoiceStreamSource = None
13 | if stream_source:
14 | super().__init__(stream_source=stream_source, debug=debug)
15 | else:
16 | super().__init__(
17 | stream_source=NijiVoiceStreamSource(
18 | api_key=api_key,
19 | base_url=base_url or "https://api.nijivoice.com",
20 | cache_storage=FileCacheStorage(cache_dir=cache_dir or "nijivoice_cache"),
21 | format_converters={},
22 | performance_recorder=performance_recorder or SQLitePerformanceRecorder(),
23 | debug=debug
24 | ),
25 | debug=debug
26 | )
27 | self.speeds = speeds or {}
28 | self.prefix = prefix
29 |
30 | def register_endpoint(self, router: APIRouter):
31 | @router.post("/api/platform/v1/voice-actors/{voice_actor_id}/generate-voice")
32 | async def generate_voice_handler(voice_actor_id: str, request: Request, x_audio_format: str = None):
33 | request_json = await request.json()
34 |
35 | if x_audio_format:
36 | if x_audio_format in ["mp3", "wav"]:
37 | request_json["format"] = x_audio_format
38 | else:
39 | # Set wave to convert to other format later
40 | request_json["format"] = "wav"
41 | else:
42 | x_audio_format = request_json.get("format", "mp3")
43 |
44 | gateway_base_url = f"{request.base_url.scheme}://{request.base_url.netloc}{self.prefix}"
45 | resp_json = await self.stream_source.generate_voice(
46 | voice_actor_id,
47 | request_json,
48 | gateway_base_url,
49 | x_audio_format
50 | )
51 |
52 | return JSONResponse(resp_json)
53 |
54 | @router.get("/api/platform/v1/voice-actors/{voice_actor_id}/get-voice")
55 | async def get_voice_handler(voice_actor_id: str, x_audio_format: str, url: str = None, download: str = None, cache_key: str = None):
56 | nijivoice_resp = await self.stream_source.fetch_stream(
57 | voice_actor_id=voice_actor_id,
58 | url=url,
59 | download=download,
60 | cache_key=cache_key,
61 | audio_format=x_audio_format
62 | )
63 | return StreamingResponse(nijivoice_resp, media_type=f"audio/{x_audio_format}")
64 |
65 | async def unified_tts_handler(self, request: Request, tts_request: UnifiedTTSRequest, x_audio_format: str = "wav"):
66 | gateway_base_url = f"{request.base_url.scheme}://{request.base_url.netloc}{self.prefix}"
67 |
68 | payload = {
69 | "script": tts_request.text,
70 | "speed": str(tts_request.speed) if tts_request.speed else str(self.speeds.get(tts_request.speaker, "1.0")),
71 | "format": x_audio_format if x_audio_format == "mp3" else "wav"
72 | }
73 |
74 | resp_json = await self.stream_source.generate_voice(tts_request.speaker, payload, gateway_base_url, x_audio_format, overwrite_download_urls=False)
75 |
76 | nijivoice_resp = await self.stream_source.fetch_stream(
77 | voice_actor_id=tts_request.speaker,
78 | url=resp_json["generatedVoice"]["audioFileUrl"],
79 | download=False,
80 | cache_key=self.stream_source.get_cache_key(x_audio_format, tts_request.speaker, payload),
81 | audio_format=x_audio_format
82 | )
83 |
84 | return StreamingResponse(nijivoice_resp, media_type=f"audio/{x_audio_format}")
85 |
--------------------------------------------------------------------------------
/speech_gateway/performance_recorder/postgres.py:
--------------------------------------------------------------------------------
1 | from dataclasses import fields
2 | from datetime import datetime, timezone
3 | import logging
4 | import queue
5 | import threading
6 | import time
7 | import psycopg2
8 | from . import PerformanceRecorder, PerformanceRecord
9 |
10 | logger = logging.getLogger(__name__)
11 |
12 |
13 | class PostgreSQLPerformanceRecorder(PerformanceRecorder):
14 | def __init__(
15 | self,
16 | *,
17 | host: str = "localhost",
18 | port: int = 5432,
19 | dbname: str = "speech_gateway",
20 | user: str = "postgres",
21 | password: str = None,
22 | ):
23 | self.connection_params = {
24 | "host": host,
25 | "port": port,
26 | "dbname": dbname,
27 | "user": user,
28 | "password": password,
29 | }
30 | self.record_queue = queue.Queue()
31 | self.stop_event = threading.Event()
32 |
33 | self.init_db()
34 |
35 | self.worker_thread = threading.Thread(target=self.start_worker, daemon=True)
36 | self.worker_thread.start()
37 |
38 | def connect_db(self):
39 | return psycopg2.connect(**self.connection_params)
40 |
41 | def init_db(self):
42 | conn = self.connect_db()
43 | try:
44 | with conn:
45 | with conn.cursor() as cur:
46 | cur.execute(
47 | """
48 | CREATE TABLE IF NOT EXISTS performance_records (
49 | id SERIAL PRIMARY KEY,
50 | process_id TEXT NOT NULL,
51 | created_at TIMESTAMPTZ NOT NULL,
52 | source TEXT,
53 | text TEXT,
54 | audio_format TEXT,
55 | cached INTEGER,
56 | elapsed REAL
57 | )
58 | """
59 | )
60 | finally:
61 | conn.close()
62 |
63 | def start_worker(self):
64 | conn = self.connect_db()
65 | try:
66 | while not self.stop_event.is_set() or not self.record_queue.empty():
67 | try:
68 | record = self.record_queue.get(timeout=0.5)
69 | except queue.Empty:
70 | continue
71 |
72 | try:
73 | self.insert_record(conn, record)
74 | except (psycopg2.InterfaceError, psycopg2.OperationalError):
75 | try:
76 | conn.close()
77 | except Exception:
78 | pass
79 |
80 | logger.warning("Connection is not available. Retrying insert_record with new connection...")
81 | time.sleep(0.5)
82 | conn = self.connect_db()
83 | self.insert_record(conn, record)
84 |
85 | self.record_queue.task_done()
86 | finally:
87 | try:
88 | conn.close()
89 | except Exception:
90 | pass
91 |
92 |
93 | def insert_record(self, conn, record: PerformanceRecord):
94 | columns = [field.name for field in fields(PerformanceRecord)] + ["created_at"]
95 | placeholders = ["%s"] * len(columns)
96 | values = [getattr(record, field.name) for field in fields(PerformanceRecord)] + [
97 | datetime.now(timezone.utc)
98 | ]
99 |
100 | with conn.cursor() as cur:
101 | cur.execute(
102 | f"INSERT INTO performance_records ({', '.join(columns)}) VALUES ({', '.join(placeholders)})",
103 | values,
104 | )
105 | conn.commit()
106 |
107 | def record(
108 | self,
109 | *,
110 | process_id: str,
111 | source: str = None,
112 | text: str = None,
113 | audio_format: str = None,
114 | cached: int = 0,
115 | elapsed: float = None,
116 | ):
117 | performance_record = PerformanceRecord(
118 | process_id=process_id,
119 | source=source,
120 | text=text,
121 | audio_format=audio_format,
122 | cached=cached,
123 | elapsed=elapsed,
124 | )
125 | self.record_queue.put(performance_record)
126 |
127 | def close(self):
128 | self.stop_event.set()
129 | self.record_queue.join()
130 | self.worker_thread.join()
131 |
--------------------------------------------------------------------------------
/speech_gateway/source/nijivoice.py:
--------------------------------------------------------------------------------
1 | from time import time
2 | from typing import AsyncIterator, Dict
3 | import urllib.parse
4 | import httpx
5 | from . import StreamSource, StreamSourceError
6 | from ..cache import CacheStorage
7 | from ..cache.file import FileCacheStorage
8 | from ..converter import FormatConverter
9 | from ..performance_recorder import PerformanceRecorder
10 |
11 |
12 | class NijiVoiceStreamSource(StreamSource):
13 | def __init__(self,
14 | *,
15 | api_key: str = None,
16 | base_url: str = "https://api.nijivoice.com",
17 | cache_storage: CacheStorage = None,
18 | format_converters: Dict[str, FormatConverter] = None,
19 | max_connections: int = 100,
20 | max_keepalive_connections: int = 20,
21 | timeout: float = 10.0,
22 | performance_recorder: PerformanceRecorder = None,
23 | debug: bool = False
24 | ):
25 | super().__init__(
26 | base_url=base_url,
27 | cache_storage=cache_storage or FileCacheStorage(cache_dir="nijivoice_cache"),
28 | format_converters=format_converters,
29 | max_connections=max_connections,
30 | max_keepalive_connections=max_keepalive_connections,
31 | timeout=timeout,
32 | performance_recorder=performance_recorder,
33 | debug=debug
34 | )
35 | self.base_url = base_url
36 | self.api_key = api_key
37 |
38 | def get_cache_key(self, audio_format: str, voice_actor_id: str = None, payload: dict = None, cache_key: str = None, **kwargs) -> str:
39 | if cache_key:
40 | return cache_key
41 |
42 | return f"{voice_actor_id}_{hash(str(payload))}.{audio_format or 'mp3'}"
43 |
44 | def parse_text(self, **kwargs) -> str:
45 | return None
46 |
47 | def make_stream_request(self, url: str, **kwargs):
48 | return {
49 | "method": "GET",
50 | "url": url,
51 | }
52 |
53 | async def generate_voice(self, voice_actor_id: str, payload: dict, gateway_base_url: str, x_audio_format: str = "mp3", overwrite_download_urls: bool = True):
54 | start_time = time()
55 | cache_key = self.get_cache_key(x_audio_format, voice_actor_id, payload)
56 | use_cache = self.cache_storage and await self.cache_storage.has_cache(cache_key)
57 |
58 | # Return cache info if cached
59 | if use_cache:
60 | gateway_voice_url = f"{gateway_base_url}/api/platform/v1/voice-actors/{voice_actor_id}/get-voice?cache_key={cache_key}&x_audio_format={x_audio_format}"
61 | data = {"generatedVoice": {
62 | "audioFileUrl": gateway_voice_url,
63 | "audioFileDownloadUrl": gateway_voice_url + "&download=true"
64 | }}
65 |
66 | else:
67 | try:
68 | # Generate voice
69 | url = f"{self.base_url}/api/platform/v1/voice-actors/{voice_actor_id}/generate-voice"
70 | headers = {
71 | "x-api-key": self.api_key,
72 | "content-type": "application/json"
73 | }
74 | url_resp = await self.http_client.post(url, headers=headers, json=payload)
75 | if url_resp.status_code != 200:
76 | raise StreamSourceError(f"NijiVoice generate voice failed: {url_resp.status_code}")
77 |
78 | # Get voice URL
79 | data = url_resp.json()
80 | audio_file_url = data.get("generatedVoice", {}).get("audioFileUrl")
81 | encoded_audio_file_url = urllib.parse.quote(audio_file_url, safe='')
82 |
83 | # Overwrite URLs
84 | if overwrite_download_urls:
85 | gateway_voice_url = (
86 | f"{gateway_base_url}/api/platform/v1/voice-actors/{voice_actor_id}/get-voice"
87 | f"?url={encoded_audio_file_url}&cache_key={cache_key}&x_audio_format={x_audio_format}"
88 | )
89 | data["generatedVoice"]["audioFileUrl"] = gateway_voice_url
90 | data["generatedVoice"]["audioFileDownloadUrl"] = gateway_voice_url + "&download=true"
91 |
92 | except httpx.RequestError as ex:
93 | raise StreamSourceError(f"HTTP request failed: {ex}") from ex
94 |
95 | # Performance record
96 | if self.performance_recorder:
97 | self.performance_recorder.record(
98 | process_id=cache_key, source=self.__class__.__name__, text=payload.get("script"),
99 | audio_format=x_audio_format, cached=use_cache, elapsed=time() - start_time
100 | )
101 |
102 | return data
103 |
--------------------------------------------------------------------------------
/tests/source/test_nijivoice_source.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import os
3 | import httpx
4 | from speech_gateway.source.nijivoice import NijiVoiceStreamSource
5 | from speech_gateway.source import StreamSourceError
6 |
7 | BASE_URL = "https://api.nijivoice.com"
8 | GATEWAY_BASE_URL = "http://127.0.0.1:8000/nijivoice"
9 | NIJIVOICE_API_KEY = os.getenv("NIJIVOICE_API_KEY")
10 | VOICE_ACTOR_ID = "a192db5f-bd8b-4fc7-bc08-af5ca5957c12"
11 | PAYLOAD = {
12 | "script": "こんにちは。これはテストです。",
13 | "speed": "1.0",
14 | "emotionalLevel": "0.1",
15 | "soundDuration": "0.1",
16 | "format": "mp3",
17 | }
18 |
19 | @pytest.fixture
20 | def source():
21 | # Create an instance of NijiVoiceStreamSource
22 | return NijiVoiceStreamSource(base_url=BASE_URL, api_key=NIJIVOICE_API_KEY, debug=True)
23 |
24 | @pytest.mark.asyncio
25 | async def test_get_cache_key(source):
26 | # Test get_cache_key method
27 | cache_key = source.get_cache_key("mp3", VOICE_ACTOR_ID, PAYLOAD)
28 | assert cache_key.endswith(".mp3")
29 | assert VOICE_ACTOR_ID in cache_key
30 |
31 | cache_key = source.get_cache_key("wav", VOICE_ACTOR_ID, PAYLOAD)
32 | assert cache_key.endswith(".wav")
33 | assert VOICE_ACTOR_ID in cache_key
34 |
35 | @pytest.mark.asyncio
36 | async def test_parse_text(source):
37 | # Test parse_text method
38 | text = source.parse_text(payload=PAYLOAD)
39 | assert text is None # Since parse_text returns None in the current implementation
40 |
41 | @pytest.mark.asyncio
42 | async def test_make_stream_request(source):
43 | # Test make_stream_request method
44 | url = f"{BASE_URL}/api/platform/v1/voice-actors/{VOICE_ACTOR_ID}/generate-voice"
45 | request = source.make_stream_request(url=url)
46 | assert request["method"] == "GET"
47 | assert request["url"] == url
48 |
49 | @pytest.mark.asyncio
50 | async def test_generate_voice_cached(source):
51 | # Test generate_voice method with cache
52 | cache_key = source.get_cache_key("mp3", VOICE_ACTOR_ID, PAYLOAD)
53 |
54 | # Create a dummy async generator for cached data
55 | async def dummy_cache_data():
56 | yield b"cached data"
57 |
58 | # Write a dummy cache
59 | async for _ in source.cache_storage.write_cache(dummy_cache_data(), cache_key):
60 | pass # Consume the generator to simulate writing cache
61 |
62 | # Call generate_voice and verify it uses cache
63 | response = await source.generate_voice(VOICE_ACTOR_ID, PAYLOAD, GATEWAY_BASE_URL)
64 | assert "generatedVoice" in response
65 | assert response["generatedVoice"]["audioFileUrl"].startswith(GATEWAY_BASE_URL)
66 |
67 | @pytest.mark.asyncio
68 | async def test_generate_voice_fresh(source):
69 | # Test generate_voice method without cache (actual API call)
70 | try:
71 | response = await source.generate_voice(VOICE_ACTOR_ID, PAYLOAD, GATEWAY_BASE_URL)
72 | assert "generatedVoice" in response
73 | assert response["generatedVoice"]["audioFileUrl"].startswith(GATEWAY_BASE_URL)
74 | except Exception as e:
75 | pytest.fail(f"generate_voice failed: {e}")
76 |
77 | @pytest.mark.asyncio
78 | async def test_generate_voice_error(source):
79 | # Test generate_voice method with invalid payload
80 | invalid_payload = PAYLOAD.copy()
81 | invalid_payload["script"] = "" # Invalid script
82 |
83 | with pytest.raises(StreamSourceError):
84 | await source.generate_voice(VOICE_ACTOR_ID, invalid_payload, GATEWAY_BASE_URL)
85 |
86 | @pytest.mark.asyncio
87 | async def test_fetch_stream_raw(source):
88 | # Test fetch_stream_raw method (actual API call)
89 | url_resp = httpx.post(
90 | f"{GATEWAY_BASE_URL}/api/platform/v1/voice-actors/{VOICE_ACTOR_ID}/generate-voice",
91 | json={"script": "こんにちは。これはテストです。", "speed": "1.0"}
92 | )
93 |
94 | assert url_resp.status_code == 200
95 | url = url_resp.json()["generatedVoice"]["audioFileUrl"]
96 | assert GATEWAY_BASE_URL in url
97 |
98 | http_request = {
99 | "method": "GET",
100 | "url": url,
101 | }
102 |
103 | try:
104 | async for chunk in source.fetch_stream_raw(http_request):
105 | assert isinstance(chunk, bytes)
106 | except Exception as e:
107 | pytest.fail(f"fetch_stream_raw failed: {e}")
108 |
109 | @pytest.mark.asyncio
110 | async def test_fetch_stream(source):
111 | # Test fetch_stream method with a full pipeline
112 | try:
113 | async for chunk in await source.fetch_stream(
114 | audio_format="mp3",
115 | voice_actor_id=VOICE_ACTOR_ID,
116 | payload=PAYLOAD,
117 | gateway_base_url=GATEWAY_BASE_URL,
118 | ):
119 | assert isinstance(chunk, bytes)
120 | except Exception as e:
121 | pytest.fail(f"fetch_stream failed: {e}")
122 |
--------------------------------------------------------------------------------
/speech_gateway/gateway/__init__.py:
--------------------------------------------------------------------------------
1 | from abc import ABC, abstractmethod
2 | import logging
3 | from fastapi import Request, APIRouter, HTTPException
4 | from fastapi.responses import Response
5 | from pydantic import BaseModel, Field
6 | import httpx
7 | from ..source import StreamSource
8 |
9 | logger = logging.getLogger(__name__)
10 |
11 |
12 | class UnifiedTTSRequest(BaseModel):
13 | text: str = Field(..., description="The text to be synthesized into speech.", example="hello")
14 | speaker: str = Field(
15 | None,
16 | description="The unique identifier for the voice in each speech service. "
17 | "For Style-Bert-VITS2, specify as `{model_id}-{speaker_id}`. "
18 | "If omitted, the default speaker of the speech service will be used.",
19 | example="888753761"
20 | )
21 | style: str = Field(
22 | None,
23 | description="A predefined set of voice styles that includes `neutral`, `joy`, `angry`, `sorrow`, `fun`, and `surprised`. "
24 | "These styles act as presets and must be mapped appropriately to the corresponding style identifiers in each speech service. "
25 | "If omitted, no style will be applied.",
26 | example="neutral"
27 | )
28 | speed: float = Field(
29 | None,
30 | description="The speed of synthesized speech, where 1.0 is normal speed. "
31 | "Values greater than 1.0 increase the speed (e.g., 1.5 is 50% faster), "
32 | "and values less than 1.0 decrease the speed (e.g., 0.5 is 50% slower). "
33 | "The acceptable range depends on each speech service.",
34 | example=1.0
35 | )
36 | service_name: str = Field(
37 | None,
38 | description="The name of the service as specified in `add_gateway`. "
39 | "If omitted, the default gateway will be used.",
40 | example="aivisspeech",
41 | )
42 | language: str = Field(
43 | None,
44 | description="The language. The corresponding text-to-speech service will be used. "
45 | "Specify the language code in ISO639-1 format combined with the country code using a hyphen."
46 | "If omitted, the default gateway will be used.",
47 | example="en-US",
48 | )
49 |
50 |
51 | class SpeechGateway(ABC):
52 | HOP_BY_HOP_HEADERS = {
53 | "connection",
54 | "keep-alive",
55 | "proxy-authenticate",
56 | "proxy-authorization",
57 | "te",
58 | "trailers",
59 | "transfer-encoding",
60 | "upgrade",
61 | }
62 |
63 | def __init__(
64 | self,
65 | *,
66 | stream_source: StreamSource = None,
67 | debug: bool = False
68 | ):
69 | self.stream_source = stream_source
70 | self.debug = debug
71 |
72 | def filter_headers(self, headers: httpx.Headers) -> dict:
73 | filtered = {}
74 | for k, v in headers.items():
75 | if k.lower() not in self.HOP_BY_HOP_HEADERS:
76 | filtered[k] = v
77 | return filtered
78 |
79 | @abstractmethod
80 | def register_endpoint(self, router: APIRouter):
81 | pass
82 |
83 | async def passthrough_handler(self, request: Request, path: str):
84 | url = f"{self.stream_source.base_url}/{path}"
85 | if request.query_params:
86 | url += f"?{request.query_params}"
87 |
88 | headers = dict(request.headers)
89 | headers.pop("host", None)
90 | body = await request.body()
91 |
92 | r = await self.stream_source.http_client.request(
93 | request.method,
94 | url,
95 | headers=headers,
96 | content=body
97 | )
98 |
99 | resp_headers = self.filter_headers(r.headers)
100 |
101 | if self.debug:
102 | logger.info(f"Proxy: {request.method} /{path} -> {r.status_code}")
103 |
104 | return Response(content=r.content, status_code=r.status_code, headers=resp_headers)
105 |
106 | async def unified_tts_handler(self, request: Request, tts_request: UnifiedTTSRequest, x_audio_format: str = "wav"):
107 | raise HTTPException(status_code=400, detail=f"This speech service doesn't support unified interface for now: {self.__class__.__name__}")
108 |
109 | def get_router(self) -> APIRouter:
110 | router = APIRouter()
111 | self.register_endpoint(router)
112 | router.add_api_route(
113 | "/{path:path}",
114 | self.passthrough_handler,
115 | methods=["GET", "POST", "PUT", "DELETE", "PATCH", "OPTIONS", "HEAD"],
116 | include_in_schema=False
117 | )
118 |
119 | return router
120 |
121 | async def shutdown(self):
122 | await self.stream_source.close()
123 |
--------------------------------------------------------------------------------
/tests/source/test_voicevox_source.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import os
3 | from speech_gateway.source.voicevox import VoicevoxStreamSource
4 |
5 | VOICEVOX_URL = os.getenv("VOICEVOX_URL")
6 | SPEAKER = "2"
7 |
8 | @pytest.fixture
9 | def source():
10 | # Create an instance of VoicevoxStreamSource
11 | return VoicevoxStreamSource(base_url=VOICEVOX_URL)
12 |
13 | @pytest.fixture
14 | def audio_query():
15 | # Provide the audio_query data
16 | return {
17 | "accent_phrases": [
18 | {
19 | "moras": [
20 | {"text": "コ", "consonant": "k", "consonant_length": 0, "vowel": "o", "vowel_length": 0, "pitch": 0},
21 | {"text": "ン", "consonant": None, "consonant_length": None, "vowel": "N", "vowel_length": 0, "pitch": 0},
22 | {"text": "ニ", "consonant": "n", "consonant_length": 0, "vowel": "i", "vowel_length": 0, "pitch": 0},
23 | {"text": "チ", "consonant": "ch", "consonant_length": 0, "vowel": "i", "vowel_length": 0, "pitch": 0},
24 | {"text": "ワ", "consonant": "w", "consonant_length": 0, "vowel": "a", "vowel_length": 0, "pitch": 0},
25 | {"text": ".", "consonant": None, "consonant_length": None, "vowel": "pau", "vowel_length": 0, "pitch": 0}
26 | ],
27 | "accent": 5,
28 | "pause_mora": None,
29 | "is_interrogative": False
30 | },
31 | {
32 | "moras": [
33 | {"text": "コ", "consonant": "k", "consonant_length": 0, "vowel": "o", "vowel_length": 0, "pitch": 0},
34 | {"text": "レ", "consonant": "r", "consonant_length": 0, "vowel": "e", "vowel_length": 0, "pitch": 0},
35 | {"text": "ワ", "consonant": "w", "consonant_length": 0, "vowel": "a", "vowel_length": 0, "pitch": 0},
36 | {"text": "テ", "consonant": "t", "consonant_length": 0, "vowel": "e", "vowel_length": 0, "pitch": 0},
37 | {"text": "ス", "consonant": "s", "consonant_length": 0, "vowel": "u", "vowel_length": 0, "pitch": 0},
38 | {"text": "ト", "consonant": "t", "consonant_length": 0, "vowel": "o", "vowel_length": 0, "pitch": 0},
39 | {"text": "デ", "consonant": "d", "consonant_length": 0, "vowel": "e", "vowel_length": 0, "pitch": 0},
40 | {"text": "ス", "consonant": "s", "consonant_length": 0, "vowel": "u", "vowel_length": 0, "pitch": 0},
41 | {"text": ".", "consonant": None, "consonant_length": None, "vowel": "pau", "vowel_length": 0, "pitch": 0}
42 | ],
43 | "accent": 4,
44 | "pause_mora": None,
45 | "is_interrogative": False
46 | }
47 | ],
48 | "speedScale": 1,
49 | "intonationScale": 1,
50 | "tempoDynamicsScale": 1,
51 | "pitchScale": 0,
52 | "volumeScale": 1,
53 | "prePhonemeLength": 0.1,
54 | "postPhonemeLength": 0.1,
55 | "pauseLength": None,
56 | "pauseLengthScale": 1,
57 | "outputSamplingRate": 44100,
58 | "outputStereo": False,
59 | "kana": "こんにちは。これはテストです。"
60 | }
61 |
62 | @pytest.mark.asyncio
63 | async def test_get_cache_key(source, audio_query):
64 | # Test get_cache_key method
65 | cache_key = source.get_cache_key("mp3", SPEAKER, audio_query)
66 | assert cache_key.endswith(".mp3")
67 | assert SPEAKER in cache_key
68 |
69 | cache_key = source.get_cache_key("wav", SPEAKER, audio_query)
70 | assert cache_key.endswith(".wav")
71 | assert SPEAKER in cache_key
72 |
73 | @pytest.mark.asyncio
74 | async def test_parse_text(source, audio_query):
75 | # Test parse_text method
76 | text = source.parse_text(audio_query)
77 | assert text == "こんにちは。これはテストです。"
78 |
79 | @pytest.mark.asyncio
80 | async def test_make_stream_request(source, audio_query):
81 | # Test make_stream_request method
82 | request = source.make_stream_request(SPEAKER, audio_query)
83 | assert request["method"] == "POST"
84 | assert request["url"] == f"{VOICEVOX_URL}/synthesis"
85 | assert request["params"] == {"speaker": SPEAKER}
86 | assert request["json"] == audio_query
87 |
88 | @pytest.mark.asyncio
89 | async def test_fetch_stream_raw(source, audio_query):
90 | # Test fetch_stream_raw with a real request (ensure server is running locally)
91 | http_request = source.make_stream_request(SPEAKER, audio_query)
92 |
93 | try:
94 | async for chunk in source.fetch_stream_raw(http_request):
95 | assert isinstance(chunk, bytes)
96 | except Exception as e:
97 | pytest.fail(f"fetch_stream_raw failed: {e}")
98 |
99 | @pytest.mark.asyncio
100 | async def test_fetch_stream(source, audio_query):
101 | # Test fetch_stream method with conversion and caching
102 | audio_format = "mp3"
103 |
104 | try:
105 | async for chunk in await source.fetch_stream(audio_format, speaker=SPEAKER, audio_query=audio_query):
106 | assert isinstance(chunk, bytes)
107 | except Exception as e:
108 | pytest.fail(f"fetch_stream failed: {e}")
109 |
--------------------------------------------------------------------------------
/speech_gateway/source/__init__.py:
--------------------------------------------------------------------------------
1 | from abc import ABC, abstractmethod
2 | import logging
3 | from time import time
4 | from typing import AsyncIterator, Any, Dict
5 | import httpx
6 | from ..cache import CacheStorage
7 | from ..converter import FormatConverter
8 | from ..performance_recorder import PerformanceRecorder
9 |
10 | logger = logging.getLogger(__name__)
11 |
12 |
13 | class StreamSourceError(Exception):
14 | def __init__(self, message: str):
15 | super().__init__(message)
16 |
17 |
18 | class StreamSource(ABC):
19 | def __init__(self,
20 | *,
21 | base_url: str,
22 | cache_storage: CacheStorage = None,
23 | format_converters: Dict[str, FormatConverter] = None,
24 | max_connections: int = 100,
25 | max_keepalive_connections: int = 20,
26 | timeout: float = 10.0,
27 | performance_recorder: PerformanceRecorder = None,
28 | debug: bool = False
29 | ):
30 | self.base_url = base_url
31 | self.cache_storage = cache_storage
32 | self.format_converters = format_converters
33 | self.http_client = httpx.AsyncClient(
34 | follow_redirects=False,
35 | timeout=httpx.Timeout(timeout),
36 | limits=httpx.Limits(
37 | max_connections=max_connections,
38 | max_keepalive_connections=max_keepalive_connections
39 | )
40 | )
41 | self.performance_recorder = performance_recorder
42 | self.debug = debug
43 |
44 | @abstractmethod
45 | def get_cache_key(self, audio_format: str, **kwargs) -> str:
46 | pass
47 |
48 | @abstractmethod
49 | def parse_text(self, **kwargs) -> str:
50 | pass
51 |
52 | def get_converter(self, audio_format: str) -> FormatConverter:
53 | if self.format_converters:
54 | return self.format_converters.get(audio_format)
55 |
56 | @abstractmethod
57 | def make_stream_request(self, **kwargs) -> dict:
58 | pass
59 |
60 | async def fetch_stream_raw(self, http_request: Dict[str, Any]) -> AsyncIterator[bytes]:
61 | try:
62 | async with self.http_client.stream(**http_request) as audio_resp:
63 | if audio_resp.status_code != 200:
64 | resp_body = ""
65 | try:
66 | resp_body = await audio_resp.aread()
67 | except:
68 | pass
69 | raise StreamSourceError(f"Stream from voice service failed: {audio_resp.status_code}: {resp_body}")
70 |
71 | async for chunk in audio_resp.aiter_bytes(1024):
72 | yield chunk
73 |
74 | except httpx.RequestError as ex:
75 | raise StreamSourceError(f"HTTP request failed: {ex}") from ex
76 |
77 | async def fetch_stream(self, audio_format: str, **kwargs) -> AsyncIterator[bytes]:
78 | start_time = time()
79 | cache_key = self.get_cache_key(audio_format, **kwargs)
80 | use_cache = self.cache_storage and await self.cache_storage.has_cache(cache_key)
81 |
82 | if use_cache:
83 | if self.debug:
84 | logger.info(f"[cache]: {cache_key}")
85 | # Get cache stream
86 | stream = self.cache_storage.fetch_cache_stream(cache_key)
87 |
88 | else:
89 | # Get stream from TTS service
90 | if self.debug:
91 | logger.info(f"[generate]: {cache_key}")
92 | http_request = self.make_stream_request(**kwargs)
93 |
94 | if self.debug:
95 | logger.info(f"Request to speech service: {http_request}")
96 |
97 | stream = self.fetch_stream_raw(http_request)
98 |
99 | # Convert format
100 | converter = self.get_converter(audio_format)
101 | if converter:
102 | stream = converter.convert(stream)
103 |
104 | # Write cache
105 | if self.cache_storage:
106 | stream = self.cache_storage.write_cache(stream, cache_key)
107 |
108 | # Response time
109 | if self.performance_recorder:
110 | stream = self.record_time(
111 | stream,
112 | cache_key=cache_key,
113 | text=self.parse_text(**kwargs),
114 | audio_format=audio_format,
115 | cached=use_cache,
116 | start_time=start_time
117 | )
118 |
119 | return stream
120 |
121 | async def record_time(
122 | self,
123 | input_stream: AsyncIterator[bytes],
124 | *,
125 | cache_key: str,
126 | text: str,
127 | audio_format: str,
128 | cached: bool,
129 | start_time: float
130 | ) -> AsyncIterator[bytes]:
131 | async for chunk in input_stream:
132 | yield chunk
133 |
134 | self.performance_recorder.record(
135 | process_id=cache_key, source=self.__class__.__name__, text=text,
136 | audio_format=audio_format, cached=1 if cached else 0, elapsed=time() - start_time
137 | )
138 |
139 | async def close(self):
140 | await self.http_client.aclose()
141 |
--------------------------------------------------------------------------------
/docker/run.py:
--------------------------------------------------------------------------------
1 | from contextlib import asynccontextmanager
2 | import logging
3 | import os
4 | from fastapi import FastAPI
5 | from dotenv import load_dotenv
6 | from speech_gateway.performance_recorder.postgres import PostgreSQLPerformanceRecorder
7 | from speech_gateway.gateway.azure import AzureGateway
8 | from speech_gateway.gateway.openai_speech import OpenAIGateway
9 | from speech_gateway.gateway.voicevox import VoicevoxGateway
10 | from speech_gateway.gateway.sbv2 import StyleBertVits2Gateway
11 | from speech_gateway.gateway.nijivoice_encoded import NijiVoiceEncodedGateway
12 | from speech_gateway.gateway.unified import UnifiedGateway
13 |
14 | # Configure root logger
15 | logger = logging.getLogger("speech_gateway")
16 | logger.setLevel(logging.INFO)
17 | log_format = logging.Formatter("[%(levelname)s] %(asctime)s : %(message)s")
18 | streamHandler = logging.StreamHandler()
19 | streamHandler.setFormatter(log_format)
20 | logger.addHandler(streamHandler)
21 |
22 | load_dotenv()
23 | DEBUG = os.getenv("DEBUG", "false").lower() in ("true", "1", "yes")
24 |
25 | # Azure
26 | AZURE_ENABLED = os.getenv("AZURE_ENABLED", "false").lower() in ("true", "1", "yes")
27 | AZURE_API_KEY = os.getenv("AZURE_API_KEY")
28 | AZURE_REGION = os.getenv("AZURE_REGION")
29 | AZURE_LANGUAGES = os.getenv("AZURE_LANGUAGES")
30 | # OpenAI
31 | OPENAI_ENABLED = os.getenv("OPENAI_ENABLED", "false").lower() in ("true", "1", "yes")
32 | OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
33 | OPENAI_LANGUAGES = os.getenv("OPENAI_LANGUAGES")
34 | # VOICEVOX
35 | VOICEVOX_ENABLED = os.getenv("VOICEVOX_ENABLED", "false").lower() in ("true", "1", "yes")
36 | VOICEVOX_URL = os.getenv("VOICEVOX_URL")
37 | VOICEVOX_LANGUAGES = os.getenv("VOICEVOX_LANGUAGES")
38 | # Style-Bert-VITS2
39 | SBV2_ENABLED = os.getenv("SBV2_ENABLED", "false").lower() in ("true", "1", "yes")
40 | SBV2_URL = os.getenv("SBV2_URL")
41 | SBV2_LANGUAGES = os.getenv("SBV2_LANGUAGES")
42 | # NIJIVOICE
43 | NIJIVOICE_ENABLED = os.getenv("NIJIVOICE_ENABLED", "false").lower() in ("true", "1", "yes")
44 | NIJIVOICE_API_KEY = os.getenv("NIJIVOICE_API_KEY")
45 | NIJIVOICE_LANGUAGES = os.getenv("NIJIVOICE_LANGUAGES")
46 | # Database
47 | DB_PORT = os.getenv("PORT_DB")
48 | DB_USER = os.getenv("SPGW_DB_USER")
49 | DB_PASSWORD = os.getenv("SPGW_DB_PASSWORD")
50 |
51 | # Performance recorder
52 | performance_recorder = PostgreSQLPerformanceRecorder(host="spgw-db", port=DB_PORT, user=DB_USER, password=DB_PASSWORD)
53 |
54 | # On app down
55 | @asynccontextmanager
56 | async def lifespan(app: FastAPI):
57 | yield
58 | # Shutdown enabled gateways
59 | if AZURE_ENABLED and 'azure_gateway' in globals():
60 | await azure_gateway.shutdown()
61 | if OPENAI_ENABLED and 'openai_gateway' in globals():
62 | await openai_gateway.shutdown()
63 | if VOICEVOX_ENABLED and 'voicevox_gateway' in globals():
64 | await voicevox_gateway.shutdown()
65 | if SBV2_ENABLED and 'sbv2_gateway' in globals():
66 | await sbv2_gateway.shutdown()
67 | if NIJIVOICE_ENABLED and 'nijivoice_gateway' in globals():
68 | await nijivoice_gateway.shutdown()
69 |
70 | # Create API app
71 | app = FastAPI(lifespan=lifespan)
72 |
73 | # Unified gateway
74 | unified_gateway = UnifiedGateway(debug=True)
75 | app.include_router(unified_gateway.get_router())
76 |
77 | # Create service gateways
78 | if AZURE_ENABLED:
79 | azure_gateway = AzureGateway(api_key=AZURE_API_KEY, cache_dir="cache/azure", performance_recorder=performance_recorder, region=AZURE_REGION, debug=DEBUG)
80 | unified_gateway.add_gateway(
81 | service_name="azure",
82 | gateway=azure_gateway,
83 | languages=AZURE_LANGUAGES.split(",") if AZURE_LANGUAGES else None,
84 | )
85 | app.include_router(azure_gateway.get_router(), prefix="/azure")
86 | logger.info("[Gateway] Azure on /azure")
87 |
88 | if OPENAI_ENABLED:
89 | openai_gateway = OpenAIGateway(api_key=OPENAI_API_KEY, cache_dir="cache/openai", performance_recorder=performance_recorder, debug=DEBUG)
90 | unified_gateway.add_gateway(
91 | service_name="openai",
92 | gateway=openai_gateway,
93 | languages=OPENAI_LANGUAGES.split(",") if OPENAI_LANGUAGES else None,
94 | )
95 | app.include_router(openai_gateway.get_router(), prefix="/openai")
96 | logger.info(f"[Gateway] OpenAI on /openai")
97 |
98 | if VOICEVOX_ENABLED:
99 | voicevox_gateway = VoicevoxGateway(base_url=VOICEVOX_URL, cache_dir="cache/voicevox", performance_recorder=performance_recorder, debug=DEBUG)
100 | unified_gateway.add_gateway(
101 | service_name="voicevox",
102 | gateway=voicevox_gateway,
103 | languages=VOICEVOX_LANGUAGES.split(",") if VOICEVOX_LANGUAGES else None,
104 | )
105 | app.include_router(voicevox_gateway.get_router(), prefix="/voicevox")
106 | logger.info(f"[Gateway] VOICEVOX on /voicevox")
107 |
108 | if SBV2_ENABLED:
109 | sbv2_gateway = StyleBertVits2Gateway(base_url=SBV2_URL, cache_dir="cache/sbv2", performance_recorder=performance_recorder, debug=DEBUG)
110 | unified_gateway.add_gateway(
111 | service_name="sbv2",
112 | gateway=sbv2_gateway,
113 | languages=SBV2_LANGUAGES.split(",") if SBV2_LANGUAGES else None,
114 | )
115 | app.include_router(sbv2_gateway.get_router(), prefix="/sbv2")
116 | logger.info(f"[Gateway] Style-Bert-VITS2 on /sbv2")
117 |
118 | if NIJIVOICE_ENABLED:
119 | nijivoice_gateway = NijiVoiceEncodedGateway(api_key=NIJIVOICE_API_KEY, cache_dir="cache/nijivoice", performance_recorder=performance_recorder, debug=DEBUG)
120 | unified_gateway.add_gateway(
121 | service_name="nijivoice",
122 | gateway=nijivoice_gateway,
123 | languages=NIJIVOICE_LANGUAGES.split(",") if NIJIVOICE_LANGUAGES else None,
124 | )
125 | app.include_router(nijivoice_gateway.get_router(), prefix="/nijivoice")
126 | logger.info(f"[Gateway] Nijivoice on /nijivoice")
127 |
--------------------------------------------------------------------------------
/tests/gateway/test_openai_speech.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pytest
3 | import httpx
4 |
5 | SPEAKER = "alloy"
6 |
7 |
8 | @pytest.mark.asyncio
9 | async def test_openai_speech(random_text, mp3_checker, audio_transcriber):
10 | resp = httpx.post(
11 | "http://127.0.0.1:8000/openai/audio/speech",
12 | json={
13 | "model": "tts-1",
14 | "voice": "alloy",
15 | "input": random_text,
16 | "speed": 1.0,
17 | }
18 | )
19 | audio_data = resp.content
20 | assert mp3_checker(audio_data)
21 | assert "音声合成" in audio_transcriber(audio_data, "mp3")
22 |
23 |
24 | @pytest.mark.asyncio
25 | async def test_openai_speech_wav(random_text, wave_checker, audio_transcriber):
26 | resp = httpx.post(
27 | "http://127.0.0.1:8000/openai/audio/speech",
28 | json={
29 | "model": "tts-1",
30 | "voice": "alloy",
31 | "input": random_text,
32 | "speed": 1.0,
33 | "response_format": "wav"
34 | }
35 | )
36 | audio_data = resp.content
37 | assert wave_checker(audio_data)
38 | assert "音声合成" in audio_transcriber(audio_data, "wav")
39 |
40 |
41 | @pytest.mark.asyncio
42 | async def test_openai_speech_mp3(random_text, mp3_checker, audio_transcriber):
43 | resp = httpx.post(
44 | "http://127.0.0.1:8000/openai/audio/speech",
45 | json={
46 | "model": "tts-1",
47 | "voice": "alloy",
48 | "input": random_text,
49 | "speed": 1.0,
50 | "response_format": "mp3"
51 | }
52 | )
53 | audio_data = resp.content
54 | assert mp3_checker(audio_data)
55 | assert "音声合成" in audio_transcriber(audio_data, "mp3")
56 |
57 |
58 | @pytest.mark.asyncio
59 | async def test_openai_speech_wav_mp3(random_text, mp3_checker, audio_transcriber):
60 | resp = httpx.post(
61 | "http://127.0.0.1:8000/openai/audio/speech",
62 | json={
63 | "model": "tts-1",
64 | "voice": "alloy",
65 | "input": random_text,
66 | "speed": 1.0,
67 | "response_format": "wav" # <- wav
68 | },
69 | params={
70 | "x_audio_format": "mp3" # <- mp3
71 | }
72 | )
73 | audio_data = resp.content
74 | assert mp3_checker(audio_data)
75 | assert "音声合成" in audio_transcriber(audio_data, "mp3")
76 |
77 |
78 | @pytest.mark.asyncio
79 | async def test_openai_speech_mp3_wav(random_text, wave_checker, audio_transcriber):
80 | resp = httpx.post(
81 | "http://127.0.0.1:8000/openai/audio/speech",
82 | json={
83 | "model": "tts-1",
84 | "voice": "alloy",
85 | "input": random_text,
86 | "speed": 1.0,
87 | "response_format": "mp3" # <- mp3
88 | },
89 | params={
90 | "x_audio_format": "wav" # <- wav
91 | }
92 | )
93 | audio_data = resp.content
94 | assert wave_checker(audio_data)
95 | assert "音声合成" in audio_transcriber(audio_data, "wav")
96 |
97 |
98 | @pytest.mark.asyncio
99 | async def test_openai_speech_x_wav(random_text, wave_checker, audio_transcriber):
100 | resp = httpx.post(
101 | "http://127.0.0.1:8000/openai/audio/speech",
102 | json={
103 | "model": "tts-1",
104 | "voice": "alloy",
105 | "input": random_text,
106 | "speed": 1.0,
107 | },
108 | params={
109 | "x_audio_format": "wav" # <- wav
110 | }
111 | )
112 | audio_data = resp.content
113 | assert wave_checker(audio_data)
114 | assert "音声合成" in audio_transcriber(audio_data, "wav")
115 |
116 |
117 | @pytest.mark.asyncio
118 | async def test_openai_speech_x_mp3(random_text, mp3_checker, audio_transcriber):
119 | resp = httpx.post(
120 | "http://127.0.0.1:8000/openai/audio/speech",
121 | json={
122 | "model": "tts-1",
123 | "voice": "alloy",
124 | "input": random_text,
125 | "speed": 1.0,
126 | },
127 | params={
128 | "x_audio_format": "mp3" # <- mp3
129 | }
130 | )
131 | audio_data = resp.content
132 | assert mp3_checker(audio_data)
133 | assert "音声合成" in audio_transcriber(audio_data, "mp3")
134 |
135 |
136 | @pytest.mark.asyncio
137 | async def test_openai_speech_unified(random_text, wave_checker, audio_transcriber):
138 | req = {
139 | "text": random_text,
140 | "speaker": SPEAKER,
141 | "service_name": "openai"
142 | }
143 | resp = httpx.post("http://127.0.0.1:8000/tts", json=req)
144 | audio_data = resp.content
145 | assert wave_checker(audio_data)
146 | assert "音声合成" in audio_transcriber(audio_data, "wav")
147 |
148 |
149 | @pytest.mark.asyncio
150 | async def test_openai_speech_unified_wav(random_text, wave_checker, audio_transcriber):
151 | req = {
152 | "text": random_text,
153 | "speaker": SPEAKER,
154 | "service_name": "openai"
155 | }
156 | query_params = {
157 | "x_audio_format": "wav"
158 | }
159 | resp = httpx.post("http://127.0.0.1:8000/tts", params=query_params, json=req)
160 | audio_data = resp.content
161 | assert wave_checker(audio_data)
162 | assert "音声合成" in audio_transcriber(audio_data, "wav")
163 |
164 |
165 | @pytest.mark.asyncio
166 | async def test_openai_speech_unified_mp3(random_text, mp3_checker, audio_transcriber):
167 | req = {
168 | "text": random_text,
169 | "speaker": SPEAKER,
170 | "service_name": "openai"
171 | }
172 | query_params = {
173 | "x_audio_format": "mp3"
174 | }
175 | resp = httpx.post("http://127.0.0.1:8000/tts", params=query_params, json=req)
176 | audio_data = resp.content
177 | assert mp3_checker(audio_data)
178 | assert "音声合成" in audio_transcriber(audio_data, "mp3")
179 |
--------------------------------------------------------------------------------
/tests/gateway/test_azure_openai_speech.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pytest
3 | import httpx
4 |
5 | SPEAKER = "alloy"
6 |
7 |
8 | @pytest.mark.asyncio
9 | async def test_openai_speech(random_text, mp3_checker, audio_transcriber):
10 | resp = httpx.post(
11 | "http://127.0.0.1:8000/azure_openai/audio/speech",
12 | json={
13 | "model": "gpt-4o-mini-tts",
14 | "voice": "alloy",
15 | "input": random_text,
16 | "speed": 1.0,
17 | }
18 | )
19 | audio_data = resp.content
20 | assert mp3_checker(audio_data)
21 | assert "音声合成" in audio_transcriber(audio_data, "mp3")
22 |
23 |
24 | @pytest.mark.asyncio
25 | async def test_openai_speech_wav(random_text, wave_checker, audio_transcriber):
26 | resp = httpx.post(
27 | "http://127.0.0.1:8000/azure_openai/audio/speech",
28 | json={
29 | "model": "gpt-4o-mini-tts",
30 | "voice": "alloy",
31 | "input": random_text,
32 | "speed": 1.0,
33 | "response_format": "wav"
34 | }
35 | )
36 | audio_data = resp.content
37 | assert wave_checker(audio_data)
38 | assert "音声合成" in audio_transcriber(audio_data, "wav")
39 |
40 |
41 | @pytest.mark.asyncio
42 | async def test_openai_speech_mp3(random_text, mp3_checker, audio_transcriber):
43 | resp = httpx.post(
44 | "http://127.0.0.1:8000/azure_openai/audio/speech",
45 | json={
46 | "model": "gpt-4o-mini-tts",
47 | "voice": "alloy",
48 | "input": random_text,
49 | "speed": 1.0,
50 | "response_format": "mp3"
51 | }
52 | )
53 | audio_data = resp.content
54 | assert mp3_checker(audio_data)
55 | assert "音声合成" in audio_transcriber(audio_data, "mp3")
56 |
57 |
58 | @pytest.mark.asyncio
59 | async def test_openai_speech_wav_mp3(random_text, mp3_checker, audio_transcriber):
60 | resp = httpx.post(
61 | "http://127.0.0.1:8000/azure_openai/audio/speech",
62 | json={
63 | "model": "gpt-4o-mini-tts",
64 | "voice": "alloy",
65 | "input": random_text,
66 | "speed": 1.0,
67 | "response_format": "wav" # <- wav
68 | },
69 | params={
70 | "x_audio_format": "mp3" # <- mp3
71 | }
72 | )
73 | audio_data = resp.content
74 | assert mp3_checker(audio_data)
75 | assert "音声合成" in audio_transcriber(audio_data, "mp3")
76 |
77 |
78 | @pytest.mark.asyncio
79 | async def test_openai_speech_mp3_wav(random_text, wave_checker, audio_transcriber):
80 | resp = httpx.post(
81 | "http://127.0.0.1:8000/azure_openai/audio/speech",
82 | json={
83 | "model": "gpt-4o-mini-tts",
84 | "voice": "alloy",
85 | "input": random_text,
86 | "speed": 1.0,
87 | "response_format": "mp3" # <- mp3
88 | },
89 | params={
90 | "x_audio_format": "wav" # <- wav
91 | }
92 | )
93 | audio_data = resp.content
94 | assert wave_checker(audio_data)
95 | assert "音声合成" in audio_transcriber(audio_data, "wav")
96 |
97 |
98 | @pytest.mark.asyncio
99 | async def test_openai_speech_x_wav(random_text, wave_checker, audio_transcriber):
100 | resp = httpx.post(
101 | "http://127.0.0.1:8000/azure_openai/audio/speech",
102 | json={
103 | "model": "gpt-4o-mini-tts",
104 | "voice": "alloy",
105 | "input": random_text,
106 | "speed": 1.0,
107 | },
108 | params={
109 | "x_audio_format": "wav" # <- wav
110 | }
111 | )
112 | audio_data = resp.content
113 | assert wave_checker(audio_data)
114 | assert "音声合成" in audio_transcriber(audio_data, "wav")
115 |
116 |
117 | @pytest.mark.asyncio
118 | async def test_openai_speech_x_mp3(random_text, mp3_checker, audio_transcriber):
119 | resp = httpx.post(
120 | "http://127.0.0.1:8000/azure_openai/audio/speech",
121 | json={
122 | "model": "gpt-4o-mini-tts",
123 | "voice": "alloy",
124 | "input": random_text,
125 | "speed": 1.0,
126 | },
127 | params={
128 | "x_audio_format": "mp3" # <- mp3
129 | }
130 | )
131 | audio_data = resp.content
132 | assert mp3_checker(audio_data)
133 | assert "音声合成" in audio_transcriber(audio_data, "mp3")
134 |
135 |
136 | @pytest.mark.asyncio
137 | async def test_openai_speech_unified(random_text, wave_checker, audio_transcriber):
138 | req = {
139 | "text": random_text,
140 | "speaker": SPEAKER,
141 | "service_name": "azure_openai"
142 | }
143 | resp = httpx.post("http://127.0.0.1:8000/tts", json=req)
144 | audio_data = resp.content
145 | assert wave_checker(audio_data)
146 | assert "音声合成" in audio_transcriber(audio_data, "wav")
147 |
148 |
149 | @pytest.mark.asyncio
150 | async def test_openai_speech_unified_wav(random_text, wave_checker, audio_transcriber):
151 | req = {
152 | "text": random_text,
153 | "speaker": SPEAKER,
154 | "service_name": "azure_openai"
155 | }
156 | query_params = {
157 | "x_audio_format": "wav"
158 | }
159 | resp = httpx.post("http://127.0.0.1:8000/tts", params=query_params, json=req)
160 | audio_data = resp.content
161 | assert wave_checker(audio_data)
162 | assert "音声合成" in audio_transcriber(audio_data, "wav")
163 |
164 |
165 | @pytest.mark.asyncio
166 | async def test_openai_speech_unified_mp3(random_text, mp3_checker, audio_transcriber):
167 | req = {
168 | "text": random_text,
169 | "speaker": SPEAKER,
170 | "service_name": "azure_openai"
171 | }
172 | query_params = {
173 | "x_audio_format": "mp3"
174 | }
175 | resp = httpx.post("http://127.0.0.1:8000/tts", params=query_params, json=req)
176 | audio_data = resp.content
177 | assert mp3_checker(audio_data)
178 | assert "音声合成" in audio_transcriber(audio_data, "mp3")
179 |
--------------------------------------------------------------------------------
/tests/source/test_openai_speech_source.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import os
3 | from speech_gateway.source.openai_speech import OpenAIStreamSource
4 |
5 | OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
6 | AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
7 | AZURE_OPENAI_BASE_URL =os.getenv("AZURE_OPENAI_BASE_URL")
8 |
9 | @pytest.fixture
10 | def source():
11 | # Create an instance of OpenAIStreamSource
12 | return OpenAIStreamSource(api_key=OPENAI_API_KEY)
13 |
14 | @pytest.mark.asyncio
15 | async def test_get_cache_key(source):
16 | # Test get_cache_key method
17 | request_json = {
18 | "model": "tts-1",
19 | "voice": "alloy",
20 | "input": "こんにちは。これはテストです。",
21 | "speed": 1.0,
22 | "response_format": "wav"
23 | }
24 | cache_key = source.get_cache_key("mp3", request_json)
25 | assert cache_key.endswith(".mp3")
26 |
27 | cache_key = source.get_cache_key("wav", request_json)
28 | assert cache_key.endswith(".wav")
29 |
30 | @pytest.mark.asyncio
31 | async def test_parse_text(source):
32 | # Test parse_text method
33 | request_json = {
34 | "model": "tts-1",
35 | "voice": "alloy",
36 | "input": "こんにちは。これはテストです。",
37 | "speed": 1.0,
38 | "response_format": "wav"
39 | }
40 | text = source.parse_text(request_json)
41 | assert text == "こんにちは。これはテストです。"
42 |
43 | @pytest.mark.asyncio
44 | async def test_make_stream_request(source):
45 | # Test make_stream_request method
46 | request_json = {
47 | "model": "tts-1",
48 | "voice": "alloy",
49 | "input": "こんにちは。これはテストです。",
50 | "speed": 1.0,
51 | "response_format": "wav"
52 | }
53 | request = source.make_stream_request(request_json)
54 | assert request["method"] == "POST"
55 | assert request["url"] == "https://api.openai.com/v1/audio/speech"
56 | assert request["json"] == request_json
57 |
58 | @pytest.mark.asyncio
59 | async def test_fetch_stream_raw(source):
60 | # Test fetch_stream_raw with a real request (ensure server is running locally)
61 | request_json = {
62 | "model": "tts-1",
63 | "voice": "alloy",
64 | "input": "こんにちは。これはテストです。",
65 | "speed": 1.0,
66 | "response_format": "wav"
67 | }
68 | http_request = source.make_stream_request(request_json)
69 |
70 | try:
71 | async for chunk in source.fetch_stream_raw(http_request):
72 | assert isinstance(chunk, bytes)
73 | except Exception as e:
74 | pytest.fail(f"fetch_stream_raw failed: {e}")
75 |
76 | @pytest.mark.asyncio
77 | async def test_fetch_stream(source):
78 | # Test fetch_stream method with conversion and caching
79 | request_json = {
80 | "model": "tts-1",
81 | "voice": "alloy",
82 | "input": "こんにちは。これはテストです。",
83 | "speed": 1.0,
84 | "response_format": "wav"
85 | }
86 |
87 | audio_format = "wav"
88 |
89 | try:
90 | async for chunk in await source.fetch_stream(audio_format, request_json=request_json):
91 | assert isinstance(chunk, bytes)
92 | except Exception as e:
93 | pytest.fail(f"fetch_stream failed: {e}")
94 |
95 | @pytest.mark.asyncio
96 | async def test_fetch_stream_raw(source):
97 | # Test fetch_stream_raw with a real request (ensure server is running locally)
98 | request_json = {
99 | "model": "tts-1",
100 | "voice": "alloy",
101 | "input": "こんにちは。これはテストです。",
102 | "speed": 1.0,
103 | "response_format": "wav"
104 | }
105 | http_request = source.make_stream_request(request_json)
106 |
107 | try:
108 | async for chunk in source.fetch_stream_raw(http_request):
109 | assert isinstance(chunk, bytes)
110 | except Exception as e:
111 | pytest.fail(f"fetch_stream_raw failed: {e}")
112 |
113 | @pytest.mark.asyncio
114 | async def test_fetch_stream(source):
115 | # Test fetch_stream method with conversion and caching
116 | request_json = {
117 | "model": "tts-1",
118 | "voice": "alloy",
119 | "input": "こんにちは。これはテストです。",
120 | "speed": 1.0,
121 | "response_format": "wav"
122 | }
123 |
124 | audio_format = "wav"
125 |
126 | try:
127 | async for chunk in await source.fetch_stream(audio_format, request_json=request_json):
128 | assert isinstance(chunk, bytes)
129 | except Exception as e:
130 | pytest.fail(f"fetch_stream failed: {e}")
131 |
132 |
133 | @pytest.mark.asyncio
134 | async def test_fetch_stream_raw_azure(source):
135 | # Use Azure OpenAI API
136 | source.api_key = AZURE_OPENAI_API_KEY
137 | source.base_url = AZURE_OPENAI_BASE_URL
138 |
139 | # Test fetch_stream_raw with a real request (ensure server is running locally)
140 | request_json = {
141 | "model": "gpt-4o-mini-tts",
142 | "voice": "alloy",
143 | "input": "こんにちは。これはテストです。",
144 | "speed": 1.0,
145 | "response_format": "wav"
146 | }
147 | http_request = source.make_stream_request(request_json)
148 |
149 | try:
150 | async for chunk in source.fetch_stream_raw(http_request):
151 | assert isinstance(chunk, bytes)
152 | except Exception as e:
153 | pytest.fail(f"fetch_stream_raw_azure failed: {e}")
154 |
155 | @pytest.mark.asyncio
156 | async def test_fetch_stream_azure(source):
157 | # Use Azure OpenAI API
158 | source.api_key = AZURE_OPENAI_API_KEY
159 | source.base_url = AZURE_OPENAI_BASE_URL
160 |
161 | # Test fetch_stream method with conversion and caching
162 | request_json = {
163 | "model": "gpt-4o-mini-tts",
164 | "voice": "alloy",
165 | "input": "こんにちは。これはテストです。",
166 | "speed": 1.0,
167 | "response_format": "wav"
168 | }
169 |
170 | audio_format = "wav"
171 |
172 | try:
173 | async for chunk in await source.fetch_stream(audio_format, request_json=request_json):
174 | assert isinstance(chunk, bytes)
175 | except Exception as e:
176 | pytest.fail(f"fetch_stream_azure failed: {e}")
177 |
--------------------------------------------------------------------------------
/tests/gateway/test_nijivoice.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pytest
3 | import httpx
4 |
5 | VOICE_ACTOR_ID = "dba2fa0e-f750-43ad-b9f6-d5aeaea7dc16"
6 |
7 |
8 | @pytest.mark.asyncio
9 | async def test_nijivoice(random_text, mp3_checker, audio_transcriber):
10 | resp_json = httpx.post(
11 | f"http://127.0.0.1:8000/nijivoice/api/platform/v1/voice-actors/{VOICE_ACTOR_ID}/generate-voice",
12 | json={
13 | "script": random_text,
14 | "speed": "1.0"
15 | }
16 | ).json()
17 |
18 | resp = httpx.get(resp_json["generatedVoice"]["audioFileUrl"])
19 | audio_data = resp.content
20 | assert mp3_checker(audio_data)
21 | assert "音声合成" in audio_transcriber(audio_data, "mp3")
22 |
23 |
24 | @pytest.mark.asyncio
25 | async def test_nijivoice_wav(random_text, wave_checker, audio_transcriber):
26 | resp_json = httpx.post(
27 | f"http://127.0.0.1:8000/nijivoice/api/platform/v1/voice-actors/{VOICE_ACTOR_ID}/generate-voice",
28 | json={
29 | "script": random_text,
30 | "speed": "1.0",
31 | "format": "wav"
32 | }
33 | ).json()
34 |
35 | resp = httpx.get(resp_json["generatedVoice"]["audioFileUrl"])
36 | audio_data = resp.content
37 | assert wave_checker(audio_data)
38 | assert "音声合成" in audio_transcriber(audio_data, "wav")
39 |
40 |
41 | @pytest.mark.asyncio
42 | async def test_nijivoice_mp3(random_text, mp3_checker, audio_transcriber):
43 | resp_json = httpx.post(
44 | f"http://127.0.0.1:8000/nijivoice/api/platform/v1/voice-actors/{VOICE_ACTOR_ID}/generate-voice",
45 | json={
46 | "script": random_text,
47 | "speed": "1.0",
48 | "format": "mp3"
49 | }
50 | ).json()
51 |
52 | resp = httpx.get(resp_json["generatedVoice"]["audioFileUrl"])
53 | audio_data = resp.content
54 | assert mp3_checker(audio_data)
55 | assert "音声合成" in audio_transcriber(audio_data, "mp3")
56 |
57 |
58 | @pytest.mark.asyncio
59 | async def test_nijivoice_wav_mp3(random_text, mp3_checker, audio_transcriber):
60 | resp_json = httpx.post(
61 | f"http://127.0.0.1:8000/nijivoice/api/platform/v1/voice-actors/{VOICE_ACTOR_ID}/generate-voice",
62 | json={
63 | "script": random_text,
64 | "speed": "1.0",
65 | "format": "wav" # <- wav
66 | },
67 | params={
68 | "x_audio_format": "mp3" # <- mp3
69 | }
70 | ).json()
71 |
72 | resp = httpx.get(resp_json["generatedVoice"]["audioFileUrl"])
73 | audio_data = resp.content
74 | assert mp3_checker(audio_data)
75 | assert "音声合成" in audio_transcriber(audio_data, "mp3")
76 |
77 |
78 | @pytest.mark.asyncio
79 | async def test_nijivoice_mp3_wav(random_text, wave_checker, audio_transcriber):
80 | resp_json = httpx.post(
81 | f"http://127.0.0.1:8000/nijivoice/api/platform/v1/voice-actors/{VOICE_ACTOR_ID}/generate-voice",
82 | json={
83 | "script": random_text,
84 | "speed": "1.0",
85 | "format": "mp3" # <- mp3
86 | },
87 | params = {
88 | "x_audio_format": "wav" # <- wav
89 | }
90 | ).json()
91 |
92 | resp = httpx.get(resp_json["generatedVoice"]["audioFileUrl"])
93 | audio_data = resp.content
94 | assert wave_checker(audio_data)
95 | assert "音声合成" in audio_transcriber(audio_data, "wav")
96 |
97 |
98 | @pytest.mark.asyncio
99 | async def test_nijivoice_x_wav(random_text, wave_checker, audio_transcriber):
100 | resp_json = httpx.post(
101 | f"http://127.0.0.1:8000/nijivoice/api/platform/v1/voice-actors/{VOICE_ACTOR_ID}/generate-voice",
102 | json={
103 | "script": random_text,
104 | "speed": "1.0"
105 | },
106 | params = {
107 | "x_audio_format": "wav" # <- wav
108 | }
109 | ).json()
110 |
111 | resp = httpx.get(resp_json["generatedVoice"]["audioFileUrl"])
112 | audio_data = resp.content
113 | assert wave_checker(audio_data)
114 | assert "音声合成" in audio_transcriber(audio_data, "wav")
115 |
116 |
117 | @pytest.mark.asyncio
118 | async def test_nijivoice_x_mp3(random_text, mp3_checker, audio_transcriber):
119 | resp_json = httpx.post(
120 | f"http://127.0.0.1:8000/nijivoice/api/platform/v1/voice-actors/{VOICE_ACTOR_ID}/generate-voice",
121 | json={
122 | "script": random_text,
123 | "speed": "1.0"
124 | },
125 | params = {
126 | "x_audio_format": "mp3" # <- mp3
127 | }
128 | ).json()
129 |
130 | resp = httpx.get(resp_json["generatedVoice"]["audioFileUrl"])
131 | audio_data = resp.content
132 | assert mp3_checker(audio_data)
133 | assert "音声合成" in audio_transcriber(audio_data, "mp3")
134 |
135 |
136 | @pytest.mark.asyncio
137 | async def test_nijivoice_unified(random_text, wave_checker, audio_transcriber):
138 | req = {
139 | "text": random_text,
140 | "speaker": VOICE_ACTOR_ID,
141 | "service_name": "nijivoice"
142 | }
143 | resp = httpx.post("http://127.0.0.1:8000/tts", json=req)
144 | audio_data = resp.content
145 | assert wave_checker(audio_data)
146 | assert "音声合成" in audio_transcriber(audio_data, "wav")
147 |
148 |
149 | @pytest.mark.asyncio
150 | async def test_nijivoice_unified_wav(random_text, wave_checker, audio_transcriber):
151 | req = {
152 | "text": random_text,
153 | "speaker": VOICE_ACTOR_ID,
154 | "service_name": "nijivoice"
155 | }
156 | query_params = {
157 | "x_audio_format": "wav"
158 | }
159 | resp = httpx.post("http://127.0.0.1:8000/tts", params=query_params, json=req)
160 | audio_data = resp.content
161 | assert wave_checker(audio_data)
162 | assert "音声合成" in audio_transcriber(audio_data, "wav")
163 |
164 |
165 | @pytest.mark.asyncio
166 | async def test_nijivoice_unified_mp3(random_text, mp3_checker, audio_transcriber):
167 | req = {
168 | "text": random_text,
169 | "speaker": VOICE_ACTOR_ID,
170 | "service_name": "nijivoice"
171 | }
172 | query_params = {
173 | "x_audio_format": "mp3"
174 | }
175 | resp = httpx.post("http://127.0.0.1:8000/tts", params=query_params, json=req)
176 | audio_data = resp.content
177 | assert mp3_checker(audio_data)
178 | assert "音声合成" in audio_transcriber(audio_data, "mp3")
179 |
--------------------------------------------------------------------------------
/tests/gateway/test_nijivoice_encoded.py:
--------------------------------------------------------------------------------
1 | import base64
2 | import pytest
3 | import httpx
4 |
5 | VOICE_ACTOR_ID = "dba2fa0e-f750-43ad-b9f6-d5aeaea7dc16"
6 |
7 |
8 | @pytest.mark.asyncio
9 | async def test_nijivoice(random_text, mp3_checker, audio_transcriber):
10 | resp_json = httpx.post(
11 | f"http://127.0.0.1:8000/nijivoice_encoded/api/platform/v1/voice-actors/{VOICE_ACTOR_ID}/generate-encoded-voice",
12 | json={
13 | "script": random_text,
14 | "speed": "1.0"
15 | }
16 | ).json()
17 | base64_audio = resp_json["generatedVoice"]["base64Audio"]
18 | audio_data = base64.b64decode(base64_audio)
19 |
20 | assert mp3_checker(audio_data)
21 | assert "音声合成" in audio_transcriber(audio_data, "mp3")
22 |
23 |
24 | @pytest.mark.asyncio
25 | async def test_nijivoice_wav(random_text, wave_checker, audio_transcriber):
26 | resp_json = httpx.post(
27 | f"http://127.0.0.1:8000/nijivoice_encoded/api/platform/v1/voice-actors/{VOICE_ACTOR_ID}/generate-encoded-voice",
28 | json={
29 | "script": random_text,
30 | "speed": "1.0",
31 | "format": "wav"
32 | }
33 | ).json()
34 | base64_audio = resp_json["generatedVoice"]["base64Audio"]
35 | audio_data = base64.b64decode(base64_audio)
36 |
37 | assert wave_checker(audio_data)
38 | assert "音声合成" in audio_transcriber(audio_data, "wav")
39 |
40 |
41 | @pytest.mark.asyncio
42 | async def test_nijivoice_mp3(random_text, mp3_checker, audio_transcriber):
43 | resp_json = httpx.post(
44 | f"http://127.0.0.1:8000/nijivoice_encoded/api/platform/v1/voice-actors/{VOICE_ACTOR_ID}/generate-encoded-voice",
45 | json={
46 | "script": random_text,
47 | "speed": "1.0",
48 | "format": "mp3"
49 | }
50 | ).json()
51 | base64_audio = resp_json["generatedVoice"]["base64Audio"]
52 | audio_data = base64.b64decode(base64_audio)
53 |
54 | assert mp3_checker(audio_data)
55 | assert "音声合成" in audio_transcriber(audio_data, "mp3")
56 |
57 |
58 | @pytest.mark.asyncio
59 | async def test_nijivoice_wav_mp3(random_text, mp3_checker, audio_transcriber):
60 | resp_json = httpx.post(
61 | f"http://127.0.0.1:8000/nijivoice_encoded/api/platform/v1/voice-actors/{VOICE_ACTOR_ID}/generate-encoded-voice",
62 | json={
63 | "script": random_text,
64 | "speed": "1.0",
65 | "format": "wav" # <- wav
66 | },
67 | params={
68 | "x_audio_format": "mp3" # <- mp3
69 | }
70 | ).json()
71 | base64_audio = resp_json["generatedVoice"]["base64Audio"]
72 | audio_data = base64.b64decode(base64_audio)
73 |
74 | assert mp3_checker(audio_data)
75 | assert "音声合成" in audio_transcriber(audio_data, "mp3")
76 |
77 |
78 | @pytest.mark.asyncio
79 | async def test_nijivoice_mp3_wav(random_text, wave_checker, audio_transcriber):
80 | resp_json = httpx.post(
81 | f"http://127.0.0.1:8000/nijivoice_encoded/api/platform/v1/voice-actors/{VOICE_ACTOR_ID}/generate-encoded-voice",
82 | json={
83 | "script": random_text,
84 | "speed": "1.0",
85 | "format": "mp3" # <- mp3
86 | },
87 | params = {
88 | "x_audio_format": "wav" # <- wav
89 | }
90 | ).json()
91 | base64_audio = resp_json["generatedVoice"]["base64Audio"]
92 | audio_data = base64.b64decode(base64_audio)
93 |
94 | assert wave_checker(audio_data)
95 | assert "音声合成" in audio_transcriber(audio_data, "wav")
96 |
97 |
98 | @pytest.mark.asyncio
99 | async def test_nijivoice_x_wav(random_text, wave_checker, audio_transcriber):
100 | resp_json = httpx.post(
101 | f"http://127.0.0.1:8000/nijivoice_encoded/api/platform/v1/voice-actors/{VOICE_ACTOR_ID}/generate-encoded-voice",
102 | json={
103 | "script": random_text,
104 | "speed": "1.0"
105 | },
106 | params = {
107 | "x_audio_format": "wav" # <- wav
108 | }
109 | ).json()
110 | base64_audio = resp_json["generatedVoice"]["base64Audio"]
111 | audio_data = base64.b64decode(base64_audio)
112 |
113 | assert wave_checker(audio_data)
114 | assert "音声合成" in audio_transcriber(audio_data, "wav")
115 |
116 |
117 | @pytest.mark.asyncio
118 | async def test_nijivoice_x_mp3(random_text, mp3_checker, audio_transcriber):
119 | resp_json = httpx.post(
120 | f"http://127.0.0.1:8000/nijivoice_encoded/api/platform/v1/voice-actors/{VOICE_ACTOR_ID}/generate-encoded-voice",
121 | json={
122 | "script": random_text,
123 | "speed": "1.0"
124 | },
125 | params = {
126 | "x_audio_format": "mp3" # <- mp3
127 | }
128 | ).json()
129 | base64_audio = resp_json["generatedVoice"]["base64Audio"]
130 | audio_data = base64.b64decode(base64_audio)
131 |
132 | assert mp3_checker(audio_data)
133 | assert "音声合成" in audio_transcriber(audio_data, "mp3")
134 |
135 |
136 | @pytest.mark.asyncio
137 | async def test_nijivoice_unified(random_text, wave_checker, audio_transcriber):
138 | req = {
139 | "text": random_text,
140 | "speaker": VOICE_ACTOR_ID,
141 | "service_name": "nijivoice"
142 | }
143 | resp = httpx.post("http://127.0.0.1:8000/tts", json=req)
144 | audio_data = resp.content
145 | assert wave_checker(audio_data)
146 | assert "音声合成" in audio_transcriber(audio_data, "wav")
147 |
148 |
149 | @pytest.mark.asyncio
150 | async def test_nijivoice_unified_wav(random_text, wave_checker, audio_transcriber):
151 | req = {
152 | "text": random_text,
153 | "speaker": VOICE_ACTOR_ID,
154 | "service_name": "nijivoice"
155 | }
156 | query_params = {
157 | "x_audio_format": "wav"
158 | }
159 | resp = httpx.post("http://127.0.0.1:8000/tts", params=query_params, json=req)
160 | audio_data = resp.content
161 | assert wave_checker(audio_data)
162 | assert "音声合成" in audio_transcriber(audio_data, "wav")
163 |
164 |
165 | @pytest.mark.asyncio
166 | async def test_nijivoice_unified_mp3(random_text, mp3_checker, audio_transcriber):
167 | req = {
168 | "text": random_text,
169 | "speaker": VOICE_ACTOR_ID,
170 | "service_name": "nijivoice"
171 | }
172 | query_params = {
173 | "x_audio_format": "mp3"
174 | }
175 | resp = httpx.post("http://127.0.0.1:8000/tts", params=query_params, json=req)
176 | audio_data = resp.content
177 | assert mp3_checker(audio_data)
178 | assert "音声合成" in audio_transcriber(audio_data, "mp3")
179 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # SpeechGateway
2 |
3 | A reverse proxy server that enhances speech synthesis with essential, extensible features. 🦉💬
4 |
5 |
6 | ## 💎 Features
7 |
8 | - 🥰 **Supports Popular Speech Services**: Works seamlessly with AivisSpeech, VOICEVOX, Style-Bert-VITS2, NijiVoice, OpenAI and Azure — and lets you integrate additional services to suit your needs.
9 | - 🗂️ **Caching**: Boost response speed and save API calls with built-in audio caching.
10 | - 🔄 **Format Conversion**: Effortlessly convert WAV to MP3 for bandwidth-friendly responses.
11 | - 📊 **Performance Metrics**: Track synthesis time and cache hits for in-depth insights.
12 | - ⚡️ **Low Latency**: Streamlined pipeline for minimal delay, delivering fast results!
13 | - 🌟 **Unified Interface**: Use various text-to-speech services through a unified interface — now with multi-language support!🌏
14 |
15 |
16 | ## 🎁 Installation
17 |
18 | ```sh
19 | pip install speech-gateway
20 | ```
21 |
22 | To use MP3 format conversion, you also need to install ffmpeg to your computer.
23 |
24 |
25 | ## 🚀 Start server
26 |
27 | Create a script like the following example:
28 |
29 | ```python
30 | from contextlib import asynccontextmanager
31 | from fastapi import FastAPI
32 | from speech_gateway.gateway.voicevox import VoicevoxGateway
33 | from speech_gateway.gateway.sbv2 import StyleBertVits2Gateway
34 | from speech_gateway.gateway.nijivoice import NijiVoiceGateway
35 |
36 | # Create gateways
37 | voicevox_gateway = VoicevoxGateway(base_url="http://127.0.0.1:10101", debug=True)
38 | sbv2_gateway = StyleBertVits2Gateway(base_url="http://127.0.0.1:5000", debug=True)
39 | nijivoice_gateway = NijiVoiceGateway(api_key=NIJIVOICE_API_KEY, prefix="/nijivoice", debug=True)
40 |
41 | # Create app
42 | app = FastAPI()
43 |
44 | # Add gateways to app
45 | app.include_router(voicevox_gateway.get_router(), prefix="/aivisspeech")
46 | app.include_router(sbv2_gateway.get_router(), prefix="/sbv2")
47 | app.include_router(nijivoice_gateway.get_router(), prefix="/nijivoice")
48 |
49 | # On app down
50 | @asynccontextmanager
51 | async def lifespan(app: FastAPI):
52 | yield
53 | await voicevox_gateway.shutdown()
54 | await sbv2_gateway.shutdown()
55 | await nijivoice_gateway.shutdown()
56 | ```
57 |
58 | Then, run it with uvicorn:
59 |
60 | ```
61 | uvicorn run:app --port 8000
62 | ```
63 |
64 | In this example, you can access AivisSpeech at http://127.0.0.1:8000/aivisspeech, Style-Bert-VITS2 at http://127.0.0.1:8000/sbv2, and NijiVoice at http://127.0.0.1:8000/nijivoice.
65 |
66 | **NOTE**: If you want to perform MP3 conversion, make sure to include `x_audio_format=mp3` as a query parameter in your request.
67 |
68 |
69 | ## 🌟 Unified Interface
70 |
71 | You can use various text-to-speech services through a unified interface specification.
72 | Below is an example of providing a unified interface for AivisSpeech, Style-Bert-VITS2, and Nijivoice.
73 |
74 | ```python
75 | from speech_gateway.gateway.unified import UnifiedGateway
76 |
77 | # Create UnifiedGateway and add gateways with its service name
78 | unified_gateway = UnifiedGateway(debug=True)
79 | unified_gateway.add_gateway("aivisspeech", aivisspeech_gateway, True) # Set as default gateway
80 | unified_gateway.add_gateway("sbv2", sbv2_gateway)
81 | unified_gateway.add_gateway("nijivoice", nijivoice_gateway)
82 |
83 | # Add unified interface router
84 | app.include_router(unified_gateway.get_router())
85 | ```
86 |
87 | ### Parameters
88 |
89 | POST a JSON object with the following fields:
90 |
91 | | Parameter | Type | Required | Description |
92 | |---------------|--------|----------|---------------------------------------------------------------------------------------------|
93 | | `text` | string | Required | The text to be synthesized into speech. |
94 | | `speaker` | string | Optional | The unique identifier for the voice in each speech service.
For Style-Bert-VITS2, specify as `{model_id}-{speaker_id}`.
If omitted, the default speaker of the speech service will be used. |
95 | | `style`| string | Optional | A predefined set of voice styles that includes `neutral`, `joy`, `angry`, `sorrow`, `fun`, and `surprised`. |
96 | | `service_name`| string | Optional | The name of the service as specified in `add_gateway`.
If omitted, the default gateway will be used. |
97 | | `language`| string | Optional | The language. The corresponding text-to-speech service will be used. If omitted, the default gateway will be used. |
98 |
99 |
100 | ### Client code
101 |
102 | You can access the services in a unified manner as shown in the client code below:
103 |
104 | ```python
105 | import httpx
106 |
107 | req = {"text": "こんにちは。これはデフォルトサービスだよ。", "speaker": "888753761"}
108 | # req = {"text": "こんにちは。これはAivisSpeechだよ。", "speaker": "888753761", "service_name": "aivisspeech"}
109 | # req = {"text": "こんにちは。これはStyle-Bert-VITS2だよ。", "speaker": "0-0", "service_name": "sbv2"}
110 | # req = {"text": "こんにちは。これはにじボイスだよ。", "speaker": "a192db5f-bd8b-4fc7-bc08-af5ca5957c12", "service_name": "nijivoice"}
111 |
112 | resp = httpx.post("http://127.0.0.1:8000/tts", json=req, timeout=60)
113 |
114 | with open("tts.wav", "wb") as f:
115 | f.write(resp.content)
116 | ```
117 |
118 | **NOTE**: Due to the unified specification, it is not possible to use features specific to each text-to-speech service (e.g., intonation adjustment or pitch variation control). If you need high-quality speech synthesis utilizing such features, please use the individual service interfaces.
119 |
120 |
121 | ### Applying Style
122 |
123 | Define styles on server side.
124 |
125 | ```python
126 | aivisspeech_gateway = VoicevoxGateway(base_url="http://127.0.0.1:10101", debug=True)
127 | # Define speakers for each style
128 | aivisspeech_gateway.style_mapper["888753761"] = {
129 | "joy": "888753764",
130 | "angry": "888753765",
131 | "sorrow": "888753765",
132 | "fun": "888753762",
133 | "surprised": "888753762"
134 | }
135 |
136 | sbv2_gateway = StyleBertVits2Gateway(base_url="http://127.0.0.1:5000", debug=True)
137 | # Define sytle name for each style
138 | sbv2_gateway.style_mapper["0-0"] = {
139 | "joy": "上機嫌",
140 | "angry": "怒り・悲しみ",
141 | "sorrow": "怒り・悲しみ",
142 | "fun": "テンション高め",
143 | "surprised": "テンション高め"
144 | }
145 | ```
146 |
147 | Call with style from client.
148 |
149 | ```python
150 | req = {"service_name": "aivisspeech", "text": "こんにちは。これはデフォルトサービスだよ。", "speaker": "888753761", "style": "angry"}
151 | # req = {"service_name": "sbv2", "text": "こんにちは。これはStyle-Bert-VITS2だよ。", "speaker": "0-0", "style": "angry"}
152 |
153 | resp = httpx.post("http://127.0.0.1:8000/tts", json=req, timeout=60)
154 |
155 | with open("tts.wav", "wb") as f:
156 | f.write(resp.content)
157 | ```
158 |
159 |
160 | ### Multi-language Support
161 |
162 | You can configure the system to use the appropriate speech service based on the language, without explicitly specifying the service name.
163 | By passing `languages` to `add_gateway`, you can register a speech service that corresponds to the `language` specified in the request. Additionally, by registering a `default_speaker`, you can eliminate the need to specify a `speaker` in each request.
164 |
165 | ```python
166 | # Gateway for default language (ja-JP) - Voice: 888753761
167 | unified_gateway.add_gateway("aivisspeech", aivisspeech_gateway, default_speaker="888753761", default=True)
168 |
169 | # Gateway for en-US and zh-CN - Voice: Alloy
170 | unified_gateway.add_gateway("openai", openai_gateway, languages=["en-US", "zh-CN"], default_speaker="alloy")
171 | ```
172 |
173 | Here is an example of client code to call this API. Switching the `language` enables easy support for multiple languages.
174 |
175 | ```python
176 | import httpx
177 |
178 | # Simply set the text and language - easily switch between multiple languages
179 | req = {"text": "こんにちは。これはデフォルトサービスだよ。"}
180 | # req = {"text": "Hello. This is the speech service for English.", "language": "en-US"}
181 | # req = {"text": "你好,这是英语的语音服务。", "language": "zh-CN"}
182 |
183 | resp = httpx.post("http://127.0.0.1:8000/tts", json=req, timeout=60)
184 |
185 | with open("tts.wav", "wb") as f:
186 | f.write(resp.content)
187 | ```
188 |
189 |
190 | ## 🛠️ Customization
191 |
192 | You can add new speech synthesis services to relay.
193 | Additionally, you can extend the cache store, audio format converter, and performance recorder. For example, the default cache store uses the file system, but you can replace it with a cloud storage service or another alternative.
194 |
195 | We’ll provide documentation for these customizations as the need arises, so if you have specific requests, please open an issue! 🙏
196 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------