├── .gitignore ├── .pre-commit-config.yaml ├── FAQ.md ├── LICENSE ├── README.md ├── README_en.md ├── assets ├── audio_understanding_leaderboard.png ├── dataset_distribute.png ├── default.wav ├── img_1.png ├── leaderboard.md ├── logo.png ├── performance.png ├── s2s_leaderboard.png ├── s2s_semantic_leaderboard.png └── utmos.png ├── audio_evals ├── __init__.py ├── agg │ ├── __init__.py │ ├── air_chat.py │ ├── base.py │ └── sp.py ├── base.py ├── constants.py ├── dataset │ ├── __init__.py │ ├── dataset.py │ ├── giga.py │ ├── huggingface.py │ └── resume.py ├── eval_task.py ├── evaluator │ ├── __init__.py │ ├── air_chat.py │ ├── alpaca_eval.py │ ├── alpaca_eval.txt │ ├── base.py │ ├── bbh.py │ ├── bleu.py │ ├── coco.py │ ├── dict_match.py │ ├── dnsmos.py │ ├── ensemble.py │ ├── harm.py │ ├── ifeval.py │ ├── long_tts_eval_asr_wer.py │ ├── mcq.py │ ├── qa_eval.py │ ├── qa_exact_match.py │ ├── ref_qa_geval.py │ ├── ref_qa_geval.txt │ ├── seed_tts_eval_asr_wer.py │ ├── simo.py │ ├── string_match.py │ ├── utmos.py │ ├── voice_bench.py │ └── wer.py ├── isolate.py ├── lib │ ├── CosyVoice │ │ ├── .gitignore │ │ ├── CODE_OF_CONDUCT.md │ │ ├── FAQ.md │ │ ├── LICENSE │ │ ├── README.md │ │ ├── asset │ │ │ ├── cross_lingual_prompt.wav │ │ │ ├── dingding.png │ │ │ └── zero_shot_prompt.wav │ │ ├── cosyvoice │ │ │ ├── __init__.py │ │ │ ├── bin │ │ │ │ ├── average_model.py │ │ │ │ ├── export_jit.py │ │ │ │ ├── export_onnx.py │ │ │ │ ├── export_trt.sh │ │ │ │ ├── inference.py │ │ │ │ └── train.py │ │ │ ├── cli │ │ │ │ ├── __init__.py │ │ │ │ ├── cosyvoice.py │ │ │ │ ├── frontend.py │ │ │ │ └── model.py │ │ │ ├── dataset │ │ │ │ ├── __init__.py │ │ │ │ ├── dataset.py │ │ │ │ └── processor.py │ │ │ ├── flow │ │ │ │ ├── decoder.py │ │ │ │ ├── flow.py │ │ │ │ ├── flow_matching.py │ │ │ │ └── length_regulator.py │ │ │ ├── hifigan │ │ │ │ ├── discriminator.py │ │ │ │ ├── f0_predictor.py │ │ │ │ ├── generator.py │ │ │ │ └── hifigan.py │ │ │ ├── llm │ │ │ │ └── llm.py │ │ │ ├── tokenizer │ │ │ │ ├── assets │ │ │ │ │ └── multilingual_zh_ja_yue_char_del.tiktoken │ │ │ │ └── tokenizer.py │ │ │ ├── transformer │ │ │ │ ├── __init__.py │ │ │ │ ├── activation.py │ │ │ │ ├── attention.py │ │ │ │ ├── convolution.py │ │ │ │ ├── decoder.py │ │ │ │ ├── decoder_layer.py │ │ │ │ ├── embedding.py │ │ │ │ ├── encoder.py │ │ │ │ ├── encoder_layer.py │ │ │ │ ├── label_smoothing_loss.py │ │ │ │ ├── positionwise_feed_forward.py │ │ │ │ ├── subsampling.py │ │ │ │ └── upsample_encoder.py │ │ │ └── utils │ │ │ │ ├── __init__.py │ │ │ │ ├── class_utils.py │ │ │ │ ├── common.py │ │ │ │ ├── executor.py │ │ │ │ ├── file_utils.py │ │ │ │ ├── frontend_utils.py │ │ │ │ ├── losses.py │ │ │ │ ├── mask.py │ │ │ │ ├── scheduler.py │ │ │ │ └── train_utils.py │ │ ├── docker │ │ │ └── Dockerfile │ │ ├── examples │ │ │ ├── libritts │ │ │ │ ├── cosyvoice │ │ │ │ │ ├── conf │ │ │ │ │ │ ├── cosyvoice.fromscratch.yaml │ │ │ │ │ │ ├── cosyvoice.yaml │ │ │ │ │ │ └── ds_stage2.json │ │ │ │ │ ├── cosyvoice │ │ │ │ │ ├── local │ │ │ │ │ │ ├── download_and_untar.sh │ │ │ │ │ │ └── prepare_data.py │ │ │ │ │ ├── path.sh │ │ │ │ │ ├── run.sh │ │ │ │ │ ├── tools │ │ │ │ │ └── tts_text.json │ │ │ │ └── cosyvoice2 │ │ │ │ │ ├── cosyvoice │ │ │ │ │ └── tools │ │ │ └── magicdata-read │ │ │ │ └── cosyvoice │ │ │ │ ├── conf │ │ │ │ ├── cosyvoice.fromscratch.yaml │ │ │ │ ├── cosyvoice.yaml │ │ │ │ └── ds_stage2.json │ │ │ │ ├── cosyvoice │ │ │ │ ├── local │ │ │ │ ├── download_and_untar.sh │ │ │ │ └── prepare_data.py │ │ │ │ ├── path.sh │ │ │ │ ├── run.sh │ │ │ │ ├── tools │ │ │ │ └── tts_text.json │ │ ├── main.py │ │ ├── requirements.txt │ │ ├── runtime │ │ │ └── python │ │ │ │ ├── Dockerfile │ │ │ │ ├── fastapi │ │ │ │ ├── client.py │ │ │ │ └── server.py │ │ │ │ └── grpc │ │ │ │ ├── client.py │ │ │ │ ├── cosyvoice.proto │ │ │ │ └── server.py │ │ ├── tools │ │ │ ├── extract_embedding.py │ │ │ ├── extract_speech_token.py │ │ │ └── make_parquet_list.py │ │ └── webui.py │ ├── DNSMOS │ │ ├── README.md │ │ ├── dnsmos_single.py │ │ ├── main.py │ │ └── requirements.txt │ ├── HiggsAudio │ │ ├── main.py │ │ ├── requirements.txt │ │ └── vc.py │ ├── Kimi-Audio │ │ ├── Dockerfile │ │ ├── README.md │ │ ├── assets │ │ │ ├── kimia_framework.png │ │ │ ├── kimia_logo.png │ │ │ ├── kimia_radar_chart.png │ │ │ └── kimia_report.pdf │ │ ├── infer.py │ │ ├── kimia_infer │ │ │ ├── __init__.py │ │ │ ├── api │ │ │ │ ├── __init__.py │ │ │ │ ├── kimia.py │ │ │ │ └── prompt_manager.py │ │ │ ├── models │ │ │ │ ├── __init__.py │ │ │ │ ├── detokenizer │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── bigvgan_wrapper.py │ │ │ │ │ ├── flow_matching │ │ │ │ │ │ ├── dit_block.py │ │ │ │ │ │ ├── model.py │ │ │ │ │ │ ├── ode_wrapper.py │ │ │ │ │ │ └── scheduler.py │ │ │ │ │ ├── semantic_fm_prefix_streaming.py │ │ │ │ │ └── vocoder │ │ │ │ │ │ ├── activations.py │ │ │ │ │ │ ├── alias_free_activation │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── cuda │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ ├── activation1d.py │ │ │ │ │ │ │ ├── anti_alias_activation.cpp │ │ │ │ │ │ │ ├── anti_alias_activation_cuda.cu │ │ │ │ │ │ │ ├── compat.h │ │ │ │ │ │ │ ├── load.py │ │ │ │ │ │ │ └── type_shim.h │ │ │ │ │ │ └── torch │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ ├── act.py │ │ │ │ │ │ │ ├── filter.py │ │ │ │ │ │ │ └── resample.py │ │ │ │ │ │ ├── bigvgan.py │ │ │ │ │ │ └── utils.py │ │ │ │ └── tokenizer │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── glm4 │ │ │ │ │ ├── LICENSE │ │ │ │ │ ├── README.md │ │ │ │ │ ├── README_en.md │ │ │ │ │ ├── audio_process.py │ │ │ │ │ ├── cosyvoice │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── bin │ │ │ │ │ │ │ ├── inference.py │ │ │ │ │ │ │ └── train.py │ │ │ │ │ │ ├── cli │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ ├── cosyvoice.py │ │ │ │ │ │ │ ├── frontend.py │ │ │ │ │ │ │ └── model.py │ │ │ │ │ │ ├── dataset │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ ├── dataset.py │ │ │ │ │ │ │ └── processor.py │ │ │ │ │ │ ├── flow │ │ │ │ │ │ │ ├── decoder.py │ │ │ │ │ │ │ ├── flow.py │ │ │ │ │ │ │ ├── flow_gradtts.py │ │ │ │ │ │ │ ├── flow_matching.py │ │ │ │ │ │ │ ├── flow_matching_dit.py │ │ │ │ │ │ │ ├── length_regulator.py │ │ │ │ │ │ │ └── stable │ │ │ │ │ │ │ │ ├── adp.py │ │ │ │ │ │ │ │ ├── blocks.py │ │ │ │ │ │ │ │ ├── dit.py │ │ │ │ │ │ │ │ ├── dit_v2.py │ │ │ │ │ │ │ │ ├── sampling.py │ │ │ │ │ │ │ │ ├── stable_diffusion.py │ │ │ │ │ │ │ │ ├── stable_diffusion_test.py │ │ │ │ │ │ │ │ ├── transformer.py │ │ │ │ │ │ │ │ └── transformer_use_mask.py │ │ │ │ │ │ ├── hifigan │ │ │ │ │ │ │ ├── f0_predictor.py │ │ │ │ │ │ │ └── generator.py │ │ │ │ │ │ ├── llm │ │ │ │ │ │ │ └── llm.py │ │ │ │ │ │ ├── transformer │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ ├── activation.py │ │ │ │ │ │ │ ├── attention.py │ │ │ │ │ │ │ ├── convolution.py │ │ │ │ │ │ │ ├── decoder.py │ │ │ │ │ │ │ ├── decoder_layer.py │ │ │ │ │ │ │ ├── embedding.py │ │ │ │ │ │ │ ├── encoder.py │ │ │ │ │ │ │ ├── encoder_layer.py │ │ │ │ │ │ │ ├── label_smoothing_loss.py │ │ │ │ │ │ │ ├── positionwise_feed_forward.py │ │ │ │ │ │ │ └── subsampling.py │ │ │ │ │ │ └── utils │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ ├── block_mask_util.py │ │ │ │ │ │ │ ├── class_utils.py │ │ │ │ │ │ │ ├── common.py │ │ │ │ │ │ │ ├── executor.py │ │ │ │ │ │ │ ├── file_utils.py │ │ │ │ │ │ │ ├── frontend_utils.py │ │ │ │ │ │ │ ├── mask.py │ │ │ │ │ │ │ ├── scheduler.py │ │ │ │ │ │ │ └── train_utils.py │ │ │ │ │ ├── flow_inference.py │ │ │ │ │ ├── model_server.py │ │ │ │ │ ├── requirements.txt │ │ │ │ │ ├── resources │ │ │ │ │ │ ├── architecture.jpeg │ │ │ │ │ │ └── web_demo.png │ │ │ │ │ ├── speech_tokenizer │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── configuration_whisper.py │ │ │ │ │ │ ├── generation_whisper.py │ │ │ │ │ │ ├── modeling_whisper.py │ │ │ │ │ │ └── utils.py │ │ │ │ │ ├── third_party │ │ │ │ │ │ └── Matcha-TTS │ │ │ │ │ │ │ ├── .env.example │ │ │ │ │ │ │ ├── .github │ │ │ │ │ │ │ ├── PULL_REQUEST_TEMPLATE.md │ │ │ │ │ │ │ ├── codecov.yml │ │ │ │ │ │ │ ├── dependabot.yml │ │ │ │ │ │ │ └── release-drafter.yml │ │ │ │ │ │ │ ├── .gitignore │ │ │ │ │ │ │ ├── .pre-commit-config.yaml │ │ │ │ │ │ │ ├── .project-root │ │ │ │ │ │ │ ├── .pylintrc │ │ │ │ │ │ │ ├── LICENSE │ │ │ │ │ │ │ ├── MANIFEST.in │ │ │ │ │ │ │ ├── Makefile │ │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ │ ├── configs │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ ├── callbacks │ │ │ │ │ │ │ │ ├── default.yaml │ │ │ │ │ │ │ │ ├── model_checkpoint.yaml │ │ │ │ │ │ │ │ ├── model_summary.yaml │ │ │ │ │ │ │ │ ├── none.yaml │ │ │ │ │ │ │ │ └── rich_progress_bar.yaml │ │ │ │ │ │ │ ├── data │ │ │ │ │ │ │ │ ├── hi-fi_en-US_female.yaml │ │ │ │ │ │ │ │ ├── ljspeech.yaml │ │ │ │ │ │ │ │ └── vctk.yaml │ │ │ │ │ │ │ ├── debug │ │ │ │ │ │ │ │ ├── default.yaml │ │ │ │ │ │ │ │ ├── fdr.yaml │ │ │ │ │ │ │ │ ├── limit.yaml │ │ │ │ │ │ │ │ ├── overfit.yaml │ │ │ │ │ │ │ │ └── profiler.yaml │ │ │ │ │ │ │ ├── eval.yaml │ │ │ │ │ │ │ ├── experiment │ │ │ │ │ │ │ │ ├── hifi_dataset_piper_phonemizer.yaml │ │ │ │ │ │ │ │ ├── ljspeech.yaml │ │ │ │ │ │ │ │ ├── ljspeech_min_memory.yaml │ │ │ │ │ │ │ │ └── multispeaker.yaml │ │ │ │ │ │ │ ├── extras │ │ │ │ │ │ │ │ └── default.yaml │ │ │ │ │ │ │ ├── hparams_search │ │ │ │ │ │ │ │ └── mnist_optuna.yaml │ │ │ │ │ │ │ ├── hydra │ │ │ │ │ │ │ │ └── default.yaml │ │ │ │ │ │ │ ├── local │ │ │ │ │ │ │ │ └── .gitkeep │ │ │ │ │ │ │ ├── logger │ │ │ │ │ │ │ │ ├── aim.yaml │ │ │ │ │ │ │ │ ├── comet.yaml │ │ │ │ │ │ │ │ ├── csv.yaml │ │ │ │ │ │ │ │ ├── many_loggers.yaml │ │ │ │ │ │ │ │ ├── mlflow.yaml │ │ │ │ │ │ │ │ ├── neptune.yaml │ │ │ │ │ │ │ │ ├── tensorboard.yaml │ │ │ │ │ │ │ │ └── wandb.yaml │ │ │ │ │ │ │ ├── model │ │ │ │ │ │ │ │ ├── cfm │ │ │ │ │ │ │ │ │ └── default.yaml │ │ │ │ │ │ │ │ ├── decoder │ │ │ │ │ │ │ │ │ └── default.yaml │ │ │ │ │ │ │ │ ├── encoder │ │ │ │ │ │ │ │ │ └── default.yaml │ │ │ │ │ │ │ │ ├── matcha.yaml │ │ │ │ │ │ │ │ └── optimizer │ │ │ │ │ │ │ │ │ └── adam.yaml │ │ │ │ │ │ │ ├── paths │ │ │ │ │ │ │ │ └── default.yaml │ │ │ │ │ │ │ ├── train.yaml │ │ │ │ │ │ │ └── trainer │ │ │ │ │ │ │ │ ├── cpu.yaml │ │ │ │ │ │ │ │ ├── ddp.yaml │ │ │ │ │ │ │ │ ├── ddp_sim.yaml │ │ │ │ │ │ │ │ ├── default.yaml │ │ │ │ │ │ │ │ ├── gpu.yaml │ │ │ │ │ │ │ │ └── mps.yaml │ │ │ │ │ │ │ ├── data │ │ │ │ │ │ │ ├── matcha │ │ │ │ │ │ │ ├── VERSION │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ ├── app.py │ │ │ │ │ │ │ ├── cli.py │ │ │ │ │ │ │ ├── data │ │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ │ ├── components │ │ │ │ │ │ │ │ │ └── __init__.py │ │ │ │ │ │ │ │ └── text_mel_datamodule.py │ │ │ │ │ │ │ ├── hifigan │ │ │ │ │ │ │ │ ├── LICENSE │ │ │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ │ ├── config.py │ │ │ │ │ │ │ │ ├── denoiser.py │ │ │ │ │ │ │ │ ├── env.py │ │ │ │ │ │ │ │ ├── meldataset.py │ │ │ │ │ │ │ │ ├── models.py │ │ │ │ │ │ │ │ └── xutils.py │ │ │ │ │ │ │ ├── models │ │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ │ ├── baselightningmodule.py │ │ │ │ │ │ │ │ ├── components │ │ │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ │ │ ├── decoder.py │ │ │ │ │ │ │ │ │ ├── flow_matching.py │ │ │ │ │ │ │ │ │ ├── text_encoder.py │ │ │ │ │ │ │ │ │ └── transformer.py │ │ │ │ │ │ │ │ └── matcha_tts.py │ │ │ │ │ │ │ ├── onnx │ │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ │ ├── export.py │ │ │ │ │ │ │ │ └── infer.py │ │ │ │ │ │ │ ├── text │ │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ │ ├── cleaners.py │ │ │ │ │ │ │ │ ├── numbers.py │ │ │ │ │ │ │ │ └── symbols.py │ │ │ │ │ │ │ ├── train.py │ │ │ │ │ │ │ └── utils │ │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ │ ├── audio.py │ │ │ │ │ │ │ │ ├── generate_data_statistics.py │ │ │ │ │ │ │ │ ├── instantiators.py │ │ │ │ │ │ │ │ ├── logging_utils.py │ │ │ │ │ │ │ │ ├── model.py │ │ │ │ │ │ │ │ ├── monotonic_align │ │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ │ ├── core.pyx │ │ │ │ │ │ │ │ └── setup.py │ │ │ │ │ │ │ │ ├── pylogger.py │ │ │ │ │ │ │ │ ├── rich_utils.py │ │ │ │ │ │ │ │ └── utils.py │ │ │ │ │ │ │ ├── notebooks │ │ │ │ │ │ │ └── .gitkeep │ │ │ │ │ │ │ ├── pyproject.toml │ │ │ │ │ │ │ ├── requirements.txt │ │ │ │ │ │ │ ├── scripts │ │ │ │ │ │ │ └── schedule.sh │ │ │ │ │ │ │ ├── setup.py │ │ │ │ │ │ │ └── synthesis.ipynb │ │ │ │ │ └── web_demo.py │ │ │ │ │ ├── glm4_tokenizer.py │ │ │ │ │ └── whisper_Lv3 │ │ │ │ │ ├── mel_filters.npz │ │ │ │ │ ├── modeling_whisper.py │ │ │ │ │ └── whisper.py │ │ │ └── utils │ │ │ │ ├── __init__.py │ │ │ │ ├── data.py │ │ │ │ ├── sampler.py │ │ │ │ └── special_tokens.py │ │ ├── main.py │ │ └── requirements.txt │ ├── MGM_Omni │ │ ├── README.md │ │ ├── main.py │ │ └── requirements.txt │ ├── SenseVoice │ │ ├── main.py │ │ └── requirements.txt │ ├── Spark-TTS │ │ ├── .gitignore │ │ ├── LICENSE │ │ ├── README.md │ │ ├── cli │ │ │ ├── SparkTTS.py │ │ │ └── inference.py │ │ ├── encodec.py │ │ ├── example │ │ │ └── infer.sh │ │ ├── main.py │ │ ├── requirements.txt │ │ ├── sparktts │ │ │ ├── models │ │ │ │ ├── audio_tokenizer.py │ │ │ │ └── bicodec.py │ │ │ ├── modules │ │ │ │ ├── blocks │ │ │ │ │ ├── layers.py │ │ │ │ │ ├── samper.py │ │ │ │ │ └── vocos.py │ │ │ │ ├── encoder_decoder │ │ │ │ │ ├── feat_decoder.py │ │ │ │ │ ├── feat_encoder.py │ │ │ │ │ └── wave_generator.py │ │ │ │ ├── fsq │ │ │ │ │ ├── finite_scalar_quantization.py │ │ │ │ │ └── residual_fsq.py │ │ │ │ ├── speaker │ │ │ │ │ ├── ecapa_tdnn.py │ │ │ │ │ ├── perceiver_encoder.py │ │ │ │ │ ├── pooling_layers.py │ │ │ │ │ └── speaker_encoder.py │ │ │ │ └── vq │ │ │ │ │ └── factorized_vector_quantize.py │ │ │ └── utils │ │ │ │ ├── __init__.py │ │ │ │ ├── audio.py │ │ │ │ ├── file.py │ │ │ │ ├── parse_options.sh │ │ │ │ └── token_parser.py │ │ ├── src │ │ │ ├── figures │ │ │ │ ├── gradio_TTS.png │ │ │ │ ├── gradio_control.png │ │ │ │ ├── infer_control.png │ │ │ │ └── infer_voice_cloning.png │ │ │ └── logo │ │ │ │ ├── HKUST.jpg │ │ │ │ ├── NPU.jpg │ │ │ │ ├── NTU.jpg │ │ │ │ ├── SJU.jpg │ │ │ │ ├── SparkAudio.jpg │ │ │ │ ├── SparkAudio2.jpg │ │ │ │ ├── SparkTTS.jpg │ │ │ │ ├── SparkTTS.png │ │ │ │ ├── mobvoi.jpg │ │ │ │ └── mobvoi.png │ │ └── webui.py │ ├── VoxCPM │ │ ├── main.py │ │ └── requirements.txt │ ├── VoxCPM2 │ │ ├── .gitignore │ │ ├── LICENSE │ │ ├── README.md │ │ ├── app.py │ │ ├── assets │ │ │ ├── logo_v2.jpeg │ │ │ └── thuhcsi.png │ │ ├── main.py │ │ ├── pyproject.toml │ │ ├── requirements.txt │ │ └── src │ │ │ └── voxcpm │ │ │ ├── __init__.py │ │ │ ├── cli.py │ │ │ ├── core.py │ │ │ ├── model │ │ │ ├── __init__.py │ │ │ ├── utils.py │ │ │ └── voxcpm.py │ │ │ ├── modules │ │ │ ├── __init__.py │ │ │ ├── audiovae │ │ │ │ ├── __init__.py │ │ │ │ └── audio_vae.py │ │ │ ├── layers │ │ │ │ ├── __init__.py │ │ │ │ └── scalar_quantization_layer.py │ │ │ ├── locdit │ │ │ │ ├── __init__.py │ │ │ │ ├── local_dit.py │ │ │ │ └── unified_cfm.py │ │ │ ├── locenc │ │ │ │ ├── __init__.py │ │ │ │ └── local_encoder.py │ │ │ └── minicpm4 │ │ │ │ ├── __init__.py │ │ │ │ ├── cache.py │ │ │ │ ├── config.py │ │ │ │ └── model.py │ │ │ └── utils │ │ │ └── text_normalize.py │ ├── WavTokenizer │ │ ├── LICENSE │ │ ├── README.md │ │ ├── configs │ │ │ ├── wavtokenizer_smalldata_frame40_3s_nq1_code4096_dim512_kmeans200_attn.yaml │ │ │ └── wavtokenizer_smalldata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml │ │ ├── data │ │ │ └── demo.txt │ │ ├── decoder │ │ │ ├── __init__.py │ │ │ ├── dataset.py │ │ │ ├── discriminator_dac.py │ │ │ ├── discriminators.py │ │ │ ├── experiment.py │ │ │ ├── feature_extractors.py │ │ │ ├── heads.py │ │ │ ├── helpers.py │ │ │ ├── loss.py │ │ │ ├── models.py │ │ │ ├── modules.py │ │ │ ├── pretrained.py │ │ │ ├── pretrained_model.py │ │ │ └── spectral_ops.py │ │ ├── encoder │ │ │ ├── __init__.py │ │ │ ├── distrib.py │ │ │ ├── model.py │ │ │ ├── modules │ │ │ │ ├── __init__.py │ │ │ │ ├── conv.py │ │ │ │ ├── lstm.py │ │ │ │ ├── norm.py │ │ │ │ ├── seanet.py │ │ │ │ └── transformer.py │ │ │ ├── msstftd.py │ │ │ ├── quantization │ │ │ │ ├── __init__.py │ │ │ │ ├── ac.py │ │ │ │ ├── core_vq.py │ │ │ │ └── vq.py │ │ │ └── utils.py │ │ ├── infer.py │ │ ├── metrics │ │ │ ├── UTMOS.py │ │ │ ├── infer.py │ │ │ └── periodicity.py │ │ ├── requirements.txt │ │ ├── result.png │ │ └── train.py │ ├── __init__.py │ ├── chattts.py │ ├── coco.py │ ├── cpm_tts │ │ ├── __init__.py │ │ ├── chattts.py │ │ ├── config.py │ │ ├── dvae.py │ │ ├── gpt.py │ │ ├── minicpmv26_resampler.py │ │ └── processor.py │ ├── cv3_speaker_sim │ │ ├── 3D-Speaker │ │ │ ├── egs │ │ │ │ ├── 3dspeaker │ │ │ │ │ ├── README.md │ │ │ │ │ ├── language-identification │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ ├── conf │ │ │ │ │ │ │ ├── cam++.yaml │ │ │ │ │ │ │ ├── eres2net.yaml │ │ │ │ │ │ │ └── eres2net_para.yaml │ │ │ │ │ │ ├── local │ │ │ │ │ │ │ ├── compute_acc.py │ │ │ │ │ │ │ ├── download_data.sh │ │ │ │ │ │ │ ├── predict.py │ │ │ │ │ │ │ ├── predict_para.py │ │ │ │ │ │ │ ├── prepare_data.sh │ │ │ │ │ │ │ ├── prepare_data_csv.py │ │ │ │ │ │ │ └── prepare_pretrained_model.py │ │ │ │ │ │ ├── path.sh │ │ │ │ │ │ ├── requirements.txt │ │ │ │ │ │ ├── run.sh │ │ │ │ │ │ ├── run_paraformer.sh │ │ │ │ │ │ ├── speakerlab │ │ │ │ │ │ └── utils │ │ │ │ │ │ │ ├── parse_options.sh │ │ │ │ │ │ │ └── utt2spk_to_spk2utt.pl │ │ │ │ │ ├── speaker-diarization │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ ├── conf │ │ │ │ │ │ │ ├── diar.yaml │ │ │ │ │ │ │ └── diar_video.yaml │ │ │ │ │ │ ├── local │ │ │ │ │ │ │ ├── DER.py │ │ │ │ │ │ │ ├── cluster_and_postprocess.py │ │ │ │ │ │ │ ├── compute_der.py │ │ │ │ │ │ │ ├── extract_diar_embeddings.py │ │ │ │ │ │ │ ├── extract_visual_embeddings.py │ │ │ │ │ │ │ ├── md-eval.pl │ │ │ │ │ │ │ ├── parse_options.sh │ │ │ │ │ │ │ ├── prepare_subseg_json.py │ │ │ │ │ │ │ ├── vision_processer.py │ │ │ │ │ │ │ ├── vision_tools │ │ │ │ │ │ │ │ ├── active_speaker_detection.py │ │ │ │ │ │ │ │ ├── face_detection.py │ │ │ │ │ │ │ │ ├── face_quality_assessment.py │ │ │ │ │ │ │ │ └── face_recognition.py │ │ │ │ │ │ │ └── voice_activity_detection.py │ │ │ │ │ │ ├── path.sh │ │ │ │ │ │ ├── requirements.txt │ │ │ │ │ │ ├── run_audio.sh │ │ │ │ │ │ ├── run_video.sh │ │ │ │ │ │ └── speakerlab │ │ │ │ │ ├── sv-cam++ │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ ├── conf │ │ │ │ │ │ │ └── cam++.yaml │ │ │ │ │ │ ├── local │ │ │ │ │ │ │ ├── download_data.sh │ │ │ │ │ │ │ ├── prepare_data.sh │ │ │ │ │ │ │ └── prepare_data_csv.py │ │ │ │ │ │ ├── path.sh │ │ │ │ │ │ ├── run.sh │ │ │ │ │ │ ├── speakerlab │ │ │ │ │ │ └── utils │ │ │ │ │ │ │ ├── m4a2wav.pl │ │ │ │ │ │ │ ├── parse_options.sh │ │ │ │ │ │ │ └── utt2spk_to_spk2utt.pl │ │ │ │ │ ├── sv-ecapa │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ ├── conf │ │ │ │ │ │ │ └── ecapa_tdnn.yaml │ │ │ │ │ │ ├── local │ │ │ │ │ │ │ ├── download_data.sh │ │ │ │ │ │ │ ├── prepare_data.sh │ │ │ │ │ │ │ └── prepare_data_csv.py │ │ │ │ │ │ ├── path.sh │ │ │ │ │ │ ├── run.sh │ │ │ │ │ │ ├── speakerlab │ │ │ │ │ │ └── utils │ │ │ │ │ │ │ ├── m4a2wav.pl │ │ │ │ │ │ │ ├── parse_options.sh │ │ │ │ │ │ │ └── utt2spk_to_spk2utt.pl │ │ │ │ │ ├── sv-eres2net │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ ├── conf │ │ │ │ │ │ │ ├── eres2net.yaml │ │ │ │ │ │ │ └── eres2net_lm.yaml │ │ │ │ │ │ ├── local │ │ │ │ │ │ │ ├── download_data.sh │ │ │ │ │ │ │ ├── prepare_data.sh │ │ │ │ │ │ │ └── prepare_data_csv.py │ │ │ │ │ │ ├── path.sh │ │ │ │ │ │ ├── run.sh │ │ │ │ │ │ ├── speakerlab │ │ │ │ │ │ └── utils │ │ │ │ │ │ │ ├── m4a2wav.pl │ │ │ │ │ │ │ ├── parse_options.sh │ │ │ │ │ │ │ └── utt2spk_to_spk2utt.pl │ │ │ │ │ ├── sv-eres2netv2 │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ ├── conf │ │ │ │ │ │ │ ├── eres2netv2.yaml │ │ │ │ │ │ │ └── eres2netv2_lm.yaml │ │ │ │ │ │ ├── local │ │ │ │ │ │ │ ├── download_data.sh │ │ │ │ │ │ │ ├── prepare_data.sh │ │ │ │ │ │ │ └── prepare_data_csv.py │ │ │ │ │ │ ├── path.sh │ │ │ │ │ │ ├── run.sh │ │ │ │ │ │ ├── speakerlab │ │ │ │ │ │ └── utils │ │ │ │ │ │ │ ├── m4a2wav.pl │ │ │ │ │ │ │ ├── parse_options.sh │ │ │ │ │ │ │ └── utt2spk_to_spk2utt.pl │ │ │ │ │ ├── sv-rdino │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ ├── conf │ │ │ │ │ │ │ └── rdino.yaml │ │ │ │ │ │ ├── local │ │ │ │ │ │ │ ├── download_data.sh │ │ │ │ │ │ │ ├── prepare_data_rdino.sh │ │ │ │ │ │ │ └── process_musan.py │ │ │ │ │ │ ├── path.sh │ │ │ │ │ │ ├── run.sh │ │ │ │ │ │ ├── speakerlab │ │ │ │ │ │ └── utils │ │ │ │ │ │ │ ├── m4a2wav.pl │ │ │ │ │ │ │ ├── parse_options.sh │ │ │ │ │ │ │ └── utt2spk_to_spk2utt.pl │ │ │ │ │ ├── sv-res2net │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ ├── conf │ │ │ │ │ │ │ └── res2net.yaml │ │ │ │ │ │ ├── local │ │ │ │ │ │ │ ├── download_data.sh │ │ │ │ │ │ │ ├── prepare_data.sh │ │ │ │ │ │ │ └── prepare_data_csv.py │ │ │ │ │ │ ├── path.sh │ │ │ │ │ │ ├── run.sh │ │ │ │ │ │ ├── speakerlab │ │ │ │ │ │ └── utils │ │ │ │ │ │ │ ├── m4a2wav.pl │ │ │ │ │ │ │ ├── parse_options.sh │ │ │ │ │ │ │ └── utt2spk_to_spk2utt.pl │ │ │ │ │ └── sv-resnet │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ ├── conf │ │ │ │ │ │ └── resnet.yaml │ │ │ │ │ │ ├── local │ │ │ │ │ │ ├── download_data.sh │ │ │ │ │ │ ├── prepare_data.sh │ │ │ │ │ │ └── prepare_data_csv.py │ │ │ │ │ │ ├── path.sh │ │ │ │ │ │ ├── run.sh │ │ │ │ │ │ ├── speakerlab │ │ │ │ │ │ └── utils │ │ │ │ │ │ ├── m4a2wav.pl │ │ │ │ │ │ ├── parse_options.sh │ │ │ │ │ │ └── utt2spk_to_spk2utt.pl │ │ │ │ ├── ava-asd │ │ │ │ │ └── talknet │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ ├── conf │ │ │ │ │ │ └── config.yaml │ │ │ │ │ │ ├── local │ │ │ │ │ │ ├── download_data.sh │ │ │ │ │ │ ├── extract_audio_clips.py │ │ │ │ │ │ └── extract_video_clips.py │ │ │ │ │ │ ├── path.sh │ │ │ │ │ │ ├── requirements.txt │ │ │ │ │ │ ├── run.sh │ │ │ │ │ │ ├── speakerlab │ │ │ │ │ │ └── utils │ │ │ │ │ │ └── parse_options.sh │ │ │ │ ├── cnceleb │ │ │ │ │ ├── README.md │ │ │ │ │ ├── sv-cam++ │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ ├── conf │ │ │ │ │ │ │ └── cam++.yaml │ │ │ │ │ │ ├── local │ │ │ │ │ │ │ ├── download_data.sh │ │ │ │ │ │ │ ├── flac2wav.py │ │ │ │ │ │ │ ├── prepare_data_cncb.sh │ │ │ │ │ │ │ └── prepare_data_csv.py │ │ │ │ │ │ ├── path.sh │ │ │ │ │ │ ├── run.sh │ │ │ │ │ │ ├── speakerlab │ │ │ │ │ │ └── utils │ │ │ │ │ │ │ ├── m4a2wav.pl │ │ │ │ │ │ │ ├── parse_options.sh │ │ │ │ │ │ │ ├── spk2utt_to_utt2spk.pl │ │ │ │ │ │ │ └── utt2spk_to_spk2utt.pl │ │ │ │ │ ├── sv-ecapa │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ ├── conf │ │ │ │ │ │ │ └── ecapa_tdnn.yaml │ │ │ │ │ │ ├── local │ │ │ │ │ │ │ ├── download_data.sh │ │ │ │ │ │ │ ├── flac2wav.py │ │ │ │ │ │ │ ├── prepare_data_cncb.sh │ │ │ │ │ │ │ └── prepare_data_csv.py │ │ │ │ │ │ ├── path.sh │ │ │ │ │ │ ├── run.sh │ │ │ │ │ │ ├── speakerlab │ │ │ │ │ │ └── utils │ │ │ │ │ │ │ ├── m4a2wav.pl │ │ │ │ │ │ │ ├── parse_options.sh │ │ │ │ │ │ │ ├── spk2utt_to_utt2spk.pl │ │ │ │ │ │ │ └── utt2spk_to_spk2utt.pl │ │ │ │ │ ├── sv-eres2net │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ ├── conf │ │ │ │ │ │ │ ├── eres2net.yaml │ │ │ │ │ │ │ └── eres2net_lm.yaml │ │ │ │ │ │ ├── local │ │ │ │ │ │ │ ├── download_data.sh │ │ │ │ │ │ │ ├── flac2wav.py │ │ │ │ │ │ │ ├── prepare_data_cncb.sh │ │ │ │ │ │ │ └── prepare_data_csv.py │ │ │ │ │ │ ├── path.sh │ │ │ │ │ │ ├── run.sh │ │ │ │ │ │ ├── speakerlab │ │ │ │ │ │ └── utils │ │ │ │ │ │ │ ├── m4a2wav.pl │ │ │ │ │ │ │ ├── parse_options.sh │ │ │ │ │ │ │ ├── spk2utt_to_utt2spk.pl │ │ │ │ │ │ │ └── utt2spk_to_spk2utt.pl │ │ │ │ │ ├── sv-eres2netv2 │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ ├── conf │ │ │ │ │ │ │ ├── eres2netv2.yaml │ │ │ │ │ │ │ └── eres2netv2_lm.yaml │ │ │ │ │ │ ├── local │ │ │ │ │ │ │ ├── download_data.sh │ │ │ │ │ │ │ ├── flac2wav.py │ │ │ │ │ │ │ ├── prepare_data_cncb.sh │ │ │ │ │ │ │ └── prepare_data_csv.py │ │ │ │ │ │ ├── path.sh │ │ │ │ │ │ ├── run.sh │ │ │ │ │ │ ├── speakerlab │ │ │ │ │ │ └── utils │ │ │ │ │ │ │ ├── m4a2wav.pl │ │ │ │ │ │ │ ├── parse_options.sh │ │ │ │ │ │ │ ├── spk2utt_to_utt2spk.pl │ │ │ │ │ │ │ └── utt2spk_to_spk2utt.pl │ │ │ │ │ ├── sv-rdino │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ ├── conf │ │ │ │ │ │ │ └── rdino.yaml │ │ │ │ │ │ ├── local │ │ │ │ │ │ │ ├── download_data.sh │ │ │ │ │ │ │ ├── flac2wav.py │ │ │ │ │ │ │ ├── prepare_data_rdino.sh │ │ │ │ │ │ │ └── process_musan.py │ │ │ │ │ │ ├── path.sh │ │ │ │ │ │ ├── run.sh │ │ │ │ │ │ ├── speakerlab │ │ │ │ │ │ └── utils │ │ │ │ │ │ │ ├── m4a2wav.pl │ │ │ │ │ │ │ ├── parse_options.sh │ │ │ │ │ │ │ └── utt2spk_to_spk2utt.pl │ │ │ │ │ ├── sv-res2net │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ ├── conf │ │ │ │ │ │ │ └── res2net.yaml │ │ │ │ │ │ ├── local │ │ │ │ │ │ │ ├── download_data.sh │ │ │ │ │ │ │ ├── flac2wav.py │ │ │ │ │ │ │ ├── prepare_data_cncb.sh │ │ │ │ │ │ │ └── prepare_data_csv.py │ │ │ │ │ │ ├── path.sh │ │ │ │ │ │ ├── run.sh │ │ │ │ │ │ ├── speakerlab │ │ │ │ │ │ └── utils │ │ │ │ │ │ │ ├── m4a2wav.pl │ │ │ │ │ │ │ ├── parse_options.sh │ │ │ │ │ │ │ └── utt2spk_to_spk2utt.pl │ │ │ │ │ └── sv-resnet │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ ├── conf │ │ │ │ │ │ └── resnet.yaml │ │ │ │ │ │ ├── local │ │ │ │ │ │ ├── download_data.sh │ │ │ │ │ │ ├── flac2wav.py │ │ │ │ │ │ ├── prepare_data_cncb.sh │ │ │ │ │ │ └── prepare_data_csv.py │ │ │ │ │ │ ├── path.sh │ │ │ │ │ │ ├── run.sh │ │ │ │ │ │ ├── speakerlab │ │ │ │ │ │ └── utils │ │ │ │ │ │ ├── m4a2wav.pl │ │ │ │ │ │ ├── parse_options.sh │ │ │ │ │ │ └── utt2spk_to_spk2utt.pl │ │ │ │ ├── semantic_speaker │ │ │ │ │ └── bert │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ ├── bin │ │ │ │ │ │ ├── run_dialogue_detection.py │ │ │ │ │ │ └── run_speaker_turn_detection.py │ │ │ │ │ │ ├── local │ │ │ │ │ │ ├── download_aishell_4_data.sh │ │ │ │ │ │ ├── download_alimeeting_data.sh │ │ │ │ │ │ ├── merge_json_files_for_semantic_speaker.py │ │ │ │ │ │ ├── prepare_files_for_aishell_4.py │ │ │ │ │ │ ├── prepare_files_for_alimeeting.py │ │ │ │ │ │ └── prepare_json_files_for_semantic_speaker.py │ │ │ │ │ │ ├── path.sh │ │ │ │ │ │ ├── requirements.txt │ │ │ │ │ │ ├── run_dialogue_detection.sh │ │ │ │ │ │ └── run_speaker_turn_detection.sh │ │ │ │ └── voxceleb │ │ │ │ │ ├── README.md │ │ │ │ │ ├── sv-cam++ │ │ │ │ │ ├── README.md │ │ │ │ │ ├── conf │ │ │ │ │ │ └── cam++.yaml │ │ │ │ │ ├── local │ │ │ │ │ │ ├── download_data.sh │ │ │ │ │ │ ├── prepare_data.sh │ │ │ │ │ │ └── prepare_data_csv.py │ │ │ │ │ ├── path.sh │ │ │ │ │ ├── run.sh │ │ │ │ │ ├── speakerlab │ │ │ │ │ └── utils │ │ │ │ │ │ ├── m4a2wav.pl │ │ │ │ │ │ ├── parse_options.sh │ │ │ │ │ │ └── utt2spk_to_spk2utt.pl │ │ │ │ │ ├── sv-ecapa │ │ │ │ │ ├── README.md │ │ │ │ │ ├── conf │ │ │ │ │ │ └── ecapa_tdnn.yaml │ │ │ │ │ ├── local │ │ │ │ │ │ ├── download_data.sh │ │ │ │ │ │ ├── prepare_data.sh │ │ │ │ │ │ └── prepare_data_csv.py │ │ │ │ │ ├── path.sh │ │ │ │ │ ├── run.sh │ │ │ │ │ ├── speakerlab │ │ │ │ │ └── utils │ │ │ │ │ │ ├── m4a2wav.pl │ │ │ │ │ │ ├── parse_options.sh │ │ │ │ │ │ └── utt2spk_to_spk2utt.pl │ │ │ │ │ ├── sv-eres2net │ │ │ │ │ ├── README.md │ │ │ │ │ ├── conf │ │ │ │ │ │ ├── eres2net.yaml │ │ │ │ │ │ └── eres2net_lm.yaml │ │ │ │ │ ├── local │ │ │ │ │ │ ├── download_data.sh │ │ │ │ │ │ ├── prepare_data.sh │ │ │ │ │ │ └── prepare_data_csv.py │ │ │ │ │ ├── path.sh │ │ │ │ │ ├── run.sh │ │ │ │ │ ├── speakerlab │ │ │ │ │ └── utils │ │ │ │ │ │ ├── m4a2wav.pl │ │ │ │ │ │ ├── parse_options.sh │ │ │ │ │ │ └── utt2spk_to_spk2utt.pl │ │ │ │ │ ├── sv-eres2netv2 │ │ │ │ │ ├── README.md │ │ │ │ │ ├── conf │ │ │ │ │ │ ├── eres2netv2.yaml │ │ │ │ │ │ └── eres2netv2_lm.yaml │ │ │ │ │ ├── local │ │ │ │ │ │ ├── download_data.sh │ │ │ │ │ │ ├── prepare_data.sh │ │ │ │ │ │ └── prepare_data_csv.py │ │ │ │ │ ├── path.sh │ │ │ │ │ ├── run.sh │ │ │ │ │ ├── speakerlab │ │ │ │ │ └── utils │ │ │ │ │ │ ├── m4a2wav.pl │ │ │ │ │ │ ├── parse_options.sh │ │ │ │ │ │ └── utt2spk_to_spk2utt.pl │ │ │ │ │ ├── sv-rdino │ │ │ │ │ ├── README.md │ │ │ │ │ ├── conf │ │ │ │ │ │ └── rdino.yaml │ │ │ │ │ ├── local │ │ │ │ │ │ ├── download_data.sh │ │ │ │ │ │ ├── prepare_data_rdino.sh │ │ │ │ │ │ └── process_musan.py │ │ │ │ │ ├── path.sh │ │ │ │ │ ├── run.sh │ │ │ │ │ ├── speakerlab │ │ │ │ │ └── utils │ │ │ │ │ │ ├── m4a2wav.pl │ │ │ │ │ │ ├── parse_options.sh │ │ │ │ │ │ └── utt2spk_to_spk2utt.pl │ │ │ │ │ ├── sv-res2net │ │ │ │ │ ├── README.md │ │ │ │ │ ├── conf │ │ │ │ │ │ └── res2net.yaml │ │ │ │ │ ├── local │ │ │ │ │ │ ├── download_data.sh │ │ │ │ │ │ ├── prepare_data.sh │ │ │ │ │ │ └── prepare_data_csv.py │ │ │ │ │ ├── path.sh │ │ │ │ │ ├── run.sh │ │ │ │ │ ├── speakerlab │ │ │ │ │ └── utils │ │ │ │ │ │ ├── m4a2wav.pl │ │ │ │ │ │ ├── parse_options.sh │ │ │ │ │ │ └── utt2spk_to_spk2utt.pl │ │ │ │ │ ├── sv-resnet │ │ │ │ │ ├── README.md │ │ │ │ │ ├── conf │ │ │ │ │ │ └── resnet.yaml │ │ │ │ │ ├── local │ │ │ │ │ │ ├── download_data.sh │ │ │ │ │ │ ├── prepare_data.sh │ │ │ │ │ │ └── prepare_data_csv.py │ │ │ │ │ ├── path.sh │ │ │ │ │ ├── run.sh │ │ │ │ │ ├── speakerlab │ │ │ │ │ └── utils │ │ │ │ │ │ ├── m4a2wav.pl │ │ │ │ │ │ ├── parse_options.sh │ │ │ │ │ │ └── utt2spk_to_spk2utt.pl │ │ │ │ │ ├── sv-sdpn │ │ │ │ │ ├── README.md │ │ │ │ │ ├── conf │ │ │ │ │ │ └── sdpn.yaml │ │ │ │ │ ├── local │ │ │ │ │ │ ├── download_data.sh │ │ │ │ │ │ ├── prepare_data_rdino.sh │ │ │ │ │ │ └── process_musan.py │ │ │ │ │ ├── path.sh │ │ │ │ │ ├── run.sh │ │ │ │ │ ├── speakerlab │ │ │ │ │ └── utils │ │ │ │ │ │ ├── m4a2wav.pl │ │ │ │ │ │ ├── parse_options.sh │ │ │ │ │ │ └── utt2spk_to_spk2utt.pl │ │ │ │ │ └── sv-xvector │ │ │ │ │ ├── README.md │ │ │ │ │ ├── conf │ │ │ │ │ └── tdnn.yaml │ │ │ │ │ ├── local │ │ │ │ │ ├── download_data.sh │ │ │ │ │ ├── prepare_data.sh │ │ │ │ │ └── prepare_data_csv.py │ │ │ │ │ ├── path.sh │ │ │ │ │ ├── run.sh │ │ │ │ │ ├── speakerlab │ │ │ │ │ └── utils │ │ │ │ │ ├── m4a2wav.pl │ │ │ │ │ ├── parse_options.sh │ │ │ │ │ └── utt2spk_to_spk2utt.pl │ │ │ ├── pretrained │ │ │ │ └── speech_eres2net_sv_en_voxceleb_16k │ │ │ │ │ └── pretrained_eres2net.ckpt │ │ │ ├── runtime │ │ │ │ └── onnxruntime │ │ │ │ │ ├── CMakeLists.txt │ │ │ │ │ ├── README.md │ │ │ │ │ ├── assets │ │ │ │ │ └── fbank_config.json │ │ │ │ │ ├── bin │ │ │ │ │ ├── CMakeLists.txt │ │ │ │ │ ├── extract_speaker_embedding.cpp │ │ │ │ │ ├── make_fbank_feature.cpp │ │ │ │ │ └── read_and_describe_wav.cpp │ │ │ │ │ ├── cmake │ │ │ │ │ ├── build_json.cmake │ │ │ │ │ └── build_onnx.cmake │ │ │ │ │ ├── feature │ │ │ │ │ ├── CMakeLists.txt │ │ │ │ │ ├── feature_basic.cpp │ │ │ │ │ ├── feature_basic.h │ │ │ │ │ ├── feature_common.cpp │ │ │ │ │ ├── feature_common.h │ │ │ │ │ ├── feature_fbank.cpp │ │ │ │ │ ├── feature_fbank.h │ │ │ │ │ ├── feature_functions.cpp │ │ │ │ │ └── feature_functions.h │ │ │ │ │ ├── model │ │ │ │ │ ├── CMakeLists.txt │ │ │ │ │ ├── speaker_embedding_model.cpp │ │ │ │ │ └── speaker_embedding_model.h │ │ │ │ │ └── utils │ │ │ │ │ ├── CMakeLists.txt │ │ │ │ │ ├── wav_reader.cpp │ │ │ │ │ └── wav_reader.h │ │ │ └── speakerlab │ │ │ │ ├── bin │ │ │ │ ├── compute_score_metrics.py │ │ │ │ ├── export_speaker_embedding_onnx.py │ │ │ │ ├── extract.py │ │ │ │ ├── extract_ssl.py │ │ │ │ ├── infer_sv.py │ │ │ │ ├── infer_sv_batch.py │ │ │ │ ├── infer_sv_ssl.py │ │ │ │ ├── train.py │ │ │ │ ├── train_asd.py │ │ │ │ ├── train_para.py │ │ │ │ ├── train_rdino.py │ │ │ │ └── train_sdpn.py │ │ │ │ ├── dataset │ │ │ │ ├── dataset.py │ │ │ │ ├── dataset_asd.py │ │ │ │ ├── dataset_rdino.py │ │ │ │ └── dataset_sdpn.py │ │ │ │ ├── loss │ │ │ │ ├── dino_loss.py │ │ │ │ ├── keleo_loss.py │ │ │ │ ├── margin_loss.py │ │ │ │ └── sdpn_loss.py │ │ │ │ ├── models │ │ │ │ ├── campplus │ │ │ │ │ ├── DTDNN.py │ │ │ │ │ ├── classifier.py │ │ │ │ │ └── layers.py │ │ │ │ ├── ecapa_tdnn │ │ │ │ │ └── ECAPA_TDNN.py │ │ │ │ ├── eres2net │ │ │ │ │ ├── ERes2Net.py │ │ │ │ │ ├── ERes2NetV2.py │ │ │ │ │ ├── ERes2Net_huge.py │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── fusion.py │ │ │ │ │ └── pooling_layers.py │ │ │ │ ├── rdino │ │ │ │ │ ├── ECAPA_TDNN.py │ │ │ │ │ ├── RDINO_Head.py │ │ │ │ │ └── combiner.py │ │ │ │ ├── res2net │ │ │ │ │ └── Res2Net.py │ │ │ │ ├── resnet │ │ │ │ │ └── ResNet.py │ │ │ │ ├── sdpn │ │ │ │ │ ├── ECAPA_TDNN.py │ │ │ │ │ ├── SDPN_Head.py │ │ │ │ │ └── combiner.py │ │ │ │ ├── talknet │ │ │ │ │ ├── attentionLayer.py │ │ │ │ │ ├── audioEncoder.py │ │ │ │ │ ├── talknet.py │ │ │ │ │ └── visualEncoder.py │ │ │ │ └── xvector │ │ │ │ │ └── TDNN.py │ │ │ │ ├── process │ │ │ │ ├── augmentation.py │ │ │ │ ├── cluster.py │ │ │ │ ├── processor.py │ │ │ │ ├── processor_para.py │ │ │ │ └── scheduler.py │ │ │ │ └── utils │ │ │ │ ├── builder.py │ │ │ │ ├── checkpoint.py │ │ │ │ ├── config.py │ │ │ │ ├── epoch.py │ │ │ │ ├── fileio.py │ │ │ │ ├── score_metrics.py │ │ │ │ ├── utils.py │ │ │ │ └── utils_rdino.py │ │ ├── README.md │ │ ├── example_usage.py │ │ ├── requirements.txt │ │ ├── speaker_sim.py │ │ └── test_integration.py │ ├── doubao │ │ ├── simplex_websocket_demo.py │ │ └── stream_asr.py │ ├── encodec │ │ ├── main.py │ │ └── requirements.txt │ ├── evaluate_tokenizer.py │ ├── index-tts │ │ ├── .gitignore │ │ ├── DISCLAIMER │ │ ├── INDEX_MODEL_LICENSE │ │ ├── LICENSE │ │ ├── README.md │ │ ├── assets │ │ │ ├── IndexTTS.png │ │ │ ├── img.png │ │ │ └── index_icon.png │ │ ├── checkpoints │ │ │ └── config.yaml │ │ ├── indextts │ │ │ ├── BigVGAN │ │ │ │ ├── ECAPA_TDNN.py │ │ │ │ ├── __init__.py │ │ │ │ ├── activations.py │ │ │ │ ├── alias_free_activation │ │ │ │ │ ├── cuda │ │ │ │ │ │ ├── .gitignore │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── activation1d.py │ │ │ │ │ │ ├── anti_alias_activation.cpp │ │ │ │ │ │ ├── anti_alias_activation_cuda.cu │ │ │ │ │ │ ├── compat.h │ │ │ │ │ │ ├── load.py │ │ │ │ │ │ └── type_shim.h │ │ │ │ │ └── torch │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── act.py │ │ │ │ │ │ ├── filter.py │ │ │ │ │ │ └── resample.py │ │ │ │ ├── alias_free_torch │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── act.py │ │ │ │ │ ├── filter.py │ │ │ │ │ └── resample.py │ │ │ │ ├── bigvgan.py │ │ │ │ ├── models.py │ │ │ │ ├── nnet │ │ │ │ │ ├── CNN.py │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── linear.py │ │ │ │ │ └── normalization.py │ │ │ │ └── utils.py │ │ │ ├── __init__.py │ │ │ ├── cli.py │ │ │ ├── gpt │ │ │ │ ├── __init__.py │ │ │ │ ├── conformer │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── attention.py │ │ │ │ │ ├── embedding.py │ │ │ │ │ └── subsampling.py │ │ │ │ ├── conformer_encoder.py │ │ │ │ ├── model.py │ │ │ │ └── perceiver.py │ │ │ ├── infer.py │ │ │ ├── utils │ │ │ │ ├── __init__.py │ │ │ │ ├── arch_util.py │ │ │ │ ├── checkpoint.py │ │ │ │ ├── common.py │ │ │ │ ├── feature_extractors.py │ │ │ │ ├── front.py │ │ │ │ ├── typical_sampling.py │ │ │ │ ├── webui_utils.py │ │ │ │ └── xtransformers.py │ │ │ └── vqvae │ │ │ │ ├── __init__.py │ │ │ │ └── xtts_dvae.py │ │ ├── main.py │ │ ├── requirements.txt │ │ ├── requirements2.txt │ │ ├── setup.py │ │ ├── tests │ │ │ ├── regression_test.py │ │ │ └── sample_prompt.wav │ │ ├── tools │ │ │ └── i18n │ │ │ │ ├── i18n.py │ │ │ │ ├── locale │ │ │ │ └── en_US.json │ │ │ │ └── scan_i18n.py │ │ └── webui.py │ ├── index-tts2 │ │ ├── .gitattributes │ │ ├── .gitignore │ │ ├── .python-version │ │ ├── DISCLAIMER │ │ ├── LICENSE │ │ ├── LICENSE_ZH.txt │ │ ├── MANIFEST.in │ │ ├── README.md │ │ ├── archive │ │ │ └── README_INDEXTTS_1_5.md │ │ ├── assets │ │ │ ├── IndexTTS.png │ │ │ ├── IndexTTS2-video-pic.png │ │ │ ├── IndexTTS2.mp4 │ │ │ ├── IndexTTS2.png │ │ │ ├── IndexTTS2_banner.png │ │ │ ├── img.png │ │ │ └── index_icon.png │ │ ├── checkpoints │ │ │ └── config.yaml │ │ ├── examples │ │ │ ├── cases.jsonl │ │ │ ├── emo_hate.wav │ │ │ ├── emo_sad.wav │ │ │ ├── voice_01.wav │ │ │ ├── voice_02.wav │ │ │ ├── voice_03.wav │ │ │ ├── voice_04.wav │ │ │ ├── voice_05.wav │ │ │ ├── voice_06.wav │ │ │ ├── voice_07.wav │ │ │ ├── voice_08.wav │ │ │ ├── voice_09.wav │ │ │ ├── voice_10.wav │ │ │ ├── voice_11.wav │ │ │ └── voice_12.wav │ │ ├── indextts │ │ │ ├── BigVGAN │ │ │ │ ├── ECAPA_TDNN.py │ │ │ │ ├── __init__.py │ │ │ │ ├── activations.py │ │ │ │ ├── alias_free_activation │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── cuda │ │ │ │ │ │ ├── .gitignore │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── activation1d.py │ │ │ │ │ │ ├── anti_alias_activation.cpp │ │ │ │ │ │ ├── anti_alias_activation_cuda.cu │ │ │ │ │ │ ├── compat.h │ │ │ │ │ │ ├── load.py │ │ │ │ │ │ └── type_shim.h │ │ │ │ │ └── torch │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── act.py │ │ │ │ │ │ ├── filter.py │ │ │ │ │ │ └── resample.py │ │ │ │ ├── alias_free_torch │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── act.py │ │ │ │ │ ├── filter.py │ │ │ │ │ └── resample.py │ │ │ │ ├── bigvgan.py │ │ │ │ ├── models.py │ │ │ │ ├── nnet │ │ │ │ │ ├── CNN.py │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── linear.py │ │ │ │ │ └── normalization.py │ │ │ │ └── utils.py │ │ │ ├── __init__.py │ │ │ ├── cli.py │ │ │ ├── gpt │ │ │ │ ├── __init__.py │ │ │ │ ├── conformer │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── attention.py │ │ │ │ │ ├── embedding.py │ │ │ │ │ └── subsampling.py │ │ │ │ ├── conformer_encoder.py │ │ │ │ ├── model.py │ │ │ │ ├── model_v2.py │ │ │ │ ├── perceiver.py │ │ │ │ ├── transformers_beam_search.py │ │ │ │ ├── transformers_generation_utils.py │ │ │ │ ├── transformers_gpt2.py │ │ │ │ └── transformers_modeling_utils.py │ │ │ ├── infer.py │ │ │ ├── infer_v2.py │ │ │ ├── s2mel │ │ │ │ ├── dac │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── __main__.py │ │ │ │ │ ├── model │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── base.py │ │ │ │ │ │ ├── dac.py │ │ │ │ │ │ ├── discriminator.py │ │ │ │ │ │ └── encodec.py │ │ │ │ │ ├── nn │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── layers.py │ │ │ │ │ │ ├── loss.py │ │ │ │ │ │ └── quantize.py │ │ │ │ │ └── utils │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── decode.py │ │ │ │ │ │ └── encode.py │ │ │ │ ├── hf_utils.py │ │ │ │ ├── modules │ │ │ │ │ ├── .ipynb_checkpoints │ │ │ │ │ │ ├── audio-checkpoint.py │ │ │ │ │ │ ├── commons-checkpoint.py │ │ │ │ │ │ ├── diffusion_transformer-checkpoint.py │ │ │ │ │ │ ├── flow_matching-checkpoint.py │ │ │ │ │ │ └── length_regulator-checkpoint.py │ │ │ │ │ ├── alias_free_torch │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── act.py │ │ │ │ │ │ ├── filter.py │ │ │ │ │ │ └── resample.py │ │ │ │ │ ├── audio.py │ │ │ │ │ ├── bigvgan │ │ │ │ │ │ ├── activations.py │ │ │ │ │ │ ├── alias_free_activation │ │ │ │ │ │ │ ├── cuda │ │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ │ ├── activation1d.py │ │ │ │ │ │ │ │ ├── anti_alias_activation.cpp │ │ │ │ │ │ │ │ ├── anti_alias_activation_cuda.cu │ │ │ │ │ │ │ │ ├── compat.h │ │ │ │ │ │ │ │ ├── load.py │ │ │ │ │ │ │ │ └── type_shim.h │ │ │ │ │ │ │ └── torch │ │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ │ ├── act.py │ │ │ │ │ │ │ │ ├── filter.py │ │ │ │ │ │ │ │ └── resample.py │ │ │ │ │ │ ├── bigvgan.py │ │ │ │ │ │ ├── config.json │ │ │ │ │ │ ├── env.py │ │ │ │ │ │ ├── meldataset.py │ │ │ │ │ │ └── utils.py │ │ │ │ │ ├── campplus │ │ │ │ │ │ ├── DTDNN.py │ │ │ │ │ │ ├── classifier.py │ │ │ │ │ │ └── layers.py │ │ │ │ │ ├── commons.py │ │ │ │ │ ├── diffusion_transformer.py │ │ │ │ │ ├── encodec.py │ │ │ │ │ ├── flow_matching.py │ │ │ │ │ ├── gpt_fast │ │ │ │ │ │ ├── .ipynb_checkpoints │ │ │ │ │ │ │ └── model-checkpoint.py │ │ │ │ │ │ ├── generate.py │ │ │ │ │ │ ├── model.py │ │ │ │ │ │ └── quantize.py │ │ │ │ │ ├── hifigan │ │ │ │ │ │ ├── f0_predictor.py │ │ │ │ │ │ └── generator.py │ │ │ │ │ ├── layers.py │ │ │ │ │ ├── length_regulator.py │ │ │ │ │ ├── openvoice │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── api.py │ │ │ │ │ │ ├── attentions.py │ │ │ │ │ │ ├── checkpoints_v2 │ │ │ │ │ │ │ └── converter │ │ │ │ │ │ │ │ └── config.json │ │ │ │ │ │ ├── commons.py │ │ │ │ │ │ ├── mel_processing.py │ │ │ │ │ │ ├── models.py │ │ │ │ │ │ ├── modules.py │ │ │ │ │ │ ├── openvoice_app.py │ │ │ │ │ │ ├── se_extractor.py │ │ │ │ │ │ ├── transforms.py │ │ │ │ │ │ └── utils.py │ │ │ │ │ ├── quantize.py │ │ │ │ │ ├── rmvpe.py │ │ │ │ │ ├── vocos │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── heads.py │ │ │ │ │ │ ├── helpers.py │ │ │ │ │ │ ├── loss.py │ │ │ │ │ │ ├── models.py │ │ │ │ │ │ ├── modules.py │ │ │ │ │ │ ├── pretrained.py │ │ │ │ │ │ └── spectral_ops.py │ │ │ │ │ └── wavenet.py │ │ │ │ ├── optimizers.py │ │ │ │ └── wav2vecbert_extract.py │ │ │ ├── utils │ │ │ │ ├── __init__.py │ │ │ │ ├── arch_util.py │ │ │ │ ├── checkpoint.py │ │ │ │ ├── common.py │ │ │ │ ├── feature_extractors.py │ │ │ │ ├── front.py │ │ │ │ ├── maskgct │ │ │ │ │ └── models │ │ │ │ │ │ ├── codec │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── amphion_codec │ │ │ │ │ │ │ ├── codec.py │ │ │ │ │ │ │ ├── quantize │ │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ │ ├── factorized_vector_quantize.py │ │ │ │ │ │ │ │ ├── lookup_free_quantize.py │ │ │ │ │ │ │ │ ├── residual_vq.py │ │ │ │ │ │ │ │ └── vector_quantize.py │ │ │ │ │ │ │ └── vocos.py │ │ │ │ │ │ ├── codec_dataset.py │ │ │ │ │ │ ├── codec_inference.py │ │ │ │ │ │ ├── codec_sampler.py │ │ │ │ │ │ ├── codec_trainer.py │ │ │ │ │ │ ├── facodec │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ ├── alias_free_torch │ │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ │ ├── act.py │ │ │ │ │ │ │ │ ├── filter.py │ │ │ │ │ │ │ │ └── resample.py │ │ │ │ │ │ │ ├── facodec_dataset.py │ │ │ │ │ │ │ ├── facodec_inference.py │ │ │ │ │ │ │ ├── facodec_trainer.py │ │ │ │ │ │ │ ├── modules │ │ │ │ │ │ │ │ ├── JDC │ │ │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ │ │ ├── bst.t7 │ │ │ │ │ │ │ │ │ └── model.py │ │ │ │ │ │ │ │ ├── attentions.py │ │ │ │ │ │ │ │ ├── commons.py │ │ │ │ │ │ │ │ ├── gradient_reversal.py │ │ │ │ │ │ │ │ ├── layers.py │ │ │ │ │ │ │ │ ├── quantize.py │ │ │ │ │ │ │ │ ├── style_encoder.py │ │ │ │ │ │ │ │ └── wavenet.py │ │ │ │ │ │ │ └── optimizer.py │ │ │ │ │ │ ├── kmeans │ │ │ │ │ │ │ ├── repcodec_model.py │ │ │ │ │ │ │ └── vocos.py │ │ │ │ │ │ ├── melvqgan │ │ │ │ │ │ │ └── melspec.py │ │ │ │ │ │ ├── ns3_codec │ │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ ├── alias_free_torch │ │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ │ ├── act.py │ │ │ │ │ │ │ │ ├── filter.py │ │ │ │ │ │ │ │ └── resample.py │ │ │ │ │ │ │ ├── facodec.py │ │ │ │ │ │ │ ├── gradient_reversal.py │ │ │ │ │ │ │ ├── melspec.py │ │ │ │ │ │ │ ├── quantize │ │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ │ ├── fvq.py │ │ │ │ │ │ │ │ └── rvq.py │ │ │ │ │ │ │ └── transformer.py │ │ │ │ │ │ ├── speechtokenizer │ │ │ │ │ │ │ ├── model.py │ │ │ │ │ │ │ └── modules │ │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ │ ├── conv.py │ │ │ │ │ │ │ │ ├── lstm.py │ │ │ │ │ │ │ │ ├── norm.py │ │ │ │ │ │ │ │ ├── quantization │ │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ │ ├── ac.py │ │ │ │ │ │ │ │ ├── core_vq.py │ │ │ │ │ │ │ │ ├── distrib.py │ │ │ │ │ │ │ │ └── vq.py │ │ │ │ │ │ │ │ └── seanet.py │ │ │ │ │ │ └── vevo │ │ │ │ │ │ │ └── vevo_repcodec.py │ │ │ │ │ │ └── tts │ │ │ │ │ │ └── maskgct │ │ │ │ │ │ ├── ckpt │ │ │ │ │ │ └── wav2vec2bert_stats.pt │ │ │ │ │ │ ├── llama_nar.py │ │ │ │ │ │ └── maskgct_s2a.py │ │ │ │ ├── maskgct_utils.py │ │ │ │ ├── text_utils.py │ │ │ │ ├── typical_sampling.py │ │ │ │ ├── utils.py │ │ │ │ ├── webui_utils.py │ │ │ │ └── xtransformers.py │ │ │ └── vqvae │ │ │ │ ├── __init__.py │ │ │ │ └── xtts_dvae.py │ │ ├── main.py │ │ ├── pyproject.toml │ │ ├── requirements.txt │ │ ├── tests │ │ │ ├── cases.jsonl │ │ │ ├── padding_test.py │ │ │ ├── regression_test.py │ │ │ └── sample_prompt.wav │ │ ├── tools │ │ │ ├── gpu_check.py │ │ │ └── i18n │ │ │ │ ├── i18n.py │ │ │ │ ├── locale │ │ │ │ ├── en_US.json │ │ │ │ └── zh_CN.json │ │ │ │ └── scan_i18n.py │ │ ├── tts2_infer.py │ │ ├── uv.lock │ │ └── webui.py │ ├── instruction_following_eval │ │ ├── README.md │ │ ├── __init__.py │ │ ├── instructions.py │ │ ├── instructions_registry.py │ │ └── instructions_util.py │ ├── mimi │ │ ├── main.py │ │ ├── requirements.txt │ │ └── stream.py │ ├── minicpm │ │ ├── main.py │ │ └── requirements.txt │ ├── minicpm_0_5B │ │ ├── main.py │ │ └── requirements.txt │ ├── paraformer │ │ ├── main.py │ │ └── requirements.txt │ ├── paraformer_ms │ │ ├── main.py │ │ └── requirements.txt │ ├── qwen2-5omni │ │ ├── main.py │ │ └── requirements.txt │ ├── sensevoicelib.py │ ├── simo │ │ ├── models_ecapa_tdnn.py │ │ ├── requirements.txt │ │ └── simo.py │ ├── ssnact │ │ └── ssnact.py │ ├── streaming_asr_demo.py │ ├── text_normalization │ │ ├── __init__.py │ │ ├── basic.py │ │ ├── cn_tn.py │ │ ├── en.py │ │ └── english.json │ ├── utmos │ │ ├── lightning_module.py │ │ ├── main.py │ │ ├── model.py │ │ └── requirements.txt │ ├── wer.py │ └── whisper │ │ ├── cv3.py │ │ ├── main.py │ │ ├── requirements.txt │ │ └── seed_tts_eval.py ├── main.py ├── models │ ├── AudioEncoder │ │ ├── __init__.py │ │ ├── chattts.py │ │ ├── cosyvoice.py │ │ ├── cosyvoice_adv.py │ │ ├── encodec.py │ │ ├── mimi.py │ │ ├── spark.py │ │ ├── vocos_encode.py │ │ └── wav_tokenizer.py │ ├── TTS │ │ ├── __init__.py │ │ ├── amphion.py │ │ ├── higgs_audio.py │ │ ├── indextts.py │ │ ├── indextts2.py │ │ ├── megatts.py │ │ ├── melotts.py │ │ ├── mgm_omni.py │ │ ├── spark.py │ │ ├── stabletts.py │ │ ├── voxcpm.py │ │ └── voxcpm2.py │ ├── UltraVOX.py │ ├── __init__.py │ ├── ali.py │ ├── asr │ │ ├── __init__.py │ │ ├── ali.py │ │ ├── baidu.py │ │ ├── fireredasr.py │ │ ├── huawei.py │ │ ├── huoshan.py │ │ ├── paraformer.py │ │ ├── paraformer_ms.py │ │ ├── sensevoice.py │ │ ├── sherpa.py │ │ ├── tencent.py │ │ └── xfyun.py │ ├── bytedance │ │ ├── __init__.py │ │ └── doubao.py │ ├── cv3_speaker_sim.py │ ├── dnsmos.py │ ├── glm4audio.py │ ├── glm4voice.py │ ├── google.py │ ├── llama_omni.py │ ├── llmcenter.py │ ├── mini_cpm.py │ ├── mini_omni.py │ ├── model.py │ ├── moonshot.py │ ├── offline_model.py │ ├── ola.py │ ├── openai.py │ ├── openai_realtime.py │ ├── qwen.py │ ├── qwen2_5.py │ ├── sp_gemini.py │ ├── step_audio.py │ ├── utmos.py │ ├── wavlm.py │ └── whisper.py ├── process │ ├── __init__.py │ ├── base.py │ ├── eliminate.py │ ├── firstoption.py │ ├── normalization.py │ ├── qwen.py │ └── speech.py ├── prompt │ ├── __init__.py │ └── base.py ├── recorder.py ├── registry.py └── utils.py ├── cli └── list_availabel.py ├── docs ├── Procedures for Restarting an Incomplete Evaluation.md ├── how add a dataset.md ├── how eval your model.md ├── how launch a custom eval task.md ├── how use UTMOS, DNSMOS eval speech quality.md └── seed-tts-eval4voice_clone.md ├── pyproject.toml ├── registry ├── agg │ ├── air-bench.yaml │ ├── naive.yaml │ └── wer.yaml ├── dataset │ ├── AudioCaps.yaml │ ├── COVID-recognizer.yaml │ ├── CatDog.yaml │ ├── ClothoAQA.yaml │ ├── CommonVoice.yaml │ ├── DESEDpublic_eval.yaml │ ├── GTZAN.yaml │ ├── GigaSpeech.yaml │ ├── KeSpeech.yaml │ ├── MELD.yaml │ ├── MMAU.yaml │ ├── Nsynth.yaml │ ├── RAVDESS.yaml │ ├── RespiratorySound.yaml │ ├── TESS.yaml │ ├── VSC.yaml │ ├── VoxCeleb.yaml │ ├── WavCaps.yaml │ ├── WenetSpeech.yaml │ ├── air.yaml │ ├── aishell.yaml │ ├── alpaca_eval.yaml │ ├── audio-MNIST.yaml │ ├── chord_recoganition.yaml │ ├── covost2.yaml │ ├── cv3.yaml │ ├── fleurs.yaml │ ├── heart_beat.yaml │ ├── librispeech.yaml │ ├── llama_questions.yaml │ ├── long-tts-eval.yaml │ ├── multilingual_librispeech.yaml │ ├── peoples_speech.yaml │ ├── sample.yaml │ ├── seed-tts-eval.yaml │ ├── tedlium.yaml │ ├── triviaqa.yaml │ ├── voicebench.yaml │ ├── voxpopuli.yaml │ └── webQ.yaml ├── eval_task │ ├── acoustics.yaml │ ├── air.yaml │ ├── alpaca.yaml │ ├── aqa.yaml │ ├── asr.yaml │ ├── caption.yaml │ ├── cv3.yaml │ ├── digit.yaml │ ├── emo.yaml │ ├── gender.yaml │ ├── inference.yaml │ ├── medicine.yaml │ ├── music.yaml │ ├── seed_tts_eval.yaml │ ├── sound_identify.yaml │ ├── stt.yaml │ ├── tts.yaml │ ├── voicebench.yaml │ └── vsc.yaml ├── evaluator │ ├── air-bench.yaml │ ├── alpaca.yaml │ ├── choice-with-ans.yaml │ ├── common.yaml │ ├── cv3.yaml │ ├── dnsmos.yaml │ ├── llama-speech.yaml │ ├── long_tts_eval.yaml │ ├── naive_wer.yaml │ ├── qa.yaml │ ├── seed_tts_eval.yaml │ ├── simo.yaml │ ├── speech_qulity.yaml │ ├── utmos.yaml │ └── voicebench.yaml ├── model │ ├── ali.yaml │ ├── cv3_speaker_sim.yaml │ ├── dnsmos.yaml │ ├── gemini.yaml │ ├── gpt.yaml │ ├── indextts.yaml │ ├── kyutai.yaml │ ├── mgm_omni.yaml │ ├── minicpmo.yaml │ ├── moonshot.yaml │ ├── offline.yaml │ ├── ola.yaml │ ├── paraformer.yaml │ ├── paraformer_ms.yaml │ ├── qwen2.5.yaml │ ├── spark.yaml │ ├── speechLLM.yaml │ ├── step.yaml │ ├── tencent.yaml │ ├── ultravox.yaml │ ├── utmos.yaml │ ├── voxcpm.yaml │ ├── wavlm.yaml │ └── whisper.yaml ├── process │ ├── base.yaml │ ├── choice.yaml │ └── speech_model_output.yaml ├── prompt │ ├── 3o.yaml │ ├── aqa.yaml │ ├── asr.yaml │ ├── caption.yaml │ ├── chatbot.yaml │ ├── choice.yaml │ ├── digit.yaml │ ├── emotion_anlysis.yaml │ ├── gender_anlysis.yaml │ ├── geval.yaml │ ├── kimi-audio.yaml │ ├── medicine.yaml │ ├── mini-cpm-omni.yaml │ ├── music.yaml │ ├── ola.yaml │ ├── qa.yaml │ ├── qwen-audio-pretrain.yaml │ ├── qwen-omni.yaml │ ├── qwen2-audio-pretrain.yaml │ ├── sound_identify.yaml │ ├── stt.yaml │ ├── tts.yaml │ └── whisper-pretrain.yaml └── recorder │ └── local.yaml ├── replication ├── Long-TTS-Eval.md └── MGM-Omni.md ├── requirements.txt ├── requirments └── minicpm_o2_6.txt └── tests ├── test_audio_evals_registry.py └── test_dataset.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/.gitignore -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /FAQ.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/FAQ.md -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/README.md -------------------------------------------------------------------------------- /README_en.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/README_en.md -------------------------------------------------------------------------------- /assets/audio_understanding_leaderboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/assets/audio_understanding_leaderboard.png -------------------------------------------------------------------------------- /assets/dataset_distribute.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/assets/dataset_distribute.png -------------------------------------------------------------------------------- /assets/default.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/assets/default.wav -------------------------------------------------------------------------------- /assets/img_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/assets/img_1.png -------------------------------------------------------------------------------- /assets/leaderboard.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/assets/leaderboard.md -------------------------------------------------------------------------------- /assets/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/assets/logo.png -------------------------------------------------------------------------------- /assets/performance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/assets/performance.png -------------------------------------------------------------------------------- /assets/s2s_leaderboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/assets/s2s_leaderboard.png -------------------------------------------------------------------------------- /assets/s2s_semantic_leaderboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/assets/s2s_semantic_leaderboard.png -------------------------------------------------------------------------------- /assets/utmos.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/assets/utmos.png -------------------------------------------------------------------------------- /audio_evals/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/agg/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/agg/air_chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/agg/air_chat.py -------------------------------------------------------------------------------- /audio_evals/agg/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/agg/base.py -------------------------------------------------------------------------------- /audio_evals/agg/sp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/agg/sp.py -------------------------------------------------------------------------------- /audio_evals/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/base.py -------------------------------------------------------------------------------- /audio_evals/constants.py: -------------------------------------------------------------------------------- 1 | DEFAULT_MODEL_PATH = "init_model/" 2 | -------------------------------------------------------------------------------- /audio_evals/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/dataset/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/dataset/dataset.py -------------------------------------------------------------------------------- /audio_evals/dataset/giga.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/dataset/giga.py -------------------------------------------------------------------------------- /audio_evals/dataset/huggingface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/dataset/huggingface.py -------------------------------------------------------------------------------- /audio_evals/dataset/resume.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/dataset/resume.py -------------------------------------------------------------------------------- /audio_evals/eval_task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/eval_task.py -------------------------------------------------------------------------------- /audio_evals/evaluator/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/evaluator/air_chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/evaluator/air_chat.py -------------------------------------------------------------------------------- /audio_evals/evaluator/alpaca_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/evaluator/alpaca_eval.py -------------------------------------------------------------------------------- /audio_evals/evaluator/alpaca_eval.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/evaluator/alpaca_eval.txt -------------------------------------------------------------------------------- /audio_evals/evaluator/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/evaluator/base.py -------------------------------------------------------------------------------- /audio_evals/evaluator/bbh.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/evaluator/bbh.py -------------------------------------------------------------------------------- /audio_evals/evaluator/bleu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/evaluator/bleu.py -------------------------------------------------------------------------------- /audio_evals/evaluator/coco.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/evaluator/coco.py -------------------------------------------------------------------------------- /audio_evals/evaluator/dict_match.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/evaluator/dict_match.py -------------------------------------------------------------------------------- /audio_evals/evaluator/dnsmos.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/evaluator/dnsmos.py -------------------------------------------------------------------------------- /audio_evals/evaluator/ensemble.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/evaluator/ensemble.py -------------------------------------------------------------------------------- /audio_evals/evaluator/harm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/evaluator/harm.py -------------------------------------------------------------------------------- /audio_evals/evaluator/ifeval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/evaluator/ifeval.py -------------------------------------------------------------------------------- /audio_evals/evaluator/long_tts_eval_asr_wer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/evaluator/long_tts_eval_asr_wer.py -------------------------------------------------------------------------------- /audio_evals/evaluator/mcq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/evaluator/mcq.py -------------------------------------------------------------------------------- /audio_evals/evaluator/qa_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/evaluator/qa_eval.py -------------------------------------------------------------------------------- /audio_evals/evaluator/qa_exact_match.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/evaluator/qa_exact_match.py -------------------------------------------------------------------------------- /audio_evals/evaluator/ref_qa_geval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/evaluator/ref_qa_geval.py -------------------------------------------------------------------------------- /audio_evals/evaluator/ref_qa_geval.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/evaluator/ref_qa_geval.txt -------------------------------------------------------------------------------- /audio_evals/evaluator/seed_tts_eval_asr_wer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/evaluator/seed_tts_eval_asr_wer.py -------------------------------------------------------------------------------- /audio_evals/evaluator/simo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/evaluator/simo.py -------------------------------------------------------------------------------- /audio_evals/evaluator/string_match.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/evaluator/string_match.py -------------------------------------------------------------------------------- /audio_evals/evaluator/utmos.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/evaluator/utmos.py -------------------------------------------------------------------------------- /audio_evals/evaluator/voice_bench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/evaluator/voice_bench.py -------------------------------------------------------------------------------- /audio_evals/evaluator/wer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/evaluator/wer.py -------------------------------------------------------------------------------- /audio_evals/isolate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/isolate.py -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/.gitignore -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/CODE_OF_CONDUCT.md -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/FAQ.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/FAQ.md -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/LICENSE -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/README.md -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/asset/cross_lingual_prompt.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/asset/cross_lingual_prompt.wav -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/asset/dingding.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/asset/dingding.png -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/asset/zero_shot_prompt.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/asset/zero_shot_prompt.wav -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/cosyvoice/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/cosyvoice/bin/average_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/cosyvoice/bin/average_model.py -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/cosyvoice/bin/export_jit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/cosyvoice/bin/export_jit.py -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/cosyvoice/bin/export_onnx.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/cosyvoice/bin/export_onnx.py -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/cosyvoice/bin/export_trt.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/cosyvoice/bin/export_trt.sh -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/cosyvoice/bin/inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/cosyvoice/bin/inference.py -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/cosyvoice/bin/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/cosyvoice/bin/train.py -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/cosyvoice/cli/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/cosyvoice/cli/cosyvoice.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/cosyvoice/cli/cosyvoice.py -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/cosyvoice/cli/frontend.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/cosyvoice/cli/frontend.py -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/cosyvoice/cli/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/cosyvoice/cli/model.py -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/cosyvoice/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/cosyvoice/dataset/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/cosyvoice/dataset/dataset.py -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/cosyvoice/dataset/processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/cosyvoice/dataset/processor.py -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/cosyvoice/flow/decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/cosyvoice/flow/decoder.py -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/cosyvoice/flow/flow.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/cosyvoice/flow/flow.py -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/cosyvoice/flow/flow_matching.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/cosyvoice/flow/flow_matching.py -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/cosyvoice/flow/length_regulator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/cosyvoice/flow/length_regulator.py -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/cosyvoice/hifigan/discriminator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/cosyvoice/hifigan/discriminator.py -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/cosyvoice/hifigan/f0_predictor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/cosyvoice/hifigan/f0_predictor.py -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/cosyvoice/hifigan/generator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/cosyvoice/hifigan/generator.py -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/cosyvoice/hifigan/hifigan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/cosyvoice/hifigan/hifigan.py -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/cosyvoice/llm/llm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/cosyvoice/llm/llm.py -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/cosyvoice/tokenizer/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/cosyvoice/tokenizer/tokenizer.py -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/cosyvoice/transformer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/cosyvoice/transformer/activation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/cosyvoice/transformer/activation.py -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/cosyvoice/transformer/attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/cosyvoice/transformer/attention.py -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/cosyvoice/transformer/convolution.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/cosyvoice/transformer/convolution.py -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/cosyvoice/transformer/decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/cosyvoice/transformer/decoder.py -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/cosyvoice/transformer/decoder_layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/cosyvoice/transformer/decoder_layer.py -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/cosyvoice/transformer/embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/cosyvoice/transformer/embedding.py -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/cosyvoice/transformer/encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/cosyvoice/transformer/encoder.py -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/cosyvoice/transformer/encoder_layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/cosyvoice/transformer/encoder_layer.py -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/cosyvoice/transformer/label_smoothing_loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/cosyvoice/transformer/label_smoothing_loss.py -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/cosyvoice/transformer/subsampling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/cosyvoice/transformer/subsampling.py -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/cosyvoice/transformer/upsample_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/cosyvoice/transformer/upsample_encoder.py -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/cosyvoice/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/cosyvoice/utils/class_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/cosyvoice/utils/class_utils.py -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/cosyvoice/utils/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/cosyvoice/utils/common.py -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/cosyvoice/utils/executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/cosyvoice/utils/executor.py -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/cosyvoice/utils/file_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/cosyvoice/utils/file_utils.py -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/cosyvoice/utils/frontend_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/cosyvoice/utils/frontend_utils.py -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/cosyvoice/utils/losses.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/cosyvoice/utils/losses.py -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/cosyvoice/utils/mask.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/cosyvoice/utils/mask.py -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/cosyvoice/utils/scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/cosyvoice/utils/scheduler.py -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/cosyvoice/utils/train_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/cosyvoice/utils/train_utils.py -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/docker/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/docker/Dockerfile -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/examples/libritts/cosyvoice/cosyvoice: -------------------------------------------------------------------------------- 1 | ../../../cosyvoice -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/examples/libritts/cosyvoice/path.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/examples/libritts/cosyvoice/path.sh -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/examples/libritts/cosyvoice/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/examples/libritts/cosyvoice/run.sh -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/examples/libritts/cosyvoice/tools: -------------------------------------------------------------------------------- 1 | ../../../tools -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/examples/libritts/cosyvoice/tts_text.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/examples/libritts/cosyvoice/tts_text.json -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/examples/libritts/cosyvoice2/cosyvoice: -------------------------------------------------------------------------------- 1 | ../../../cosyvoice -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/examples/libritts/cosyvoice2/tools: -------------------------------------------------------------------------------- 1 | ../../../tools -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/examples/magicdata-read/cosyvoice/cosyvoice: -------------------------------------------------------------------------------- 1 | ../../../cosyvoice -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/examples/magicdata-read/cosyvoice/path.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/examples/magicdata-read/cosyvoice/path.sh -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/examples/magicdata-read/cosyvoice/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/examples/magicdata-read/cosyvoice/run.sh -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/examples/magicdata-read/cosyvoice/tools: -------------------------------------------------------------------------------- 1 | ../../../tools -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/main.py -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/requirements.txt -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/runtime/python/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/runtime/python/Dockerfile -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/runtime/python/fastapi/client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/runtime/python/fastapi/client.py -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/runtime/python/fastapi/server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/runtime/python/fastapi/server.py -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/runtime/python/grpc/client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/runtime/python/grpc/client.py -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/runtime/python/grpc/cosyvoice.proto: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/runtime/python/grpc/cosyvoice.proto -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/runtime/python/grpc/server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/runtime/python/grpc/server.py -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/tools/extract_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/tools/extract_embedding.py -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/tools/extract_speech_token.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/tools/extract_speech_token.py -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/tools/make_parquet_list.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/tools/make_parquet_list.py -------------------------------------------------------------------------------- /audio_evals/lib/CosyVoice/webui.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/CosyVoice/webui.py -------------------------------------------------------------------------------- /audio_evals/lib/DNSMOS/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/DNSMOS/README.md -------------------------------------------------------------------------------- /audio_evals/lib/DNSMOS/dnsmos_single.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/DNSMOS/dnsmos_single.py -------------------------------------------------------------------------------- /audio_evals/lib/DNSMOS/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/DNSMOS/main.py -------------------------------------------------------------------------------- /audio_evals/lib/DNSMOS/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/DNSMOS/requirements.txt -------------------------------------------------------------------------------- /audio_evals/lib/HiggsAudio/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/HiggsAudio/main.py -------------------------------------------------------------------------------- /audio_evals/lib/HiggsAudio/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/HiggsAudio/requirements.txt -------------------------------------------------------------------------------- /audio_evals/lib/HiggsAudio/vc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/HiggsAudio/vc.py -------------------------------------------------------------------------------- /audio_evals/lib/Kimi-Audio/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/Kimi-Audio/Dockerfile -------------------------------------------------------------------------------- /audio_evals/lib/Kimi-Audio/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/Kimi-Audio/README.md -------------------------------------------------------------------------------- /audio_evals/lib/Kimi-Audio/assets/kimia_framework.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/Kimi-Audio/assets/kimia_framework.png -------------------------------------------------------------------------------- /audio_evals/lib/Kimi-Audio/assets/kimia_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/Kimi-Audio/assets/kimia_logo.png -------------------------------------------------------------------------------- /audio_evals/lib/Kimi-Audio/assets/kimia_radar_chart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/Kimi-Audio/assets/kimia_radar_chart.png -------------------------------------------------------------------------------- /audio_evals/lib/Kimi-Audio/assets/kimia_report.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/Kimi-Audio/assets/kimia_report.pdf -------------------------------------------------------------------------------- /audio_evals/lib/Kimi-Audio/infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/Kimi-Audio/infer.py -------------------------------------------------------------------------------- /audio_evals/lib/Kimi-Audio/kimia_infer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/Kimi-Audio/kimia_infer/api/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/Kimi-Audio/kimia_infer/api/kimia.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/Kimi-Audio/kimia_infer/api/kimia.py -------------------------------------------------------------------------------- /audio_evals/lib/Kimi-Audio/kimia_infer/api/prompt_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/Kimi-Audio/kimia_infer/api/prompt_manager.py -------------------------------------------------------------------------------- /audio_evals/lib/Kimi-Audio/kimia_infer/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/Kimi-Audio/kimia_infer/models/detokenizer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/Kimi-Audio/kimia_infer/models/detokenizer/__init__.py -------------------------------------------------------------------------------- /audio_evals/lib/Kimi-Audio/kimia_infer/models/detokenizer/vocoder/alias_free_activation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/Kimi-Audio/kimia_infer/models/detokenizer/vocoder/alias_free_activation/cuda/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/Kimi-Audio/kimia_infer/models/tokenizer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/Kimi-Audio/kimia_infer/models/tokenizer/glm4/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/Kimi-Audio/kimia_infer/models/tokenizer/glm4/LICENSE -------------------------------------------------------------------------------- /audio_evals/lib/Kimi-Audio/kimia_infer/models/tokenizer/glm4/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/Kimi-Audio/kimia_infer/models/tokenizer/glm4/README.md -------------------------------------------------------------------------------- /audio_evals/lib/Kimi-Audio/kimia_infer/models/tokenizer/glm4/cosyvoice/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/Kimi-Audio/kimia_infer/models/tokenizer/glm4/cosyvoice/cli/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/Kimi-Audio/kimia_infer/models/tokenizer/glm4/cosyvoice/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/Kimi-Audio/kimia_infer/models/tokenizer/glm4/cosyvoice/transformer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/Kimi-Audio/kimia_infer/models/tokenizer/glm4/cosyvoice/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/Kimi-Audio/kimia_infer/models/tokenizer/glm4/speech_tokenizer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/Kimi-Audio/kimia_infer/models/tokenizer/glm4/third_party/Matcha-TTS/configs/callbacks/none.yaml: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/Kimi-Audio/kimia_infer/models/tokenizer/glm4/third_party/Matcha-TTS/configs/local/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/Kimi-Audio/kimia_infer/models/tokenizer/glm4/third_party/Matcha-TTS/configs/model/cfm/default.yaml: -------------------------------------------------------------------------------- 1 | name: CFM 2 | solver: euler 3 | sigma_min: 1e-4 4 | -------------------------------------------------------------------------------- /audio_evals/lib/Kimi-Audio/kimia_infer/models/tokenizer/glm4/third_party/Matcha-TTS/configs/model/optimizer/adam.yaml: -------------------------------------------------------------------------------- 1 | _target_: torch.optim.Adam 2 | _partial_: true 3 | lr: 1e-4 4 | weight_decay: 0.0 5 | -------------------------------------------------------------------------------- /audio_evals/lib/Kimi-Audio/kimia_infer/models/tokenizer/glm4/third_party/Matcha-TTS/data: -------------------------------------------------------------------------------- 1 | /home/smehta/Projects/Speech-Backbones/Grad-TTS/data -------------------------------------------------------------------------------- /audio_evals/lib/Kimi-Audio/kimia_infer/models/tokenizer/glm4/third_party/Matcha-TTS/matcha/VERSION: -------------------------------------------------------------------------------- 1 | 0.0.5.1 2 | -------------------------------------------------------------------------------- /audio_evals/lib/Kimi-Audio/kimia_infer/models/tokenizer/glm4/third_party/Matcha-TTS/matcha/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/Kimi-Audio/kimia_infer/models/tokenizer/glm4/third_party/Matcha-TTS/matcha/data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/Kimi-Audio/kimia_infer/models/tokenizer/glm4/third_party/Matcha-TTS/matcha/data/components/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/Kimi-Audio/kimia_infer/models/tokenizer/glm4/third_party/Matcha-TTS/matcha/hifigan/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/Kimi-Audio/kimia_infer/models/tokenizer/glm4/third_party/Matcha-TTS/matcha/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/Kimi-Audio/kimia_infer/models/tokenizer/glm4/third_party/Matcha-TTS/matcha/models/components/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/Kimi-Audio/kimia_infer/models/tokenizer/glm4/third_party/Matcha-TTS/matcha/onnx/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/Kimi-Audio/kimia_infer/models/tokenizer/glm4/third_party/Matcha-TTS/notebooks/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/Kimi-Audio/kimia_infer/models/tokenizer/glm4/web_demo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/Kimi-Audio/kimia_infer/models/tokenizer/glm4/web_demo.py -------------------------------------------------------------------------------- /audio_evals/lib/Kimi-Audio/kimia_infer/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/Kimi-Audio/kimia_infer/utils/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/Kimi-Audio/kimia_infer/utils/data.py -------------------------------------------------------------------------------- /audio_evals/lib/Kimi-Audio/kimia_infer/utils/sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/Kimi-Audio/kimia_infer/utils/sampler.py -------------------------------------------------------------------------------- /audio_evals/lib/Kimi-Audio/kimia_infer/utils/special_tokens.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/Kimi-Audio/kimia_infer/utils/special_tokens.py -------------------------------------------------------------------------------- /audio_evals/lib/Kimi-Audio/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/Kimi-Audio/main.py -------------------------------------------------------------------------------- /audio_evals/lib/Kimi-Audio/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/Kimi-Audio/requirements.txt -------------------------------------------------------------------------------- /audio_evals/lib/MGM_Omni/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/MGM_Omni/README.md -------------------------------------------------------------------------------- /audio_evals/lib/MGM_Omni/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/MGM_Omni/main.py -------------------------------------------------------------------------------- /audio_evals/lib/MGM_Omni/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/MGM_Omni/requirements.txt -------------------------------------------------------------------------------- /audio_evals/lib/SenseVoice/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/SenseVoice/main.py -------------------------------------------------------------------------------- /audio_evals/lib/SenseVoice/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/SenseVoice/requirements.txt -------------------------------------------------------------------------------- /audio_evals/lib/Spark-TTS/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/Spark-TTS/.gitignore -------------------------------------------------------------------------------- /audio_evals/lib/Spark-TTS/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/Spark-TTS/LICENSE -------------------------------------------------------------------------------- /audio_evals/lib/Spark-TTS/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/Spark-TTS/README.md -------------------------------------------------------------------------------- /audio_evals/lib/Spark-TTS/cli/SparkTTS.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/Spark-TTS/cli/SparkTTS.py -------------------------------------------------------------------------------- /audio_evals/lib/Spark-TTS/cli/inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/Spark-TTS/cli/inference.py -------------------------------------------------------------------------------- /audio_evals/lib/Spark-TTS/encodec.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/Spark-TTS/encodec.py -------------------------------------------------------------------------------- /audio_evals/lib/Spark-TTS/example/infer.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/Spark-TTS/example/infer.sh -------------------------------------------------------------------------------- /audio_evals/lib/Spark-TTS/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/Spark-TTS/main.py -------------------------------------------------------------------------------- /audio_evals/lib/Spark-TTS/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/Spark-TTS/requirements.txt -------------------------------------------------------------------------------- /audio_evals/lib/Spark-TTS/sparktts/models/audio_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/Spark-TTS/sparktts/models/audio_tokenizer.py -------------------------------------------------------------------------------- /audio_evals/lib/Spark-TTS/sparktts/models/bicodec.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/Spark-TTS/sparktts/models/bicodec.py -------------------------------------------------------------------------------- /audio_evals/lib/Spark-TTS/sparktts/modules/blocks/layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/Spark-TTS/sparktts/modules/blocks/layers.py -------------------------------------------------------------------------------- /audio_evals/lib/Spark-TTS/sparktts/modules/blocks/samper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/Spark-TTS/sparktts/modules/blocks/samper.py -------------------------------------------------------------------------------- /audio_evals/lib/Spark-TTS/sparktts/modules/blocks/vocos.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/Spark-TTS/sparktts/modules/blocks/vocos.py -------------------------------------------------------------------------------- /audio_evals/lib/Spark-TTS/sparktts/modules/fsq/residual_fsq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/Spark-TTS/sparktts/modules/fsq/residual_fsq.py -------------------------------------------------------------------------------- /audio_evals/lib/Spark-TTS/sparktts/modules/speaker/ecapa_tdnn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/Spark-TTS/sparktts/modules/speaker/ecapa_tdnn.py -------------------------------------------------------------------------------- /audio_evals/lib/Spark-TTS/sparktts/modules/speaker/perceiver_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/Spark-TTS/sparktts/modules/speaker/perceiver_encoder.py -------------------------------------------------------------------------------- /audio_evals/lib/Spark-TTS/sparktts/modules/speaker/pooling_layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/Spark-TTS/sparktts/modules/speaker/pooling_layers.py -------------------------------------------------------------------------------- /audio_evals/lib/Spark-TTS/sparktts/modules/speaker/speaker_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/Spark-TTS/sparktts/modules/speaker/speaker_encoder.py -------------------------------------------------------------------------------- /audio_evals/lib/Spark-TTS/sparktts/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/Spark-TTS/sparktts/utils/audio.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/Spark-TTS/sparktts/utils/audio.py -------------------------------------------------------------------------------- /audio_evals/lib/Spark-TTS/sparktts/utils/file.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/Spark-TTS/sparktts/utils/file.py -------------------------------------------------------------------------------- /audio_evals/lib/Spark-TTS/sparktts/utils/parse_options.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/Spark-TTS/sparktts/utils/parse_options.sh -------------------------------------------------------------------------------- /audio_evals/lib/Spark-TTS/sparktts/utils/token_parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/Spark-TTS/sparktts/utils/token_parser.py -------------------------------------------------------------------------------- /audio_evals/lib/Spark-TTS/src/figures/gradio_TTS.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/Spark-TTS/src/figures/gradio_TTS.png -------------------------------------------------------------------------------- /audio_evals/lib/Spark-TTS/src/figures/gradio_control.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/Spark-TTS/src/figures/gradio_control.png -------------------------------------------------------------------------------- /audio_evals/lib/Spark-TTS/src/figures/infer_control.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/Spark-TTS/src/figures/infer_control.png -------------------------------------------------------------------------------- /audio_evals/lib/Spark-TTS/src/figures/infer_voice_cloning.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/Spark-TTS/src/figures/infer_voice_cloning.png -------------------------------------------------------------------------------- /audio_evals/lib/Spark-TTS/src/logo/HKUST.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/Spark-TTS/src/logo/HKUST.jpg -------------------------------------------------------------------------------- /audio_evals/lib/Spark-TTS/src/logo/NPU.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/Spark-TTS/src/logo/NPU.jpg -------------------------------------------------------------------------------- /audio_evals/lib/Spark-TTS/src/logo/NTU.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/Spark-TTS/src/logo/NTU.jpg -------------------------------------------------------------------------------- /audio_evals/lib/Spark-TTS/src/logo/SJU.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/Spark-TTS/src/logo/SJU.jpg -------------------------------------------------------------------------------- /audio_evals/lib/Spark-TTS/src/logo/SparkAudio.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/Spark-TTS/src/logo/SparkAudio.jpg -------------------------------------------------------------------------------- /audio_evals/lib/Spark-TTS/src/logo/SparkAudio2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/Spark-TTS/src/logo/SparkAudio2.jpg -------------------------------------------------------------------------------- /audio_evals/lib/Spark-TTS/src/logo/SparkTTS.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/Spark-TTS/src/logo/SparkTTS.jpg -------------------------------------------------------------------------------- /audio_evals/lib/Spark-TTS/src/logo/SparkTTS.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/Spark-TTS/src/logo/SparkTTS.png -------------------------------------------------------------------------------- /audio_evals/lib/Spark-TTS/src/logo/mobvoi.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/Spark-TTS/src/logo/mobvoi.jpg -------------------------------------------------------------------------------- /audio_evals/lib/Spark-TTS/src/logo/mobvoi.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/Spark-TTS/src/logo/mobvoi.png -------------------------------------------------------------------------------- /audio_evals/lib/Spark-TTS/webui.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/Spark-TTS/webui.py -------------------------------------------------------------------------------- /audio_evals/lib/VoxCPM/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/VoxCPM/main.py -------------------------------------------------------------------------------- /audio_evals/lib/VoxCPM/requirements.txt: -------------------------------------------------------------------------------- 1 | voxcpm 2 | torchcodec -------------------------------------------------------------------------------- /audio_evals/lib/VoxCPM2/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/VoxCPM2/.gitignore -------------------------------------------------------------------------------- /audio_evals/lib/VoxCPM2/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/VoxCPM2/LICENSE -------------------------------------------------------------------------------- /audio_evals/lib/VoxCPM2/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/VoxCPM2/README.md -------------------------------------------------------------------------------- /audio_evals/lib/VoxCPM2/app.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/VoxCPM2/app.py -------------------------------------------------------------------------------- /audio_evals/lib/VoxCPM2/assets/logo_v2.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/VoxCPM2/assets/logo_v2.jpeg -------------------------------------------------------------------------------- /audio_evals/lib/VoxCPM2/assets/thuhcsi.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/VoxCPM2/assets/thuhcsi.png -------------------------------------------------------------------------------- /audio_evals/lib/VoxCPM2/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/VoxCPM2/main.py -------------------------------------------------------------------------------- /audio_evals/lib/VoxCPM2/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/VoxCPM2/pyproject.toml -------------------------------------------------------------------------------- /audio_evals/lib/VoxCPM2/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/VoxCPM2/requirements.txt -------------------------------------------------------------------------------- /audio_evals/lib/VoxCPM2/src/voxcpm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/VoxCPM2/src/voxcpm/__init__.py -------------------------------------------------------------------------------- /audio_evals/lib/VoxCPM2/src/voxcpm/cli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/VoxCPM2/src/voxcpm/cli.py -------------------------------------------------------------------------------- /audio_evals/lib/VoxCPM2/src/voxcpm/core.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/VoxCPM2/src/voxcpm/core.py -------------------------------------------------------------------------------- /audio_evals/lib/VoxCPM2/src/voxcpm/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/VoxCPM2/src/voxcpm/model/__init__.py -------------------------------------------------------------------------------- /audio_evals/lib/VoxCPM2/src/voxcpm/model/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/VoxCPM2/src/voxcpm/model/utils.py -------------------------------------------------------------------------------- /audio_evals/lib/VoxCPM2/src/voxcpm/model/voxcpm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/VoxCPM2/src/voxcpm/model/voxcpm.py -------------------------------------------------------------------------------- /audio_evals/lib/VoxCPM2/src/voxcpm/modules/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/VoxCPM2/src/voxcpm/modules/audiovae/__init__.py: -------------------------------------------------------------------------------- 1 | from .audio_vae import AudioVAE 2 | -------------------------------------------------------------------------------- /audio_evals/lib/VoxCPM2/src/voxcpm/modules/audiovae/audio_vae.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/VoxCPM2/src/voxcpm/modules/audiovae/audio_vae.py -------------------------------------------------------------------------------- /audio_evals/lib/VoxCPM2/src/voxcpm/modules/layers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/VoxCPM2/src/voxcpm/modules/layers/__init__.py -------------------------------------------------------------------------------- /audio_evals/lib/VoxCPM2/src/voxcpm/modules/locdit/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/VoxCPM2/src/voxcpm/modules/locdit/__init__.py -------------------------------------------------------------------------------- /audio_evals/lib/VoxCPM2/src/voxcpm/modules/locdit/local_dit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/VoxCPM2/src/voxcpm/modules/locdit/local_dit.py -------------------------------------------------------------------------------- /audio_evals/lib/VoxCPM2/src/voxcpm/modules/locdit/unified_cfm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/VoxCPM2/src/voxcpm/modules/locdit/unified_cfm.py -------------------------------------------------------------------------------- /audio_evals/lib/VoxCPM2/src/voxcpm/modules/locenc/__init__.py: -------------------------------------------------------------------------------- 1 | from .local_encoder import VoxCPMLocEnc 2 | -------------------------------------------------------------------------------- /audio_evals/lib/VoxCPM2/src/voxcpm/modules/locenc/local_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/VoxCPM2/src/voxcpm/modules/locenc/local_encoder.py -------------------------------------------------------------------------------- /audio_evals/lib/VoxCPM2/src/voxcpm/modules/minicpm4/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/VoxCPM2/src/voxcpm/modules/minicpm4/__init__.py -------------------------------------------------------------------------------- /audio_evals/lib/VoxCPM2/src/voxcpm/modules/minicpm4/cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/VoxCPM2/src/voxcpm/modules/minicpm4/cache.py -------------------------------------------------------------------------------- /audio_evals/lib/VoxCPM2/src/voxcpm/modules/minicpm4/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/VoxCPM2/src/voxcpm/modules/minicpm4/config.py -------------------------------------------------------------------------------- /audio_evals/lib/VoxCPM2/src/voxcpm/modules/minicpm4/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/VoxCPM2/src/voxcpm/modules/minicpm4/model.py -------------------------------------------------------------------------------- /audio_evals/lib/VoxCPM2/src/voxcpm/utils/text_normalize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/VoxCPM2/src/voxcpm/utils/text_normalize.py -------------------------------------------------------------------------------- /audio_evals/lib/WavTokenizer/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/WavTokenizer/LICENSE -------------------------------------------------------------------------------- /audio_evals/lib/WavTokenizer/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/WavTokenizer/README.md -------------------------------------------------------------------------------- /audio_evals/lib/WavTokenizer/data/demo.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/WavTokenizer/data/demo.txt -------------------------------------------------------------------------------- /audio_evals/lib/WavTokenizer/decoder/__init__.py: -------------------------------------------------------------------------------- 1 | from decoder.pretrained import WavTokenizer 2 | 3 | 4 | __version__ = "0.0.3" 5 | -------------------------------------------------------------------------------- /audio_evals/lib/WavTokenizer/decoder/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/WavTokenizer/decoder/dataset.py -------------------------------------------------------------------------------- /audio_evals/lib/WavTokenizer/decoder/discriminator_dac.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/WavTokenizer/decoder/discriminator_dac.py -------------------------------------------------------------------------------- /audio_evals/lib/WavTokenizer/decoder/discriminators.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/WavTokenizer/decoder/discriminators.py -------------------------------------------------------------------------------- /audio_evals/lib/WavTokenizer/decoder/experiment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/WavTokenizer/decoder/experiment.py -------------------------------------------------------------------------------- /audio_evals/lib/WavTokenizer/decoder/feature_extractors.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/WavTokenizer/decoder/feature_extractors.py -------------------------------------------------------------------------------- /audio_evals/lib/WavTokenizer/decoder/heads.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/WavTokenizer/decoder/heads.py -------------------------------------------------------------------------------- /audio_evals/lib/WavTokenizer/decoder/helpers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/WavTokenizer/decoder/helpers.py -------------------------------------------------------------------------------- /audio_evals/lib/WavTokenizer/decoder/loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/WavTokenizer/decoder/loss.py -------------------------------------------------------------------------------- /audio_evals/lib/WavTokenizer/decoder/models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/WavTokenizer/decoder/models.py -------------------------------------------------------------------------------- /audio_evals/lib/WavTokenizer/decoder/modules.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/WavTokenizer/decoder/modules.py -------------------------------------------------------------------------------- /audio_evals/lib/WavTokenizer/decoder/pretrained.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/WavTokenizer/decoder/pretrained.py -------------------------------------------------------------------------------- /audio_evals/lib/WavTokenizer/decoder/pretrained_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/WavTokenizer/decoder/pretrained_model.py -------------------------------------------------------------------------------- /audio_evals/lib/WavTokenizer/decoder/spectral_ops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/WavTokenizer/decoder/spectral_ops.py -------------------------------------------------------------------------------- /audio_evals/lib/WavTokenizer/encoder/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/WavTokenizer/encoder/__init__.py -------------------------------------------------------------------------------- /audio_evals/lib/WavTokenizer/encoder/distrib.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/WavTokenizer/encoder/distrib.py -------------------------------------------------------------------------------- /audio_evals/lib/WavTokenizer/encoder/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/WavTokenizer/encoder/model.py -------------------------------------------------------------------------------- /audio_evals/lib/WavTokenizer/encoder/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/WavTokenizer/encoder/modules/__init__.py -------------------------------------------------------------------------------- /audio_evals/lib/WavTokenizer/encoder/modules/conv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/WavTokenizer/encoder/modules/conv.py -------------------------------------------------------------------------------- /audio_evals/lib/WavTokenizer/encoder/modules/lstm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/WavTokenizer/encoder/modules/lstm.py -------------------------------------------------------------------------------- /audio_evals/lib/WavTokenizer/encoder/modules/norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/WavTokenizer/encoder/modules/norm.py -------------------------------------------------------------------------------- /audio_evals/lib/WavTokenizer/encoder/modules/seanet.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/WavTokenizer/encoder/modules/seanet.py -------------------------------------------------------------------------------- /audio_evals/lib/WavTokenizer/encoder/modules/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/WavTokenizer/encoder/modules/transformer.py -------------------------------------------------------------------------------- /audio_evals/lib/WavTokenizer/encoder/msstftd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/WavTokenizer/encoder/msstftd.py -------------------------------------------------------------------------------- /audio_evals/lib/WavTokenizer/encoder/quantization/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/WavTokenizer/encoder/quantization/__init__.py -------------------------------------------------------------------------------- /audio_evals/lib/WavTokenizer/encoder/quantization/ac.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/WavTokenizer/encoder/quantization/ac.py -------------------------------------------------------------------------------- /audio_evals/lib/WavTokenizer/encoder/quantization/core_vq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/WavTokenizer/encoder/quantization/core_vq.py -------------------------------------------------------------------------------- /audio_evals/lib/WavTokenizer/encoder/quantization/vq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/WavTokenizer/encoder/quantization/vq.py -------------------------------------------------------------------------------- /audio_evals/lib/WavTokenizer/encoder/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/WavTokenizer/encoder/utils.py -------------------------------------------------------------------------------- /audio_evals/lib/WavTokenizer/infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/WavTokenizer/infer.py -------------------------------------------------------------------------------- /audio_evals/lib/WavTokenizer/metrics/UTMOS.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/WavTokenizer/metrics/UTMOS.py -------------------------------------------------------------------------------- /audio_evals/lib/WavTokenizer/metrics/infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/WavTokenizer/metrics/infer.py -------------------------------------------------------------------------------- /audio_evals/lib/WavTokenizer/metrics/periodicity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/WavTokenizer/metrics/periodicity.py -------------------------------------------------------------------------------- /audio_evals/lib/WavTokenizer/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/WavTokenizer/requirements.txt -------------------------------------------------------------------------------- /audio_evals/lib/WavTokenizer/result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/WavTokenizer/result.png -------------------------------------------------------------------------------- /audio_evals/lib/WavTokenizer/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/WavTokenizer/train.py -------------------------------------------------------------------------------- /audio_evals/lib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/chattts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/chattts.py -------------------------------------------------------------------------------- /audio_evals/lib/coco.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/coco.py -------------------------------------------------------------------------------- /audio_evals/lib/cpm_tts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/cpm_tts/chattts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/cpm_tts/chattts.py -------------------------------------------------------------------------------- /audio_evals/lib/cpm_tts/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/cpm_tts/config.py -------------------------------------------------------------------------------- /audio_evals/lib/cpm_tts/dvae.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/cpm_tts/dvae.py -------------------------------------------------------------------------------- /audio_evals/lib/cpm_tts/gpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/cpm_tts/gpt.py -------------------------------------------------------------------------------- /audio_evals/lib/cpm_tts/minicpmv26_resampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/cpm_tts/minicpmv26_resampler.py -------------------------------------------------------------------------------- /audio_evals/lib/cpm_tts/processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/cpm_tts/processor.py -------------------------------------------------------------------------------- /audio_evals/lib/cv3_speaker_sim/3D-Speaker/egs/3dspeaker/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/cv3_speaker_sim/3D-Speaker/egs/3dspeaker/README.md -------------------------------------------------------------------------------- /audio_evals/lib/cv3_speaker_sim/3D-Speaker/egs/3dspeaker/language-identification/requirements.txt: -------------------------------------------------------------------------------- 1 | funasr 2 | pickle 3 | random 4 | wget 5 | -------------------------------------------------------------------------------- /audio_evals/lib/cv3_speaker_sim/3D-Speaker/egs/3dspeaker/language-identification/speakerlab: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/cv3_speaker_sim/3D-Speaker/egs/3dspeaker/speaker-diarization/speakerlab: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/cv3_speaker_sim/3D-Speaker/egs/3dspeaker/sv-cam++/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/cv3_speaker_sim/3D-Speaker/egs/3dspeaker/sv-cam++/run.sh -------------------------------------------------------------------------------- /audio_evals/lib/cv3_speaker_sim/3D-Speaker/egs/3dspeaker/sv-cam++/speakerlab: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/cv3_speaker_sim/3D-Speaker/egs/3dspeaker/sv-ecapa/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/cv3_speaker_sim/3D-Speaker/egs/3dspeaker/sv-ecapa/run.sh -------------------------------------------------------------------------------- /audio_evals/lib/cv3_speaker_sim/3D-Speaker/egs/3dspeaker/sv-ecapa/speakerlab: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/cv3_speaker_sim/3D-Speaker/egs/3dspeaker/sv-eres2net/speakerlab: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/cv3_speaker_sim/3D-Speaker/egs/3dspeaker/sv-eres2netv2/speakerlab: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/cv3_speaker_sim/3D-Speaker/egs/3dspeaker/sv-rdino/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/cv3_speaker_sim/3D-Speaker/egs/3dspeaker/sv-rdino/run.sh -------------------------------------------------------------------------------- /audio_evals/lib/cv3_speaker_sim/3D-Speaker/egs/3dspeaker/sv-rdino/speakerlab: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/cv3_speaker_sim/3D-Speaker/egs/3dspeaker/sv-res2net/speakerlab: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/cv3_speaker_sim/3D-Speaker/egs/3dspeaker/sv-resnet/speakerlab: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/cv3_speaker_sim/3D-Speaker/egs/ava-asd/talknet/requirements.txt: -------------------------------------------------------------------------------- 1 | gdown 2 | -------------------------------------------------------------------------------- /audio_evals/lib/cv3_speaker_sim/3D-Speaker/egs/ava-asd/talknet/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/cv3_speaker_sim/3D-Speaker/egs/ava-asd/talknet/run.sh -------------------------------------------------------------------------------- /audio_evals/lib/cv3_speaker_sim/3D-Speaker/egs/ava-asd/talknet/speakerlab: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/cv3_speaker_sim/3D-Speaker/egs/cnceleb/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/cv3_speaker_sim/3D-Speaker/egs/cnceleb/README.md -------------------------------------------------------------------------------- /audio_evals/lib/cv3_speaker_sim/3D-Speaker/egs/cnceleb/sv-cam++/speakerlab: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/cv3_speaker_sim/3D-Speaker/egs/cnceleb/sv-ecapa/speakerlab: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/cv3_speaker_sim/3D-Speaker/egs/cnceleb/sv-eres2net/speakerlab: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/cv3_speaker_sim/3D-Speaker/egs/cnceleb/sv-eres2netv2/speakerlab: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/cv3_speaker_sim/3D-Speaker/egs/cnceleb/sv-rdino/speakerlab: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/cv3_speaker_sim/3D-Speaker/egs/cnceleb/sv-res2net/speakerlab: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/cv3_speaker_sim/3D-Speaker/egs/cnceleb/sv-resnet/speakerlab: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/cv3_speaker_sim/3D-Speaker/egs/semantic_speaker/bert/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets>=2.15.0 2 | scikit-learn>=1.3.1 3 | transformers>=4.34.0 4 | -------------------------------------------------------------------------------- /audio_evals/lib/cv3_speaker_sim/3D-Speaker/egs/voxceleb/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/cv3_speaker_sim/3D-Speaker/egs/voxceleb/README.md -------------------------------------------------------------------------------- /audio_evals/lib/cv3_speaker_sim/3D-Speaker/egs/voxceleb/sv-cam++/speakerlab: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/cv3_speaker_sim/3D-Speaker/egs/voxceleb/sv-ecapa/speakerlab: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/cv3_speaker_sim/3D-Speaker/egs/voxceleb/sv-eres2net/speakerlab: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/cv3_speaker_sim/3D-Speaker/egs/voxceleb/sv-eres2netv2/speakerlab: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/cv3_speaker_sim/3D-Speaker/egs/voxceleb/sv-rdino/speakerlab: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/cv3_speaker_sim/3D-Speaker/egs/voxceleb/sv-res2net/speakerlab: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/cv3_speaker_sim/3D-Speaker/egs/voxceleb/sv-resnet/speakerlab: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/cv3_speaker_sim/3D-Speaker/egs/voxceleb/sv-sdpn/speakerlab: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/cv3_speaker_sim/3D-Speaker/egs/voxceleb/sv-xvector/speakerlab: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/cv3_speaker_sim/3D-Speaker/speakerlab/bin/extract.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/cv3_speaker_sim/3D-Speaker/speakerlab/bin/extract.py -------------------------------------------------------------------------------- /audio_evals/lib/cv3_speaker_sim/3D-Speaker/speakerlab/bin/infer_sv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/cv3_speaker_sim/3D-Speaker/speakerlab/bin/infer_sv.py -------------------------------------------------------------------------------- /audio_evals/lib/cv3_speaker_sim/3D-Speaker/speakerlab/bin/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/cv3_speaker_sim/3D-Speaker/speakerlab/bin/train.py -------------------------------------------------------------------------------- /audio_evals/lib/cv3_speaker_sim/3D-Speaker/speakerlab/models/eres2net/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/cv3_speaker_sim/3D-Speaker/speakerlab/utils/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/cv3_speaker_sim/3D-Speaker/speakerlab/utils/config.py -------------------------------------------------------------------------------- /audio_evals/lib/cv3_speaker_sim/3D-Speaker/speakerlab/utils/epoch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/cv3_speaker_sim/3D-Speaker/speakerlab/utils/epoch.py -------------------------------------------------------------------------------- /audio_evals/lib/cv3_speaker_sim/3D-Speaker/speakerlab/utils/fileio.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/cv3_speaker_sim/3D-Speaker/speakerlab/utils/fileio.py -------------------------------------------------------------------------------- /audio_evals/lib/cv3_speaker_sim/3D-Speaker/speakerlab/utils/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/cv3_speaker_sim/3D-Speaker/speakerlab/utils/utils.py -------------------------------------------------------------------------------- /audio_evals/lib/cv3_speaker_sim/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/cv3_speaker_sim/README.md -------------------------------------------------------------------------------- /audio_evals/lib/cv3_speaker_sim/example_usage.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/cv3_speaker_sim/example_usage.py -------------------------------------------------------------------------------- /audio_evals/lib/cv3_speaker_sim/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/cv3_speaker_sim/requirements.txt -------------------------------------------------------------------------------- /audio_evals/lib/cv3_speaker_sim/speaker_sim.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/cv3_speaker_sim/speaker_sim.py -------------------------------------------------------------------------------- /audio_evals/lib/cv3_speaker_sim/test_integration.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/cv3_speaker_sim/test_integration.py -------------------------------------------------------------------------------- /audio_evals/lib/doubao/simplex_websocket_demo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/doubao/simplex_websocket_demo.py -------------------------------------------------------------------------------- /audio_evals/lib/doubao/stream_asr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/doubao/stream_asr.py -------------------------------------------------------------------------------- /audio_evals/lib/encodec/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/encodec/main.py -------------------------------------------------------------------------------- /audio_evals/lib/encodec/requirements.txt: -------------------------------------------------------------------------------- 1 | librosa 2 | numpy 3 | soundfile 4 | torch 5 | transformers 6 | -------------------------------------------------------------------------------- /audio_evals/lib/evaluate_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/evaluate_tokenizer.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts/.gitignore -------------------------------------------------------------------------------- /audio_evals/lib/index-tts/DISCLAIMER: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts/DISCLAIMER -------------------------------------------------------------------------------- /audio_evals/lib/index-tts/INDEX_MODEL_LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts/INDEX_MODEL_LICENSE -------------------------------------------------------------------------------- /audio_evals/lib/index-tts/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts/LICENSE -------------------------------------------------------------------------------- /audio_evals/lib/index-tts/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts/README.md -------------------------------------------------------------------------------- /audio_evals/lib/index-tts/assets/IndexTTS.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts/assets/IndexTTS.png -------------------------------------------------------------------------------- /audio_evals/lib/index-tts/assets/img.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts/assets/img.png -------------------------------------------------------------------------------- /audio_evals/lib/index-tts/assets/index_icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts/assets/index_icon.png -------------------------------------------------------------------------------- /audio_evals/lib/index-tts/checkpoints/config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts/checkpoints/config.yaml -------------------------------------------------------------------------------- /audio_evals/lib/index-tts/indextts/BigVGAN/ECAPA_TDNN.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts/indextts/BigVGAN/ECAPA_TDNN.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts/indextts/BigVGAN/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/index-tts/indextts/BigVGAN/activations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts/indextts/BigVGAN/activations.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts/indextts/BigVGAN/alias_free_activation/cuda/.gitignore: -------------------------------------------------------------------------------- 1 | /build 2 | -------------------------------------------------------------------------------- /audio_evals/lib/index-tts/indextts/BigVGAN/alias_free_activation/cuda/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/index-tts/indextts/BigVGAN/alias_free_torch/act.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts/indextts/BigVGAN/alias_free_torch/act.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts/indextts/BigVGAN/alias_free_torch/filter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts/indextts/BigVGAN/alias_free_torch/filter.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts/indextts/BigVGAN/bigvgan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts/indextts/BigVGAN/bigvgan.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts/indextts/BigVGAN/models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts/indextts/BigVGAN/models.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts/indextts/BigVGAN/nnet/CNN.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts/indextts/BigVGAN/nnet/CNN.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts/indextts/BigVGAN/nnet/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/index-tts/indextts/BigVGAN/nnet/linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts/indextts/BigVGAN/nnet/linear.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts/indextts/BigVGAN/nnet/normalization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts/indextts/BigVGAN/nnet/normalization.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts/indextts/BigVGAN/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts/indextts/BigVGAN/utils.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts/indextts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/index-tts/indextts/cli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts/indextts/cli.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts/indextts/gpt/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/index-tts/indextts/gpt/conformer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/index-tts/indextts/gpt/conformer/attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts/indextts/gpt/conformer/attention.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts/indextts/gpt/conformer/embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts/indextts/gpt/conformer/embedding.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts/indextts/gpt/conformer/subsampling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts/indextts/gpt/conformer/subsampling.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts/indextts/gpt/conformer_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts/indextts/gpt/conformer_encoder.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts/indextts/gpt/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts/indextts/gpt/model.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts/indextts/gpt/perceiver.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts/indextts/gpt/perceiver.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts/indextts/infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts/indextts/infer.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts/indextts/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/index-tts/indextts/utils/arch_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts/indextts/utils/arch_util.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts/indextts/utils/checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts/indextts/utils/checkpoint.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts/indextts/utils/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts/indextts/utils/common.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts/indextts/utils/feature_extractors.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts/indextts/utils/feature_extractors.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts/indextts/utils/front.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts/indextts/utils/front.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts/indextts/utils/typical_sampling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts/indextts/utils/typical_sampling.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts/indextts/utils/webui_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts/indextts/utils/webui_utils.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts/indextts/utils/xtransformers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts/indextts/utils/xtransformers.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts/indextts/vqvae/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/index-tts/indextts/vqvae/xtts_dvae.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts/indextts/vqvae/xtts_dvae.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts/main.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts/requirements.txt -------------------------------------------------------------------------------- /audio_evals/lib/index-tts/requirements2.txt: -------------------------------------------------------------------------------- 1 | -e audio_evals/lib/index-tts 2 | -------------------------------------------------------------------------------- /audio_evals/lib/index-tts/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts/setup.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts/tests/regression_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts/tests/regression_test.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts/tests/sample_prompt.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts/tests/sample_prompt.wav -------------------------------------------------------------------------------- /audio_evals/lib/index-tts/tools/i18n/i18n.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts/tools/i18n/i18n.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts/tools/i18n/locale/en_US.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts/tools/i18n/locale/en_US.json -------------------------------------------------------------------------------- /audio_evals/lib/index-tts/tools/i18n/scan_i18n.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts/tools/i18n/scan_i18n.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts/webui.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts/webui.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/.gitattributes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/.gitattributes -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/.gitignore -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/.python-version: -------------------------------------------------------------------------------- 1 | 3.10 2 | -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/DISCLAIMER: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/DISCLAIMER -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/LICENSE -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/LICENSE_ZH.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/LICENSE_ZH.txt -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/MANIFEST.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/MANIFEST.in -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/README.md -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/archive/README_INDEXTTS_1_5.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/archive/README_INDEXTTS_1_5.md -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/assets/IndexTTS.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/assets/IndexTTS.png -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/assets/IndexTTS2-video-pic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/assets/IndexTTS2-video-pic.png -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/assets/IndexTTS2.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/assets/IndexTTS2.mp4 -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/assets/IndexTTS2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/assets/IndexTTS2.png -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/assets/IndexTTS2_banner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/assets/IndexTTS2_banner.png -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/assets/img.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/assets/img.png -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/assets/index_icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/assets/index_icon.png -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/checkpoints/config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/checkpoints/config.yaml -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/examples/cases.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/examples/cases.jsonl -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/examples/emo_hate.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/examples/emo_hate.wav -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/examples/emo_sad.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/examples/emo_sad.wav -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/examples/voice_01.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/examples/voice_01.wav -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/examples/voice_02.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/examples/voice_02.wav -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/examples/voice_03.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/examples/voice_03.wav -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/examples/voice_04.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/examples/voice_04.wav -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/examples/voice_05.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/examples/voice_05.wav -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/examples/voice_06.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/examples/voice_06.wav -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/examples/voice_07.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/examples/voice_07.wav -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/examples/voice_08.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/examples/voice_08.wav -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/examples/voice_09.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/examples/voice_09.wav -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/examples/voice_10.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/examples/voice_10.wav -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/examples/voice_11.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/examples/voice_11.wav -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/examples/voice_12.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/examples/voice_12.wav -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/BigVGAN/ECAPA_TDNN.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/BigVGAN/ECAPA_TDNN.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/BigVGAN/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/BigVGAN/activations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/BigVGAN/activations.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/BigVGAN/alias_free_activation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/BigVGAN/alias_free_activation/cuda/.gitignore: -------------------------------------------------------------------------------- 1 | /build 2 | -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/BigVGAN/alias_free_activation/cuda/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/BigVGAN/alias_free_torch/act.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/BigVGAN/alias_free_torch/act.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/BigVGAN/bigvgan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/BigVGAN/bigvgan.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/BigVGAN/models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/BigVGAN/models.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/BigVGAN/nnet/CNN.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/BigVGAN/nnet/CNN.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/BigVGAN/nnet/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/BigVGAN/nnet/linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/BigVGAN/nnet/linear.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/BigVGAN/nnet/normalization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/BigVGAN/nnet/normalization.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/BigVGAN/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/BigVGAN/utils.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/cli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/cli.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/gpt/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/gpt/conformer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/gpt/conformer/attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/gpt/conformer/attention.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/gpt/conformer/embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/gpt/conformer/embedding.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/gpt/conformer/subsampling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/gpt/conformer/subsampling.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/gpt/conformer_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/gpt/conformer_encoder.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/gpt/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/gpt/model.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/gpt/model_v2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/gpt/model_v2.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/gpt/perceiver.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/gpt/perceiver.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/gpt/transformers_beam_search.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/gpt/transformers_beam_search.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/gpt/transformers_gpt2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/gpt/transformers_gpt2.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/infer.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/infer_v2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/infer_v2.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/s2mel/dac/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/s2mel/dac/__init__.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/s2mel/dac/__main__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/s2mel/dac/__main__.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/s2mel/dac/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/s2mel/dac/model/__init__.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/s2mel/dac/model/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/s2mel/dac/model/base.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/s2mel/dac/model/dac.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/s2mel/dac/model/dac.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/s2mel/dac/model/discriminator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/s2mel/dac/model/discriminator.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/s2mel/dac/model/encodec.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/s2mel/dac/model/encodec.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/s2mel/dac/nn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/s2mel/dac/nn/__init__.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/s2mel/dac/nn/layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/s2mel/dac/nn/layers.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/s2mel/dac/nn/loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/s2mel/dac/nn/loss.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/s2mel/dac/nn/quantize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/s2mel/dac/nn/quantize.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/s2mel/dac/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/s2mel/dac/utils/__init__.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/s2mel/dac/utils/decode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/s2mel/dac/utils/decode.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/s2mel/dac/utils/encode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/s2mel/dac/utils/encode.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/s2mel/hf_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/s2mel/hf_utils.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/s2mel/modules/audio.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/s2mel/modules/audio.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/s2mel/modules/bigvgan/bigvgan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/s2mel/modules/bigvgan/bigvgan.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/s2mel/modules/bigvgan/config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/s2mel/modules/bigvgan/config.json -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/s2mel/modules/bigvgan/env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/s2mel/modules/bigvgan/env.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/s2mel/modules/bigvgan/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/s2mel/modules/bigvgan/utils.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/s2mel/modules/campplus/DTDNN.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/s2mel/modules/campplus/DTDNN.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/s2mel/modules/campplus/layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/s2mel/modules/campplus/layers.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/s2mel/modules/commons.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/s2mel/modules/commons.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/s2mel/modules/encodec.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/s2mel/modules/encodec.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/s2mel/modules/flow_matching.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/s2mel/modules/flow_matching.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/s2mel/modules/gpt_fast/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/s2mel/modules/gpt_fast/model.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/s2mel/modules/layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/s2mel/modules/layers.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/s2mel/modules/length_regulator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/s2mel/modules/length_regulator.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/s2mel/modules/openvoice/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/s2mel/modules/openvoice/api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/s2mel/modules/openvoice/api.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/s2mel/modules/openvoice/models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/s2mel/modules/openvoice/models.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/s2mel/modules/openvoice/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/s2mel/modules/openvoice/utils.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/s2mel/modules/quantize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/s2mel/modules/quantize.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/s2mel/modules/rmvpe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/s2mel/modules/rmvpe.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/s2mel/modules/vocos/__init__.py: -------------------------------------------------------------------------------- 1 | from .pretrained import Vocos 2 | 3 | 4 | __version__ = "0.1.0" 5 | -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/s2mel/modules/vocos/heads.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/s2mel/modules/vocos/heads.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/s2mel/modules/vocos/helpers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/s2mel/modules/vocos/helpers.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/s2mel/modules/vocos/loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/s2mel/modules/vocos/loss.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/s2mel/modules/vocos/models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/s2mel/modules/vocos/models.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/s2mel/modules/vocos/modules.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/s2mel/modules/vocos/modules.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/s2mel/modules/vocos/pretrained.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/s2mel/modules/vocos/pretrained.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/s2mel/modules/wavenet.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/s2mel/modules/wavenet.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/s2mel/optimizers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/s2mel/optimizers.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/s2mel/wav2vecbert_extract.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/s2mel/wav2vecbert_extract.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/utils/arch_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/utils/arch_util.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/utils/checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/utils/checkpoint.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/utils/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/utils/common.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/utils/feature_extractors.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/utils/feature_extractors.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/utils/front.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/utils/front.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/utils/maskgct/models/codec/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/utils/maskgct/models/codec/facodec/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/utils/maskgct/models/codec/facodec/modules/JDC/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/utils/maskgct_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/utils/maskgct_utils.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/utils/text_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/utils/text_utils.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/utils/typical_sampling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/utils/typical_sampling.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/utils/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/utils/utils.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/utils/webui_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/utils/webui_utils.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/utils/xtransformers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/utils/xtransformers.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/vqvae/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/indextts/vqvae/xtts_dvae.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/indextts/vqvae/xtts_dvae.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/main.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/pyproject.toml -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/requirements.txt: -------------------------------------------------------------------------------- 1 | -e audio_evals/lib/index-tts2 2 | -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/tests/cases.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/tests/cases.jsonl -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/tests/padding_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/tests/padding_test.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/tests/regression_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/tests/regression_test.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/tests/sample_prompt.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/tests/sample_prompt.wav -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/tools/gpu_check.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/tools/gpu_check.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/tools/i18n/i18n.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/tools/i18n/i18n.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/tools/i18n/locale/en_US.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/tools/i18n/locale/en_US.json -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/tools/i18n/locale/zh_CN.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/tools/i18n/locale/zh_CN.json -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/tools/i18n/scan_i18n.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/tools/i18n/scan_i18n.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/tts2_infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/tts2_infer.py -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/uv.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/uv.lock -------------------------------------------------------------------------------- /audio_evals/lib/index-tts2/webui.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/index-tts2/webui.py -------------------------------------------------------------------------------- /audio_evals/lib/instruction_following_eval/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/instruction_following_eval/README.md -------------------------------------------------------------------------------- /audio_evals/lib/instruction_following_eval/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/instruction_following_eval/instructions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/instruction_following_eval/instructions.py -------------------------------------------------------------------------------- /audio_evals/lib/instruction_following_eval/instructions_registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/instruction_following_eval/instructions_registry.py -------------------------------------------------------------------------------- /audio_evals/lib/instruction_following_eval/instructions_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/instruction_following_eval/instructions_util.py -------------------------------------------------------------------------------- /audio_evals/lib/mimi/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/mimi/main.py -------------------------------------------------------------------------------- /audio_evals/lib/mimi/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/mimi/requirements.txt -------------------------------------------------------------------------------- /audio_evals/lib/mimi/stream.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/mimi/stream.py -------------------------------------------------------------------------------- /audio_evals/lib/minicpm/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/minicpm/main.py -------------------------------------------------------------------------------- /audio_evals/lib/minicpm/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/minicpm/requirements.txt -------------------------------------------------------------------------------- /audio_evals/lib/minicpm_0_5B/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/minicpm_0_5B/main.py -------------------------------------------------------------------------------- /audio_evals/lib/minicpm_0_5B/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/minicpm_0_5B/requirements.txt -------------------------------------------------------------------------------- /audio_evals/lib/paraformer/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/paraformer/main.py -------------------------------------------------------------------------------- /audio_evals/lib/paraformer/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/paraformer/requirements.txt -------------------------------------------------------------------------------- /audio_evals/lib/paraformer_ms/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/paraformer_ms/main.py -------------------------------------------------------------------------------- /audio_evals/lib/paraformer_ms/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/paraformer_ms/requirements.txt -------------------------------------------------------------------------------- /audio_evals/lib/qwen2-5omni/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/qwen2-5omni/main.py -------------------------------------------------------------------------------- /audio_evals/lib/qwen2-5omni/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/qwen2-5omni/requirements.txt -------------------------------------------------------------------------------- /audio_evals/lib/sensevoicelib.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/sensevoicelib.py -------------------------------------------------------------------------------- /audio_evals/lib/simo/models_ecapa_tdnn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/simo/models_ecapa_tdnn.py -------------------------------------------------------------------------------- /audio_evals/lib/simo/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/simo/requirements.txt -------------------------------------------------------------------------------- /audio_evals/lib/simo/simo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/simo/simo.py -------------------------------------------------------------------------------- /audio_evals/lib/ssnact/ssnact.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/ssnact/ssnact.py -------------------------------------------------------------------------------- /audio_evals/lib/streaming_asr_demo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/streaming_asr_demo.py -------------------------------------------------------------------------------- /audio_evals/lib/text_normalization/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/lib/text_normalization/basic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/text_normalization/basic.py -------------------------------------------------------------------------------- /audio_evals/lib/text_normalization/cn_tn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/text_normalization/cn_tn.py -------------------------------------------------------------------------------- /audio_evals/lib/text_normalization/en.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/text_normalization/en.py -------------------------------------------------------------------------------- /audio_evals/lib/text_normalization/english.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/text_normalization/english.json -------------------------------------------------------------------------------- /audio_evals/lib/utmos/lightning_module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/utmos/lightning_module.py -------------------------------------------------------------------------------- /audio_evals/lib/utmos/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/utmos/main.py -------------------------------------------------------------------------------- /audio_evals/lib/utmos/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/utmos/model.py -------------------------------------------------------------------------------- /audio_evals/lib/utmos/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/utmos/requirements.txt -------------------------------------------------------------------------------- /audio_evals/lib/wer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/wer.py -------------------------------------------------------------------------------- /audio_evals/lib/whisper/cv3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/whisper/cv3.py -------------------------------------------------------------------------------- /audio_evals/lib/whisper/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/whisper/main.py -------------------------------------------------------------------------------- /audio_evals/lib/whisper/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/whisper/requirements.txt -------------------------------------------------------------------------------- /audio_evals/lib/whisper/seed_tts_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/lib/whisper/seed_tts_eval.py -------------------------------------------------------------------------------- /audio_evals/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/main.py -------------------------------------------------------------------------------- /audio_evals/models/AudioEncoder/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/models/AudioEncoder/chattts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/models/AudioEncoder/chattts.py -------------------------------------------------------------------------------- /audio_evals/models/AudioEncoder/cosyvoice.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/models/AudioEncoder/cosyvoice.py -------------------------------------------------------------------------------- /audio_evals/models/AudioEncoder/cosyvoice_adv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/models/AudioEncoder/cosyvoice_adv.py -------------------------------------------------------------------------------- /audio_evals/models/AudioEncoder/encodec.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/models/AudioEncoder/encodec.py -------------------------------------------------------------------------------- /audio_evals/models/AudioEncoder/mimi.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/models/AudioEncoder/mimi.py -------------------------------------------------------------------------------- /audio_evals/models/AudioEncoder/spark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/models/AudioEncoder/spark.py -------------------------------------------------------------------------------- /audio_evals/models/AudioEncoder/vocos_encode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/models/AudioEncoder/vocos_encode.py -------------------------------------------------------------------------------- /audio_evals/models/AudioEncoder/wav_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/models/AudioEncoder/wav_tokenizer.py -------------------------------------------------------------------------------- /audio_evals/models/TTS/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/models/TTS/amphion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/models/TTS/amphion.py -------------------------------------------------------------------------------- /audio_evals/models/TTS/higgs_audio.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/models/TTS/higgs_audio.py -------------------------------------------------------------------------------- /audio_evals/models/TTS/indextts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/models/TTS/indextts.py -------------------------------------------------------------------------------- /audio_evals/models/TTS/indextts2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/models/TTS/indextts2.py -------------------------------------------------------------------------------- /audio_evals/models/TTS/megatts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/models/TTS/megatts.py -------------------------------------------------------------------------------- /audio_evals/models/TTS/melotts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/models/TTS/melotts.py -------------------------------------------------------------------------------- /audio_evals/models/TTS/mgm_omni.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/models/TTS/mgm_omni.py -------------------------------------------------------------------------------- /audio_evals/models/TTS/spark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/models/TTS/spark.py -------------------------------------------------------------------------------- /audio_evals/models/TTS/stabletts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/models/TTS/stabletts.py -------------------------------------------------------------------------------- /audio_evals/models/TTS/voxcpm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/models/TTS/voxcpm.py -------------------------------------------------------------------------------- /audio_evals/models/TTS/voxcpm2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/models/TTS/voxcpm2.py -------------------------------------------------------------------------------- /audio_evals/models/UltraVOX.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/models/UltraVOX.py -------------------------------------------------------------------------------- /audio_evals/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/models/ali.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/models/ali.py -------------------------------------------------------------------------------- /audio_evals/models/asr/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/models/asr/__init__.py -------------------------------------------------------------------------------- /audio_evals/models/asr/ali.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/models/asr/ali.py -------------------------------------------------------------------------------- /audio_evals/models/asr/baidu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/models/asr/baidu.py -------------------------------------------------------------------------------- /audio_evals/models/asr/fireredasr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/models/asr/fireredasr.py -------------------------------------------------------------------------------- /audio_evals/models/asr/huawei.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/models/asr/huawei.py -------------------------------------------------------------------------------- /audio_evals/models/asr/huoshan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/models/asr/huoshan.py -------------------------------------------------------------------------------- /audio_evals/models/asr/paraformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/models/asr/paraformer.py -------------------------------------------------------------------------------- /audio_evals/models/asr/paraformer_ms.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/models/asr/paraformer_ms.py -------------------------------------------------------------------------------- /audio_evals/models/asr/sensevoice.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/models/asr/sensevoice.py -------------------------------------------------------------------------------- /audio_evals/models/asr/sherpa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/models/asr/sherpa.py -------------------------------------------------------------------------------- /audio_evals/models/asr/tencent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/models/asr/tencent.py -------------------------------------------------------------------------------- /audio_evals/models/asr/xfyun.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/models/asr/xfyun.py -------------------------------------------------------------------------------- /audio_evals/models/bytedance/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/models/bytedance/doubao.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/models/bytedance/doubao.py -------------------------------------------------------------------------------- /audio_evals/models/cv3_speaker_sim.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/models/cv3_speaker_sim.py -------------------------------------------------------------------------------- /audio_evals/models/dnsmos.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/models/dnsmos.py -------------------------------------------------------------------------------- /audio_evals/models/glm4audio.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/models/glm4audio.py -------------------------------------------------------------------------------- /audio_evals/models/glm4voice.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/models/glm4voice.py -------------------------------------------------------------------------------- /audio_evals/models/google.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/models/google.py -------------------------------------------------------------------------------- /audio_evals/models/llama_omni.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/models/llama_omni.py -------------------------------------------------------------------------------- /audio_evals/models/llmcenter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/models/llmcenter.py -------------------------------------------------------------------------------- /audio_evals/models/mini_cpm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/models/mini_cpm.py -------------------------------------------------------------------------------- /audio_evals/models/mini_omni.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/models/mini_omni.py -------------------------------------------------------------------------------- /audio_evals/models/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/models/model.py -------------------------------------------------------------------------------- /audio_evals/models/moonshot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/models/moonshot.py -------------------------------------------------------------------------------- /audio_evals/models/offline_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/models/offline_model.py -------------------------------------------------------------------------------- /audio_evals/models/ola.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/models/ola.py -------------------------------------------------------------------------------- /audio_evals/models/openai.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/models/openai.py -------------------------------------------------------------------------------- /audio_evals/models/openai_realtime.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/models/openai_realtime.py -------------------------------------------------------------------------------- /audio_evals/models/qwen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/models/qwen.py -------------------------------------------------------------------------------- /audio_evals/models/qwen2_5.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/models/qwen2_5.py -------------------------------------------------------------------------------- /audio_evals/models/sp_gemini.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/models/sp_gemini.py -------------------------------------------------------------------------------- /audio_evals/models/step_audio.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/models/step_audio.py -------------------------------------------------------------------------------- /audio_evals/models/utmos.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/models/utmos.py -------------------------------------------------------------------------------- /audio_evals/models/wavlm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/models/wavlm.py -------------------------------------------------------------------------------- /audio_evals/models/whisper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/models/whisper.py -------------------------------------------------------------------------------- /audio_evals/process/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/process/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/process/base.py -------------------------------------------------------------------------------- /audio_evals/process/eliminate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/process/eliminate.py -------------------------------------------------------------------------------- /audio_evals/process/firstoption.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/process/firstoption.py -------------------------------------------------------------------------------- /audio_evals/process/normalization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/process/normalization.py -------------------------------------------------------------------------------- /audio_evals/process/qwen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/process/qwen.py -------------------------------------------------------------------------------- /audio_evals/process/speech.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/process/speech.py -------------------------------------------------------------------------------- /audio_evals/prompt/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /audio_evals/prompt/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/prompt/base.py -------------------------------------------------------------------------------- /audio_evals/recorder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/recorder.py -------------------------------------------------------------------------------- /audio_evals/registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/registry.py -------------------------------------------------------------------------------- /audio_evals/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/audio_evals/utils.py -------------------------------------------------------------------------------- /cli/list_availabel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/cli/list_availabel.py -------------------------------------------------------------------------------- /docs/Procedures for Restarting an Incomplete Evaluation.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/docs/Procedures for Restarting an Incomplete Evaluation.md -------------------------------------------------------------------------------- /docs/how add a dataset.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/docs/how add a dataset.md -------------------------------------------------------------------------------- /docs/how eval your model.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/docs/how eval your model.md -------------------------------------------------------------------------------- /docs/how launch a custom eval task.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/docs/how launch a custom eval task.md -------------------------------------------------------------------------------- /docs/how use UTMOS, DNSMOS eval speech quality.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/docs/how use UTMOS, DNSMOS eval speech quality.md -------------------------------------------------------------------------------- /docs/seed-tts-eval4voice_clone.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/docs/seed-tts-eval4voice_clone.md -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/pyproject.toml -------------------------------------------------------------------------------- /registry/agg/air-bench.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/agg/air-bench.yaml -------------------------------------------------------------------------------- /registry/agg/naive.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/agg/naive.yaml -------------------------------------------------------------------------------- /registry/agg/wer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/agg/wer.yaml -------------------------------------------------------------------------------- /registry/dataset/AudioCaps.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/dataset/AudioCaps.yaml -------------------------------------------------------------------------------- /registry/dataset/COVID-recognizer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/dataset/COVID-recognizer.yaml -------------------------------------------------------------------------------- /registry/dataset/CatDog.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/dataset/CatDog.yaml -------------------------------------------------------------------------------- /registry/dataset/ClothoAQA.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/dataset/ClothoAQA.yaml -------------------------------------------------------------------------------- /registry/dataset/CommonVoice.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/dataset/CommonVoice.yaml -------------------------------------------------------------------------------- /registry/dataset/DESEDpublic_eval.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/dataset/DESEDpublic_eval.yaml -------------------------------------------------------------------------------- /registry/dataset/GTZAN.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/dataset/GTZAN.yaml -------------------------------------------------------------------------------- /registry/dataset/GigaSpeech.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/dataset/GigaSpeech.yaml -------------------------------------------------------------------------------- /registry/dataset/KeSpeech.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/dataset/KeSpeech.yaml -------------------------------------------------------------------------------- /registry/dataset/MELD.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/dataset/MELD.yaml -------------------------------------------------------------------------------- /registry/dataset/MMAU.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/dataset/MMAU.yaml -------------------------------------------------------------------------------- /registry/dataset/Nsynth.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/dataset/Nsynth.yaml -------------------------------------------------------------------------------- /registry/dataset/RAVDESS.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/dataset/RAVDESS.yaml -------------------------------------------------------------------------------- /registry/dataset/RespiratorySound.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/dataset/RespiratorySound.yaml -------------------------------------------------------------------------------- /registry/dataset/TESS.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/dataset/TESS.yaml -------------------------------------------------------------------------------- /registry/dataset/VSC.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/dataset/VSC.yaml -------------------------------------------------------------------------------- /registry/dataset/VoxCeleb.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/dataset/VoxCeleb.yaml -------------------------------------------------------------------------------- /registry/dataset/WavCaps.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/dataset/WavCaps.yaml -------------------------------------------------------------------------------- /registry/dataset/WenetSpeech.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/dataset/WenetSpeech.yaml -------------------------------------------------------------------------------- /registry/dataset/air.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/dataset/air.yaml -------------------------------------------------------------------------------- /registry/dataset/aishell.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/dataset/aishell.yaml -------------------------------------------------------------------------------- /registry/dataset/alpaca_eval.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/dataset/alpaca_eval.yaml -------------------------------------------------------------------------------- /registry/dataset/audio-MNIST.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/dataset/audio-MNIST.yaml -------------------------------------------------------------------------------- /registry/dataset/chord_recoganition.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/dataset/chord_recoganition.yaml -------------------------------------------------------------------------------- /registry/dataset/covost2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/dataset/covost2.yaml -------------------------------------------------------------------------------- /registry/dataset/cv3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/dataset/cv3.yaml -------------------------------------------------------------------------------- /registry/dataset/fleurs.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/dataset/fleurs.yaml -------------------------------------------------------------------------------- /registry/dataset/heart_beat.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/dataset/heart_beat.yaml -------------------------------------------------------------------------------- /registry/dataset/librispeech.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/dataset/librispeech.yaml -------------------------------------------------------------------------------- /registry/dataset/llama_questions.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/dataset/llama_questions.yaml -------------------------------------------------------------------------------- /registry/dataset/long-tts-eval.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/dataset/long-tts-eval.yaml -------------------------------------------------------------------------------- /registry/dataset/multilingual_librispeech.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/dataset/multilingual_librispeech.yaml -------------------------------------------------------------------------------- /registry/dataset/peoples_speech.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/dataset/peoples_speech.yaml -------------------------------------------------------------------------------- /registry/dataset/sample.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/dataset/sample.yaml -------------------------------------------------------------------------------- /registry/dataset/seed-tts-eval.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/dataset/seed-tts-eval.yaml -------------------------------------------------------------------------------- /registry/dataset/tedlium.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/dataset/tedlium.yaml -------------------------------------------------------------------------------- /registry/dataset/triviaqa.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/dataset/triviaqa.yaml -------------------------------------------------------------------------------- /registry/dataset/voicebench.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/dataset/voicebench.yaml -------------------------------------------------------------------------------- /registry/dataset/voxpopuli.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/dataset/voxpopuli.yaml -------------------------------------------------------------------------------- /registry/dataset/webQ.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/dataset/webQ.yaml -------------------------------------------------------------------------------- /registry/eval_task/acoustics.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/eval_task/acoustics.yaml -------------------------------------------------------------------------------- /registry/eval_task/air.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/eval_task/air.yaml -------------------------------------------------------------------------------- /registry/eval_task/alpaca.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/eval_task/alpaca.yaml -------------------------------------------------------------------------------- /registry/eval_task/aqa.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/eval_task/aqa.yaml -------------------------------------------------------------------------------- /registry/eval_task/asr.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/eval_task/asr.yaml -------------------------------------------------------------------------------- /registry/eval_task/caption.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/eval_task/caption.yaml -------------------------------------------------------------------------------- /registry/eval_task/cv3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/eval_task/cv3.yaml -------------------------------------------------------------------------------- /registry/eval_task/digit.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/eval_task/digit.yaml -------------------------------------------------------------------------------- /registry/eval_task/emo.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/eval_task/emo.yaml -------------------------------------------------------------------------------- /registry/eval_task/gender.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/eval_task/gender.yaml -------------------------------------------------------------------------------- /registry/eval_task/inference.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/eval_task/inference.yaml -------------------------------------------------------------------------------- /registry/eval_task/medicine.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/eval_task/medicine.yaml -------------------------------------------------------------------------------- /registry/eval_task/music.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/eval_task/music.yaml -------------------------------------------------------------------------------- /registry/eval_task/seed_tts_eval.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/eval_task/seed_tts_eval.yaml -------------------------------------------------------------------------------- /registry/eval_task/sound_identify.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/eval_task/sound_identify.yaml -------------------------------------------------------------------------------- /registry/eval_task/stt.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/eval_task/stt.yaml -------------------------------------------------------------------------------- /registry/eval_task/tts.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/eval_task/tts.yaml -------------------------------------------------------------------------------- /registry/eval_task/voicebench.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/eval_task/voicebench.yaml -------------------------------------------------------------------------------- /registry/eval_task/vsc.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/eval_task/vsc.yaml -------------------------------------------------------------------------------- /registry/evaluator/air-bench.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/evaluator/air-bench.yaml -------------------------------------------------------------------------------- /registry/evaluator/alpaca.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/evaluator/alpaca.yaml -------------------------------------------------------------------------------- /registry/evaluator/choice-with-ans.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/evaluator/choice-with-ans.yaml -------------------------------------------------------------------------------- /registry/evaluator/common.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/evaluator/common.yaml -------------------------------------------------------------------------------- /registry/evaluator/cv3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/evaluator/cv3.yaml -------------------------------------------------------------------------------- /registry/evaluator/dnsmos.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/evaluator/dnsmos.yaml -------------------------------------------------------------------------------- /registry/evaluator/llama-speech.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/evaluator/llama-speech.yaml -------------------------------------------------------------------------------- /registry/evaluator/long_tts_eval.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/evaluator/long_tts_eval.yaml -------------------------------------------------------------------------------- /registry/evaluator/naive_wer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/evaluator/naive_wer.yaml -------------------------------------------------------------------------------- /registry/evaluator/qa.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/evaluator/qa.yaml -------------------------------------------------------------------------------- /registry/evaluator/seed_tts_eval.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/evaluator/seed_tts_eval.yaml -------------------------------------------------------------------------------- /registry/evaluator/simo.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/evaluator/simo.yaml -------------------------------------------------------------------------------- /registry/evaluator/speech_qulity.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/evaluator/speech_qulity.yaml -------------------------------------------------------------------------------- /registry/evaluator/utmos.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/evaluator/utmos.yaml -------------------------------------------------------------------------------- /registry/evaluator/voicebench.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/evaluator/voicebench.yaml -------------------------------------------------------------------------------- /registry/model/ali.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/model/ali.yaml -------------------------------------------------------------------------------- /registry/model/cv3_speaker_sim.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/model/cv3_speaker_sim.yaml -------------------------------------------------------------------------------- /registry/model/dnsmos.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/model/dnsmos.yaml -------------------------------------------------------------------------------- /registry/model/gemini.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/model/gemini.yaml -------------------------------------------------------------------------------- /registry/model/gpt.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/model/gpt.yaml -------------------------------------------------------------------------------- /registry/model/indextts.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/model/indextts.yaml -------------------------------------------------------------------------------- /registry/model/kyutai.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/model/kyutai.yaml -------------------------------------------------------------------------------- /registry/model/mgm_omni.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/model/mgm_omni.yaml -------------------------------------------------------------------------------- /registry/model/minicpmo.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/model/minicpmo.yaml -------------------------------------------------------------------------------- /registry/model/moonshot.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/model/moonshot.yaml -------------------------------------------------------------------------------- /registry/model/offline.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/model/offline.yaml -------------------------------------------------------------------------------- /registry/model/ola.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/model/ola.yaml -------------------------------------------------------------------------------- /registry/model/paraformer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/model/paraformer.yaml -------------------------------------------------------------------------------- /registry/model/paraformer_ms.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/model/paraformer_ms.yaml -------------------------------------------------------------------------------- /registry/model/qwen2.5.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/model/qwen2.5.yaml -------------------------------------------------------------------------------- /registry/model/spark.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/model/spark.yaml -------------------------------------------------------------------------------- /registry/model/speechLLM.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/model/speechLLM.yaml -------------------------------------------------------------------------------- /registry/model/step.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/model/step.yaml -------------------------------------------------------------------------------- /registry/model/tencent.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/model/tencent.yaml -------------------------------------------------------------------------------- /registry/model/ultravox.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/model/ultravox.yaml -------------------------------------------------------------------------------- /registry/model/utmos.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/model/utmos.yaml -------------------------------------------------------------------------------- /registry/model/voxcpm.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/model/voxcpm.yaml -------------------------------------------------------------------------------- /registry/model/wavlm.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/model/wavlm.yaml -------------------------------------------------------------------------------- /registry/model/whisper.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/model/whisper.yaml -------------------------------------------------------------------------------- /registry/process/base.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/process/base.yaml -------------------------------------------------------------------------------- /registry/process/choice.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/process/choice.yaml -------------------------------------------------------------------------------- /registry/process/speech_model_output.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/process/speech_model_output.yaml -------------------------------------------------------------------------------- /registry/prompt/3o.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/prompt/3o.yaml -------------------------------------------------------------------------------- /registry/prompt/aqa.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/prompt/aqa.yaml -------------------------------------------------------------------------------- /registry/prompt/asr.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/prompt/asr.yaml -------------------------------------------------------------------------------- /registry/prompt/caption.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/prompt/caption.yaml -------------------------------------------------------------------------------- /registry/prompt/chatbot.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/prompt/chatbot.yaml -------------------------------------------------------------------------------- /registry/prompt/choice.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/prompt/choice.yaml -------------------------------------------------------------------------------- /registry/prompt/digit.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/prompt/digit.yaml -------------------------------------------------------------------------------- /registry/prompt/emotion_anlysis.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/prompt/emotion_anlysis.yaml -------------------------------------------------------------------------------- /registry/prompt/gender_anlysis.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/prompt/gender_anlysis.yaml -------------------------------------------------------------------------------- /registry/prompt/geval.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/prompt/geval.yaml -------------------------------------------------------------------------------- /registry/prompt/kimi-audio.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/prompt/kimi-audio.yaml -------------------------------------------------------------------------------- /registry/prompt/medicine.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/prompt/medicine.yaml -------------------------------------------------------------------------------- /registry/prompt/mini-cpm-omni.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/prompt/mini-cpm-omni.yaml -------------------------------------------------------------------------------- /registry/prompt/music.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/prompt/music.yaml -------------------------------------------------------------------------------- /registry/prompt/ola.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/prompt/ola.yaml -------------------------------------------------------------------------------- /registry/prompt/qa.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/prompt/qa.yaml -------------------------------------------------------------------------------- /registry/prompt/qwen-audio-pretrain.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/prompt/qwen-audio-pretrain.yaml -------------------------------------------------------------------------------- /registry/prompt/qwen-omni.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/prompt/qwen-omni.yaml -------------------------------------------------------------------------------- /registry/prompt/qwen2-audio-pretrain.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/prompt/qwen2-audio-pretrain.yaml -------------------------------------------------------------------------------- /registry/prompt/sound_identify.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/prompt/sound_identify.yaml -------------------------------------------------------------------------------- /registry/prompt/stt.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/prompt/stt.yaml -------------------------------------------------------------------------------- /registry/prompt/tts.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/prompt/tts.yaml -------------------------------------------------------------------------------- /registry/prompt/whisper-pretrain.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/registry/prompt/whisper-pretrain.yaml -------------------------------------------------------------------------------- /registry/recorder/local.yaml: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /replication/Long-TTS-Eval.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/replication/Long-TTS-Eval.md -------------------------------------------------------------------------------- /replication/MGM-Omni.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/replication/MGM-Omni.md -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/requirements.txt -------------------------------------------------------------------------------- /requirments/minicpm_o2_6.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/requirments/minicpm_o2_6.txt -------------------------------------------------------------------------------- /tests/test_audio_evals_registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/tests/test_audio_evals_registry.py -------------------------------------------------------------------------------- /tests/test_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenBMB/UltraEval-Audio/HEAD/tests/test_dataset.py --------------------------------------------------------------------------------